diff --git a/makelaars.md b/makelaars.md index 8685220..1bcbab1 100644 --- a/makelaars.md +++ b/makelaars.md @@ -4,8 +4,8 @@ | Done | Naam | Website | Adres | | [ ] | ---- |------|---------|-------| -| [ ] | Van Silfhout & Hogetoorn Wereldmakelaars | vansilfhout.nl | Ireneboulevard 2 | -| [ ] | Van Daal Makelaardij | vandaalmakelaardij.nl | Voldersgracht 33 | +| [x] | Van Silfhout & Hogetoorn Wereldmakelaars | vansilfhout.nl | Ireneboulevard 2 | +| [x] | Van Daal Makelaardij | vandaalmakelaardij.nl | Voldersgracht 33 | | [x] | Björnd Makelaardij | bjornd.nl | Oude Delft 103 | | [ ] | Hof van Delft Makelaardij | hofvandelftmakelaardij.nl | Wateringsevest 26 | | [ ] | V&W Makelaars Delft | vwmakelaars.nl | Coenderstraat 31 | diff --git a/src/adapters/api.py b/src/adapters/api.py index 450d003..79674d2 100644 --- a/src/adapters/api.py +++ b/src/adapters/api.py @@ -244,6 +244,69 @@ def fetch_moerman() -> list[RawListing]: return listings +# --------------------------------------------------------------------------- +# Van Daal Makelaardij (Delft) +# --------------------------------------------------------------------------- +# OG Online / realtime-listings platform. + +_VANDAAL_BASE = "https://www.vandaalmakelaardij.nl" +_VANDAAL_SKIP = {"rented", "rented_ur"} + +_VANDAAL_STATUS_MAP = { + "available": "beschikbaar", + "under_bid": "onder_bod", + "under_option": "onder_bod", + "is_bought": "verkocht", + "sold": "verkocht", + "sold_ur": "verkocht", +} + + +def fetch_vandaal() -> list[RawListing]: + data = fetch_json( + f"{_VANDAAL_BASE}/nl/realtime-listings/consumer", + headers={"X-Requested-With": "XMLHttpRequest"}, + ) + + listings = [] + for item in data: + if not item.get("isSales"): + continue + if item.get("statusOrig") in _VANDAAL_SKIP: + continue + if item.get("salesPrice", 0) > config.MAX_PRICE: + continue + + postcode = (item.get("zipcode") or "").replace(" ", "") or None + perceel = item.get("plotSurface") or None + if perceel == 0: + perceel = None + + raw_year = item.get("dateOfConstruction") or "" + bouwjaar = int(raw_year) if raw_year.isdigit() else None + + listings.append(RawListing( + url=_VANDAAL_BASE + item["url"], + source_makelaar="vandaal", + status=_VANDAAL_STATUS_MAP.get(item.get("statusOrig", ""), "beschikbaar"), + adres=item.get("address") or None, + postcode=postcode, + stad=item.get("city") or None, + prijs=item.get("salesPrice") or None, + woningtype=item.get("type") or None, + woonoppervlak=item.get("livingSurface") or None, + perceeloppervlak=perceel, + kamers=item.get("rooms") or None, + slaapkamers=item.get("bedrooms") or None, + bouwjaar=bouwjaar, + energielabel=item.get("energyLabel") or None, + hero_image_url=item.get("photo") or None, + )) + + log.info("vandaal: %d koopwoningen opgehaald", len(listings)) + return listings + + # --------------------------------------------------------------------------- # SCRAPERS — exporteer hier alle actieve API adapters # --------------------------------------------------------------------------- @@ -252,4 +315,5 @@ SCRAPERS = { 'bjornd': fetch_bjornd, 'ooms': fetch_ooms, 'moerman': fetch_moerman, + 'vandaal': fetch_vandaal, } diff --git a/src/adapters/ssr.py b/src/adapters/ssr.py index 1f24630..a4e9ba5 100644 --- a/src/adapters/ssr.py +++ b/src/adapters/ssr.py @@ -1038,6 +1038,148 @@ def fetch_schielandborsboom() -> list[RawListing]: return listings +# --------------------------------------------------------------------------- +# Van Silfhout & Hogetoorn Wereldmakelaars (Delft) +# --------------------------------------------------------------------------- + +_VANSILFHOUT_BASE = "https://www.vansilfhout.nl" + +_VANSILFHOUT_STATUS_MAP = { + "te koop": "beschikbaar", + "onder bod": "onder_bod", + "verkocht onder voorbehoud": "verkocht", + "verkocht": "verkocht", +} + + +def _vansilfhout_detail(detail_url: str) -> dict: + """Fetch Van Silfhout detail page; extract postcode from JS and specs from shortSpecs.""" + try: + import re as _re + r = __import__("httpx").get( + detail_url, + headers={"User-Agent": config.USER_AGENT}, + timeout=15, + follow_redirects=True, + ) + r.raise_for_status() + html = r.text + from bs4 import BeautifulSoup as _BS + soup = _BS(html, "html.parser") + + # Postcode embedded in JS: objectZipcode': '2624NP' + m = _re.search(r"objectZipcode':\s*'([^']+)'", html) + postcode = m.group(1) if m else None + + # shortSpecs:
  • Label:Value
  • + kv: dict[str, str] = {} + for li in soup.select(".shortSpecs li"): + spans = li.select("span") + if len(spans) >= 2: + label = spans[0].get_text(strip=True).rstrip(":").lower() + value = spans[-1].get_text(strip=True) + kv[label] = value + + return { + "postcode": postcode, + "bouwjaar": kv.get("bouwjaar"), + "woonoppervlak": kv.get("oppervlakte"), + "kamers": kv.get("kamers"), + "slaapkamers": kv.get("slaapkamers"), + } + except Exception as e: + log.warning("vansilfhout: detail fetch fout %s: %s", detail_url, e) + return {} + + +def fetch_vansilfhout() -> list[RawListing]: + """Fetch Van Silfhout woningaanbod (alle listings op één pagina).""" + soup = fetch_soup(f"{_VANSILFHOUT_BASE}/woningaanbod/") + listings = [] + + for card in soup.select("article.row"): + try: + a_tag = card.select_one("a.objectcontainerimg") + if not a_tag or "href" not in a_tag.attrs: + continue + detail_url = a_tag["href"] + if not detail_url.startswith("http"): + detail_url = _VANSILFHOUT_BASE + detail_url + + # Status + status_text = (_text(card, "span.objectstatus") or "").lower() + status = _VANSILFHOUT_STATUS_MAP.get(status_text, "beschikbaar") + + # Address and city + adres = _text(card, "h2.objecttitle") + city_el = card.select("a.straatnaamwoonplaats span") + stad = city_el[-1].get_text(strip=True) if city_el else None + + # Price from shortSpecs strong + prijs = parse_prijs(_text(card, "ul.shortSpecs li strong")) + if prijs and prijs > config.MAX_PRICE: + continue + + # Area and rooms from shortSpecs + woonoppervlak_card = None + kamers_card = None + for li in card.select("ul.shortSpecs li"): + spans = li.select("span") + if len(spans) >= 2: + label = spans[0].get_text(strip=True).lower() + val = spans[-1].get_text(strip=True) + if "oppervlakt" in label: + woonoppervlak_card = parse_m2(val) + elif "kamer" in label: + m = re.search(r"(\d+)", val) + kamers_card = int(m.group(1)) if m else None + + # Hero image: prefer data-lazy-src, fall back to noscript img src + img_tag = card.select_one("a.objectcontainerimg img") + hero = None + if img_tag: + hero = (img_tag.get("data-lazy-src") + or img_tag.get("src") or None) + if hero and hero.startswith("data:"): + noscript = card.select_one("noscript img") + hero = noscript["src"] if noscript else None + + kk = _vansilfhout_detail(detail_url) + + # Parse kamers/slaapkamers from detail + kamers = kamers_card + if kk.get("kamers"): + m = re.search(r"(\d+)", kk["kamers"]) + kamers = int(m.group(1)) if m else kamers_card + + slaapkamers = None + if kk.get("slaapkamers"): + m = re.search(r"(\d+)", kk["slaapkamers"]) + slaapkamers = int(m.group(1)) if m else None + + listings.append(RawListing( + url=detail_url, + source_makelaar="vansilfhout", + status=status, + adres=adres, + postcode=kk.get("postcode"), + stad=stad, + prijs=prijs, + hero_image_url=hero, + bouwjaar=int(kk["bouwjaar"]) if kk.get("bouwjaar") else None, + woonoppervlak=parse_m2(kk.get("woonoppervlak")) or woonoppervlak_card, + kamers=kamers, + slaapkamers=slaapkamers, + )) + if config.APP_ENV == "dev": + break + except Exception as e: + log.warning("vansilfhout: parse fout: %s", e) + + log.info("vansilfhout: %d listings opgehaald", len(listings)) + return listings + + # --------------------------------------------------------------------------- # SCRAPERS — exporteer hier alle actieve SSR adapters # --------------------------------------------------------------------------- @@ -1051,4 +1193,5 @@ SCRAPERS = { '3dmakelaars': fetch_3dmakelaars, 'dupont': fetch_dupont, 'schielandborsboom': fetch_schielandborsboom, + 'vansilfhout': fetch_vansilfhout, } diff --git a/tests/test_adapters.py b/tests/test_adapters.py index 51ac910..130fc7b 100644 --- a/tests/test_adapters.py +++ b/tests/test_adapters.py @@ -16,7 +16,7 @@ logging.basicConfig( ) # --- change this to test a different adapter --- -ADAPTER = SCRAPERS['schielandborsboom'] +ADAPTER = SCRAPERS['vansilfhout'] if __name__ == "__main__": print(f"Testing adapter: {ADAPTER.__name__}")