add scrapers: Van Daal (API), Van Silfhout (SSR) for Delft

- fetch_vandaal: OG Online API, covers Delft/Rijswijk/Den Haag area,
  includes is_bought→verkocht status mapping
- fetch_vansilfhout: HTML scraper, all listings on single page,
  extracts postcode from embedded JS variable (objectZipcode)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-04-04 21:39:02 +02:00
parent c92ddb5812
commit d310a7a560
4 changed files with 210 additions and 3 deletions

View File

@@ -244,6 +244,69 @@ def fetch_moerman() -> list[RawListing]:
return listings
# ---------------------------------------------------------------------------
# Van Daal Makelaardij (Delft)
# ---------------------------------------------------------------------------
# OG Online / realtime-listings platform.
_VANDAAL_BASE = "https://www.vandaalmakelaardij.nl"
_VANDAAL_SKIP = {"rented", "rented_ur"}
_VANDAAL_STATUS_MAP = {
"available": "beschikbaar",
"under_bid": "onder_bod",
"under_option": "onder_bod",
"is_bought": "verkocht",
"sold": "verkocht",
"sold_ur": "verkocht",
}
def fetch_vandaal() -> list[RawListing]:
data = fetch_json(
f"{_VANDAAL_BASE}/nl/realtime-listings/consumer",
headers={"X-Requested-With": "XMLHttpRequest"},
)
listings = []
for item in data:
if not item.get("isSales"):
continue
if item.get("statusOrig") in _VANDAAL_SKIP:
continue
if item.get("salesPrice", 0) > config.MAX_PRICE:
continue
postcode = (item.get("zipcode") or "").replace(" ", "") or None
perceel = item.get("plotSurface") or None
if perceel == 0:
perceel = None
raw_year = item.get("dateOfConstruction") or ""
bouwjaar = int(raw_year) if raw_year.isdigit() else None
listings.append(RawListing(
url=_VANDAAL_BASE + item["url"],
source_makelaar="vandaal",
status=_VANDAAL_STATUS_MAP.get(item.get("statusOrig", ""), "beschikbaar"),
adres=item.get("address") or None,
postcode=postcode,
stad=item.get("city") or None,
prijs=item.get("salesPrice") or None,
woningtype=item.get("type") or None,
woonoppervlak=item.get("livingSurface") or None,
perceeloppervlak=perceel,
kamers=item.get("rooms") or None,
slaapkamers=item.get("bedrooms") or None,
bouwjaar=bouwjaar,
energielabel=item.get("energyLabel") or None,
hero_image_url=item.get("photo") or None,
))
log.info("vandaal: %d koopwoningen opgehaald", len(listings))
return listings
# ---------------------------------------------------------------------------
# SCRAPERS — exporteer hier alle actieve API adapters
# ---------------------------------------------------------------------------
@@ -252,4 +315,5 @@ SCRAPERS = {
'bjornd': fetch_bjornd,
'ooms': fetch_ooms,
'moerman': fetch_moerman,
'vandaal': fetch_vandaal,
}

View File

@@ -1038,6 +1038,148 @@ def fetch_schielandborsboom() -> list[RawListing]:
return listings
# ---------------------------------------------------------------------------
# Van Silfhout & Hogetoorn Wereldmakelaars (Delft)
# ---------------------------------------------------------------------------
_VANSILFHOUT_BASE = "https://www.vansilfhout.nl"
_VANSILFHOUT_STATUS_MAP = {
"te koop": "beschikbaar",
"onder bod": "onder_bod",
"verkocht onder voorbehoud": "verkocht",
"verkocht": "verkocht",
}
def _vansilfhout_detail(detail_url: str) -> dict:
"""Fetch Van Silfhout detail page; extract postcode from JS and specs from shortSpecs."""
try:
import re as _re
r = __import__("httpx").get(
detail_url,
headers={"User-Agent": config.USER_AGENT},
timeout=15,
follow_redirects=True,
)
r.raise_for_status()
html = r.text
from bs4 import BeautifulSoup as _BS
soup = _BS(html, "html.parser")
# Postcode embedded in JS: objectZipcode': '2624NP'
m = _re.search(r"objectZipcode':\s*'([^']+)'", html)
postcode = m.group(1) if m else None
# shortSpecs: <li><span>Label:</span><span>Value</span></li>
kv: dict[str, str] = {}
for li in soup.select(".shortSpecs li"):
spans = li.select("span")
if len(spans) >= 2:
label = spans[0].get_text(strip=True).rstrip(":").lower()
value = spans[-1].get_text(strip=True)
kv[label] = value
return {
"postcode": postcode,
"bouwjaar": kv.get("bouwjaar"),
"woonoppervlak": kv.get("oppervlakte"),
"kamers": kv.get("kamers"),
"slaapkamers": kv.get("slaapkamers"),
}
except Exception as e:
log.warning("vansilfhout: detail fetch fout %s: %s", detail_url, e)
return {}
def fetch_vansilfhout() -> list[RawListing]:
"""Fetch Van Silfhout woningaanbod (alle listings op één pagina)."""
soup = fetch_soup(f"{_VANSILFHOUT_BASE}/woningaanbod/")
listings = []
for card in soup.select("article.row"):
try:
a_tag = card.select_one("a.objectcontainerimg")
if not a_tag or "href" not in a_tag.attrs:
continue
detail_url = a_tag["href"]
if not detail_url.startswith("http"):
detail_url = _VANSILFHOUT_BASE + detail_url
# Status
status_text = (_text(card, "span.objectstatus") or "").lower()
status = _VANSILFHOUT_STATUS_MAP.get(status_text, "beschikbaar")
# Address and city
adres = _text(card, "h2.objecttitle")
city_el = card.select("a.straatnaamwoonplaats span")
stad = city_el[-1].get_text(strip=True) if city_el else None
# Price from shortSpecs strong
prijs = parse_prijs(_text(card, "ul.shortSpecs li strong"))
if prijs and prijs > config.MAX_PRICE:
continue
# Area and rooms from shortSpecs
woonoppervlak_card = None
kamers_card = None
for li in card.select("ul.shortSpecs li"):
spans = li.select("span")
if len(spans) >= 2:
label = spans[0].get_text(strip=True).lower()
val = spans[-1].get_text(strip=True)
if "oppervlakt" in label:
woonoppervlak_card = parse_m2(val)
elif "kamer" in label:
m = re.search(r"(\d+)", val)
kamers_card = int(m.group(1)) if m else None
# Hero image: prefer data-lazy-src, fall back to noscript img src
img_tag = card.select_one("a.objectcontainerimg img")
hero = None
if img_tag:
hero = (img_tag.get("data-lazy-src")
or img_tag.get("src") or None)
if hero and hero.startswith("data:"):
noscript = card.select_one("noscript img")
hero = noscript["src"] if noscript else None
kk = _vansilfhout_detail(detail_url)
# Parse kamers/slaapkamers from detail
kamers = kamers_card
if kk.get("kamers"):
m = re.search(r"(\d+)", kk["kamers"])
kamers = int(m.group(1)) if m else kamers_card
slaapkamers = None
if kk.get("slaapkamers"):
m = re.search(r"(\d+)", kk["slaapkamers"])
slaapkamers = int(m.group(1)) if m else None
listings.append(RawListing(
url=detail_url,
source_makelaar="vansilfhout",
status=status,
adres=adres,
postcode=kk.get("postcode"),
stad=stad,
prijs=prijs,
hero_image_url=hero,
bouwjaar=int(kk["bouwjaar"]) if kk.get("bouwjaar") else None,
woonoppervlak=parse_m2(kk.get("woonoppervlak")) or woonoppervlak_card,
kamers=kamers,
slaapkamers=slaapkamers,
))
if config.APP_ENV == "dev":
break
except Exception as e:
log.warning("vansilfhout: parse fout: %s", e)
log.info("vansilfhout: %d listings opgehaald", len(listings))
return listings
# ---------------------------------------------------------------------------
# SCRAPERS — exporteer hier alle actieve SSR adapters
# ---------------------------------------------------------------------------
@@ -1051,4 +1193,5 @@ SCRAPERS = {
'3dmakelaars': fetch_3dmakelaars,
'dupont': fetch_dupont,
'schielandborsboom': fetch_schielandborsboom,
'vansilfhout': fetch_vansilfhout,
}