add scrapers: Van Daal (API), Van Silfhout (SSR) for Delft
- fetch_vandaal: OG Online API, covers Delft/Rijswijk/Den Haag area, includes is_bought→verkocht status mapping - fetch_vansilfhout: HTML scraper, all listings on single page, extracts postcode from embedded JS variable (objectZipcode) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -4,8 +4,8 @@
|
||||
|
||||
| Done | Naam | Website | Adres |
|
||||
| [ ] | ---- |------|---------|-------|
|
||||
| [ ] | Van Silfhout & Hogetoorn Wereldmakelaars | vansilfhout.nl | Ireneboulevard 2 |
|
||||
| [ ] | Van Daal Makelaardij | vandaalmakelaardij.nl | Voldersgracht 33 |
|
||||
| [x] | Van Silfhout & Hogetoorn Wereldmakelaars | vansilfhout.nl | Ireneboulevard 2 |
|
||||
| [x] | Van Daal Makelaardij | vandaalmakelaardij.nl | Voldersgracht 33 |
|
||||
| [x] | Björnd Makelaardij | bjornd.nl | Oude Delft 103 |
|
||||
| [ ] | Hof van Delft Makelaardij | hofvandelftmakelaardij.nl | Wateringsevest 26 |
|
||||
| [ ] | V&W Makelaars Delft | vwmakelaars.nl | Coenderstraat 31 |
|
||||
|
||||
@@ -244,6 +244,69 @@ def fetch_moerman() -> list[RawListing]:
|
||||
return listings
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Van Daal Makelaardij (Delft)
|
||||
# ---------------------------------------------------------------------------
|
||||
# OG Online / realtime-listings platform.
|
||||
|
||||
_VANDAAL_BASE = "https://www.vandaalmakelaardij.nl"
|
||||
_VANDAAL_SKIP = {"rented", "rented_ur"}
|
||||
|
||||
_VANDAAL_STATUS_MAP = {
|
||||
"available": "beschikbaar",
|
||||
"under_bid": "onder_bod",
|
||||
"under_option": "onder_bod",
|
||||
"is_bought": "verkocht",
|
||||
"sold": "verkocht",
|
||||
"sold_ur": "verkocht",
|
||||
}
|
||||
|
||||
|
||||
def fetch_vandaal() -> list[RawListing]:
|
||||
data = fetch_json(
|
||||
f"{_VANDAAL_BASE}/nl/realtime-listings/consumer",
|
||||
headers={"X-Requested-With": "XMLHttpRequest"},
|
||||
)
|
||||
|
||||
listings = []
|
||||
for item in data:
|
||||
if not item.get("isSales"):
|
||||
continue
|
||||
if item.get("statusOrig") in _VANDAAL_SKIP:
|
||||
continue
|
||||
if item.get("salesPrice", 0) > config.MAX_PRICE:
|
||||
continue
|
||||
|
||||
postcode = (item.get("zipcode") or "").replace(" ", "") or None
|
||||
perceel = item.get("plotSurface") or None
|
||||
if perceel == 0:
|
||||
perceel = None
|
||||
|
||||
raw_year = item.get("dateOfConstruction") or ""
|
||||
bouwjaar = int(raw_year) if raw_year.isdigit() else None
|
||||
|
||||
listings.append(RawListing(
|
||||
url=_VANDAAL_BASE + item["url"],
|
||||
source_makelaar="vandaal",
|
||||
status=_VANDAAL_STATUS_MAP.get(item.get("statusOrig", ""), "beschikbaar"),
|
||||
adres=item.get("address") or None,
|
||||
postcode=postcode,
|
||||
stad=item.get("city") or None,
|
||||
prijs=item.get("salesPrice") or None,
|
||||
woningtype=item.get("type") or None,
|
||||
woonoppervlak=item.get("livingSurface") or None,
|
||||
perceeloppervlak=perceel,
|
||||
kamers=item.get("rooms") or None,
|
||||
slaapkamers=item.get("bedrooms") or None,
|
||||
bouwjaar=bouwjaar,
|
||||
energielabel=item.get("energyLabel") or None,
|
||||
hero_image_url=item.get("photo") or None,
|
||||
))
|
||||
|
||||
log.info("vandaal: %d koopwoningen opgehaald", len(listings))
|
||||
return listings
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# SCRAPERS — exporteer hier alle actieve API adapters
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -252,4 +315,5 @@ SCRAPERS = {
|
||||
'bjornd': fetch_bjornd,
|
||||
'ooms': fetch_ooms,
|
||||
'moerman': fetch_moerman,
|
||||
'vandaal': fetch_vandaal,
|
||||
}
|
||||
|
||||
@@ -1038,6 +1038,148 @@ def fetch_schielandborsboom() -> list[RawListing]:
|
||||
return listings
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Van Silfhout & Hogetoorn Wereldmakelaars (Delft)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_VANSILFHOUT_BASE = "https://www.vansilfhout.nl"
|
||||
|
||||
_VANSILFHOUT_STATUS_MAP = {
|
||||
"te koop": "beschikbaar",
|
||||
"onder bod": "onder_bod",
|
||||
"verkocht onder voorbehoud": "verkocht",
|
||||
"verkocht": "verkocht",
|
||||
}
|
||||
|
||||
|
||||
def _vansilfhout_detail(detail_url: str) -> dict:
|
||||
"""Fetch Van Silfhout detail page; extract postcode from JS and specs from shortSpecs."""
|
||||
try:
|
||||
import re as _re
|
||||
r = __import__("httpx").get(
|
||||
detail_url,
|
||||
headers={"User-Agent": config.USER_AGENT},
|
||||
timeout=15,
|
||||
follow_redirects=True,
|
||||
)
|
||||
r.raise_for_status()
|
||||
html = r.text
|
||||
from bs4 import BeautifulSoup as _BS
|
||||
soup = _BS(html, "html.parser")
|
||||
|
||||
# Postcode embedded in JS: objectZipcode': '2624NP'
|
||||
m = _re.search(r"objectZipcode':\s*'([^']+)'", html)
|
||||
postcode = m.group(1) if m else None
|
||||
|
||||
# shortSpecs: <li><span>Label:</span><span>Value</span></li>
|
||||
kv: dict[str, str] = {}
|
||||
for li in soup.select(".shortSpecs li"):
|
||||
spans = li.select("span")
|
||||
if len(spans) >= 2:
|
||||
label = spans[0].get_text(strip=True).rstrip(":").lower()
|
||||
value = spans[-1].get_text(strip=True)
|
||||
kv[label] = value
|
||||
|
||||
return {
|
||||
"postcode": postcode,
|
||||
"bouwjaar": kv.get("bouwjaar"),
|
||||
"woonoppervlak": kv.get("oppervlakte"),
|
||||
"kamers": kv.get("kamers"),
|
||||
"slaapkamers": kv.get("slaapkamers"),
|
||||
}
|
||||
except Exception as e:
|
||||
log.warning("vansilfhout: detail fetch fout %s: %s", detail_url, e)
|
||||
return {}
|
||||
|
||||
|
||||
def fetch_vansilfhout() -> list[RawListing]:
|
||||
"""Fetch Van Silfhout woningaanbod (alle listings op één pagina)."""
|
||||
soup = fetch_soup(f"{_VANSILFHOUT_BASE}/woningaanbod/")
|
||||
listings = []
|
||||
|
||||
for card in soup.select("article.row"):
|
||||
try:
|
||||
a_tag = card.select_one("a.objectcontainerimg")
|
||||
if not a_tag or "href" not in a_tag.attrs:
|
||||
continue
|
||||
detail_url = a_tag["href"]
|
||||
if not detail_url.startswith("http"):
|
||||
detail_url = _VANSILFHOUT_BASE + detail_url
|
||||
|
||||
# Status
|
||||
status_text = (_text(card, "span.objectstatus") or "").lower()
|
||||
status = _VANSILFHOUT_STATUS_MAP.get(status_text, "beschikbaar")
|
||||
|
||||
# Address and city
|
||||
adres = _text(card, "h2.objecttitle")
|
||||
city_el = card.select("a.straatnaamwoonplaats span")
|
||||
stad = city_el[-1].get_text(strip=True) if city_el else None
|
||||
|
||||
# Price from shortSpecs strong
|
||||
prijs = parse_prijs(_text(card, "ul.shortSpecs li strong"))
|
||||
if prijs and prijs > config.MAX_PRICE:
|
||||
continue
|
||||
|
||||
# Area and rooms from shortSpecs
|
||||
woonoppervlak_card = None
|
||||
kamers_card = None
|
||||
for li in card.select("ul.shortSpecs li"):
|
||||
spans = li.select("span")
|
||||
if len(spans) >= 2:
|
||||
label = spans[0].get_text(strip=True).lower()
|
||||
val = spans[-1].get_text(strip=True)
|
||||
if "oppervlakt" in label:
|
||||
woonoppervlak_card = parse_m2(val)
|
||||
elif "kamer" in label:
|
||||
m = re.search(r"(\d+)", val)
|
||||
kamers_card = int(m.group(1)) if m else None
|
||||
|
||||
# Hero image: prefer data-lazy-src, fall back to noscript img src
|
||||
img_tag = card.select_one("a.objectcontainerimg img")
|
||||
hero = None
|
||||
if img_tag:
|
||||
hero = (img_tag.get("data-lazy-src")
|
||||
or img_tag.get("src") or None)
|
||||
if hero and hero.startswith("data:"):
|
||||
noscript = card.select_one("noscript img")
|
||||
hero = noscript["src"] if noscript else None
|
||||
|
||||
kk = _vansilfhout_detail(detail_url)
|
||||
|
||||
# Parse kamers/slaapkamers from detail
|
||||
kamers = kamers_card
|
||||
if kk.get("kamers"):
|
||||
m = re.search(r"(\d+)", kk["kamers"])
|
||||
kamers = int(m.group(1)) if m else kamers_card
|
||||
|
||||
slaapkamers = None
|
||||
if kk.get("slaapkamers"):
|
||||
m = re.search(r"(\d+)", kk["slaapkamers"])
|
||||
slaapkamers = int(m.group(1)) if m else None
|
||||
|
||||
listings.append(RawListing(
|
||||
url=detail_url,
|
||||
source_makelaar="vansilfhout",
|
||||
status=status,
|
||||
adres=adres,
|
||||
postcode=kk.get("postcode"),
|
||||
stad=stad,
|
||||
prijs=prijs,
|
||||
hero_image_url=hero,
|
||||
bouwjaar=int(kk["bouwjaar"]) if kk.get("bouwjaar") else None,
|
||||
woonoppervlak=parse_m2(kk.get("woonoppervlak")) or woonoppervlak_card,
|
||||
kamers=kamers,
|
||||
slaapkamers=slaapkamers,
|
||||
))
|
||||
if config.APP_ENV == "dev":
|
||||
break
|
||||
except Exception as e:
|
||||
log.warning("vansilfhout: parse fout: %s", e)
|
||||
|
||||
log.info("vansilfhout: %d listings opgehaald", len(listings))
|
||||
return listings
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# SCRAPERS — exporteer hier alle actieve SSR adapters
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -1051,4 +1193,5 @@ SCRAPERS = {
|
||||
'3dmakelaars': fetch_3dmakelaars,
|
||||
'dupont': fetch_dupont,
|
||||
'schielandborsboom': fetch_schielandborsboom,
|
||||
'vansilfhout': fetch_vansilfhout,
|
||||
}
|
||||
|
||||
@@ -16,7 +16,7 @@ logging.basicConfig(
|
||||
)
|
||||
|
||||
# --- change this to test a different adapter ---
|
||||
ADAPTER = SCRAPERS['schielandborsboom']
|
||||
ADAPTER = SCRAPERS['vansilfhout']
|
||||
|
||||
if __name__ == "__main__":
|
||||
print(f"Testing adapter: {ADAPTER.__name__}")
|
||||
|
||||
Reference in New Issue
Block a user