Compare commits

..

2 Commits

Author SHA1 Message Date
d310a7a560 add scrapers: Van Daal (API), Van Silfhout (SSR) for Delft
- fetch_vandaal: OG Online API, covers Delft/Rijswijk/Den Haag area,
  includes is_bought→verkocht status mapping
- fetch_vansilfhout: HTML scraper, all listings on single page,
  extracts postcode from embedded JS variable (objectZipcode)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-04 21:39:02 +02:00
c92ddb5812 add scrapers: Moerman & De Jong (API) and Schieland Borsboom (SSR)
- fetch_moerman: OG Online realtime-listings API (same platform as bjornd),
  includes bouwjaar from dateOfConstruction, energielabel, strips postcode space
- fetch_schielandborsboom: paginated HTML scraper filtered to Schiedam,
  fetches #kenmerken detail page for full specs (bouwjaar, kamers, etc.)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-04 21:34:58 +02:00
4 changed files with 445 additions and 6 deletions

View File

@@ -4,8 +4,8 @@
| Done | Naam | Website | Adres |
| [ ] | ---- |------|---------|-------|
| [ ] | Van Silfhout & Hogetoorn Wereldmakelaars | vansilfhout.nl | Ireneboulevard 2 |
| [ ] | Van Daal Makelaardij | vandaalmakelaardij.nl | Voldersgracht 33 |
| [x] | Van Silfhout & Hogetoorn Wereldmakelaars | vansilfhout.nl | Ireneboulevard 2 |
| [x] | Van Daal Makelaardij | vandaalmakelaardij.nl | Voldersgracht 33 |
| [x] | Björnd Makelaardij | bjornd.nl | Oude Delft 103 |
| [ ] | Hof van Delft Makelaardij | hofvandelftmakelaardij.nl | Wateringsevest 26 |
| [ ] | V&W Makelaars Delft | vwmakelaars.nl | Coenderstraat 31 |
@@ -33,9 +33,9 @@
| [x] | 3D Makelaars | 3dmakelaars.nl | Gerrit Verboonstraat 17 |
| [x] | Dupont Makelaars | dupont.nl | Rotterdamsedijk 437 |
| [x] | D&S Makelaardij | densmakelaars.nl | Land van Belofte 50 |
| [ ] | Moerman & De Jong Makelaars | moerman-dejong.nl | Lange Kerkstraat 80B |
| [x] | Moerman & De Jong Makelaars | moerman-dejong.nl | Lange Kerkstraat 80B |
| [ ] | Hagestein Makelaardij | — | Degerfors 54 |
| [ ] | Schieland Borsboom NVM Makelaars | schielandborsboom.nl | (Rotterdam, actief in Schiedam) |
| [x] | Schieland Borsboom NVM Makelaars | schielandborsboom.nl | (Rotterdam, actief in Schiedam) |
## Leiden

View File

@@ -182,6 +182,131 @@ def fetch_ooms() -> list[RawListing]:
log.info("ooms: %d listings opgehaald", len(listings))
return listings
# ---------------------------------------------------------------------------
# Moerman & De Jong Makelaars (Schiedam)
# ---------------------------------------------------------------------------
# Zelfde OG Online / realtime-listings platform als Bjornd.
_MOERMAN_BASE = "https://www.moerman-dejong.nl"
_MOERMAN_SKIP = {"rented", "rented_ur"}
_MOERMAN_STATUS_MAP = {
"available": "beschikbaar",
"under_bid": "onder_bod",
"under_option": "onder_bod",
"sold": "verkocht",
"sold_ur": "verkocht",
}
def fetch_moerman() -> list[RawListing]:
data = fetch_json(
f"{_MOERMAN_BASE}/nl/realtime-listings/consumer",
headers={"X-Requested-With": "XMLHttpRequest"},
)
listings = []
for item in data:
if not item.get("isSales"):
continue
if item.get("statusOrig") in _MOERMAN_SKIP:
continue
if item.get("salesPrice", 0) > config.MAX_PRICE:
continue
postcode = (item.get("zipcode") or "").replace(" ", "") or None
perceel = item.get("plotSurface") or None
if perceel == 0:
perceel = None
raw_year = item.get("dateOfConstruction") or ""
bouwjaar = int(raw_year) if raw_year.isdigit() else None
listings.append(RawListing(
url=_MOERMAN_BASE + item["url"],
source_makelaar="moerman",
status=_MOERMAN_STATUS_MAP.get(item.get("statusOrig", ""), "beschikbaar"),
adres=item.get("address") or None,
postcode=postcode,
stad=item.get("city") or None,
prijs=item.get("salesPrice") or None,
woningtype=item.get("type") or None,
woonoppervlak=item.get("livingSurface") or None,
perceeloppervlak=perceel,
kamers=item.get("rooms") or None,
slaapkamers=item.get("bedrooms") or None,
bouwjaar=bouwjaar,
energielabel=item.get("energyLabel") or None,
hero_image_url=item.get("photo") or None,
))
log.info("moerman: %d koopwoningen opgehaald", len(listings))
return listings
# ---------------------------------------------------------------------------
# Van Daal Makelaardij (Delft)
# ---------------------------------------------------------------------------
# OG Online / realtime-listings platform.
_VANDAAL_BASE = "https://www.vandaalmakelaardij.nl"
_VANDAAL_SKIP = {"rented", "rented_ur"}
_VANDAAL_STATUS_MAP = {
"available": "beschikbaar",
"under_bid": "onder_bod",
"under_option": "onder_bod",
"is_bought": "verkocht",
"sold": "verkocht",
"sold_ur": "verkocht",
}
def fetch_vandaal() -> list[RawListing]:
data = fetch_json(
f"{_VANDAAL_BASE}/nl/realtime-listings/consumer",
headers={"X-Requested-With": "XMLHttpRequest"},
)
listings = []
for item in data:
if not item.get("isSales"):
continue
if item.get("statusOrig") in _VANDAAL_SKIP:
continue
if item.get("salesPrice", 0) > config.MAX_PRICE:
continue
postcode = (item.get("zipcode") or "").replace(" ", "") or None
perceel = item.get("plotSurface") or None
if perceel == 0:
perceel = None
raw_year = item.get("dateOfConstruction") or ""
bouwjaar = int(raw_year) if raw_year.isdigit() else None
listings.append(RawListing(
url=_VANDAAL_BASE + item["url"],
source_makelaar="vandaal",
status=_VANDAAL_STATUS_MAP.get(item.get("statusOrig", ""), "beschikbaar"),
adres=item.get("address") or None,
postcode=postcode,
stad=item.get("city") or None,
prijs=item.get("salesPrice") or None,
woningtype=item.get("type") or None,
woonoppervlak=item.get("livingSurface") or None,
perceeloppervlak=perceel,
kamers=item.get("rooms") or None,
slaapkamers=item.get("bedrooms") or None,
bouwjaar=bouwjaar,
energielabel=item.get("energyLabel") or None,
hero_image_url=item.get("photo") or None,
))
log.info("vandaal: %d koopwoningen opgehaald", len(listings))
return listings
# ---------------------------------------------------------------------------
# SCRAPERS — exporteer hier alle actieve API adapters
# ---------------------------------------------------------------------------
@@ -189,4 +314,6 @@ def fetch_ooms() -> list[RawListing]:
SCRAPERS = {
'bjornd': fetch_bjornd,
'ooms': fetch_ooms,
'moerman': fetch_moerman,
'vandaal': fetch_vandaal,
}

View File

@@ -870,6 +870,316 @@ def fetch_dupont() -> list[RawListing]:
return listings
# ---------------------------------------------------------------------------
# Schieland Borsboom NVM Makelaars (Rotterdam, actief in Schiedam)
# ---------------------------------------------------------------------------
_SCHIELAND_BASE = "https://www.schielandborsboom.nl"
_SCHIELAND_STATUS_MAP = {
"beschikbaar": "beschikbaar",
"onder bod": "onder_bod",
"onder optie": "onder_bod",
"verkocht o.v.": "verkocht",
"verkocht": "verkocht",
}
def _schieland_detail(detail_url: str) -> dict:
"""Fetch Schieland Borsboom detail page and extract kenmerken."""
try:
soup = fetch_soup(detail_url)
# Postcode from house__status p (e.g. "3117 DP Schiedam")
postcode_el = soup.select_one("div.house__status p")
postcode = _extract_postcode(postcode_el.get_text()) if postcode_el else None
# Parse #kenmerken section: <li><strong>label</strong><span>value</span></li>
kv: dict[str, str] = {}
kenmerken = soup.select_one("#kenmerken")
if kenmerken:
for li in kenmerken.select("li"):
label_el = li.select_one("strong")
value_el = li.select_one("span")
if label_el and value_el:
# Strip nested links (e.g. "Hypotheek berekenen")
for a in value_el.select("a"):
a.decompose()
kv[label_el.get_text(strip=True).lower()] = value_el.get_text(strip=True)
return {
"postcode": postcode,
"status": kv.get("status", "").lower(),
"woningtype": kv.get("soort bouw"),
"bouwjaar": kv.get("bouwjaar"),
"woonoppervlak": kv.get("woonoppervlakte"),
"perceeloppervlak": kv.get("perceeloppervlakte"),
"kamers": kv.get("aantal kamers"),
"slaapkamers": kv.get("aantal slaapkamers"),
"energielabel": kv.get("energielabel"),
}
except Exception as e:
log.warning("schielandborsboom: detail fetch fout %s: %s", detail_url, e)
return {}
def fetch_schielandborsboom() -> list[RawListing]:
"""Fetch Schieland Borsboom NVM listings (koop only, Schiedam)."""
listings = []
page = 1
while True:
if page == 1:
url = f"{_SCHIELAND_BASE}/wonen?sure_koop_huur=koop"
else:
url = f"{_SCHIELAND_BASE}/wonen/page/{page}/?sure_koop_huur=koop"
soup = fetch_soup(url)
cards = soup.select("div.card.card--house")
if not cards:
break
for card in cards:
try:
a_tag = card.select_one("a.card__anchor")
if not a_tag or "href" not in a_tag.attrs:
continue
detail_url = a_tag["href"]
if not detail_url.startswith("http"):
detail_url = _SCHIELAND_BASE + detail_url
# Filter: only Schiedam
stad_el = card.select_one("p.house-place")
stad = stad_el.get_text(strip=True) if stad_el else None
if not stad or stad.lower() != "schiedam":
continue
# Status from card-house__thumb second class
thumb = card.select_one("div.card-house__thumb")
status_classes = thumb.get("class", []) if thumb else []
status_text = next(
(c for c in status_classes if c != "card-house__thumb"), "beschikbaar"
).lower()
status = _SCHIELAND_STATUS_MAP.get(status_text, "beschikbaar")
# Price
prijs = parse_prijs(_text(card, "p.price"))
if prijs and prijs > config.MAX_PRICE:
continue
adres = _text(card, "h4.house-street")
# Hero image from picture source (medium size)
src_tag = card.select_one('picture source[media="(min-width:100px)"]')
hero = src_tag["srcset"] if src_tag else _src(card, "img")
if hero and not hero.startswith("http"):
hero = _SCHIELAND_BASE + hero
# Data icons on card: surface, bedrooms, energy label
woonoppervlak_card = None
slaapkamers_card = None
energielabel_card = None
for data_div in card.select("div.data"):
txt = data_div.get_text(strip=True)
if data_div.select_one("i.icon-surface"):
woonoppervlak_card = parse_m2(txt)
elif data_div.select_one("i.icon-bedrooms"):
m = re.search(r"(\d+)", txt)
slaapkamers_card = int(m.group(1)) if m else None
elif data_div.select_one("i.icon-label"):
energielabel_card = txt.strip() or None
# Fetch detail page for full kenmerken
kk = _schieland_detail(detail_url)
# Refine status from detail page
if kk.get("status"):
status = _SCHIELAND_STATUS_MAP.get(kk["status"], status)
# Parse kamers: "5 kamers" → 5
kamers = None
if kk.get("kamers"):
m = re.search(r"(\d+)", kk["kamers"])
kamers = int(m.group(1)) if m else None
# Parse slaapkamers: "3" or "3 slaapkamers" → 3
slaapkamers = slaapkamers_card
if kk.get("slaapkamers"):
m = re.search(r"(\d+)", kk["slaapkamers"])
slaapkamers = int(m.group(1)) if m else slaapkamers_card
listings.append(RawListing(
url=detail_url,
source_makelaar="schielandborsboom",
status=status,
adres=adres,
postcode=kk.get("postcode"),
stad=stad,
prijs=prijs,
hero_image_url=hero,
woningtype=kk.get("woningtype"),
bouwjaar=int(kk["bouwjaar"]) if kk.get("bouwjaar") else None,
woonoppervlak=parse_m2(kk.get("woonoppervlak")) or woonoppervlak_card,
perceeloppervlak=parse_m2(kk.get("perceeloppervlak")),
kamers=kamers,
slaapkamers=slaapkamers,
energielabel=kk.get("energielabel") or energielabel_card,
))
if config.APP_ENV == "dev":
break
except Exception as e:
log.warning("schielandborsboom: parse fout: %s", e)
if len(cards) < 18:
break
page += 1
log.info("schielandborsboom: %d listings opgehaald", len(listings))
return listings
# ---------------------------------------------------------------------------
# Van Silfhout & Hogetoorn Wereldmakelaars (Delft)
# ---------------------------------------------------------------------------
_VANSILFHOUT_BASE = "https://www.vansilfhout.nl"
_VANSILFHOUT_STATUS_MAP = {
"te koop": "beschikbaar",
"onder bod": "onder_bod",
"verkocht onder voorbehoud": "verkocht",
"verkocht": "verkocht",
}
def _vansilfhout_detail(detail_url: str) -> dict:
"""Fetch Van Silfhout detail page; extract postcode from JS and specs from shortSpecs."""
try:
import re as _re
r = __import__("httpx").get(
detail_url,
headers={"User-Agent": config.USER_AGENT},
timeout=15,
follow_redirects=True,
)
r.raise_for_status()
html = r.text
from bs4 import BeautifulSoup as _BS
soup = _BS(html, "html.parser")
# Postcode embedded in JS: objectZipcode': '2624NP'
m = _re.search(r"objectZipcode':\s*'([^']+)'", html)
postcode = m.group(1) if m else None
# shortSpecs: <li><span>Label:</span><span>Value</span></li>
kv: dict[str, str] = {}
for li in soup.select(".shortSpecs li"):
spans = li.select("span")
if len(spans) >= 2:
label = spans[0].get_text(strip=True).rstrip(":").lower()
value = spans[-1].get_text(strip=True)
kv[label] = value
return {
"postcode": postcode,
"bouwjaar": kv.get("bouwjaar"),
"woonoppervlak": kv.get("oppervlakte"),
"kamers": kv.get("kamers"),
"slaapkamers": kv.get("slaapkamers"),
}
except Exception as e:
log.warning("vansilfhout: detail fetch fout %s: %s", detail_url, e)
return {}
def fetch_vansilfhout() -> list[RawListing]:
"""Fetch Van Silfhout woningaanbod (alle listings op één pagina)."""
soup = fetch_soup(f"{_VANSILFHOUT_BASE}/woningaanbod/")
listings = []
for card in soup.select("article.row"):
try:
a_tag = card.select_one("a.objectcontainerimg")
if not a_tag or "href" not in a_tag.attrs:
continue
detail_url = a_tag["href"]
if not detail_url.startswith("http"):
detail_url = _VANSILFHOUT_BASE + detail_url
# Status
status_text = (_text(card, "span.objectstatus") or "").lower()
status = _VANSILFHOUT_STATUS_MAP.get(status_text, "beschikbaar")
# Address and city
adres = _text(card, "h2.objecttitle")
city_el = card.select("a.straatnaamwoonplaats span")
stad = city_el[-1].get_text(strip=True) if city_el else None
# Price from shortSpecs strong
prijs = parse_prijs(_text(card, "ul.shortSpecs li strong"))
if prijs and prijs > config.MAX_PRICE:
continue
# Area and rooms from shortSpecs
woonoppervlak_card = None
kamers_card = None
for li in card.select("ul.shortSpecs li"):
spans = li.select("span")
if len(spans) >= 2:
label = spans[0].get_text(strip=True).lower()
val = spans[-1].get_text(strip=True)
if "oppervlakt" in label:
woonoppervlak_card = parse_m2(val)
elif "kamer" in label:
m = re.search(r"(\d+)", val)
kamers_card = int(m.group(1)) if m else None
# Hero image: prefer data-lazy-src, fall back to noscript img src
img_tag = card.select_one("a.objectcontainerimg img")
hero = None
if img_tag:
hero = (img_tag.get("data-lazy-src")
or img_tag.get("src") or None)
if hero and hero.startswith("data:"):
noscript = card.select_one("noscript img")
hero = noscript["src"] if noscript else None
kk = _vansilfhout_detail(detail_url)
# Parse kamers/slaapkamers from detail
kamers = kamers_card
if kk.get("kamers"):
m = re.search(r"(\d+)", kk["kamers"])
kamers = int(m.group(1)) if m else kamers_card
slaapkamers = None
if kk.get("slaapkamers"):
m = re.search(r"(\d+)", kk["slaapkamers"])
slaapkamers = int(m.group(1)) if m else None
listings.append(RawListing(
url=detail_url,
source_makelaar="vansilfhout",
status=status,
adres=adres,
postcode=kk.get("postcode"),
stad=stad,
prijs=prijs,
hero_image_url=hero,
bouwjaar=int(kk["bouwjaar"]) if kk.get("bouwjaar") else None,
woonoppervlak=parse_m2(kk.get("woonoppervlak")) or woonoppervlak_card,
kamers=kamers,
slaapkamers=slaapkamers,
))
if config.APP_ENV == "dev":
break
except Exception as e:
log.warning("vansilfhout: parse fout: %s", e)
log.info("vansilfhout: %d listings opgehaald", len(listings))
return listings
# ---------------------------------------------------------------------------
# SCRAPERS — exporteer hier alle actieve SSR adapters
# ---------------------------------------------------------------------------
@@ -882,4 +1192,6 @@ SCRAPERS = {
'dens': fetch_dens,
'3dmakelaars': fetch_3dmakelaars,
'dupont': fetch_dupont,
'schielandborsboom': fetch_schielandborsboom,
'vansilfhout': fetch_vansilfhout,
}

View File

@@ -16,7 +16,7 @@ logging.basicConfig(
)
# --- change this to test a different adapter ---
ADAPTER = SCRAPERS['dupont']
ADAPTER = SCRAPERS['vansilfhout']
if __name__ == "__main__":
print(f"Testing adapter: {ADAPTER.__name__}")