add scrapers: Moerman & De Jong (API) and Schieland Borsboom (SSR)

- fetch_moerman: OG Online realtime-listings API (same platform as bjornd),
  includes bouwjaar from dateOfConstruction, energielabel, strips postcode space
- fetch_schielandborsboom: paginated HTML scraper filtered to Schiedam,
  fetches #kenmerken detail page for full specs (bouwjaar, kamers, etc.)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-04-04 21:34:58 +02:00
parent edd2580919
commit c92ddb5812
4 changed files with 236 additions and 4 deletions

View File

@@ -870,6 +870,174 @@ def fetch_dupont() -> list[RawListing]:
return listings
# ---------------------------------------------------------------------------
# Schieland Borsboom NVM Makelaars (Rotterdam, actief in Schiedam)
# ---------------------------------------------------------------------------
_SCHIELAND_BASE = "https://www.schielandborsboom.nl"
_SCHIELAND_STATUS_MAP = {
"beschikbaar": "beschikbaar",
"onder bod": "onder_bod",
"onder optie": "onder_bod",
"verkocht o.v.": "verkocht",
"verkocht": "verkocht",
}
def _schieland_detail(detail_url: str) -> dict:
"""Fetch Schieland Borsboom detail page and extract kenmerken."""
try:
soup = fetch_soup(detail_url)
# Postcode from house__status p (e.g. "3117 DP Schiedam")
postcode_el = soup.select_one("div.house__status p")
postcode = _extract_postcode(postcode_el.get_text()) if postcode_el else None
# Parse #kenmerken section: <li><strong>label</strong><span>value</span></li>
kv: dict[str, str] = {}
kenmerken = soup.select_one("#kenmerken")
if kenmerken:
for li in kenmerken.select("li"):
label_el = li.select_one("strong")
value_el = li.select_one("span")
if label_el and value_el:
# Strip nested links (e.g. "Hypotheek berekenen")
for a in value_el.select("a"):
a.decompose()
kv[label_el.get_text(strip=True).lower()] = value_el.get_text(strip=True)
return {
"postcode": postcode,
"status": kv.get("status", "").lower(),
"woningtype": kv.get("soort bouw"),
"bouwjaar": kv.get("bouwjaar"),
"woonoppervlak": kv.get("woonoppervlakte"),
"perceeloppervlak": kv.get("perceeloppervlakte"),
"kamers": kv.get("aantal kamers"),
"slaapkamers": kv.get("aantal slaapkamers"),
"energielabel": kv.get("energielabel"),
}
except Exception as e:
log.warning("schielandborsboom: detail fetch fout %s: %s", detail_url, e)
return {}
def fetch_schielandborsboom() -> list[RawListing]:
"""Fetch Schieland Borsboom NVM listings (koop only, Schiedam)."""
listings = []
page = 1
while True:
if page == 1:
url = f"{_SCHIELAND_BASE}/wonen?sure_koop_huur=koop"
else:
url = f"{_SCHIELAND_BASE}/wonen/page/{page}/?sure_koop_huur=koop"
soup = fetch_soup(url)
cards = soup.select("div.card.card--house")
if not cards:
break
for card in cards:
try:
a_tag = card.select_one("a.card__anchor")
if not a_tag or "href" not in a_tag.attrs:
continue
detail_url = a_tag["href"]
if not detail_url.startswith("http"):
detail_url = _SCHIELAND_BASE + detail_url
# Filter: only Schiedam
stad_el = card.select_one("p.house-place")
stad = stad_el.get_text(strip=True) if stad_el else None
if not stad or stad.lower() != "schiedam":
continue
# Status from card-house__thumb second class
thumb = card.select_one("div.card-house__thumb")
status_classes = thumb.get("class", []) if thumb else []
status_text = next(
(c for c in status_classes if c != "card-house__thumb"), "beschikbaar"
).lower()
status = _SCHIELAND_STATUS_MAP.get(status_text, "beschikbaar")
# Price
prijs = parse_prijs(_text(card, "p.price"))
if prijs and prijs > config.MAX_PRICE:
continue
adres = _text(card, "h4.house-street")
# Hero image from picture source (medium size)
src_tag = card.select_one('picture source[media="(min-width:100px)"]')
hero = src_tag["srcset"] if src_tag else _src(card, "img")
if hero and not hero.startswith("http"):
hero = _SCHIELAND_BASE + hero
# Data icons on card: surface, bedrooms, energy label
woonoppervlak_card = None
slaapkamers_card = None
energielabel_card = None
for data_div in card.select("div.data"):
txt = data_div.get_text(strip=True)
if data_div.select_one("i.icon-surface"):
woonoppervlak_card = parse_m2(txt)
elif data_div.select_one("i.icon-bedrooms"):
m = re.search(r"(\d+)", txt)
slaapkamers_card = int(m.group(1)) if m else None
elif data_div.select_one("i.icon-label"):
energielabel_card = txt.strip() or None
# Fetch detail page for full kenmerken
kk = _schieland_detail(detail_url)
# Refine status from detail page
if kk.get("status"):
status = _SCHIELAND_STATUS_MAP.get(kk["status"], status)
# Parse kamers: "5 kamers" → 5
kamers = None
if kk.get("kamers"):
m = re.search(r"(\d+)", kk["kamers"])
kamers = int(m.group(1)) if m else None
# Parse slaapkamers: "3" or "3 slaapkamers" → 3
slaapkamers = slaapkamers_card
if kk.get("slaapkamers"):
m = re.search(r"(\d+)", kk["slaapkamers"])
slaapkamers = int(m.group(1)) if m else slaapkamers_card
listings.append(RawListing(
url=detail_url,
source_makelaar="schielandborsboom",
status=status,
adres=adres,
postcode=kk.get("postcode"),
stad=stad,
prijs=prijs,
hero_image_url=hero,
woningtype=kk.get("woningtype"),
bouwjaar=int(kk["bouwjaar"]) if kk.get("bouwjaar") else None,
woonoppervlak=parse_m2(kk.get("woonoppervlak")) or woonoppervlak_card,
perceeloppervlak=parse_m2(kk.get("perceeloppervlak")),
kamers=kamers,
slaapkamers=slaapkamers,
energielabel=kk.get("energielabel") or energielabel_card,
))
if config.APP_ENV == "dev":
break
except Exception as e:
log.warning("schielandborsboom: parse fout: %s", e)
if len(cards) < 18:
break
page += 1
log.info("schielandborsboom: %d listings opgehaald", len(listings))
return listings
# ---------------------------------------------------------------------------
# SCRAPERS — exporteer hier alle actieve SSR adapters
# ---------------------------------------------------------------------------
@@ -882,4 +1050,5 @@ SCRAPERS = {
'dens': fetch_dens,
'3dmakelaars': fetch_3dmakelaars,
'dupont': fetch_dupont,
'schielandborsboom': fetch_schielandborsboom,
}