add scrapers: Moerman & De Jong (API) and Schieland Borsboom (SSR)
- fetch_moerman: OG Online realtime-listings API (same platform as bjornd), includes bouwjaar from dateOfConstruction, energielabel, strips postcode space - fetch_schielandborsboom: paginated HTML scraper filtered to Schiedam, fetches #kenmerken detail page for full specs (bouwjaar, kamers, etc.) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -870,6 +870,174 @@ def fetch_dupont() -> list[RawListing]:
|
||||
return listings
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Schieland Borsboom NVM Makelaars (Rotterdam, actief in Schiedam)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_SCHIELAND_BASE = "https://www.schielandborsboom.nl"
|
||||
|
||||
_SCHIELAND_STATUS_MAP = {
|
||||
"beschikbaar": "beschikbaar",
|
||||
"onder bod": "onder_bod",
|
||||
"onder optie": "onder_bod",
|
||||
"verkocht o.v.": "verkocht",
|
||||
"verkocht": "verkocht",
|
||||
}
|
||||
|
||||
|
||||
def _schieland_detail(detail_url: str) -> dict:
|
||||
"""Fetch Schieland Borsboom detail page and extract kenmerken."""
|
||||
try:
|
||||
soup = fetch_soup(detail_url)
|
||||
|
||||
# Postcode from house__status p (e.g. "3117 DP Schiedam")
|
||||
postcode_el = soup.select_one("div.house__status p")
|
||||
postcode = _extract_postcode(postcode_el.get_text()) if postcode_el else None
|
||||
|
||||
# Parse #kenmerken section: <li><strong>label</strong><span>value</span></li>
|
||||
kv: dict[str, str] = {}
|
||||
kenmerken = soup.select_one("#kenmerken")
|
||||
if kenmerken:
|
||||
for li in kenmerken.select("li"):
|
||||
label_el = li.select_one("strong")
|
||||
value_el = li.select_one("span")
|
||||
if label_el and value_el:
|
||||
# Strip nested links (e.g. "Hypotheek berekenen")
|
||||
for a in value_el.select("a"):
|
||||
a.decompose()
|
||||
kv[label_el.get_text(strip=True).lower()] = value_el.get_text(strip=True)
|
||||
|
||||
return {
|
||||
"postcode": postcode,
|
||||
"status": kv.get("status", "").lower(),
|
||||
"woningtype": kv.get("soort bouw"),
|
||||
"bouwjaar": kv.get("bouwjaar"),
|
||||
"woonoppervlak": kv.get("woonoppervlakte"),
|
||||
"perceeloppervlak": kv.get("perceeloppervlakte"),
|
||||
"kamers": kv.get("aantal kamers"),
|
||||
"slaapkamers": kv.get("aantal slaapkamers"),
|
||||
"energielabel": kv.get("energielabel"),
|
||||
}
|
||||
except Exception as e:
|
||||
log.warning("schielandborsboom: detail fetch fout %s: %s", detail_url, e)
|
||||
return {}
|
||||
|
||||
|
||||
def fetch_schielandborsboom() -> list[RawListing]:
|
||||
"""Fetch Schieland Borsboom NVM listings (koop only, Schiedam)."""
|
||||
listings = []
|
||||
page = 1
|
||||
|
||||
while True:
|
||||
if page == 1:
|
||||
url = f"{_SCHIELAND_BASE}/wonen?sure_koop_huur=koop"
|
||||
else:
|
||||
url = f"{_SCHIELAND_BASE}/wonen/page/{page}/?sure_koop_huur=koop"
|
||||
|
||||
soup = fetch_soup(url)
|
||||
cards = soup.select("div.card.card--house")
|
||||
if not cards:
|
||||
break
|
||||
|
||||
for card in cards:
|
||||
try:
|
||||
a_tag = card.select_one("a.card__anchor")
|
||||
if not a_tag or "href" not in a_tag.attrs:
|
||||
continue
|
||||
detail_url = a_tag["href"]
|
||||
if not detail_url.startswith("http"):
|
||||
detail_url = _SCHIELAND_BASE + detail_url
|
||||
|
||||
# Filter: only Schiedam
|
||||
stad_el = card.select_one("p.house-place")
|
||||
stad = stad_el.get_text(strip=True) if stad_el else None
|
||||
if not stad or stad.lower() != "schiedam":
|
||||
continue
|
||||
|
||||
# Status from card-house__thumb second class
|
||||
thumb = card.select_one("div.card-house__thumb")
|
||||
status_classes = thumb.get("class", []) if thumb else []
|
||||
status_text = next(
|
||||
(c for c in status_classes if c != "card-house__thumb"), "beschikbaar"
|
||||
).lower()
|
||||
status = _SCHIELAND_STATUS_MAP.get(status_text, "beschikbaar")
|
||||
|
||||
# Price
|
||||
prijs = parse_prijs(_text(card, "p.price"))
|
||||
if prijs and prijs > config.MAX_PRICE:
|
||||
continue
|
||||
|
||||
adres = _text(card, "h4.house-street")
|
||||
|
||||
# Hero image from picture source (medium size)
|
||||
src_tag = card.select_one('picture source[media="(min-width:100px)"]')
|
||||
hero = src_tag["srcset"] if src_tag else _src(card, "img")
|
||||
if hero and not hero.startswith("http"):
|
||||
hero = _SCHIELAND_BASE + hero
|
||||
|
||||
# Data icons on card: surface, bedrooms, energy label
|
||||
woonoppervlak_card = None
|
||||
slaapkamers_card = None
|
||||
energielabel_card = None
|
||||
for data_div in card.select("div.data"):
|
||||
txt = data_div.get_text(strip=True)
|
||||
if data_div.select_one("i.icon-surface"):
|
||||
woonoppervlak_card = parse_m2(txt)
|
||||
elif data_div.select_one("i.icon-bedrooms"):
|
||||
m = re.search(r"(\d+)", txt)
|
||||
slaapkamers_card = int(m.group(1)) if m else None
|
||||
elif data_div.select_one("i.icon-label"):
|
||||
energielabel_card = txt.strip() or None
|
||||
|
||||
# Fetch detail page for full kenmerken
|
||||
kk = _schieland_detail(detail_url)
|
||||
|
||||
# Refine status from detail page
|
||||
if kk.get("status"):
|
||||
status = _SCHIELAND_STATUS_MAP.get(kk["status"], status)
|
||||
|
||||
# Parse kamers: "5 kamers" → 5
|
||||
kamers = None
|
||||
if kk.get("kamers"):
|
||||
m = re.search(r"(\d+)", kk["kamers"])
|
||||
kamers = int(m.group(1)) if m else None
|
||||
|
||||
# Parse slaapkamers: "3" or "3 slaapkamers" → 3
|
||||
slaapkamers = slaapkamers_card
|
||||
if kk.get("slaapkamers"):
|
||||
m = re.search(r"(\d+)", kk["slaapkamers"])
|
||||
slaapkamers = int(m.group(1)) if m else slaapkamers_card
|
||||
|
||||
listings.append(RawListing(
|
||||
url=detail_url,
|
||||
source_makelaar="schielandborsboom",
|
||||
status=status,
|
||||
adres=adres,
|
||||
postcode=kk.get("postcode"),
|
||||
stad=stad,
|
||||
prijs=prijs,
|
||||
hero_image_url=hero,
|
||||
woningtype=kk.get("woningtype"),
|
||||
bouwjaar=int(kk["bouwjaar"]) if kk.get("bouwjaar") else None,
|
||||
woonoppervlak=parse_m2(kk.get("woonoppervlak")) or woonoppervlak_card,
|
||||
perceeloppervlak=parse_m2(kk.get("perceeloppervlak")),
|
||||
kamers=kamers,
|
||||
slaapkamers=slaapkamers,
|
||||
energielabel=kk.get("energielabel") or energielabel_card,
|
||||
))
|
||||
if config.APP_ENV == "dev":
|
||||
break
|
||||
except Exception as e:
|
||||
log.warning("schielandborsboom: parse fout: %s", e)
|
||||
|
||||
if len(cards) < 18:
|
||||
break
|
||||
page += 1
|
||||
|
||||
log.info("schielandborsboom: %d listings opgehaald", len(listings))
|
||||
return listings
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# SCRAPERS — exporteer hier alle actieve SSR adapters
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -882,4 +1050,5 @@ SCRAPERS = {
|
||||
'dens': fetch_dens,
|
||||
'3dmakelaars': fetch_3dmakelaars,
|
||||
'dupont': fetch_dupont,
|
||||
'schielandborsboom': fetch_schielandborsboom,
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user