add scrapers: 88makelaars, Borgdorff (SSR) + Elzenaar, DOEN (OG Online API) for Den Haag

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-04-04 23:30:06 +02:00
parent 6beae1133b
commit 75c5b6f26d
3 changed files with 420 additions and 8 deletions

View File

@@ -46,14 +46,14 @@
| Done | Naam | Website | Adres |
|------|------|---------|-------|
| [ ] | Yuvam Makelaardij | yuvammakelaardij.nl | — |
| [ ] | 88 Makelaars | 88makelaars.nl | — |
| [ ] | DIVA Makelaars | divamakelaars.nl | — |
| [ ] | Elzenaar NVM Makelaars | elzenaar.com | — |
| [ ] | Frisia Makelaars | frisiamakelaars.nl | — |
| [ ] | Borgdorff Makelaars | borgdorff.nl | — (vestiging Den Haag) |
| [ ] | SMASH Makelaars | smashmakelaars.nl | — |
| [ ] | DOEN NVM Makelaars | doenmakelaars.com | Doezastraat 30 (Leiden, ook actief in Den Haag) |
| [skip] | Yuvam Makelaardij | yuvammakelaardij.nl | — (connection refused) |
| [x] | 88 Makelaars | 88makelaars.nl | — |
| [skip] | DIVA Makelaars | divamakelaars.nl | — (alleen Maartensdijk, niet Den Haag) |
| [x] | Elzenaar NVM Makelaars | elzenaar.com | — |
| [skip] | Frisia Makelaars | frisiamakelaars.nl | — (SPA/Vue, geen API) |
| [x] | Borgdorff Makelaars | borgdorff.nl | — (vestiging Den Haag) |
| [skip] | SMASH Makelaars | smashmakelaars.nl | — (te klein, geen API) |
| [x] | DOEN NVM Makelaars | doenmakelaars.com | Doezastraat 30 (Leiden, ook actief in Den Haag) |
## Leiden

View File

@@ -307,6 +307,135 @@ def fetch_vandaal() -> list[RawListing]:
return listings
# ---------------------------------------------------------------------------
# Elzenaar NVM Makelaars (Den Haag) — OG Online platform
# ---------------------------------------------------------------------------
# Zelfde platform als bjornd/moerman/vandaal.
_ELZENAAR_BASE = "https://www.elzenaar.com"
_ELZENAAR_SKIP = {"rented", "rented_ur"}
_ELZENAAR_CITIES = {"Den Haag", "Voorburg", "Rijswijk"}
_ELZENAAR_STATUS_MAP = {
"available": "beschikbaar",
"under_bid": "onder_bod",
"under_option": "onder_bod",
"sold": "verkocht",
"sold_ur": "verkocht",
}
def fetch_elzenaar() -> list[RawListing]:
data = fetch_json(
f"{_ELZENAAR_BASE}/nl/realtime-listings/consumer",
headers={"X-Requested-With": "XMLHttpRequest"},
)
listings = []
for item in data:
if not item.get("isSales"):
continue
if item.get("statusOrig") in _ELZENAAR_SKIP:
continue
if item.get("city") not in _ELZENAAR_CITIES:
continue
if item.get("salesPrice", 0) > config.MAX_PRICE:
continue
postcode = (item.get("zipcode") or "").replace(" ", "") or None
perceel = item.get("plotSurface") or None
if perceel == 0:
perceel = None
raw_year = item.get("dateOfConstruction") or ""
bouwjaar = int(raw_year) if raw_year.isdigit() else None
listings.append(RawListing(
url=_ELZENAAR_BASE + item["url"],
source_makelaar="elzenaar",
status=_ELZENAAR_STATUS_MAP.get(item.get("statusOrig", ""), "beschikbaar"),
adres=item.get("address") or None,
postcode=postcode,
stad=item.get("city") or None,
prijs=item.get("salesPrice") or None,
woningtype=item.get("type") or None,
woonoppervlak=item.get("livingSurface") or None,
perceeloppervlak=perceel,
kamers=item.get("rooms") or None,
slaapkamers=item.get("bedrooms") or None,
bouwjaar=bouwjaar,
energielabel=item.get("energyLabel") or None,
hero_image_url=item.get("photo") or None,
))
log.info("elzenaar: %d koopwoningen opgehaald", len(listings))
return listings
# ---------------------------------------------------------------------------
# DOEN NVM Makelaars (Den Haag / Leiden / Voorburg) — OG Online platform
# ---------------------------------------------------------------------------
_DOEN_BASE = "https://www.doenmakelaars.com"
_DOEN_SKIP = {"rented", "rented_ur"}
_DOEN_CITIES = {"Den Haag", "Leiden", "Voorburg", "Leidschendam", "Rijswijk", "Wassenaar", "Zoetermeer"}
_DOEN_STATUS_MAP = {
"available": "beschikbaar",
"under_bid": "onder_bod",
"under_option": "onder_bod",
"sold": "verkocht",
"sold_ur": "verkocht",
}
def fetch_doen() -> list[RawListing]:
data = fetch_json(
f"{_DOEN_BASE}/nl/realtime-listings/consumer",
headers={"X-Requested-With": "XMLHttpRequest"},
)
listings = []
for item in data:
if not item.get("isSales"):
continue
if item.get("statusOrig") in _DOEN_SKIP:
continue
if item.get("city") not in _DOEN_CITIES:
continue
if item.get("salesPrice", 0) > config.MAX_PRICE:
continue
postcode = (item.get("zipcode") or "").replace(" ", "") or None
perceel = item.get("plotSurface") or None
if perceel == 0:
perceel = None
raw_year = item.get("dateOfConstruction") or ""
bouwjaar = int(raw_year) if raw_year.isdigit() else None
listings.append(RawListing(
url=_DOEN_BASE + item["url"],
source_makelaar="doen",
status=_DOEN_STATUS_MAP.get(item.get("statusOrig", ""), "beschikbaar"),
adres=item.get("address") or None,
postcode=postcode,
stad=item.get("city") or None,
prijs=item.get("salesPrice") or None,
woningtype=item.get("type") or None,
woonoppervlak=item.get("livingSurface") or None,
perceeloppervlak=perceel,
kamers=item.get("rooms") or None,
slaapkamers=item.get("bedrooms") or None,
bouwjaar=bouwjaar,
energielabel=item.get("energyLabel") or None,
hero_image_url=item.get("photo") or None,
))
log.info("doen: %d koopwoningen opgehaald", len(listings))
return listings
# ---------------------------------------------------------------------------
# SCRAPERS — exporteer hier alle actieve API adapters
# ---------------------------------------------------------------------------
@@ -316,4 +445,6 @@ SCRAPERS = {
'ooms': fetch_ooms,
'moerman': fetch_moerman,
'vandaal': fetch_vandaal,
'elzenaar': fetch_elzenaar,
'doen': fetch_doen,
}

View File

@@ -1595,6 +1595,285 @@ def fetch_olsthoorn() -> list[RawListing]:
return listings
# ---------------------------------------------------------------------------
# 88 Makelaars (Den Haag) — Custom WordPress theme
# ---------------------------------------------------------------------------
# Cards on /ons-aanbod/page/{N}/; details in div.listing_detail kv pairs.
_88_BASE = "https://88makelaars.nl"
_88_STATUS_MAP = {
"te koop": "beschikbaar",
"beschikbaar": "beschikbaar",
"onder bod": "onder_bod",
"onder optie": "onder_bod",
"verkocht onder voorbehoud": "verkocht",
"verkocht": "verkocht",
}
def _88makelaars_detail(detail_url: str) -> dict:
"""Fetch 88makelaars detail page; extract kenmerken from div.listing_detail kv pairs."""
try:
soup = fetch_soup(detail_url)
kv: dict[str, str] = {}
for div in soup.select("div.listing_detail"):
txt = div.get_text(strip=True)
if ":" in txt:
label, _, value = txt.partition(":")
kv[label.strip().lower()] = value.strip()
raw_pc = kv.get("postcode") or ""
pc_match = re.search(r"\d{4}\s*[A-Z]{2}", raw_pc.upper())
postcode = pc_match.group(0).replace(" ", "") if pc_match else None
return {
"postcode": postcode,
"slaapkamers": kv.get("slaapkamers"),
"woonoppervlak": kv.get("woning grootte"),
"energielabel": kv.get("energieklasse"),
"woningtype": kv.get("soort woning"),
}
except Exception as e:
log.warning("88makelaars: detail fetch fout %s: %s", detail_url, e)
return {}
def fetch_88makelaars() -> list[RawListing]:
"""Fetch 88 Makelaars listings (Den Haag only)."""
listings = []
page = 1
while True:
if page == 1:
url = f"{_88_BASE}/ons-aanbod/"
else:
url = f"{_88_BASE}/ons-aanbod/page/{page}/"
soup = fetch_soup(url)
cards = soup.select("div.property_listing")
if not cards:
break
for card in cards:
try:
# URL from carousel
a_tag = card.select_one(".property_unit_carousel a[href]")
if not a_tag:
continue
detail_url = a_tag["href"]
if not detail_url.startswith("http"):
detail_url = _88_BASE + detail_url
# City — last link in property_location_image
loc_links = card.select(".property_location_image a")
stad = loc_links[-1].get_text(strip=True) if loc_links else None
if not stad or stad.lower() != "den haag":
continue
# Price
prijs = parse_prijs(_text(card, ".listing_unit_price_wrapper"))
if prijs and prijs > config.MAX_PRICE:
continue
# Status
status_text = (_text(card, ".ribbon-inside") or "").lower()
status = _88_STATUS_MAP.get(status_text, "beschikbaar")
# Address
adres = _text(card, "h4 a") or _text(card, "h4")
# Surface + rooms
woonoppervlak_card = parse_m2(_text(card, "span.infosize"))
kamers_card = None
rooms_txt = _text(card, "span.inforoom")
if rooms_txt:
m = re.search(r"(\d+)", rooms_txt)
kamers_card = int(m.group(1)) if m else None
# Hero: first active carousel image
img = card.select_one(".item.active img")
hero = img.get("src") or img.get("data-original") if img else None
kk = _88makelaars_detail(detail_url)
listings.append(RawListing(
url=detail_url,
source_makelaar="88makelaars",
status=status,
adres=adres,
postcode=kk.get("postcode"),
stad="Den Haag",
prijs=prijs,
hero_image_url=hero,
woningtype=kk.get("woningtype"),
woonoppervlak=parse_m2(kk.get("woonoppervlak")) or woonoppervlak_card,
kamers=kamers_card,
slaapkamers=int(kk["slaapkamers"]) if kk.get("slaapkamers") else None,
energielabel=kk.get("energielabel"),
))
if config.APP_ENV == "dev":
break
except Exception as e:
log.warning("88makelaars: parse fout: %s", e)
if len(cards) < 10:
break
page += 1
log.info("88makelaars: %d listings opgehaald", len(listings))
return listings
# ---------------------------------------------------------------------------
# Borgdorff Makelaars (Den Haag / Westland) — SURE WordPress plugin
# ---------------------------------------------------------------------------
# Covers Den Haag ('s-gravenhage), Monster, Naaldwijk etc. Filter for Den Haag.
# Same SURE plugin as Schieland Borsboom but uses a.card--house (double dash).
# No postcode on detail page.
_BORGDORFF_BASE = "https://www.borgdorff.nl"
_BORGDORFF_DEN_HAAG = {"'s-gravenhage", "den haag"}
_BORGDORFF_BADGE_MAP = {
"badge--info": "beschikbaar",
"badge--warning": "onder_bod",
"badge--danger": "verkocht",
}
def _borgdorff_detail(detail_url: str) -> dict:
"""Fetch Borgdorff detail page; extract #kenmerken li span pairs."""
try:
soup = fetch_soup(detail_url)
kv: dict[str, str] = {}
for li in soup.select("#kenmerken li"):
spans = li.select("span")
if len(spans) >= 2:
label = spans[0].get_text(strip=True).lower()
value = spans[1].get_text(strip=True)
kv[label] = value
return {
"status": kv.get("status", "").lower(),
"woningtype": kv.get("soort woonhuis") or kv.get("soort woning") or kv.get("soort bouw"),
"bouwjaar": kv.get("bouwjaar"),
"woonoppervlak": kv.get("gebruiksoppervlakte wonen") or kv.get("gebruiksoppervlakte"),
"perceeloppervlak": kv.get("perceeloppervlakte"),
"slaapkamers": kv.get("aantal slaapkamers"),
"energielabel": kv.get("energielabel"),
}
except Exception as e:
log.warning("borgdorff: detail fetch fout %s: %s", detail_url, e)
return {}
def fetch_borgdorff() -> list[RawListing]:
"""Fetch Borgdorff listings; only Den Haag / 's-gravenhage, only koop."""
listings = []
page = 1
while True:
if page == 1:
url = f"{_BORGDORFF_BASE}/wonen?sure_koop_huur=koop"
else:
url = f"{_BORGDORFF_BASE}/wonen/page/{page}/?sure_koop_huur=koop"
soup = fetch_soup(url)
cards = soup.select("a.card--house")
if not cards:
break
for card in cards:
try:
href = card.get("href", "")
if not href:
continue
detail_url = href if href.startswith("http") else _BORGDORFF_BASE + href
# Filter: only Den Haag
stad_el = card.select_one("p.lead-two")
stad = stad_el.get_text(strip=True) if stad_el else None
if not stad or stad.lower() not in _BORGDORFF_DEN_HAAG:
continue
# Price — filter early
prijs = parse_prijs(_text(card, "p.strong"))
if prijs and prijs > config.MAX_PRICE:
continue
# Status from badge class
label_span = card.select_one("span.card-house__label")
status = "beschikbaar"
if label_span:
for cls in label_span.get("class", []):
if cls in _BORGDORFF_BADGE_MAP:
status = _BORGDORFF_BADGE_MAP[cls]
break
# Address
adres = _text(card, "h4")
# Hero: largest source srcset
src_tag = card.select_one('picture source[media="(min-width:1280px)"]')
hero = src_tag.get("srcset") if src_tag else None
if not hero:
img = card.select_one("img[data-src]")
hero = img.get("data-src") if img else None
if hero and not hero.startswith("http"):
hero = _BORGDORFF_BASE + hero
# Surface + bedrooms from data icons
woonoppervlak_card = None
slaapkamers_card = None
for data_div in card.select("div.data"):
inner = data_div.select_one("p.small")
if not inner:
continue
txt = inner.get_text(strip=True)
if data_div.select_one("i.icon-surface"):
woonoppervlak_card = parse_m2(txt)
elif data_div.select_one("i.icon-bed"):
m = re.search(r"(\d+)", txt)
slaapkamers_card = int(m.group(1)) if m else None
kk = _borgdorff_detail(detail_url)
# Refine status from detail page
detail_status_map = {
"beschikbaar": "beschikbaar",
"onder bod": "onder_bod",
"onder optie": "onder_bod",
"verkocht": "verkocht",
}
if kk.get("status"):
status = detail_status_map.get(kk["status"], status)
listings.append(RawListing(
url=detail_url,
source_makelaar="borgdorff",
status=status,
adres=adres,
postcode=None, # not exposed by broker
stad=stad,
prijs=prijs,
hero_image_url=hero,
woningtype=kk.get("woningtype"),
bouwjaar=int(kk["bouwjaar"]) if kk.get("bouwjaar") else None,
woonoppervlak=parse_m2(kk.get("woonoppervlak")) or woonoppervlak_card,
perceeloppervlak=parse_m2(kk.get("perceeloppervlak")),
slaapkamers=int(kk["slaapkamers"]) if kk.get("slaapkamers") else slaapkamers_card,
energielabel=kk.get("energielabel"),
))
if config.APP_ENV == "dev":
break
except Exception as e:
log.warning("borgdorff: parse fout: %s", e)
if len(cards) < 15:
break
page += 1
log.info("borgdorff: %d listings opgehaald", len(listings))
return listings
# ---------------------------------------------------------------------------
# SCRAPERS — exporteer hier alle actieve SSR adapters
# ---------------------------------------------------------------------------
@@ -1615,4 +1894,6 @@ SCRAPERS = {
'post': fetch_post,
'morris': fetch_morris,
'olsthoorn': fetch_olsthoorn,
'88makelaars': fetch_88makelaars,
'borgdorff': fetch_borgdorff,
}