add scrapers: 88makelaars, Borgdorff (SSR) + Elzenaar, DOEN (OG Online API) for Den Haag
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1595,6 +1595,285 @@ def fetch_olsthoorn() -> list[RawListing]:
|
||||
return listings
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 88 Makelaars (Den Haag) — Custom WordPress theme
|
||||
# ---------------------------------------------------------------------------
|
||||
# Cards on /ons-aanbod/page/{N}/; details in div.listing_detail kv pairs.
|
||||
|
||||
_88_BASE = "https://88makelaars.nl"
|
||||
|
||||
_88_STATUS_MAP = {
|
||||
"te koop": "beschikbaar",
|
||||
"beschikbaar": "beschikbaar",
|
||||
"onder bod": "onder_bod",
|
||||
"onder optie": "onder_bod",
|
||||
"verkocht onder voorbehoud": "verkocht",
|
||||
"verkocht": "verkocht",
|
||||
}
|
||||
|
||||
|
||||
def _88makelaars_detail(detail_url: str) -> dict:
|
||||
"""Fetch 88makelaars detail page; extract kenmerken from div.listing_detail kv pairs."""
|
||||
try:
|
||||
soup = fetch_soup(detail_url)
|
||||
kv: dict[str, str] = {}
|
||||
for div in soup.select("div.listing_detail"):
|
||||
txt = div.get_text(strip=True)
|
||||
if ":" in txt:
|
||||
label, _, value = txt.partition(":")
|
||||
kv[label.strip().lower()] = value.strip()
|
||||
raw_pc = kv.get("postcode") or ""
|
||||
pc_match = re.search(r"\d{4}\s*[A-Z]{2}", raw_pc.upper())
|
||||
postcode = pc_match.group(0).replace(" ", "") if pc_match else None
|
||||
return {
|
||||
"postcode": postcode,
|
||||
"slaapkamers": kv.get("slaapkamers"),
|
||||
"woonoppervlak": kv.get("woning grootte"),
|
||||
"energielabel": kv.get("energieklasse"),
|
||||
"woningtype": kv.get("soort woning"),
|
||||
}
|
||||
except Exception as e:
|
||||
log.warning("88makelaars: detail fetch fout %s: %s", detail_url, e)
|
||||
return {}
|
||||
|
||||
|
||||
def fetch_88makelaars() -> list[RawListing]:
|
||||
"""Fetch 88 Makelaars listings (Den Haag only)."""
|
||||
listings = []
|
||||
page = 1
|
||||
|
||||
while True:
|
||||
if page == 1:
|
||||
url = f"{_88_BASE}/ons-aanbod/"
|
||||
else:
|
||||
url = f"{_88_BASE}/ons-aanbod/page/{page}/"
|
||||
soup = fetch_soup(url)
|
||||
cards = soup.select("div.property_listing")
|
||||
if not cards:
|
||||
break
|
||||
|
||||
for card in cards:
|
||||
try:
|
||||
# URL from carousel
|
||||
a_tag = card.select_one(".property_unit_carousel a[href]")
|
||||
if not a_tag:
|
||||
continue
|
||||
detail_url = a_tag["href"]
|
||||
if not detail_url.startswith("http"):
|
||||
detail_url = _88_BASE + detail_url
|
||||
|
||||
# City — last link in property_location_image
|
||||
loc_links = card.select(".property_location_image a")
|
||||
stad = loc_links[-1].get_text(strip=True) if loc_links else None
|
||||
if not stad or stad.lower() != "den haag":
|
||||
continue
|
||||
|
||||
# Price
|
||||
prijs = parse_prijs(_text(card, ".listing_unit_price_wrapper"))
|
||||
if prijs and prijs > config.MAX_PRICE:
|
||||
continue
|
||||
|
||||
# Status
|
||||
status_text = (_text(card, ".ribbon-inside") or "").lower()
|
||||
status = _88_STATUS_MAP.get(status_text, "beschikbaar")
|
||||
|
||||
# Address
|
||||
adres = _text(card, "h4 a") or _text(card, "h4")
|
||||
|
||||
# Surface + rooms
|
||||
woonoppervlak_card = parse_m2(_text(card, "span.infosize"))
|
||||
kamers_card = None
|
||||
rooms_txt = _text(card, "span.inforoom")
|
||||
if rooms_txt:
|
||||
m = re.search(r"(\d+)", rooms_txt)
|
||||
kamers_card = int(m.group(1)) if m else None
|
||||
|
||||
# Hero: first active carousel image
|
||||
img = card.select_one(".item.active img")
|
||||
hero = img.get("src") or img.get("data-original") if img else None
|
||||
|
||||
kk = _88makelaars_detail(detail_url)
|
||||
|
||||
listings.append(RawListing(
|
||||
url=detail_url,
|
||||
source_makelaar="88makelaars",
|
||||
status=status,
|
||||
adres=adres,
|
||||
postcode=kk.get("postcode"),
|
||||
stad="Den Haag",
|
||||
prijs=prijs,
|
||||
hero_image_url=hero,
|
||||
woningtype=kk.get("woningtype"),
|
||||
woonoppervlak=parse_m2(kk.get("woonoppervlak")) or woonoppervlak_card,
|
||||
kamers=kamers_card,
|
||||
slaapkamers=int(kk["slaapkamers"]) if kk.get("slaapkamers") else None,
|
||||
energielabel=kk.get("energielabel"),
|
||||
))
|
||||
if config.APP_ENV == "dev":
|
||||
break
|
||||
except Exception as e:
|
||||
log.warning("88makelaars: parse fout: %s", e)
|
||||
|
||||
if len(cards) < 10:
|
||||
break
|
||||
page += 1
|
||||
|
||||
log.info("88makelaars: %d listings opgehaald", len(listings))
|
||||
return listings
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Borgdorff Makelaars (Den Haag / Westland) — SURE WordPress plugin
|
||||
# ---------------------------------------------------------------------------
|
||||
# Covers Den Haag ('s-gravenhage), Monster, Naaldwijk etc. Filter for Den Haag.
|
||||
# Same SURE plugin as Schieland Borsboom but uses a.card--house (double dash).
|
||||
# No postcode on detail page.
|
||||
|
||||
_BORGDORFF_BASE = "https://www.borgdorff.nl"
|
||||
_BORGDORFF_DEN_HAAG = {"'s-gravenhage", "den haag"}
|
||||
|
||||
_BORGDORFF_BADGE_MAP = {
|
||||
"badge--info": "beschikbaar",
|
||||
"badge--warning": "onder_bod",
|
||||
"badge--danger": "verkocht",
|
||||
}
|
||||
|
||||
|
||||
def _borgdorff_detail(detail_url: str) -> dict:
|
||||
"""Fetch Borgdorff detail page; extract #kenmerken li span pairs."""
|
||||
try:
|
||||
soup = fetch_soup(detail_url)
|
||||
kv: dict[str, str] = {}
|
||||
for li in soup.select("#kenmerken li"):
|
||||
spans = li.select("span")
|
||||
if len(spans) >= 2:
|
||||
label = spans[0].get_text(strip=True).lower()
|
||||
value = spans[1].get_text(strip=True)
|
||||
kv[label] = value
|
||||
return {
|
||||
"status": kv.get("status", "").lower(),
|
||||
"woningtype": kv.get("soort woonhuis") or kv.get("soort woning") or kv.get("soort bouw"),
|
||||
"bouwjaar": kv.get("bouwjaar"),
|
||||
"woonoppervlak": kv.get("gebruiksoppervlakte wonen") or kv.get("gebruiksoppervlakte"),
|
||||
"perceeloppervlak": kv.get("perceeloppervlakte"),
|
||||
"slaapkamers": kv.get("aantal slaapkamers"),
|
||||
"energielabel": kv.get("energielabel"),
|
||||
}
|
||||
except Exception as e:
|
||||
log.warning("borgdorff: detail fetch fout %s: %s", detail_url, e)
|
||||
return {}
|
||||
|
||||
|
||||
def fetch_borgdorff() -> list[RawListing]:
|
||||
"""Fetch Borgdorff listings; only Den Haag / 's-gravenhage, only koop."""
|
||||
listings = []
|
||||
page = 1
|
||||
|
||||
while True:
|
||||
if page == 1:
|
||||
url = f"{_BORGDORFF_BASE}/wonen?sure_koop_huur=koop"
|
||||
else:
|
||||
url = f"{_BORGDORFF_BASE}/wonen/page/{page}/?sure_koop_huur=koop"
|
||||
|
||||
soup = fetch_soup(url)
|
||||
cards = soup.select("a.card--house")
|
||||
if not cards:
|
||||
break
|
||||
|
||||
for card in cards:
|
||||
try:
|
||||
href = card.get("href", "")
|
||||
if not href:
|
||||
continue
|
||||
detail_url = href if href.startswith("http") else _BORGDORFF_BASE + href
|
||||
|
||||
# Filter: only Den Haag
|
||||
stad_el = card.select_one("p.lead-two")
|
||||
stad = stad_el.get_text(strip=True) if stad_el else None
|
||||
if not stad or stad.lower() not in _BORGDORFF_DEN_HAAG:
|
||||
continue
|
||||
|
||||
# Price — filter early
|
||||
prijs = parse_prijs(_text(card, "p.strong"))
|
||||
if prijs and prijs > config.MAX_PRICE:
|
||||
continue
|
||||
|
||||
# Status from badge class
|
||||
label_span = card.select_one("span.card-house__label")
|
||||
status = "beschikbaar"
|
||||
if label_span:
|
||||
for cls in label_span.get("class", []):
|
||||
if cls in _BORGDORFF_BADGE_MAP:
|
||||
status = _BORGDORFF_BADGE_MAP[cls]
|
||||
break
|
||||
|
||||
# Address
|
||||
adres = _text(card, "h4")
|
||||
|
||||
# Hero: largest source srcset
|
||||
src_tag = card.select_one('picture source[media="(min-width:1280px)"]')
|
||||
hero = src_tag.get("srcset") if src_tag else None
|
||||
if not hero:
|
||||
img = card.select_one("img[data-src]")
|
||||
hero = img.get("data-src") if img else None
|
||||
if hero and not hero.startswith("http"):
|
||||
hero = _BORGDORFF_BASE + hero
|
||||
|
||||
# Surface + bedrooms from data icons
|
||||
woonoppervlak_card = None
|
||||
slaapkamers_card = None
|
||||
for data_div in card.select("div.data"):
|
||||
inner = data_div.select_one("p.small")
|
||||
if not inner:
|
||||
continue
|
||||
txt = inner.get_text(strip=True)
|
||||
if data_div.select_one("i.icon-surface"):
|
||||
woonoppervlak_card = parse_m2(txt)
|
||||
elif data_div.select_one("i.icon-bed"):
|
||||
m = re.search(r"(\d+)", txt)
|
||||
slaapkamers_card = int(m.group(1)) if m else None
|
||||
|
||||
kk = _borgdorff_detail(detail_url)
|
||||
|
||||
# Refine status from detail page
|
||||
detail_status_map = {
|
||||
"beschikbaar": "beschikbaar",
|
||||
"onder bod": "onder_bod",
|
||||
"onder optie": "onder_bod",
|
||||
"verkocht": "verkocht",
|
||||
}
|
||||
if kk.get("status"):
|
||||
status = detail_status_map.get(kk["status"], status)
|
||||
|
||||
listings.append(RawListing(
|
||||
url=detail_url,
|
||||
source_makelaar="borgdorff",
|
||||
status=status,
|
||||
adres=adres,
|
||||
postcode=None, # not exposed by broker
|
||||
stad=stad,
|
||||
prijs=prijs,
|
||||
hero_image_url=hero,
|
||||
woningtype=kk.get("woningtype"),
|
||||
bouwjaar=int(kk["bouwjaar"]) if kk.get("bouwjaar") else None,
|
||||
woonoppervlak=parse_m2(kk.get("woonoppervlak")) or woonoppervlak_card,
|
||||
perceeloppervlak=parse_m2(kk.get("perceeloppervlak")),
|
||||
slaapkamers=int(kk["slaapkamers"]) if kk.get("slaapkamers") else slaapkamers_card,
|
||||
energielabel=kk.get("energielabel"),
|
||||
))
|
||||
if config.APP_ENV == "dev":
|
||||
break
|
||||
except Exception as e:
|
||||
log.warning("borgdorff: parse fout: %s", e)
|
||||
|
||||
if len(cards) < 15:
|
||||
break
|
||||
page += 1
|
||||
|
||||
log.info("borgdorff: %d listings opgehaald", len(listings))
|
||||
return listings
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# SCRAPERS — exporteer hier alle actieve SSR adapters
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -1615,4 +1894,6 @@ SCRAPERS = {
|
||||
'post': fetch_post,
|
||||
'morris': fetch_morris,
|
||||
'olsthoorn': fetch_olsthoorn,
|
||||
'88makelaars': fetch_88makelaars,
|
||||
'borgdorff': fetch_borgdorff,
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user