add some more makelaars, and some more infra

This commit is contained in:
2026-04-03 15:49:42 +02:00
parent 26d9d936f4
commit 17b35d1997
9 changed files with 928 additions and 70 deletions

View File

@@ -106,11 +106,87 @@ def fetch_bjornd() -> list[RawListing]:
log.info("bjornd: %d koopwoningen opgehaald", len(listings))
return listings
# ---------------------------------------------------------------------------
# Ooms
# ---------------------------------------------------------------------------
_OOMS_BASE = "https://ooms.com"
_OOMS_CITIES = {"Delft", "Schiedam", "Rotterdam", "Leiden", "Voorburg", "Pijnacker"}
_OOMS_SKIP_STATUS = {"verhuurd", "verhuurd onder voorbehoud"}
_OOMS_STATUS_MAP = {
"beschikbaar": "beschikbaar",
"onder bod": "onder_bod",
"onder optie": "onder_bod",
"verkocht": "verkocht",
"verkocht onder voorbehoud":"verkocht",
}
def fetch_ooms() -> list[RawListing]:
data = fetch_json(f"{_OOMS_BASE}/api/properties/available.json")
listings = []
for item in data.get("objects", []):
if item.get("buy_or_rent") != "buy":
continue
if item.get("place") not in _OOMS_CITIES:
continue
if item.get("buy_price", 0) > config.MAX_PRICE:
continue
status_raw = item.get("availability_status", "")
if status_raw in _OOMS_SKIP_STATUS:
continue
hnr = item.get("house_number", "")
add = item.get("house_number_addition") or ""
adres = f"{item.get('street_name', '')} {hnr}{(' ' + add) if add else ''}".strip()
main_images = item.get("realworks_main_images") or item.get("realworks_images") or []
hero = None
if main_images:
sizes = main_images[0].get("sizes") or []
best = max(sizes, key=lambda s: s.get("width", 0), default=None)
if best:
hero = _OOMS_BASE + best["imageUrl"]
perceel = item.get("parcel_surface") or None
if perceel == 0:
perceel = None
listings.append(RawListing(
url=item["url"],
source_makelaar="ooms",
datum_aanmelding=item.get("publish_date", "")[:10] or None,
status=_OOMS_STATUS_MAP.get(status_raw, "beschikbaar"),
adres=adres or None,
postcode=(item.get("zip_code") or "").replace(" ", "") or None,
stad=item.get("place") or None,
prijs=item.get("buy_price") or None,
woningtype=item.get("appartment_characteristic") or item.get("residential_building_type") or None,
woonoppervlak=item.get("usable_area_living_function") or None,
perceeloppervlak=perceel,
kamers=item.get("amount_of_rooms") or None,
slaapkamers=item.get("amount_of_bedrooms") or None,
hero_image_url=hero,
extra={
"office": item.get("office", {}).get("name"),
"locations": item.get("locations"),
"garden_types": item.get("garden_types"),
"lat": item.get("lat"),
"lng": item.get("lng"),
"object_code": item.get("object_code"),
},
))
log.info("ooms: %d listings opgehaald", len(listings))
return listings
# ---------------------------------------------------------------------------
# SCRAPERS — exporteer hier alle actieve API adapters
# ---------------------------------------------------------------------------
SCRAPERS = {
'bjornd': fetch_bjornd,
'ooms': fetch_ooms,
}

View File

@@ -65,51 +65,127 @@ def parse_m2(text: str | None) -> int | None:
# ---------------------------------------------------------------------------
# Björn & Dries adapter (bjornd.nl)
# Realworks CMS (shared)
# ---------------------------------------------------------------------------
# TODO: vul de echte CSS selectors in na inspectie van de pagina.
# Dit is een structureel sjabloon — de selectors zijn placeholders.
BJORND_BASE = "https://www.bjornd.nl"
BJORND_AANBOD = f"{BJORND_BASE}/aanbod"
_REALWORKS_STATUS_MAP = {
"te koop": "beschikbaar",
"nieuw": "beschikbaar",
"onder bod": "onder_bod",
"onder optie": "onder_bod",
"verkocht o.v.": "verkocht",
"verkocht": "verkocht",
}
def fetch_bjornd_demo() -> list[RawListing]:
soup = fetch_soup(BJORND_AANBOD)
def _realworks_detail(detail_url: str, makelaar: str) -> dict:
"""Fetch a Realworks detail page and extract kenmerken. Returns empty dict on failure."""
try:
soup = fetch_soup(detail_url)
# Build a label→value map from all .kenmerk spans
kv: dict[str, str] = {}
for kenmerk in soup.select("span.kenmerk"):
label_el = kenmerk.select_one("span.kenmerkName")
value_el = kenmerk.select_one("span.kenmerkValue")
if label_el and value_el:
label = label_el.get_text(strip=True).lower()
value = value_el.get_text(strip=True)
kv[label] = value
return {
"woningtype": kv.get("type woning"),
"bouwjaar": kv.get("bouwjaar"),
"woonoppervlak": kv.get("woonoppervlakte"),
"perceeloppervlak": kv.get("perceeloppervlakte"),
"kamers": kv.get("aantal kamers"),
"slaapkamers": kv.get("aantal slaapkamers"),
"energielabel": kv.get("energieklasse"),
}
except Exception as e:
log.warning("%s: detail fetch fout %s: %s", makelaar, detail_url, e)
return {}
def fetch_realworks(base_url: str, makelaar: str) -> list[RawListing]:
"""
Generic fetcher for Realworks CMS brokers.
Paginates via /pagina-{n}/, fetches detail page per listing.
"""
listings_path = f"/aanbod/woningaanbod/-{config.MAX_PRICE}/koop"
listings = []
page = 1
# Pas de selector aan op de echte HTML structuur
for card in soup.select(".property-card"): # ← aanpassen
try:
a_tag = card.select_one("a[href]")
if not a_tag:
continue
url = a_tag["href"]
if not url.startswith("http"):
url = BJORND_BASE + url
while True:
url = f"{base_url}{listings_path}/pagina-{page}/"
soup = fetch_soup(url)
cards = soup.select("li.aanbodEntry")
if not cards:
break
adres = _text(card, ".property-address") # ← aanpassen
postcode = _extract_postcode(_text(card, ".property-location"))
prijs = parse_prijs(_text(card, ".property-price"))
opp = parse_m2(_text(card, ".property-area"))
img = _src(card, "img")
for card in cards:
try:
a_tag = card.select_one("a.aanbodEntryLink")
if not a_tag:
continue
listing_url = base_url + a_tag["href"]
listings.append(RawListing(
url=url,
source_makelaar="bjornd",
adres=adres,
postcode=postcode,
stad=_infer_stad(postcode),
prijs=prijs,
woonoppervlak=opp,
hero_image_url=img,
))
except Exception as e:
log.warning("Fout bij parsen bjornd card: %s", e)
adres = _text(card, ".street-address")
postcode = (_text(card, ".postal-code") or "").replace(" ", "") or None
stad = _text(card, ".locality")
prijs = parse_prijs(_text(card, ".koopprijs .kenmerkValue"))
status_text = (_text(card, ".objectstatusbanner") or "").lower()
status = _REALWORKS_STATUS_MAP.get(status_text, "beschikbaar")
img_tag = card.select_one(".hoofdfoto img")
hero = img_tag["src"] if img_tag else None
kk = _realworks_detail(listing_url, makelaar)
listings.append(RawListing(
url=listing_url,
source_makelaar=makelaar,
adres=adres,
postcode=postcode,
stad=stad,
prijs=prijs,
status=status,
hero_image_url=hero,
woningtype=kk.get("woningtype"),
bouwjaar=int(kk["bouwjaar"]) if kk.get("bouwjaar") else None,
woonoppervlak=parse_m2(kk.get("woonoppervlak")),
perceeloppervlak=parse_m2(kk.get("perceeloppervlak")),
kamers=int(kk["kamers"]) if kk.get("kamers") else None,
slaapkamers=int(kk["slaapkamers"]) if kk.get("slaapkamers") else None,
energielabel=kk.get("energielabel"),
))
except Exception as e:
log.warning("%s: parse fout: %s", makelaar, e)
if len(cards) < 10:
break
page += 1
log.info("%s: %d listings opgehaald", makelaar, len(listings))
return listings
# ---------------------------------------------------------------------------
# Anke Bodewes Makelaardij
# ---------------------------------------------------------------------------
def fetch_ankebodewes() -> list[RawListing]:
return fetch_realworks("https://www.ankebodewes.nl", "ankebodewes")
# ---------------------------------------------------------------------------
# Woongoed Makelaars Schiedam
# ---------------------------------------------------------------------------
def fetch_woongoed() -> list[RawListing]:
return fetch_realworks("https://www.woongoedmakelaars.nl", "woongoed")
# ---------------------------------------------------------------------------
# SSR helper utils
# ---------------------------------------------------------------------------
@@ -150,5 +226,6 @@ def _infer_stad(postcode: str | None) -> str | None:
# ---------------------------------------------------------------------------
SCRAPERS = {
'bjornd_demo': fetch_bjornd_demo,
'ankebodewes': fetch_ankebodewes,
'woongoed': fetch_woongoed,
}