add some more makelaars, and some more infra
This commit is contained in:
@@ -106,11 +106,87 @@ def fetch_bjornd() -> list[RawListing]:
|
||||
log.info("bjornd: %d koopwoningen opgehaald", len(listings))
|
||||
return listings
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Ooms
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_OOMS_BASE = "https://ooms.com"
|
||||
_OOMS_CITIES = {"Delft", "Schiedam", "Rotterdam", "Leiden", "Voorburg", "Pijnacker"}
|
||||
_OOMS_SKIP_STATUS = {"verhuurd", "verhuurd onder voorbehoud"}
|
||||
_OOMS_STATUS_MAP = {
|
||||
"beschikbaar": "beschikbaar",
|
||||
"onder bod": "onder_bod",
|
||||
"onder optie": "onder_bod",
|
||||
"verkocht": "verkocht",
|
||||
"verkocht onder voorbehoud":"verkocht",
|
||||
}
|
||||
|
||||
|
||||
def fetch_ooms() -> list[RawListing]:
|
||||
data = fetch_json(f"{_OOMS_BASE}/api/properties/available.json")
|
||||
listings = []
|
||||
|
||||
for item in data.get("objects", []):
|
||||
if item.get("buy_or_rent") != "buy":
|
||||
continue
|
||||
if item.get("place") not in _OOMS_CITIES:
|
||||
continue
|
||||
if item.get("buy_price", 0) > config.MAX_PRICE:
|
||||
continue
|
||||
|
||||
status_raw = item.get("availability_status", "")
|
||||
if status_raw in _OOMS_SKIP_STATUS:
|
||||
continue
|
||||
|
||||
hnr = item.get("house_number", "")
|
||||
add = item.get("house_number_addition") or ""
|
||||
adres = f"{item.get('street_name', '')} {hnr}{(' ' + add) if add else ''}".strip()
|
||||
|
||||
main_images = item.get("realworks_main_images") or item.get("realworks_images") or []
|
||||
hero = None
|
||||
if main_images:
|
||||
sizes = main_images[0].get("sizes") or []
|
||||
best = max(sizes, key=lambda s: s.get("width", 0), default=None)
|
||||
if best:
|
||||
hero = _OOMS_BASE + best["imageUrl"]
|
||||
|
||||
perceel = item.get("parcel_surface") or None
|
||||
if perceel == 0:
|
||||
perceel = None
|
||||
|
||||
listings.append(RawListing(
|
||||
url=item["url"],
|
||||
source_makelaar="ooms",
|
||||
datum_aanmelding=item.get("publish_date", "")[:10] or None,
|
||||
status=_OOMS_STATUS_MAP.get(status_raw, "beschikbaar"),
|
||||
adres=adres or None,
|
||||
postcode=(item.get("zip_code") or "").replace(" ", "") or None,
|
||||
stad=item.get("place") or None,
|
||||
prijs=item.get("buy_price") or None,
|
||||
woningtype=item.get("appartment_characteristic") or item.get("residential_building_type") or None,
|
||||
woonoppervlak=item.get("usable_area_living_function") or None,
|
||||
perceeloppervlak=perceel,
|
||||
kamers=item.get("amount_of_rooms") or None,
|
||||
slaapkamers=item.get("amount_of_bedrooms") or None,
|
||||
hero_image_url=hero,
|
||||
extra={
|
||||
"office": item.get("office", {}).get("name"),
|
||||
"locations": item.get("locations"),
|
||||
"garden_types": item.get("garden_types"),
|
||||
"lat": item.get("lat"),
|
||||
"lng": item.get("lng"),
|
||||
"object_code": item.get("object_code"),
|
||||
},
|
||||
))
|
||||
|
||||
log.info("ooms: %d listings opgehaald", len(listings))
|
||||
return listings
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# SCRAPERS — exporteer hier alle actieve API adapters
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
SCRAPERS = {
|
||||
'bjornd': fetch_bjornd,
|
||||
'ooms': fetch_ooms,
|
||||
}
|
||||
|
||||
@@ -65,51 +65,127 @@ def parse_m2(text: str | None) -> int | None:
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Björn & Dries adapter (bjornd.nl)
|
||||
# Realworks CMS (shared)
|
||||
# ---------------------------------------------------------------------------
|
||||
# TODO: vul de echte CSS selectors in na inspectie van de pagina.
|
||||
# Dit is een structureel sjabloon — de selectors zijn placeholders.
|
||||
|
||||
BJORND_BASE = "https://www.bjornd.nl"
|
||||
BJORND_AANBOD = f"{BJORND_BASE}/aanbod"
|
||||
_REALWORKS_STATUS_MAP = {
|
||||
"te koop": "beschikbaar",
|
||||
"nieuw": "beschikbaar",
|
||||
"onder bod": "onder_bod",
|
||||
"onder optie": "onder_bod",
|
||||
"verkocht o.v.": "verkocht",
|
||||
"verkocht": "verkocht",
|
||||
}
|
||||
|
||||
|
||||
def fetch_bjornd_demo() -> list[RawListing]:
|
||||
soup = fetch_soup(BJORND_AANBOD)
|
||||
def _realworks_detail(detail_url: str, makelaar: str) -> dict:
|
||||
"""Fetch a Realworks detail page and extract kenmerken. Returns empty dict on failure."""
|
||||
try:
|
||||
soup = fetch_soup(detail_url)
|
||||
|
||||
# Build a label→value map from all .kenmerk spans
|
||||
kv: dict[str, str] = {}
|
||||
for kenmerk in soup.select("span.kenmerk"):
|
||||
label_el = kenmerk.select_one("span.kenmerkName")
|
||||
value_el = kenmerk.select_one("span.kenmerkValue")
|
||||
if label_el and value_el:
|
||||
label = label_el.get_text(strip=True).lower()
|
||||
value = value_el.get_text(strip=True)
|
||||
kv[label] = value
|
||||
|
||||
return {
|
||||
"woningtype": kv.get("type woning"),
|
||||
"bouwjaar": kv.get("bouwjaar"),
|
||||
"woonoppervlak": kv.get("woonoppervlakte"),
|
||||
"perceeloppervlak": kv.get("perceeloppervlakte"),
|
||||
"kamers": kv.get("aantal kamers"),
|
||||
"slaapkamers": kv.get("aantal slaapkamers"),
|
||||
"energielabel": kv.get("energieklasse"),
|
||||
}
|
||||
except Exception as e:
|
||||
log.warning("%s: detail fetch fout %s: %s", makelaar, detail_url, e)
|
||||
return {}
|
||||
|
||||
|
||||
def fetch_realworks(base_url: str, makelaar: str) -> list[RawListing]:
|
||||
"""
|
||||
Generic fetcher for Realworks CMS brokers.
|
||||
Paginates via /pagina-{n}/, fetches detail page per listing.
|
||||
"""
|
||||
listings_path = f"/aanbod/woningaanbod/-{config.MAX_PRICE}/koop"
|
||||
listings = []
|
||||
page = 1
|
||||
|
||||
# Pas de selector aan op de echte HTML structuur
|
||||
for card in soup.select(".property-card"): # ← aanpassen
|
||||
try:
|
||||
a_tag = card.select_one("a[href]")
|
||||
if not a_tag:
|
||||
continue
|
||||
url = a_tag["href"]
|
||||
if not url.startswith("http"):
|
||||
url = BJORND_BASE + url
|
||||
while True:
|
||||
url = f"{base_url}{listings_path}/pagina-{page}/"
|
||||
soup = fetch_soup(url)
|
||||
cards = soup.select("li.aanbodEntry")
|
||||
if not cards:
|
||||
break
|
||||
|
||||
adres = _text(card, ".property-address") # ← aanpassen
|
||||
postcode = _extract_postcode(_text(card, ".property-location"))
|
||||
prijs = parse_prijs(_text(card, ".property-price"))
|
||||
opp = parse_m2(_text(card, ".property-area"))
|
||||
img = _src(card, "img")
|
||||
for card in cards:
|
||||
try:
|
||||
a_tag = card.select_one("a.aanbodEntryLink")
|
||||
if not a_tag:
|
||||
continue
|
||||
listing_url = base_url + a_tag["href"]
|
||||
|
||||
listings.append(RawListing(
|
||||
url=url,
|
||||
source_makelaar="bjornd",
|
||||
adres=adres,
|
||||
postcode=postcode,
|
||||
stad=_infer_stad(postcode),
|
||||
prijs=prijs,
|
||||
woonoppervlak=opp,
|
||||
hero_image_url=img,
|
||||
))
|
||||
except Exception as e:
|
||||
log.warning("Fout bij parsen bjornd card: %s", e)
|
||||
adres = _text(card, ".street-address")
|
||||
postcode = (_text(card, ".postal-code") or "").replace(" ", "") or None
|
||||
stad = _text(card, ".locality")
|
||||
prijs = parse_prijs(_text(card, ".koopprijs .kenmerkValue"))
|
||||
|
||||
status_text = (_text(card, ".objectstatusbanner") or "").lower()
|
||||
status = _REALWORKS_STATUS_MAP.get(status_text, "beschikbaar")
|
||||
|
||||
img_tag = card.select_one(".hoofdfoto img")
|
||||
hero = img_tag["src"] if img_tag else None
|
||||
|
||||
kk = _realworks_detail(listing_url, makelaar)
|
||||
|
||||
listings.append(RawListing(
|
||||
url=listing_url,
|
||||
source_makelaar=makelaar,
|
||||
adres=adres,
|
||||
postcode=postcode,
|
||||
stad=stad,
|
||||
prijs=prijs,
|
||||
status=status,
|
||||
hero_image_url=hero,
|
||||
woningtype=kk.get("woningtype"),
|
||||
bouwjaar=int(kk["bouwjaar"]) if kk.get("bouwjaar") else None,
|
||||
woonoppervlak=parse_m2(kk.get("woonoppervlak")),
|
||||
perceeloppervlak=parse_m2(kk.get("perceeloppervlak")),
|
||||
kamers=int(kk["kamers"]) if kk.get("kamers") else None,
|
||||
slaapkamers=int(kk["slaapkamers"]) if kk.get("slaapkamers") else None,
|
||||
energielabel=kk.get("energielabel"),
|
||||
))
|
||||
except Exception as e:
|
||||
log.warning("%s: parse fout: %s", makelaar, e)
|
||||
|
||||
if len(cards) < 10:
|
||||
break
|
||||
page += 1
|
||||
|
||||
log.info("%s: %d listings opgehaald", makelaar, len(listings))
|
||||
return listings
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Anke Bodewes Makelaardij
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def fetch_ankebodewes() -> list[RawListing]:
|
||||
return fetch_realworks("https://www.ankebodewes.nl", "ankebodewes")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Woongoed Makelaars Schiedam
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def fetch_woongoed() -> list[RawListing]:
|
||||
return fetch_realworks("https://www.woongoedmakelaars.nl", "woongoed")
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# SSR helper utils
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -150,5 +226,6 @@ def _infer_stad(postcode: str | None) -> str | None:
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
SCRAPERS = {
|
||||
'bjornd_demo': fetch_bjornd_demo,
|
||||
'ankebodewes': fetch_ankebodewes,
|
||||
'woongoed': fetch_woongoed,
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user