give in to the vibe

This commit is contained in:
2026-04-03 16:32:00 +02:00
parent 4f37a1dd37
commit 18c01139c2
4 changed files with 123 additions and 3 deletions

View File

@@ -303,6 +303,124 @@ def fetch_dewittegarantiemakelaars() -> list[RawListing]:
return listings
# ---------------------------------------------------------------------------
# Makelaardij Wassenaar (Schiedam)
# ---------------------------------------------------------------------------
# Realworks CMS. Listings page has JSON-LD (Residence) with url/address/price/photo.
# Detail pages have span.kenmerk with Wassenaar-specific label names.
_WASSENAAR_BASE = "https://www.makelaardijwassenaar.nl"
_WASSENAAR_STATUS_MAP = {
"te koop": "beschikbaar",
"nieuw": "beschikbaar",
"onder bod": "onder_bod",
"onder optie": "onder_bod",
"verkocht o.v.": "onder_bod",
"verkocht onder voorbehoud": "onder_bod",
"verkocht": "verkocht",
}
def _wassenaar_detail(detail_url: str) -> dict:
"""Fetch Realworks detail page; extract kenmerken with Wassenaar-specific labels."""
try:
soup = fetch_soup(detail_url)
kv: dict[str, str] = {}
for kenmerk in soup.select("span.kenmerk"):
label_el = kenmerk.select_one("span.kenmerkName")
value_el = kenmerk.select_one("span.kenmerkValue")
if label_el and value_el:
kv[label_el.get_text(strip=True).lower()] = value_el.get_text(strip=True)
return {
"woningtype": kv.get("soort object"),
"bouwjaar": kv.get("bouwjaar"),
"woonoppervlak": kv.get("woonoppervlakte"),
"perceeloppervlak": kv.get("perceeloppervlakte"),
"kamers": kv.get("aantal kamers"),
"slaapkamers": kv.get("aantal slaapkamers"),
"energielabel": kv.get("energieklasse"),
}
except Exception as e:
log.warning("wassenaar: detail fetch fout %s: %s", detail_url, e)
return {}
def fetch_wassenaar() -> list[RawListing]:
import json as _json
soup = fetch_soup(f"{_WASSENAAR_BASE}/aanbod/woningaanbod/-{config.MAX_PRICE}/koop/")
# First pass: collect status + thumbnail per relative url
# Each listing has two a.aanbodEntryLink with the same href;
# the first has the status banner + photo, the second has address + price.
status_by_url: dict[str, str] = {}
photo_by_url: dict[str, str] = {}
for a in soup.select("a.aanbodEntryLink[href]"):
href = a["href"]
if href in status_by_url:
continue
banner = a.select_one(".objectstatusbanner")
status_text = banner.get_text(strip=True).lower() if banner else ""
status_by_url[href] = _WASSENAAR_STATUS_MAP.get(status_text, "beschikbaar")
img = a.select_one("span.hoofdfoto img")
if img:
src = img.get("src", "")
if "geenfotobeschikbaar" not in src:
photo_by_url[href] = src
# Second pass: parse JSON-LD blocks (one per listing)
seen: set[str] = set()
listings = []
for tag in soup.select('script[type="application/ld+json"]'):
try:
ld = _json.loads(tag.string)
if ld.get("@type") != "Residence":
continue
rel_url = ld.get("url", "")
if not rel_url or rel_url in seen:
continue
seen.add(rel_url)
detail_url = _WASSENAAR_BASE + rel_url
address = ld.get("address", {})
postcode = address.get("postalCode", "").replace(" ", "") or None
price_spec = next(
(a.get("priceSpecification", {}) for a in ld.get("potentialAction", [])
if a.get("priceSpecification")),
{}
)
prijs = int(price_spec["price"]) if price_spec.get("price") else None
if prijs and prijs > config.MAX_PRICE:
continue
hero = ld.get("photo") or photo_by_url.get(rel_url)
status = status_by_url.get(rel_url, "beschikbaar")
kk = _wassenaar_detail(detail_url)
listings.append(RawListing(
url=detail_url,
source_makelaar="wassenaar",
status=status,
adres=address.get("streetAddress") or None,
postcode=postcode,
stad=address.get("addressLocality") or None,
prijs=prijs,
hero_image_url=hero,
woningtype=kk.get("woningtype"),
bouwjaar=int(kk["bouwjaar"]) if kk.get("bouwjaar") else None,
woonoppervlak=parse_m2(kk.get("woonoppervlak")),
perceeloppervlak=parse_m2(kk.get("perceeloppervlak")),
kamers=int(kk["kamers"]) if kk.get("kamers") else None,
slaapkamers=int(kk["slaapkamers"]) if kk.get("slaapkamers") else None,
energielabel=kk.get("energielabel"),
))
except Exception as e:
log.warning("wassenaar: parse fout: %s", e)
log.info("wassenaar: %d listings opgehaald", len(listings))
return listings
# ---------------------------------------------------------------------------
# SSR helper utils
# ---------------------------------------------------------------------------
@@ -345,5 +463,6 @@ def _infer_stad(postcode: str | None) -> str | None:
SCRAPERS = {
'ankebodewes': fetch_ankebodewes,
'woongoed': fetch_woongoed,
'dewittegarantiemakelaars': fetch_dewittegarantiemakelaars
'dewittegarantiemakelaars': fetch_dewittegarantiemakelaars,
'wassenaar': fetch_wassenaar,
}