give in to the vibe
This commit is contained in:
@@ -303,6 +303,124 @@ def fetch_dewittegarantiemakelaars() -> list[RawListing]:
|
||||
return listings
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Makelaardij Wassenaar (Schiedam)
|
||||
# ---------------------------------------------------------------------------
|
||||
# Realworks CMS. Listings page has JSON-LD (Residence) with url/address/price/photo.
|
||||
# Detail pages have span.kenmerk with Wassenaar-specific label names.
|
||||
|
||||
_WASSENAAR_BASE = "https://www.makelaardijwassenaar.nl"
|
||||
|
||||
_WASSENAAR_STATUS_MAP = {
|
||||
"te koop": "beschikbaar",
|
||||
"nieuw": "beschikbaar",
|
||||
"onder bod": "onder_bod",
|
||||
"onder optie": "onder_bod",
|
||||
"verkocht o.v.": "onder_bod",
|
||||
"verkocht onder voorbehoud": "onder_bod",
|
||||
"verkocht": "verkocht",
|
||||
}
|
||||
|
||||
|
||||
def _wassenaar_detail(detail_url: str) -> dict:
|
||||
"""Fetch Realworks detail page; extract kenmerken with Wassenaar-specific labels."""
|
||||
try:
|
||||
soup = fetch_soup(detail_url)
|
||||
kv: dict[str, str] = {}
|
||||
for kenmerk in soup.select("span.kenmerk"):
|
||||
label_el = kenmerk.select_one("span.kenmerkName")
|
||||
value_el = kenmerk.select_one("span.kenmerkValue")
|
||||
if label_el and value_el:
|
||||
kv[label_el.get_text(strip=True).lower()] = value_el.get_text(strip=True)
|
||||
return {
|
||||
"woningtype": kv.get("soort object"),
|
||||
"bouwjaar": kv.get("bouwjaar"),
|
||||
"woonoppervlak": kv.get("woonoppervlakte"),
|
||||
"perceeloppervlak": kv.get("perceeloppervlakte"),
|
||||
"kamers": kv.get("aantal kamers"),
|
||||
"slaapkamers": kv.get("aantal slaapkamers"),
|
||||
"energielabel": kv.get("energieklasse"),
|
||||
}
|
||||
except Exception as e:
|
||||
log.warning("wassenaar: detail fetch fout %s: %s", detail_url, e)
|
||||
return {}
|
||||
|
||||
|
||||
def fetch_wassenaar() -> list[RawListing]:
|
||||
import json as _json
|
||||
soup = fetch_soup(f"{_WASSENAAR_BASE}/aanbod/woningaanbod/-{config.MAX_PRICE}/koop/")
|
||||
|
||||
# First pass: collect status + thumbnail per relative url
|
||||
# Each listing has two a.aanbodEntryLink with the same href;
|
||||
# the first has the status banner + photo, the second has address + price.
|
||||
status_by_url: dict[str, str] = {}
|
||||
photo_by_url: dict[str, str] = {}
|
||||
for a in soup.select("a.aanbodEntryLink[href]"):
|
||||
href = a["href"]
|
||||
if href in status_by_url:
|
||||
continue
|
||||
banner = a.select_one(".objectstatusbanner")
|
||||
status_text = banner.get_text(strip=True).lower() if banner else ""
|
||||
status_by_url[href] = _WASSENAAR_STATUS_MAP.get(status_text, "beschikbaar")
|
||||
img = a.select_one("span.hoofdfoto img")
|
||||
if img:
|
||||
src = img.get("src", "")
|
||||
if "geenfotobeschikbaar" not in src:
|
||||
photo_by_url[href] = src
|
||||
|
||||
# Second pass: parse JSON-LD blocks (one per listing)
|
||||
seen: set[str] = set()
|
||||
listings = []
|
||||
for tag in soup.select('script[type="application/ld+json"]'):
|
||||
try:
|
||||
ld = _json.loads(tag.string)
|
||||
if ld.get("@type") != "Residence":
|
||||
continue
|
||||
rel_url = ld.get("url", "")
|
||||
if not rel_url or rel_url in seen:
|
||||
continue
|
||||
seen.add(rel_url)
|
||||
|
||||
detail_url = _WASSENAAR_BASE + rel_url
|
||||
address = ld.get("address", {})
|
||||
postcode = address.get("postalCode", "").replace(" ", "") or None
|
||||
|
||||
price_spec = next(
|
||||
(a.get("priceSpecification", {}) for a in ld.get("potentialAction", [])
|
||||
if a.get("priceSpecification")),
|
||||
{}
|
||||
)
|
||||
prijs = int(price_spec["price"]) if price_spec.get("price") else None
|
||||
if prijs and prijs > config.MAX_PRICE:
|
||||
continue
|
||||
|
||||
hero = ld.get("photo") or photo_by_url.get(rel_url)
|
||||
status = status_by_url.get(rel_url, "beschikbaar")
|
||||
kk = _wassenaar_detail(detail_url)
|
||||
|
||||
listings.append(RawListing(
|
||||
url=detail_url,
|
||||
source_makelaar="wassenaar",
|
||||
status=status,
|
||||
adres=address.get("streetAddress") or None,
|
||||
postcode=postcode,
|
||||
stad=address.get("addressLocality") or None,
|
||||
prijs=prijs,
|
||||
hero_image_url=hero,
|
||||
woningtype=kk.get("woningtype"),
|
||||
bouwjaar=int(kk["bouwjaar"]) if kk.get("bouwjaar") else None,
|
||||
woonoppervlak=parse_m2(kk.get("woonoppervlak")),
|
||||
perceeloppervlak=parse_m2(kk.get("perceeloppervlak")),
|
||||
kamers=int(kk["kamers"]) if kk.get("kamers") else None,
|
||||
slaapkamers=int(kk["slaapkamers"]) if kk.get("slaapkamers") else None,
|
||||
energielabel=kk.get("energielabel"),
|
||||
))
|
||||
except Exception as e:
|
||||
log.warning("wassenaar: parse fout: %s", e)
|
||||
|
||||
log.info("wassenaar: %d listings opgehaald", len(listings))
|
||||
return listings
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# SSR helper utils
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -345,5 +463,6 @@ def _infer_stad(postcode: str | None) -> str | None:
|
||||
SCRAPERS = {
|
||||
'ankebodewes': fetch_ankebodewes,
|
||||
'woongoed': fetch_woongoed,
|
||||
'dewittegarantiemakelaars': fetch_dewittegarantiemakelaars
|
||||
'dewittegarantiemakelaars': fetch_dewittegarantiemakelaars,
|
||||
'wassenaar': fetch_wassenaar,
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user