|
|
|
|
@@ -303,6 +303,124 @@ def fetch_dewittegarantiemakelaars() -> list[RawListing]:
|
|
|
|
|
return listings
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
# Makelaardij Wassenaar (Schiedam)
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
# Realworks CMS. Listings page has JSON-LD (Residence) with url/address/price/photo.
|
|
|
|
|
# Detail pages have span.kenmerk with Wassenaar-specific label names.
|
|
|
|
|
|
|
|
|
|
_WASSENAAR_BASE = "https://www.makelaardijwassenaar.nl"
|
|
|
|
|
|
|
|
|
|
_WASSENAAR_STATUS_MAP = {
|
|
|
|
|
"te koop": "beschikbaar",
|
|
|
|
|
"nieuw": "beschikbaar",
|
|
|
|
|
"onder bod": "onder_bod",
|
|
|
|
|
"onder optie": "onder_bod",
|
|
|
|
|
"verkocht o.v.": "onder_bod",
|
|
|
|
|
"verkocht onder voorbehoud": "onder_bod",
|
|
|
|
|
"verkocht": "verkocht",
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _wassenaar_detail(detail_url: str) -> dict:
|
|
|
|
|
"""Fetch Realworks detail page; extract kenmerken with Wassenaar-specific labels."""
|
|
|
|
|
try:
|
|
|
|
|
soup = fetch_soup(detail_url)
|
|
|
|
|
kv: dict[str, str] = {}
|
|
|
|
|
for kenmerk in soup.select("span.kenmerk"):
|
|
|
|
|
label_el = kenmerk.select_one("span.kenmerkName")
|
|
|
|
|
value_el = kenmerk.select_one("span.kenmerkValue")
|
|
|
|
|
if label_el and value_el:
|
|
|
|
|
kv[label_el.get_text(strip=True).lower()] = value_el.get_text(strip=True)
|
|
|
|
|
return {
|
|
|
|
|
"woningtype": kv.get("soort object"),
|
|
|
|
|
"bouwjaar": kv.get("bouwjaar"),
|
|
|
|
|
"woonoppervlak": kv.get("woonoppervlakte"),
|
|
|
|
|
"perceeloppervlak": kv.get("perceeloppervlakte"),
|
|
|
|
|
"kamers": kv.get("aantal kamers"),
|
|
|
|
|
"slaapkamers": kv.get("aantal slaapkamers"),
|
|
|
|
|
"energielabel": kv.get("energieklasse"),
|
|
|
|
|
}
|
|
|
|
|
except Exception as e:
|
|
|
|
|
log.warning("wassenaar: detail fetch fout %s: %s", detail_url, e)
|
|
|
|
|
return {}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def fetch_wassenaar() -> list[RawListing]:
|
|
|
|
|
import json as _json
|
|
|
|
|
soup = fetch_soup(f"{_WASSENAAR_BASE}/aanbod/woningaanbod/-{config.MAX_PRICE}/koop/")
|
|
|
|
|
|
|
|
|
|
# First pass: collect status + thumbnail per relative url
|
|
|
|
|
# Each listing has two a.aanbodEntryLink with the same href;
|
|
|
|
|
# the first has the status banner + photo, the second has address + price.
|
|
|
|
|
status_by_url: dict[str, str] = {}
|
|
|
|
|
photo_by_url: dict[str, str] = {}
|
|
|
|
|
for a in soup.select("a.aanbodEntryLink[href]"):
|
|
|
|
|
href = a["href"]
|
|
|
|
|
if href in status_by_url:
|
|
|
|
|
continue
|
|
|
|
|
banner = a.select_one(".objectstatusbanner")
|
|
|
|
|
status_text = banner.get_text(strip=True).lower() if banner else ""
|
|
|
|
|
status_by_url[href] = _WASSENAAR_STATUS_MAP.get(status_text, "beschikbaar")
|
|
|
|
|
img = a.select_one("span.hoofdfoto img")
|
|
|
|
|
if img:
|
|
|
|
|
src = img.get("src", "")
|
|
|
|
|
if "geenfotobeschikbaar" not in src:
|
|
|
|
|
photo_by_url[href] = src
|
|
|
|
|
|
|
|
|
|
# Second pass: parse JSON-LD blocks (one per listing)
|
|
|
|
|
seen: set[str] = set()
|
|
|
|
|
listings = []
|
|
|
|
|
for tag in soup.select('script[type="application/ld+json"]'):
|
|
|
|
|
try:
|
|
|
|
|
ld = _json.loads(tag.string)
|
|
|
|
|
if ld.get("@type") != "Residence":
|
|
|
|
|
continue
|
|
|
|
|
rel_url = ld.get("url", "")
|
|
|
|
|
if not rel_url or rel_url in seen:
|
|
|
|
|
continue
|
|
|
|
|
seen.add(rel_url)
|
|
|
|
|
|
|
|
|
|
detail_url = _WASSENAAR_BASE + rel_url
|
|
|
|
|
address = ld.get("address", {})
|
|
|
|
|
postcode = address.get("postalCode", "").replace(" ", "") or None
|
|
|
|
|
|
|
|
|
|
price_spec = next(
|
|
|
|
|
(a.get("priceSpecification", {}) for a in ld.get("potentialAction", [])
|
|
|
|
|
if a.get("priceSpecification")),
|
|
|
|
|
{}
|
|
|
|
|
)
|
|
|
|
|
prijs = int(price_spec["price"]) if price_spec.get("price") else None
|
|
|
|
|
if prijs and prijs > config.MAX_PRICE:
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
hero = ld.get("photo") or photo_by_url.get(rel_url)
|
|
|
|
|
status = status_by_url.get(rel_url, "beschikbaar")
|
|
|
|
|
kk = _wassenaar_detail(detail_url)
|
|
|
|
|
|
|
|
|
|
listings.append(RawListing(
|
|
|
|
|
url=detail_url,
|
|
|
|
|
source_makelaar="wassenaar",
|
|
|
|
|
status=status,
|
|
|
|
|
adres=address.get("streetAddress") or None,
|
|
|
|
|
postcode=postcode,
|
|
|
|
|
stad=address.get("addressLocality") or None,
|
|
|
|
|
prijs=prijs,
|
|
|
|
|
hero_image_url=hero,
|
|
|
|
|
woningtype=kk.get("woningtype"),
|
|
|
|
|
bouwjaar=int(kk["bouwjaar"]) if kk.get("bouwjaar") else None,
|
|
|
|
|
woonoppervlak=parse_m2(kk.get("woonoppervlak")),
|
|
|
|
|
perceeloppervlak=parse_m2(kk.get("perceeloppervlak")),
|
|
|
|
|
kamers=int(kk["kamers"]) if kk.get("kamers") else None,
|
|
|
|
|
slaapkamers=int(kk["slaapkamers"]) if kk.get("slaapkamers") else None,
|
|
|
|
|
energielabel=kk.get("energielabel"),
|
|
|
|
|
))
|
|
|
|
|
except Exception as e:
|
|
|
|
|
log.warning("wassenaar: parse fout: %s", e)
|
|
|
|
|
|
|
|
|
|
log.info("wassenaar: %d listings opgehaald", len(listings))
|
|
|
|
|
return listings
|
|
|
|
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
# SSR helper utils
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
@@ -345,5 +463,6 @@ def _infer_stad(postcode: str | None) -> str | None:
|
|
|
|
|
SCRAPERS = {
|
|
|
|
|
'ankebodewes': fetch_ankebodewes,
|
|
|
|
|
'woongoed': fetch_woongoed,
|
|
|
|
|
'dewittegarantiemakelaars': fetch_dewittegarantiemakelaars
|
|
|
|
|
'dewittegarantiemakelaars': fetch_dewittegarantiemakelaars,
|
|
|
|
|
'wassenaar': fetch_wassenaar,
|
|
|
|
|
}
|
|
|
|
|
|