From 18c01139c2c77671e025e51d72639eef31b0f669 Mon Sep 17 00:00:00 2001 From: Mark Kalsbeek Date: Fri, 3 Apr 2026 16:32:00 +0200 Subject: [PATCH] give in to the vibe --- makelaars.md | 2 +- shell.nix | 1 + src/adapters/ssr.py | 121 ++++++++++++++++++++++++++++++++++++++++- tests/test_adapters.py | 2 +- 4 files changed, 123 insertions(+), 3 deletions(-) diff --git a/makelaars.md b/makelaars.md index a1e5ca0..0e4f74c 100644 --- a/makelaars.md +++ b/makelaars.md @@ -29,7 +29,7 @@ | [x] | Woongoed Makelaars Schiedam | woongoedmakelaars.nl | Oranjestraat 93 | | [x] | Ooms Makelaars Schiedam | ooms.com | Gerrit Verboonstraat 2 | | [x] | De Witte Garantiemakelaars | dewittegarantiemakelaars.nl | Philippusweg 2 | -| [ ] | Makelaardij Wassenaar | makelaardijwassenaar.nl | Gerrit Verboonstraat 12 | +| [x] | Makelaardij Wassenaar | makelaardijwassenaar.nl | Gerrit Verboonstraat 12 | | [ ] | 3D Makelaars | 3dmakelaars.nl | Gerrit Verboonstraat 17 | | [ ] | Dupont Makelaars | dupont.nl | Rotterdamsedijk 437 | | [ ] | D&S Makelaardij | densmakelaars.nl | Land van Belofte 50 | diff --git a/shell.nix b/shell.nix index 91fd9a9..4948499 100644 --- a/shell.nix +++ b/shell.nix @@ -7,6 +7,7 @@ pkgs.mkShell { beautifulsoup4 lxml ])) + pkgs.claude-code ]; shellHook = '' diff --git a/src/adapters/ssr.py b/src/adapters/ssr.py index d1f56a1..22d5fd7 100644 --- a/src/adapters/ssr.py +++ b/src/adapters/ssr.py @@ -303,6 +303,124 @@ def fetch_dewittegarantiemakelaars() -> list[RawListing]: return listings +# --------------------------------------------------------------------------- +# Makelaardij Wassenaar (Schiedam) +# --------------------------------------------------------------------------- +# Realworks CMS. Listings page has JSON-LD (Residence) with url/address/price/photo. +# Detail pages have span.kenmerk with Wassenaar-specific label names. + +_WASSENAAR_BASE = "https://www.makelaardijwassenaar.nl" + +_WASSENAAR_STATUS_MAP = { + "te koop": "beschikbaar", + "nieuw": "beschikbaar", + "onder bod": "onder_bod", + "onder optie": "onder_bod", + "verkocht o.v.": "onder_bod", + "verkocht onder voorbehoud": "onder_bod", + "verkocht": "verkocht", +} + + +def _wassenaar_detail(detail_url: str) -> dict: + """Fetch Realworks detail page; extract kenmerken with Wassenaar-specific labels.""" + try: + soup = fetch_soup(detail_url) + kv: dict[str, str] = {} + for kenmerk in soup.select("span.kenmerk"): + label_el = kenmerk.select_one("span.kenmerkName") + value_el = kenmerk.select_one("span.kenmerkValue") + if label_el and value_el: + kv[label_el.get_text(strip=True).lower()] = value_el.get_text(strip=True) + return { + "woningtype": kv.get("soort object"), + "bouwjaar": kv.get("bouwjaar"), + "woonoppervlak": kv.get("woonoppervlakte"), + "perceeloppervlak": kv.get("perceeloppervlakte"), + "kamers": kv.get("aantal kamers"), + "slaapkamers": kv.get("aantal slaapkamers"), + "energielabel": kv.get("energieklasse"), + } + except Exception as e: + log.warning("wassenaar: detail fetch fout %s: %s", detail_url, e) + return {} + + +def fetch_wassenaar() -> list[RawListing]: + import json as _json + soup = fetch_soup(f"{_WASSENAAR_BASE}/aanbod/woningaanbod/-{config.MAX_PRICE}/koop/") + + # First pass: collect status + thumbnail per relative url + # Each listing has two a.aanbodEntryLink with the same href; + # the first has the status banner + photo, the second has address + price. + status_by_url: dict[str, str] = {} + photo_by_url: dict[str, str] = {} + for a in soup.select("a.aanbodEntryLink[href]"): + href = a["href"] + if href in status_by_url: + continue + banner = a.select_one(".objectstatusbanner") + status_text = banner.get_text(strip=True).lower() if banner else "" + status_by_url[href] = _WASSENAAR_STATUS_MAP.get(status_text, "beschikbaar") + img = a.select_one("span.hoofdfoto img") + if img: + src = img.get("src", "") + if "geenfotobeschikbaar" not in src: + photo_by_url[href] = src + + # Second pass: parse JSON-LD blocks (one per listing) + seen: set[str] = set() + listings = [] + for tag in soup.select('script[type="application/ld+json"]'): + try: + ld = _json.loads(tag.string) + if ld.get("@type") != "Residence": + continue + rel_url = ld.get("url", "") + if not rel_url or rel_url in seen: + continue + seen.add(rel_url) + + detail_url = _WASSENAAR_BASE + rel_url + address = ld.get("address", {}) + postcode = address.get("postalCode", "").replace(" ", "") or None + + price_spec = next( + (a.get("priceSpecification", {}) for a in ld.get("potentialAction", []) + if a.get("priceSpecification")), + {} + ) + prijs = int(price_spec["price"]) if price_spec.get("price") else None + if prijs and prijs > config.MAX_PRICE: + continue + + hero = ld.get("photo") or photo_by_url.get(rel_url) + status = status_by_url.get(rel_url, "beschikbaar") + kk = _wassenaar_detail(detail_url) + + listings.append(RawListing( + url=detail_url, + source_makelaar="wassenaar", + status=status, + adres=address.get("streetAddress") or None, + postcode=postcode, + stad=address.get("addressLocality") or None, + prijs=prijs, + hero_image_url=hero, + woningtype=kk.get("woningtype"), + bouwjaar=int(kk["bouwjaar"]) if kk.get("bouwjaar") else None, + woonoppervlak=parse_m2(kk.get("woonoppervlak")), + perceeloppervlak=parse_m2(kk.get("perceeloppervlak")), + kamers=int(kk["kamers"]) if kk.get("kamers") else None, + slaapkamers=int(kk["slaapkamers"]) if kk.get("slaapkamers") else None, + energielabel=kk.get("energielabel"), + )) + except Exception as e: + log.warning("wassenaar: parse fout: %s", e) + + log.info("wassenaar: %d listings opgehaald", len(listings)) + return listings + # --------------------------------------------------------------------------- # SSR helper utils # --------------------------------------------------------------------------- @@ -345,5 +463,6 @@ def _infer_stad(postcode: str | None) -> str | None: SCRAPERS = { 'ankebodewes': fetch_ankebodewes, 'woongoed': fetch_woongoed, - 'dewittegarantiemakelaars': fetch_dewittegarantiemakelaars + 'dewittegarantiemakelaars': fetch_dewittegarantiemakelaars, + 'wassenaar': fetch_wassenaar, } diff --git a/tests/test_adapters.py b/tests/test_adapters.py index 75eeb78..db16e63 100644 --- a/tests/test_adapters.py +++ b/tests/test_adapters.py @@ -16,7 +16,7 @@ logging.basicConfig( ) # --- change this to test a different adapter --- -ADAPTER = SCRAPERS['dewittegarantiemakelaars'] +ADAPTER = SCRAPERS['wassenaar'] if __name__ == "__main__": print(f"Testing adapter: {ADAPTER.__name__}")