From 1841412c93cf2e490cc6366467162efa2b096544 Mon Sep 17 00:00:00 2001 From: Mark Kalsbeek Date: Mon, 13 Apr 2026 22:58:30 +0200 Subject: [PATCH] Update schielandborsboom scraper for new URL structure - Change listings URL to /wonen/zoeken/heel-nederland/prijs=200000-300000/schiedam/ - Update status extraction to read from card-house__status badge text - Update detail parser to handle house-features__block HTML structure - Test verified: successfully fetches Aleidastraat 130 A listing --- src/adapters/ssr/sure.py | 71 ++++++++++++++++++++++++---------------- 1 file changed, 43 insertions(+), 28 deletions(-) diff --git a/src/adapters/ssr/sure.py b/src/adapters/ssr/sure.py index bfc89f7..52b18b5 100644 --- a/src/adapters/ssr/sure.py +++ b/src/adapters/ssr/sure.py @@ -38,29 +38,39 @@ def _schieland_detail(detail_url: str) -> dict: postcode_el = soup.select_one("div.house__status p") postcode = _extract_postcode(postcode_el.get_text()) if postcode_el else None - # Parse #kenmerken section:
  • labelvalue
  • + # Parse house-features__block sections: div.house-features__block > ul > li kv: dict[str, str] = {} - kenmerken = soup.select_one("#kenmerken") - if kenmerken: - for li in kenmerken.select("li"): - label_el = li.select_one("strong") - value_el = li.select_one("span") - if label_el and value_el: - # Strip nested links (e.g. "Hypotheek berekenen") - for a in value_el.select("a"): - a.decompose() - kv[label_el.get_text(strip=True).lower()] = value_el.get_text(strip=True) + for block in soup.select("div.house-features__block"): + h4 = block.select_one("h4") + if not h4: + continue + section_title = h4.get_text(strip=True).lower() + + for li in block.select("ul > li"): + strong = li.select_one("strong") + span = li.select_one("span") + if not strong or not span: + continue + + label = strong.get_text(strip=True).lower() + value = span.get_text(strip=True) + + # Remove links from value + for a in span.select("a"): + value = value.replace(a.get_text(strip=True), "").strip() + + kv[f"{section_title}.{label}"] = value return { "postcode": postcode, - "status": kv.get("status", "").lower(), - "woningtype": kv.get("soort bouw"), - "bouwjaar": kv.get("bouwjaar"), - "woonoppervlak": kv.get("woonoppervlakte"), - "perceeloppervlak": kv.get("perceeloppervlakte"), - "kamers": kv.get("aantal kamers"), - "slaapkamers": kv.get("aantal slaapkamers"), - "energielabel": kv.get("energielabel"), + "status": kv.get("overdracht.status", "").lower(), + "woningtype": kv.get("bouwvorm.soort bouw"), + "bouwjaar": kv.get("bouwvorm.bouwjaar"), + "woonoppervlak": kv.get("indeling.woonoppervlakte"), + "perceeloppervlak": kv.get("indeling.perceeloppervlakte"), + "kamers": kv.get("indeling.aantal kamers"), + "slaapkamers": kv.get("indeling.aantal slaapkamers"), + "energielabel": kv.get("energie & installatie.energielabel"), } except Exception as e: log.warning("schielandborsboom: detail fetch fout %s: %s", detail_url, e) @@ -74,9 +84,9 @@ def fetch_schielandborsboom() -> list[RawListing]: while True: if page == 1: - url = f"{_SCHIELAND_BASE}/wonen?sure_koop_huur=koop" + url = f"{_SCHIELAND_BASE}/wonen/zoeken/heel-nederland/prijs=200000-300000/schiedam/" else: - url = f"{_SCHIELAND_BASE}/wonen/page/{page}/?sure_koop_huur=koop" + url = f"{_SCHIELAND_BASE}/wonen/zoeken/heel-nederland/prijs=200000-300000/schiedam/?pagina={page}" soup = fetch_soup(url) cards = soup.select("div.card.card--house") @@ -98,13 +108,18 @@ def fetch_schielandborsboom() -> list[RawListing]: if not stad or stad.lower() != "schiedam": continue - # Status from card-house__thumb second class - thumb = card.select_one("div.card-house__thumb") - status_classes = thumb.get("class", []) if thumb else [] - status_text = next( - (c for c in status_classes if c != "card-house__thumb"), "beschikbaar" - ).lower() - status = _SCHIELAND_STATUS_MAP.get(status_text, "beschikbaar") + # Status from card-house__status badge + status_el = card.select_one("div.card-house__status") + status_text = status_el.get_text(strip=True).lower() if status_el else "" + # Check for known status keywords in badge text + if "beschikbaar" in status_text: + status = "beschikbaar" + elif "onder bod" in status_text: + status = "onder_bod" + elif "verkocht" in status_text: + status = "verkocht" + else: + status = "beschikbaar" # Price prijs = parse_prijs(_text(card, "p.price"))