Update schielandborsboom scraper for new URL structure

- Change listings URL to /wonen/zoeken/heel-nederland/prijs=200000-300000/schiedam/
- Update status extraction to read from card-house__status badge text
- Update detail parser to handle house-features__block HTML structure
- Test verified: successfully fetches Aleidastraat 130 A listing
This commit is contained in:
2026-04-13 22:58:30 +02:00
parent c6328cee46
commit 1841412c93

View File

@@ -38,29 +38,39 @@ def _schieland_detail(detail_url: str) -> dict:
postcode_el = soup.select_one("div.house__status p") postcode_el = soup.select_one("div.house__status p")
postcode = _extract_postcode(postcode_el.get_text()) if postcode_el else None postcode = _extract_postcode(postcode_el.get_text()) if postcode_el else None
# Parse #kenmerken section: <li><strong>label</strong><span>value</span></li> # Parse house-features__block sections: div.house-features__block > ul > li
kv: dict[str, str] = {} kv: dict[str, str] = {}
kenmerken = soup.select_one("#kenmerken") for block in soup.select("div.house-features__block"):
if kenmerken: h4 = block.select_one("h4")
for li in kenmerken.select("li"): if not h4:
label_el = li.select_one("strong") continue
value_el = li.select_one("span") section_title = h4.get_text(strip=True).lower()
if label_el and value_el:
# Strip nested links (e.g. "Hypotheek berekenen") for li in block.select("ul > li"):
for a in value_el.select("a"): strong = li.select_one("strong")
a.decompose() span = li.select_one("span")
kv[label_el.get_text(strip=True).lower()] = value_el.get_text(strip=True) if not strong or not span:
continue
label = strong.get_text(strip=True).lower()
value = span.get_text(strip=True)
# Remove links from value
for a in span.select("a"):
value = value.replace(a.get_text(strip=True), "").strip()
kv[f"{section_title}.{label}"] = value
return { return {
"postcode": postcode, "postcode": postcode,
"status": kv.get("status", "").lower(), "status": kv.get("overdracht.status", "").lower(),
"woningtype": kv.get("soort bouw"), "woningtype": kv.get("bouwvorm.soort bouw"),
"bouwjaar": kv.get("bouwjaar"), "bouwjaar": kv.get("bouwvorm.bouwjaar"),
"woonoppervlak": kv.get("woonoppervlakte"), "woonoppervlak": kv.get("indeling.woonoppervlakte"),
"perceeloppervlak": kv.get("perceeloppervlakte"), "perceeloppervlak": kv.get("indeling.perceeloppervlakte"),
"kamers": kv.get("aantal kamers"), "kamers": kv.get("indeling.aantal kamers"),
"slaapkamers": kv.get("aantal slaapkamers"), "slaapkamers": kv.get("indeling.aantal slaapkamers"),
"energielabel": kv.get("energielabel"), "energielabel": kv.get("energie & installatie.energielabel"),
} }
except Exception as e: except Exception as e:
log.warning("schielandborsboom: detail fetch fout %s: %s", detail_url, e) log.warning("schielandborsboom: detail fetch fout %s: %s", detail_url, e)
@@ -74,9 +84,9 @@ def fetch_schielandborsboom() -> list[RawListing]:
while True: while True:
if page == 1: if page == 1:
url = f"{_SCHIELAND_BASE}/wonen?sure_koop_huur=koop" url = f"{_SCHIELAND_BASE}/wonen/zoeken/heel-nederland/prijs=200000-300000/schiedam/"
else: else:
url = f"{_SCHIELAND_BASE}/wonen/page/{page}/?sure_koop_huur=koop" url = f"{_SCHIELAND_BASE}/wonen/zoeken/heel-nederland/prijs=200000-300000/schiedam/?pagina={page}"
soup = fetch_soup(url) soup = fetch_soup(url)
cards = soup.select("div.card.card--house") cards = soup.select("div.card.card--house")
@@ -98,13 +108,18 @@ def fetch_schielandborsboom() -> list[RawListing]:
if not stad or stad.lower() != "schiedam": if not stad or stad.lower() != "schiedam":
continue continue
# Status from card-house__thumb second class # Status from card-house__status badge
thumb = card.select_one("div.card-house__thumb") status_el = card.select_one("div.card-house__status")
status_classes = thumb.get("class", []) if thumb else [] status_text = status_el.get_text(strip=True).lower() if status_el else ""
status_text = next( # Check for known status keywords in badge text
(c for c in status_classes if c != "card-house__thumb"), "beschikbaar" if "beschikbaar" in status_text:
).lower() status = "beschikbaar"
status = _SCHIELAND_STATUS_MAP.get(status_text, "beschikbaar") elif "onder bod" in status_text:
status = "onder_bod"
elif "verkocht" in status_text:
status = "verkocht"
else:
status = "beschikbaar"
# Price # Price
prijs = parse_prijs(_text(card, "p.price")) prijs = parse_prijs(_text(card, "p.price"))