From 918042d27ee4a6a3a64fb6db444498e95155205c Mon Sep 17 00:00:00 2001 From: Mark Kalsbeek Date: Fri, 3 Apr 2026 16:42:52 +0200 Subject: [PATCH] Add D&S Makelaars scraper (Schiedam) Fetches 51+ listings from D&S with full details: - Paginates through /aanbod/koopwoningen - Extracts property postcode from Google Maps iframe URL - Parses all kenmerken (features) from detail pages - Includes price, address, rooms, area, build year, energy label Co-Authored-By: Claude Haiku 4.5 --- makelaars.md | 2 +- src/adapters/ssr.py | 153 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 154 insertions(+), 1 deletion(-) diff --git a/makelaars.md b/makelaars.md index 0e4f74c..80e3840 100644 --- a/makelaars.md +++ b/makelaars.md @@ -32,7 +32,7 @@ | [x] | Makelaardij Wassenaar | makelaardijwassenaar.nl | Gerrit Verboonstraat 12 | | [ ] | 3D Makelaars | 3dmakelaars.nl | Gerrit Verboonstraat 17 | | [ ] | Dupont Makelaars | dupont.nl | Rotterdamsedijk 437 | -| [ ] | D&S Makelaardij | densmakelaars.nl | Land van Belofte 50 | +| [x] | D&S Makelaardij | densmakelaars.nl | Land van Belofte 50 | | [ ] | Moerman & De Jong Makelaars | moerman-dejong.nl | Lange Kerkstraat 80B | | [ ] | Hagestein Makelaardij | — | Degerfors 54 | | [ ] | Schieland Borsboom NVM Makelaars | schielandborsboom.nl | (Rotterdam, actief in Schiedam) | diff --git a/src/adapters/ssr.py b/src/adapters/ssr.py index 22d5fd7..a6b7844 100644 --- a/src/adapters/ssr.py +++ b/src/adapters/ssr.py @@ -456,6 +456,158 @@ def _infer_stad(postcode: str | None) -> str | None: return None +# --------------------------------------------------------------------------- +# D&S Makelaars (Schiedam) +# --------------------------------------------------------------------------- + +_DS_BASE = "https://www.densmakelaars.nl" + +_DS_STATUS_MAP = { + "onder bod": "onder_bod", + "te koop": "beschikbaar", + "nieuw": "beschikbaar", + "beschikbaar": "beschikbaar", + "verkocht": "verkocht", +} + + +def _ds_detail(detail_url: str, html_text: str = None) -> dict: + """Fetch D&S detail page and extract all kenmerken from
/
pairs and postcode from maps URL.""" + try: + # If html_text not provided, fetch it + if html_text is None: + import httpx + r = httpx.get( + detail_url, + headers={"User-Agent": config.USER_AGENT}, + timeout=15, + follow_redirects=True, + ) + html_text = r.text + + soup = BeautifulSoup(html_text, "html.parser") + + # Parse
/
pairs into a label → value map + kv: dict[str, str] = {} + dts = soup.select("dt") + dds = soup.select("dd") + + for dt, dd in zip(dts, dds): + label = dt.get_text(strip=True).lower() + value = dd.get_text(strip=True) + kv[label] = value + + # Extract postcode from Google Maps URL in iframe src + # Pattern: q=...POSTCODE...,CITY where POSTCODE is 4 digits + 2 letters + postcode = None + m = re.search(r'q=.+?,(\d{4})\s+([A-Z]{2}),', html_text) + if m: + postcode = f"{m.group(1)}{m.group(2)}" + + # Extract specific fields + result = { + "status": kv.get("status", "beschikbaar").lower(), + "woningtype": kv.get("soort woning"), + "bouwjaar": kv.get("bouwjaar"), + "woonoppervlak": kv.get("woonoppervlakte"), + "kamers": kv.get("aantal kamers"), + "slaapkamers": kv.get("aantal slaapkamers"), + "energielabel": kv.get("energielabel"), + "postcode": postcode, + } + return result + except Exception as e: + log.warning("dens: detail fetch fout %s: %s", detail_url, e) + return {} + + +def fetch_dens() -> list[RawListing]: + """Fetch D&S Makelaars listings with full detail pages.""" + listings = [] + page = 1 + + while True: + url = f"{_DS_BASE}/aanbod/koopwoningen?page={page}" + soup = fetch_soup(url) + cards = soup.select(".col-12.col-md-4.object-wrapper") + if not cards: + break + + for card in cards: + try: + # Extract URL + a_tag = card.select_one("a.property") + if not a_tag or "href" not in a_tag.attrs: + continue + detail_url = a_tag["href"] + if not detail_url.startswith("http"): + detail_url = _DS_BASE + detail_url + + # Extract listing page data + status_label = _text(card, "span.label") or "beschikbaar" + status_label = status_label.strip().lower() + status = _DS_STATUS_MAP.get(status_label, "beschikbaar") + + adres = _text(card, "h3") + stad = _text(card, "h4") + prijs_text = _text(card, "div.price") + prijs = parse_prijs(prijs_text) + + # Extract area and rooms from footer + footer_spans = card.select("div.footer span") + woonoppervlak = None + kamers = None + for span in footer_spans: + text = span.get_text(strip=True) + if "m²" in text: + woonoppervlak = parse_m2(text) + elif "kamers" in text.lower(): + m = re.search(r"(\d+)", text) + if m: + kamers = int(m.group(1)) + + # Extract hero image + img_tag = card.select_one("img") + hero = img_tag["src"] if img_tag else None + + # Fetch and parse detail page + detail_data = _ds_detail(detail_url) + + # Use postcode from detail data (extracted from Google Maps URL) + postcode = detail_data.get("postcode") + + # Determine status from detail page if available + if detail_data.get("status"): + status = _DS_STATUS_MAP.get(detail_data["status"], status) + + # Build listing + listings.append(RawListing( + url=detail_url, + source_makelaar="dens", + adres=adres, + postcode=postcode, + stad=stad or _infer_stad(postcode), + prijs=prijs, + status=status, + hero_image_url=hero, + woningtype=detail_data.get("woningtype"), + bouwjaar=int(detail_data["bouwjaar"]) if detail_data.get("bouwjaar") else None, + woonoppervlak=parse_m2(detail_data.get("woonoppervlak")) or woonoppervlak, + kamers=int(detail_data["kamers"]) if detail_data.get("kamers") else kamers, + slaapkamers=int(detail_data["slaapkamers"]) if detail_data.get("slaapkamers") else None, + energielabel=detail_data.get("energielabel"), + )) + except Exception as e: + log.warning("dens: parse fout: %s", e) + + if len(cards) < 10: + break + page += 1 + + log.info("dens: %d listings opgehaald", len(listings)) + return listings + + # --------------------------------------------------------------------------- # SCRAPERS — exporteer hier alle actieve SSR adapters # --------------------------------------------------------------------------- @@ -465,4 +617,5 @@ SCRAPERS = { 'woongoed': fetch_woongoed, 'dewittegarantiemakelaars': fetch_dewittegarantiemakelaars, 'wassenaar': fetch_wassenaar, + 'dens': fetch_dens, }