""" adapters/ssr.py — HTML/SSR-based makelaars Elke scraper is een functie () -> list[RawListing]. Voeg nieuwe toe onderaan en registreer in SCRAPERS. """ import logging import re import time import httpx from bs4 import BeautifulSoup import config from huizenbot import RawListing log = logging.getLogger("huizenbot.ssr") # --------------------------------------------------------------------------- # Gedeelde HTTP helper # --------------------------------------------------------------------------- def fetch_soup(url: str, *, params: dict = None) -> BeautifulSoup: """ GET request → BeautifulSoup. Handelt 429 af met Retry-After. """ for attempt in range(3): r = httpx.get( url, params=params, headers={"User-Agent": config.USER_AGENT}, timeout=15, follow_redirects=True, ) if r.status_code == 429: wait = int(r.headers.get("Retry-After", 60)) log.warning("429 op %s, wacht %ds", url, wait) time.sleep(wait) continue r.raise_for_status() return BeautifulSoup(r.text, "html.parser") raise RuntimeError(f"Blijvend 429 op {url}") # --------------------------------------------------------------------------- # Parse helpers # --------------------------------------------------------------------------- def parse_prijs(text: str | None) -> int | None: """'€ 325.000 k.k.' → 325000""" if not text: return None digits = re.sub(r"[^\d]", "", text) return int(digits) if digits else None def parse_m2(text: str | None) -> int | None: """'87 m²' → 87""" if not text: return None m = re.search(r"(\d+)", text.replace(".", "")) return int(m.group(1)) if m else None # --------------------------------------------------------------------------- # Realworks CMS (shared) # --------------------------------------------------------------------------- _REALWORKS_STATUS_MAP = { "te koop": "beschikbaar", "nieuw": "beschikbaar", "onder bod": "onder_bod", "onder optie": "onder_bod", "verkocht o.v.": "verkocht", "verkocht": "verkocht", } def _realworks_detail(detail_url: str, makelaar: str) -> dict: """Fetch a Realworks detail page and extract kenmerken. Returns empty dict on failure.""" try: soup = fetch_soup(detail_url) # Build a label→value map from all .kenmerk spans kv: dict[str, str] = {} for kenmerk in soup.select("span.kenmerk"): label_el = kenmerk.select_one("span.kenmerkName") value_el = kenmerk.select_one("span.kenmerkValue") if label_el and value_el: label = label_el.get_text(strip=True).lower() value = value_el.get_text(strip=True) kv[label] = value return { "woningtype": kv.get("type woning"), "bouwjaar": kv.get("bouwjaar"), "woonoppervlak": kv.get("woonoppervlakte"), "perceeloppervlak": kv.get("perceeloppervlakte"), "kamers": kv.get("aantal kamers"), "slaapkamers": kv.get("aantal slaapkamers"), "energielabel": kv.get("energieklasse"), } except Exception as e: log.warning("%s: detail fetch fout %s: %s", makelaar, detail_url, e) return {} def fetch_realworks(base_url: str, makelaar: str) -> list[RawListing]: """ Generic fetcher for Realworks CMS brokers. Paginates via /pagina-{n}/, fetches detail page per listing. """ listings_path = f"/aanbod/woningaanbod/-{config.MAX_PRICE}/koop" listings = [] page = 1 while True: url = f"{base_url}{listings_path}/pagina-{page}/" soup = fetch_soup(url) cards = soup.select("li.aanbodEntry") if not cards: break for card in cards: try: a_tag = card.select_one("a.aanbodEntryLink") if not a_tag: continue listing_url = base_url + a_tag["href"] adres = _text(card, ".street-address") postcode = (_text(card, ".postal-code") or "").replace(" ", "") or None stad = _text(card, ".locality") prijs = parse_prijs(_text(card, ".koopprijs .kenmerkValue")) status_text = (_text(card, ".objectstatusbanner") or "").lower() status = _REALWORKS_STATUS_MAP.get(status_text, "beschikbaar") img_tag = card.select_one(".hoofdfoto img") hero = img_tag["src"] if img_tag else None kk = _realworks_detail(listing_url, makelaar) listings.append(RawListing( url=listing_url, source_makelaar=makelaar, adres=adres, postcode=postcode, stad=stad, prijs=prijs, status=status, hero_image_url=hero, woningtype=kk.get("woningtype"), bouwjaar=int(kk["bouwjaar"]) if kk.get("bouwjaar") else None, woonoppervlak=parse_m2(kk.get("woonoppervlak")), perceeloppervlak=parse_m2(kk.get("perceeloppervlak")), kamers=int(kk["kamers"]) if kk.get("kamers") else None, slaapkamers=int(kk["slaapkamers"]) if kk.get("slaapkamers") else None, energielabel=kk.get("energielabel"), )) if config.APP_ENV == "dev": break except Exception as e: log.warning("%s: parse fout: %s", makelaar, e) if len(cards) < 10: break page += 1 log.info("%s: %d listings opgehaald", makelaar, len(listings)) return listings # --------------------------------------------------------------------------- # Anke Bodewes Makelaardij # --------------------------------------------------------------------------- def fetch_ankebodewes() -> list[RawListing]: return fetch_realworks("https://www.ankebodewes.nl", "ankebodewes") # --------------------------------------------------------------------------- # Woongoed Makelaars Schiedam # --------------------------------------------------------------------------- def fetch_woongoed() -> list[RawListing]: return fetch_realworks("https://www.woongoedmakelaars.nl", "woongoed") # --------------------------------------------------------------------------- # De Witte Garantiemakelaars # --------------------------------------------------------------------------- _DEWITTE_BASE = "https://dewittegarantiemakelaars.nl" _DEWITTE_PILL_MAP = { "bg-fun-green": "beschikbaar", "bg-sold": "verkocht", } _DEWITTE_TYPE_MAP = { "Apartment": "appartement", "House": "woning", "SingleFamilyResidence": "woning", "Residence": "woning", } def _dewitte_jsonld(detail_url: str) -> dict: """Fetch detail page and return parsed JSON-LD dict, or {} on failure.""" import json try: soup = fetch_soup(detail_url) tag = soup.select_one('script[type="application/ld+json"]') if not tag: log.warning("dewitte: geen JSON-LD op %s", detail_url) return {} return json.loads(tag.string) except Exception as e: log.warning("dewitte: JSON-LD fout %s: %s", detail_url, e) return {} def fetch_dewittegarantiemakelaars() -> list[RawListing]: listings = [] page = 1 while True: url = ( f"{_DEWITTE_BASE}/woningaanbod" f"?buy_rent=buy&buy_price=1-{config.MAX_PRICE}&page={page}" ) soup = fetch_soup(url) cards = soup.select("div.card.card--property") if not cards: break for card in cards: try: a_tag = card.select_one("a.card__anchor") if not a_tag: continue detail_url = a_tag["href"] if not detail_url.startswith("http"): detail_url = _DEWITTE_BASE + detail_url pill = card.select_one("span.pill") pill_classes = pill.get("class", []) if pill else [] status_key = next( (c for c in pill_classes if c.startswith("bg-")), None ) status = _DEWITTE_PILL_MAP.get(status_key, "onder_bod") ld = _dewitte_jsonld(detail_url) if not ld: continue offered = ld.get("itemOffered", {}) address = offered.get("address", {}) floor_size = offered.get("floorSize", {}) postcode = address.get("postalCode", "").replace(" ", "") or None stad = address.get("addressLocality") or None adres = address.get("streetAddress") or None prijs = ld.get("price") if prijs and int(prijs) > config.MAX_PRICE: continue woningtype = _DEWITTE_TYPE_MAP.get(offered.get("@type", "")) woonoppervlak = int(floor_size["value"]) if floor_size.get("value") else None kamers = offered.get("numberOfRooms") bouwjaar = offered.get("yearBuilt") # Full-res image from JSON-LD, fall back to card thumbnail hero = ld.get("image") if not hero: img = card.select_one("picture img") hero = img["src"] if img else None listings.append(RawListing( url=detail_url, source_makelaar="dewittegarantiemakelaars", status=status, adres=adres, postcode=postcode, stad=stad, prijs=int(prijs) if prijs else None, woningtype=woningtype, woonoppervlak=woonoppervlak, kamers=int(kamers) if kamers else None, bouwjaar=int(bouwjaar) if bouwjaar else None, hero_image_url=hero, )) if config.APP_ENV == "dev": break except Exception as e: log.warning("dewitte: parse fout: %s", e) if len(cards) < 10: break page += 1 log.info("dewittegarantiemakelaars: %d listings opgehaald", len(listings)) return listings # --------------------------------------------------------------------------- # Makelaardij Wassenaar (Schiedam) # --------------------------------------------------------------------------- # Realworks CMS. Listings page has JSON-LD (Residence) with url/address/price/photo. # Detail pages have span.kenmerk with Wassenaar-specific label names. _WASSENAAR_BASE = "https://www.makelaardijwassenaar.nl" _WASSENAAR_STATUS_MAP = { "te koop": "beschikbaar", "nieuw": "beschikbaar", "onder bod": "onder_bod", "onder optie": "onder_bod", "verkocht o.v.": "onder_bod", "verkocht onder voorbehoud": "onder_bod", "verkocht": "verkocht", } def _wassenaar_detail(detail_url: str) -> dict: """Fetch Realworks detail page; extract kenmerken with Wassenaar-specific labels.""" try: soup = fetch_soup(detail_url) kv: dict[str, str] = {} for kenmerk in soup.select("span.kenmerk"): label_el = kenmerk.select_one("span.kenmerkName") value_el = kenmerk.select_one("span.kenmerkValue") if label_el and value_el: kv[label_el.get_text(strip=True).lower()] = value_el.get_text(strip=True) return { "woningtype": kv.get("soort object"), "bouwjaar": kv.get("bouwjaar"), "woonoppervlak": kv.get("woonoppervlakte"), "perceeloppervlak": kv.get("perceeloppervlakte"), "kamers": kv.get("aantal kamers"), "slaapkamers": kv.get("aantal slaapkamers"), "energielabel": kv.get("energieklasse"), } except Exception as e: log.warning("wassenaar: detail fetch fout %s: %s", detail_url, e) return {} def fetch_wassenaar() -> list[RawListing]: import json as _json soup = fetch_soup(f"{_WASSENAAR_BASE}/aanbod/woningaanbod/-{config.MAX_PRICE}/koop/") # First pass: collect status + thumbnail per relative url # Each listing has two a.aanbodEntryLink with the same href; # the first has the status banner + photo, the second has address + price. status_by_url: dict[str, str] = {} photo_by_url: dict[str, str] = {} for a in soup.select("a.aanbodEntryLink[href]"): href = a["href"] if href in status_by_url: continue banner = a.select_one(".objectstatusbanner") status_text = banner.get_text(strip=True).lower() if banner else "" status_by_url[href] = _WASSENAAR_STATUS_MAP.get(status_text, "beschikbaar") img = a.select_one("span.hoofdfoto img") if img: src = img.get("src", "") if "geenfotobeschikbaar" not in src: photo_by_url[href] = src # Second pass: parse JSON-LD blocks (one per listing) seen: set[str] = set() listings = [] for tag in soup.select('script[type="application/ld+json"]'): try: ld = _json.loads(tag.string) if ld.get("@type") != "Residence": continue rel_url = ld.get("url", "") if not rel_url or rel_url in seen: continue seen.add(rel_url) detail_url = _WASSENAAR_BASE + rel_url address = ld.get("address", {}) postcode = address.get("postalCode", "").replace(" ", "") or None price_spec = next( (a.get("priceSpecification", {}) for a in ld.get("potentialAction", []) if a.get("priceSpecification")), {} ) prijs = int(price_spec["price"]) if price_spec.get("price") else None if prijs and prijs > config.MAX_PRICE: continue hero = ld.get("photo") or photo_by_url.get(rel_url) status = status_by_url.get(rel_url, "beschikbaar") kk = _wassenaar_detail(detail_url) listings.append(RawListing( url=detail_url, source_makelaar="wassenaar", status=status, adres=address.get("streetAddress") or None, postcode=postcode, stad=address.get("addressLocality") or None, prijs=prijs, hero_image_url=hero, woningtype=kk.get("woningtype"), bouwjaar=int(kk["bouwjaar"]) if kk.get("bouwjaar") else None, woonoppervlak=parse_m2(kk.get("woonoppervlak")), perceeloppervlak=parse_m2(kk.get("perceeloppervlak")), kamers=int(kk["kamers"]) if kk.get("kamers") else None, slaapkamers=int(kk["slaapkamers"]) if kk.get("slaapkamers") else None, energielabel=kk.get("energielabel"), )) if config.APP_ENV == "dev": break except Exception as e: log.warning("wassenaar: parse fout: %s", e) log.info("wassenaar: %d listings opgehaald", len(listings)) return listings # --------------------------------------------------------------------------- # SSR helper utils # --------------------------------------------------------------------------- def _text(soup, selector: str) -> str | None: el = soup.select_one(selector) return el.get_text(strip=True) if el else None def _src(soup, selector: str) -> str | None: el = soup.select_one(selector) if el is None: return None return el.get("src") or el.get("data-src") def _extract_postcode(text: str | None) -> str | None: if not text: return None m = re.search(r"\b(\d{4}\s?[A-Z]{2})\b", text) return m.group(1).replace(" ", "") if m else None def _infer_stad(postcode: str | None) -> str | None: """Simpele mapping op basis van postcode range — uitbreiden naar wens.""" if not postcode: return None code = int(postcode[:4]) if 2600 <= code <= 2629: return "Delft" if 3100 <= code <= 3135: return "Schiedam" return None # --------------------------------------------------------------------------- # D&S Makelaars (Schiedam) # --------------------------------------------------------------------------- _DS_BASE = "https://www.densmakelaars.nl" _DS_STATUS_MAP = { "onder bod": "onder_bod", "te koop": "beschikbaar", "nieuw": "beschikbaar", "beschikbaar": "beschikbaar", "verkocht": "verkocht", } def _ds_detail(detail_url: str, html_text: str = None) -> dict: """Fetch D&S detail page and extract all kenmerken from
/
pairs and postcode from maps URL.""" try: # If html_text not provided, fetch it if html_text is None: import httpx r = httpx.get( detail_url, headers={"User-Agent": config.USER_AGENT}, timeout=15, follow_redirects=True, ) html_text = r.text soup = BeautifulSoup(html_text, "html.parser") # Parse
/
pairs into a label → value map kv: dict[str, str] = {} dts = soup.select("dt") dds = soup.select("dd") for dt, dd in zip(dts, dds): label = dt.get_text(strip=True).lower() value = dd.get_text(strip=True) kv[label] = value # Extract postcode from Google Maps URL in iframe src # Pattern: q=...POSTCODE...,CITY where POSTCODE is 4 digits + 2 letters postcode = None m = re.search(r'q=.+?,(\d{4})\s+([A-Z]{2}),', html_text) if m: postcode = f"{m.group(1)}{m.group(2)}" # Extract specific fields result = { "status": kv.get("status", "beschikbaar").lower(), "woningtype": kv.get("soort woning"), "bouwjaar": kv.get("bouwjaar"), "woonoppervlak": kv.get("woonoppervlakte"), "kamers": kv.get("aantal kamers"), "slaapkamers": kv.get("aantal slaapkamers"), "energielabel": kv.get("energielabel"), "postcode": postcode, } return result except Exception as e: log.warning("dens: detail fetch fout %s: %s", detail_url, e) return {} def fetch_dens() -> list[RawListing]: """Fetch D&S Makelaars listings with full detail pages.""" listings = [] page = 1 while True: url = f"{_DS_BASE}/aanbod/koopwoningen?page={page}" soup = fetch_soup(url) cards = soup.select(".col-12.col-md-4.object-wrapper") if not cards: break for card in cards: try: # Extract URL a_tag = card.select_one("a.property") if not a_tag or "href" not in a_tag.attrs: continue detail_url = a_tag["href"] if not detail_url.startswith("http"): detail_url = _DS_BASE + detail_url # Extract listing page data status_label = _text(card, "span.label") or "beschikbaar" status_label = status_label.strip().lower() status = _DS_STATUS_MAP.get(status_label, "beschikbaar") adres = _text(card, "h3") stad = _text(card, "h4") prijs_text = _text(card, "div.price") prijs = parse_prijs(prijs_text) # Extract area and rooms from footer footer_spans = card.select("div.footer span") woonoppervlak = None kamers = None for span in footer_spans: text = span.get_text(strip=True) if "m²" in text: woonoppervlak = parse_m2(text) elif "kamers" in text.lower(): m = re.search(r"(\d+)", text) if m: kamers = int(m.group(1)) # Extract hero image img_tag = card.select_one("img") hero = img_tag["src"] if img_tag else None # Fetch and parse detail page detail_data = _ds_detail(detail_url) # Use postcode from detail data (extracted from Google Maps URL) postcode = detail_data.get("postcode") # Determine status from detail page if available if detail_data.get("status"): status = _DS_STATUS_MAP.get(detail_data["status"], status) # Build listing listings.append(RawListing( url=detail_url, source_makelaar="dens", adres=adres, postcode=postcode, stad=stad or _infer_stad(postcode), prijs=prijs, status=status, hero_image_url=hero, woningtype=detail_data.get("woningtype"), bouwjaar=int(detail_data["bouwjaar"]) if detail_data.get("bouwjaar") else None, woonoppervlak=parse_m2(detail_data.get("woonoppervlak")) or woonoppervlak, kamers=int(detail_data["kamers"]) if detail_data.get("kamers") else kamers, slaapkamers=int(detail_data["slaapkamers"]) if detail_data.get("slaapkamers") else None, energielabel=detail_data.get("energielabel"), )) if config.APP_ENV == "dev": break except Exception as e: log.warning("dens: parse fout: %s", e) if len(cards) < 10: break page += 1 log.info("dens: %d listings opgehaald", len(listings)) return listings # --------------------------------------------------------------------------- # 3D Makelaars (Schiedam/Vlaardingen) # --------------------------------------------------------------------------- _3D_BASE = "https://3dmakelaars.nl" def _3dmakelaars_detail(detail_url: str) -> dict: """Fetch 3dmakelaars detail page and extract structured info block.""" try: soup = fetch_soup(detail_url) # Parse structured info block: span (label) + p (value) pairs kv: dict[str, str] = {} for li in soup.select("div.tl-adiltional-inforamtion ul.tl-adiltional-listed li"): label_el = li.select_one("span") value_el = li.select_one("p") if label_el and value_el: label = label_el.get_text(strip=True).lower() value = value_el.get_text(strip=True) kv[label] = value # Extract postcode from first description paragraph postcode = None p_tag = soup.select_one(".omschrijving > p:nth-child(1)") if p_tag: text = p_tag.get_text() postcode = _extract_postcode(text) return { "kamers": int(kv["aantal kamers"].split()[0]) if "aantal kamers" in kv else None, "slaapkamers": int(kv["aantal slaapkamers"].split()[0]) if "aantal slaapkamers" in kv else None, "bouwjaar": int(kv["bouwjaar"]) if "bouwjaar" in kv else None, "woningtype": kv.get("bouwvorm"), "woonoppervlak": parse_m2(kv.get("oppervlakte")), "postcode": postcode, } except Exception as e: log.warning("3dmakelaars: detail fetch fout %s: %s", detail_url, e) return {} def fetch_3dmakelaars() -> list[RawListing]: """Fetch 3D Makelaars listings with pagination.""" listings = [] page = 1 while True: url = ( f"{_3D_BASE}/woningen-te-koop-in-schiedam-en-vlaardingen" f"?kamers=&oppervlakte=&woonplaats=&video=&prijs=3&page={page}" ) soup = fetch_soup(url) cards = soup.select("div.tl-properties-item") if not cards: break for card in cards: try: # Extract detail URL from onclick attribute onclick = card.get("onclick", "") detail_url = None if "window.location" in onclick: m = re.search(r"window\.location\s*=\s*['\"]([^'\"]+)['\"]", onclick) if m: detail_url = _3D_BASE + m.group(1) if not detail_url: continue # Extract listing-level info adres = _text(card, "h3.price") prijs_text = _text(card, "span.address") prijs = parse_prijs(prijs_text) # Extract rooms and area from meta list kamers = None woonoppervlak = None for li in card.select("ul.tl-meta-listed > li"): text = li.get_text(strip=True) if "kamers" in text.lower(): m = re.search(r"(\d+)", text) if m: kamers = int(m.group(1)) elif "m²" in text or "m2" in text: woonoppervlak = parse_m2(text) # Extract image img_tag = card.select_one("img") hero = img_tag["src"] if img_tag else None if hero and not hero.startswith("http"): hero = _3D_BASE + hero # Fetch detail page for full info detail_data = _3dmakelaars_detail(detail_url) # Postcode from detail page, fallback to extraction from address postcode = detail_data.get("postcode") if not postcode and adres: postcode = _extract_postcode(adres) listings.append(RawListing( url=detail_url, source_makelaar="3dmakelaars", adres=adres, postcode=postcode, stad=_infer_stad(postcode), prijs=prijs, woningtype=detail_data.get("woningtype"), bouwjaar=detail_data.get("bouwjaar"), woonoppervlak=woonoppervlak or detail_data.get("woonoppervlak"), kamers=kamers or detail_data.get("kamers"), slaapkamers=detail_data.get("slaapkamers"), hero_image_url=hero, )) if config.APP_ENV == "dev": break except Exception as e: log.warning("3dmakelaars: parse fout: %s", e) if len(cards) < 7: break page += 1 log.info("3dmakelaars: %d listings opgehaald", len(listings)) return listings # --------------------------------------------------------------------------- # Dupont ERA Makelaars (Schiedam/Rotterdam) # --------------------------------------------------------------------------- _DUPONT_BASE = "https://www.dupont.nl" _DUPONT_STATUS_MAP = { "te koop": "beschikbaar", "nieuw": "beschikbaar", "onder bod": "onder_bod", "verkocht onder voorbehoud": "onder_bod", "verkocht": "verkocht", } def _dupont_detail(detail_url: str) -> dict: """Fetch Dupont detail page and extract kenmerken from dt/dd pairs.""" try: soup = fetch_soup(detail_url) # Parse dt/dd pairs into label → value map kv: dict[str, str] = {} dts = soup.select("dt") dds = soup.select("dd") for dt, dd in zip(dts, dds): label = dt.get_text(strip=True).lower() value = dd.get_text(strip=True) kv[label] = value # Extract postcode from small tag (format: "NNNN AA CITY") postcode = None small_tag = soup.select_one("section div.container-fluid small") if small_tag: postcode = _extract_postcode(small_tag.get_text()) return { "postcode": postcode, "woningtype": kv.get("soort woning"), "bouwjaar": kv.get("bouwjaar"), "woonoppervlak": kv.get("woonoppervlakte"), "kamers": kv.get("aantal kamers"), "slaapkamers": kv.get("aantal slaapkamers"), "energielabel": kv.get("energielabel"), } except Exception as e: log.warning("dupont: detail fetch fout %s: %s", detail_url, e) return {} def fetch_dupont() -> list[RawListing]: """Fetch Dupont ERA Makelaars listings with pagination and detail pages.""" listings = [] page = 1 while True: url = f"{_DUPONT_BASE}/aanbod/koopwoningen?page={page}" soup = fetch_soup(url) cards = soup.select("article.object") if not cards: break for card in cards: try: # Extract URL a_tag = card.select_one("a[href]") if not a_tag or "href" not in a_tag.attrs: continue detail_url = a_tag["href"] if not detail_url.startswith("http"): detail_url = _DUPONT_BASE + detail_url # Extract listing-level data adres = _text(card, "h3") stad = _text(card, "h4") prijs_text = _text(card, "div.price") prijs = parse_prijs(prijs_text) # Extract status from label status_label = _text(card, "div.label") or "beschikbaar" status_label = status_label.strip().lower() status = _DUPONT_STATUS_MAP.get(status_label, "beschikbaar") # Extract image img_tag = card.select_one("img.img-responsive") hero = img_tag["src"] if img_tag else None if hero and not hero.startswith("http"): hero = _DUPONT_BASE + hero # Fetch detail page for full data detail_data = _dupont_detail(detail_url) # Use postcode from detail if available postcode = detail_data.get("postcode") listings.append(RawListing( url=detail_url, source_makelaar="dupont", adres=adres, postcode=postcode, stad=stad or _infer_stad(postcode), prijs=prijs, status=status, hero_image_url=hero, woningtype=detail_data.get("woningtype"), bouwjaar=int(detail_data["bouwjaar"]) if detail_data.get("bouwjaar") else None, woonoppervlak=parse_m2(detail_data.get("woonoppervlak")), kamers=int(detail_data["kamers"]) if detail_data.get("kamers") else None, slaapkamers=int(detail_data["slaapkamers"]) if detail_data.get("slaapkamers") else None, energielabel=detail_data.get("energielabel"), )) if config.APP_ENV == "dev": break except Exception as e: log.warning("dupont: parse fout: %s", e) if len(cards) < 10: break page += 1 log.info("dupont: %d listings opgehaald", len(listings)) return listings # --------------------------------------------------------------------------- # Schieland Borsboom NVM Makelaars (Rotterdam, actief in Schiedam) # --------------------------------------------------------------------------- _SCHIELAND_BASE = "https://www.schielandborsboom.nl" _SCHIELAND_STATUS_MAP = { "beschikbaar": "beschikbaar", "onder bod": "onder_bod", "onder optie": "onder_bod", "verkocht o.v.": "verkocht", "verkocht": "verkocht", } def _schieland_detail(detail_url: str) -> dict: """Fetch Schieland Borsboom detail page and extract kenmerken.""" try: soup = fetch_soup(detail_url) # Postcode from house__status p (e.g. "3117 DP Schiedam") postcode_el = soup.select_one("div.house__status p") postcode = _extract_postcode(postcode_el.get_text()) if postcode_el else None # Parse #kenmerken section:
  • labelvalue
  • kv: dict[str, str] = {} kenmerken = soup.select_one("#kenmerken") if kenmerken: for li in kenmerken.select("li"): label_el = li.select_one("strong") value_el = li.select_one("span") if label_el and value_el: # Strip nested links (e.g. "Hypotheek berekenen") for a in value_el.select("a"): a.decompose() kv[label_el.get_text(strip=True).lower()] = value_el.get_text(strip=True) return { "postcode": postcode, "status": kv.get("status", "").lower(), "woningtype": kv.get("soort bouw"), "bouwjaar": kv.get("bouwjaar"), "woonoppervlak": kv.get("woonoppervlakte"), "perceeloppervlak": kv.get("perceeloppervlakte"), "kamers": kv.get("aantal kamers"), "slaapkamers": kv.get("aantal slaapkamers"), "energielabel": kv.get("energielabel"), } except Exception as e: log.warning("schielandborsboom: detail fetch fout %s: %s", detail_url, e) return {} def fetch_schielandborsboom() -> list[RawListing]: """Fetch Schieland Borsboom NVM listings (koop only, Schiedam).""" listings = [] page = 1 while True: if page == 1: url = f"{_SCHIELAND_BASE}/wonen?sure_koop_huur=koop" else: url = f"{_SCHIELAND_BASE}/wonen/page/{page}/?sure_koop_huur=koop" soup = fetch_soup(url) cards = soup.select("div.card.card--house") if not cards: break for card in cards: try: a_tag = card.select_one("a.card__anchor") if not a_tag or "href" not in a_tag.attrs: continue detail_url = a_tag["href"] if not detail_url.startswith("http"): detail_url = _SCHIELAND_BASE + detail_url # Filter: only Schiedam stad_el = card.select_one("p.house-place") stad = stad_el.get_text(strip=True) if stad_el else None if not stad or stad.lower() != "schiedam": continue # Status from card-house__thumb second class thumb = card.select_one("div.card-house__thumb") status_classes = thumb.get("class", []) if thumb else [] status_text = next( (c for c in status_classes if c != "card-house__thumb"), "beschikbaar" ).lower() status = _SCHIELAND_STATUS_MAP.get(status_text, "beschikbaar") # Price prijs = parse_prijs(_text(card, "p.price")) if prijs and prijs > config.MAX_PRICE: continue adres = _text(card, "h4.house-street") # Hero image from picture source (medium size) src_tag = card.select_one('picture source[media="(min-width:100px)"]') hero = src_tag["srcset"] if src_tag else _src(card, "img") if hero and not hero.startswith("http"): hero = _SCHIELAND_BASE + hero # Data icons on card: surface, bedrooms, energy label woonoppervlak_card = None slaapkamers_card = None energielabel_card = None for data_div in card.select("div.data"): txt = data_div.get_text(strip=True) if data_div.select_one("i.icon-surface"): woonoppervlak_card = parse_m2(txt) elif data_div.select_one("i.icon-bedrooms"): m = re.search(r"(\d+)", txt) slaapkamers_card = int(m.group(1)) if m else None elif data_div.select_one("i.icon-label"): energielabel_card = txt.strip() or None # Fetch detail page for full kenmerken kk = _schieland_detail(detail_url) # Refine status from detail page if kk.get("status"): status = _SCHIELAND_STATUS_MAP.get(kk["status"], status) # Parse kamers: "5 kamers" → 5 kamers = None if kk.get("kamers"): m = re.search(r"(\d+)", kk["kamers"]) kamers = int(m.group(1)) if m else None # Parse slaapkamers: "3" or "3 slaapkamers" → 3 slaapkamers = slaapkamers_card if kk.get("slaapkamers"): m = re.search(r"(\d+)", kk["slaapkamers"]) slaapkamers = int(m.group(1)) if m else slaapkamers_card listings.append(RawListing( url=detail_url, source_makelaar="schielandborsboom", status=status, adres=adres, postcode=kk.get("postcode"), stad=stad, prijs=prijs, hero_image_url=hero, woningtype=kk.get("woningtype"), bouwjaar=int(kk["bouwjaar"]) if kk.get("bouwjaar") else None, woonoppervlak=parse_m2(kk.get("woonoppervlak")) or woonoppervlak_card, perceeloppervlak=parse_m2(kk.get("perceeloppervlak")), kamers=kamers, slaapkamers=slaapkamers, energielabel=kk.get("energielabel") or energielabel_card, )) if config.APP_ENV == "dev": break except Exception as e: log.warning("schielandborsboom: parse fout: %s", e) if len(cards) < 18: break page += 1 log.info("schielandborsboom: %d listings opgehaald", len(listings)) return listings # --------------------------------------------------------------------------- # Van Silfhout & Hogetoorn Wereldmakelaars (Delft) # --------------------------------------------------------------------------- _VANSILFHOUT_BASE = "https://www.vansilfhout.nl" _VANSILFHOUT_STATUS_MAP = { "te koop": "beschikbaar", "onder bod": "onder_bod", "verkocht onder voorbehoud": "verkocht", "verkocht": "verkocht", } def _vansilfhout_detail(detail_url: str) -> dict: """Fetch Van Silfhout detail page; extract postcode from JS and specs from shortSpecs.""" try: import re as _re r = __import__("httpx").get( detail_url, headers={"User-Agent": config.USER_AGENT}, timeout=15, follow_redirects=True, ) r.raise_for_status() html = r.text from bs4 import BeautifulSoup as _BS soup = _BS(html, "html.parser") # Postcode embedded in JS: objectZipcode': '2624NP' m = _re.search(r"objectZipcode':\s*'([^']+)'", html) postcode = m.group(1) if m else None # shortSpecs:
  • Label:Value
  • kv: dict[str, str] = {} for li in soup.select(".shortSpecs li"): spans = li.select("span") if len(spans) >= 2: label = spans[0].get_text(strip=True).rstrip(":").lower() value = spans[-1].get_text(strip=True) kv[label] = value return { "postcode": postcode, "bouwjaar": kv.get("bouwjaar"), "woonoppervlak": kv.get("oppervlakte"), "kamers": kv.get("kamers"), "slaapkamers": kv.get("slaapkamers"), } except Exception as e: log.warning("vansilfhout: detail fetch fout %s: %s", detail_url, e) return {} def fetch_vansilfhout() -> list[RawListing]: """Fetch Van Silfhout woningaanbod (alle listings op één pagina).""" soup = fetch_soup(f"{_VANSILFHOUT_BASE}/woningaanbod/") listings = [] for card in soup.select("article.row"): try: a_tag = card.select_one("a.objectcontainerimg") if not a_tag or "href" not in a_tag.attrs: continue detail_url = a_tag["href"] if not detail_url.startswith("http"): detail_url = _VANSILFHOUT_BASE + detail_url # Status status_text = (_text(card, "span.objectstatus") or "").lower() status = _VANSILFHOUT_STATUS_MAP.get(status_text, "beschikbaar") # Address and city adres = _text(card, "h2.objecttitle") city_el = card.select("a.straatnaamwoonplaats span") stad = city_el[-1].get_text(strip=True) if city_el else None # Price from shortSpecs strong prijs = parse_prijs(_text(card, "ul.shortSpecs li strong")) if prijs and prijs > config.MAX_PRICE: continue # Area and rooms from shortSpecs woonoppervlak_card = None kamers_card = None for li in card.select("ul.shortSpecs li"): spans = li.select("span") if len(spans) >= 2: label = spans[0].get_text(strip=True).lower() val = spans[-1].get_text(strip=True) if "oppervlakt" in label: woonoppervlak_card = parse_m2(val) elif "kamer" in label: m = re.search(r"(\d+)", val) kamers_card = int(m.group(1)) if m else None # Hero image: prefer data-lazy-src, fall back to noscript img src img_tag = card.select_one("a.objectcontainerimg img") hero = None if img_tag: hero = (img_tag.get("data-lazy-src") or img_tag.get("src") or None) if hero and hero.startswith("data:"): noscript = card.select_one("noscript img") hero = noscript["src"] if noscript else None kk = _vansilfhout_detail(detail_url) # Parse kamers/slaapkamers from detail kamers = kamers_card if kk.get("kamers"): m = re.search(r"(\d+)", kk["kamers"]) kamers = int(m.group(1)) if m else kamers_card slaapkamers = None if kk.get("slaapkamers"): m = re.search(r"(\d+)", kk["slaapkamers"]) slaapkamers = int(m.group(1)) if m else None listings.append(RawListing( url=detail_url, source_makelaar="vansilfhout", status=status, adres=adres, postcode=kk.get("postcode"), stad=stad, prijs=prijs, hero_image_url=hero, bouwjaar=int(kk["bouwjaar"]) if kk.get("bouwjaar") else None, woonoppervlak=parse_m2(kk.get("woonoppervlak")) or woonoppervlak_card, kamers=kamers, slaapkamers=slaapkamers, )) if config.APP_ENV == "dev": break except Exception as e: log.warning("vansilfhout: parse fout: %s", e) log.info("vansilfhout: %d listings opgehaald", len(listings)) return listings # --------------------------------------------------------------------------- # V&W Makelaars Delft / ZO Makelaars (Delft) — Realworks CMS # --------------------------------------------------------------------------- def fetch_vwmakelaars() -> list[RawListing]: return fetch_realworks("https://www.vwmakelaars.nl", "vwmakelaars") def fetch_zomakelaars() -> list[RawListing]: return fetch_realworks("https://www.zomakelaars.nl", "zomakelaars") # --------------------------------------------------------------------------- # Roepman Makelaardij NVM (Delft) # --------------------------------------------------------------------------- # Realworks CMS maar met div.aanbodEntry i.p.v. li.aanbodEntry. # Prijs zit in JSON-LD (zelfde structuur als Wassenaar). _ROEPMAN_BASE = "https://www.roepman.nl" def fetch_roepman() -> list[RawListing]: import json as _json listings_path = f"/aanbod/woningaanbod/-{config.MAX_PRICE}/koop" listings = [] page = 1 while True: url = f"{_ROEPMAN_BASE}{listings_path}/pagina-{page}/" soup = fetch_soup(url) cards = soup.select("div.aanbodEntry") if not cards: break # Collect status + photo per relative url status_by_url: dict[str, str] = {} photo_by_url: dict[str, str] = {} for card in cards: a_tag = card.select_one("a.aanbodEntryLink[href]") if not a_tag: continue href = a_tag["href"] if href in status_by_url: continue banner = card.select_one(".objectstatusbanner") status_text = banner.get_text(strip=True).lower() if banner else "" status_by_url[href] = _REALWORKS_STATUS_MAP.get(status_text, "beschikbaar") img = card.select_one("img") if img: src = img.get("src", "") if "geenfotobeschikbaar" not in src: photo_by_url[href] = src # Parse JSON-LD Residence blocks (one per listing) seen: set[str] = set() for tag in soup.select('script[type="application/ld+json"]'): try: ld = _json.loads(tag.string) if ld.get("@type") != "Residence": continue rel_url = ld.get("url", "") if not rel_url or rel_url in seen: continue seen.add(rel_url) detail_url = _ROEPMAN_BASE + rel_url address = ld.get("address", {}) postcode = address.get("postalCode", "").replace(" ", "") or None price_spec = next( (a.get("priceSpecification", {}) for a in ld.get("potentialAction", []) if a.get("priceSpecification")), {} ) prijs = int(price_spec["price"]) if price_spec.get("price") else None if prijs and prijs > config.MAX_PRICE: continue hero = ld.get("photo") or photo_by_url.get(rel_url) status = status_by_url.get(rel_url, "beschikbaar") kk = _realworks_detail(detail_url, "roepman") listings.append(RawListing( url=detail_url, source_makelaar="roepman", status=status, adres=address.get("streetAddress") or None, postcode=postcode, stad=address.get("addressLocality") or None, prijs=prijs, hero_image_url=hero, woningtype=kk.get("woningtype"), bouwjaar=int(kk["bouwjaar"]) if kk.get("bouwjaar") else None, woonoppervlak=parse_m2(kk.get("woonoppervlak")), perceeloppervlak=parse_m2(kk.get("perceeloppervlak")), kamers=int(kk["kamers"]) if kk.get("kamers") else None, slaapkamers=int(kk["slaapkamers"]) if kk.get("slaapkamers") else None, energielabel=kk.get("energielabel"), )) if config.APP_ENV == "dev": break except Exception as e: log.warning("roepman: parse fout: %s", e) if len(cards) < 10: break page += 1 log.info("roepman: %d listings opgehaald", len(listings)) return listings # --------------------------------------------------------------------------- # Post Makelaardij (v/h Bayense) — Delft & omgeving # --------------------------------------------------------------------------- # Custom Tailwind CSS site; covers Delft, Pijnacker, Rijswijk etc. # Filter for Delft only. _POST_BASE = "https://www.postmakelaardij.nl" _POST_STATUS_MAP = { "te koop": "beschikbaar", "onder bod": "onder_bod", "verkocht": "verkocht", } def _post_detail(detail_url: str) -> dict: """Fetch Post Makelaardij detail page and extract kenmerken.""" try: soup = fetch_soup(detail_url) # Energielabel from CSS class: energielabel-{letter} energielabel = None for el in soup.select('[class]'): for cls in el.get('class', []): if cls.startswith('energielabel-') and cls != 'energielabel': energielabel = cls.replace('energielabel-', '').upper() break if energielabel: break # Woonoppervlak, perceeloppervlak, slaapkamers from icon spans woonoppervlak = None perceeloppervlak = None slaapkamers = None for span in soup.select('span.object-info-icon-text'): txt = span.get_text(strip=True) if 'slaapkamer' in txt: m = re.search(r'(\d+)', txt) slaapkamers = int(m.group(1)) if m else None elif 'perceel' in txt: perceeloppervlak = parse_m2(txt) elif 'm²' in txt or 'm2' in txt: woonoppervlak = parse_m2(txt) return { "woonoppervlak": woonoppervlak, "perceeloppervlak": perceeloppervlak, "slaapkamers": slaapkamers, "energielabel": energielabel, } except Exception as e: log.warning("post: detail fetch fout %s: %s", detail_url, e) return {} def fetch_post() -> list[RawListing]: """Fetch Post Makelaardij listings; only Delft, only koop.""" listings = [] page = 1 while True: url = f"{_POST_BASE}/woningaanbod/koop?page={page}" soup = fetch_soup(url) cards = soup.select("article") if not cards: break for card in cards: try: # URL — first link in image slider a_tag = card.select_one("a[href]") if not a_tag: continue href = a_tag["href"] detail_url = href if href.startswith("http") else _POST_BASE + href # Postcode + city from span.custom-postcode-text pc_el = card.select_one("span.custom-postcode-text") if not pc_el: continue pc_parts = pc_el.get_text(strip=True).split() if len(pc_parts) < 3: continue postcode = pc_parts[0] + pc_parts[1] # "2613BD" stad = " ".join(pc_parts[2:]) # "Delft" # Filter: only Delft if stad.lower() != "delft": continue # Price — filter early prijs = parse_prijs(_text(card, "span.price-block")) if prijs and prijs > config.MAX_PRICE: continue # Status from span.status text status_text = (_text(card, "span.status") or "").lower() status = _POST_STATUS_MAP.get(status_text, "beschikbaar") # Address adres = _text(card, "h4.custom-address-text") # Hero: first img in article img = card.select_one("img") hero = img["src"] if img else None kk = _post_detail(detail_url) listings.append(RawListing( url=detail_url, source_makelaar="post", status=status, adres=adres, postcode=postcode, stad=stad, prijs=prijs, hero_image_url=hero, woonoppervlak=kk.get("woonoppervlak"), perceeloppervlak=kk.get("perceeloppervlak"), slaapkamers=kk.get("slaapkamers"), energielabel=kk.get("energielabel"), )) if config.APP_ENV == "dev": break except Exception as e: log.warning("post: parse fout: %s", e) if len(cards) < 12: break page += 1 log.info("post: %d listings opgehaald", len(listings)) return listings # --------------------------------------------------------------------------- # Morris NVM Makelaars (Delft) — Realworks CMS # --------------------------------------------------------------------------- def fetch_morris() -> list[RawListing]: return fetch_realworks("https://www.morrismakelaardij.nl", "morris") # --------------------------------------------------------------------------- # Olsthoorn Makelaars Delft (SURE WordPress plugin) # --------------------------------------------------------------------------- # Covers Delft, Den Haag, Naaldwijk etc — we filter for Delft only. # Detail page has no postcode; leave as None. _OLSTHOORN_BASE = "https://www.olsthoornmakelaars.nl" _OLSTHOORN_STATUS_MAP = { "badge-available": "beschikbaar", "badge-bid": "onder_bod", "badge-option": "onder_bod", "badge-sold": "verkocht", } _OLSTHOORN_DETAIL_STATUS_MAP = { "beschikbaar": "beschikbaar", "onder bod": "onder_bod", "onder optie": "onder_bod", "verkocht": "verkocht", } def _olsthoorn_detail(detail_url: str) -> dict: """Fetch Olsthoorn detail page; extract kenmerken from #kenmerken li pairs.""" try: soup = fetch_soup(detail_url) kv: dict[str, str] = {} for li in soup.select("#kenmerken li"): spans = li.select("span") if len(spans) >= 2: label = spans[0].get_text(strip=True).lower() value = spans[1].get_text(strip=True) kv[label] = value return { "status": kv.get("status", "").lower(), "woningtype": kv.get("soort object") or kv.get("soort woning") or kv.get("soort bouw"), "bouwjaar": kv.get("bouwjaar"), "woonoppervlak": kv.get("gebruiksoppervlakte"), "perceeloppervlak": kv.get("perceeloppervlakte"), "kamers": kv.get("aantal kamers"), "slaapkamers": kv.get("aantal slaapkamers"), "energielabel": kv.get("energielabel"), } except Exception as e: log.warning("olsthoorn: detail fetch fout %s: %s", detail_url, e) return {} def fetch_olsthoorn() -> list[RawListing]: """Fetch Olsthoorn Makelaars listings; only Delft, only koop.""" listings = [] page = 1 while True: if page == 1: url = f"{_OLSTHOORN_BASE}/wonen?sure_koop_huur=koop" else: url = f"{_OLSTHOORN_BASE}/wonen/page/{page}/?sure_koop_huur=koop" soup = fetch_soup(url) cards = soup.select("a.card-house") if not cards: break for card in cards: try: href = card.get("href", "") if not href: continue detail_url = href if href.startswith("http") else _OLSTHOORN_BASE + href # Filter: only Delft stad_el = card.select_one("h2.card__title") stad = stad_el.get_text(strip=True) if stad_el else None if not stad or stad.lower() != "delft": continue # Price from bold tag — filter early before detail fetch prijs_b = card.select_one("b") prijs = parse_prijs(prijs_b.get_text() if prijs_b else None) if prijs and prijs > config.MAX_PRICE: continue # Status from badge class on label span label_span = card.select_one("span.card-house__label") status = "beschikbaar" if label_span: for cls in label_span.get("class", []): if cls in _OLSTHOORN_STATUS_MAP: status = _OLSTHOORN_STATUS_MAP[cls] break # Address: second

    under .short--info (collapse internal whitespace) adres_p = card.select("div.short--info > p") if adres_p: adres = " ".join(adres_p[0].get_text().split()) else: adres = None # Hero image: largest source srcset src_tag = card.select_one('picture source[media="(min-width:1024px)"]') hero = src_tag.get("data-srcset") if src_tag else None if hero and not hero.startswith("http"): hero = _OLSTHOORN_BASE + hero # Woonoppervlak + kamers + energielabel from card data icons woonoppervlak_card = None kamers_card = None energielabel_card = None for data_div in card.select("div.data"): inner = data_div.select_one("span.date__inner") if not inner: continue txt = inner.get_text(strip=True) if data_div.select_one("i.icon-sizes"): woonoppervlak_card = parse_m2(txt) elif data_div.select_one("i.icon-door"): m = re.search(r"(\d+)", txt) kamers_card = int(m.group(1)) if m else None elif data_div.select_one("i.icon-energylabel"): energielabel_card = txt or None kk = _olsthoorn_detail(detail_url) # Refine status from detail page detail_status = _OLSTHOORN_DETAIL_STATUS_MAP.get(kk.get("status", ""), "") if detail_status: status = detail_status listings.append(RawListing( url=detail_url, source_makelaar="olsthoorn", status=status, adres=adres, postcode=None, # not exposed by broker stad=stad, prijs=prijs, hero_image_url=hero, woningtype=kk.get("woningtype"), bouwjaar=int(kk["bouwjaar"]) if kk.get("bouwjaar") else None, woonoppervlak=parse_m2(kk.get("woonoppervlak")) or woonoppervlak_card, perceeloppervlak=parse_m2(kk.get("perceeloppervlak")), kamers=int(kk["kamers"]) if kk.get("kamers") else kamers_card, slaapkamers=int(kk["slaapkamers"]) if kk.get("slaapkamers") else None, energielabel=kk.get("energielabel") or energielabel_card, )) if config.APP_ENV == "dev": break except Exception as e: log.warning("olsthoorn: parse fout: %s", e) if len(cards) < 15: break page += 1 log.info("olsthoorn: %d listings opgehaald", len(listings)) return listings # --------------------------------------------------------------------------- # 88 Makelaars (Den Haag) — Custom WordPress theme # --------------------------------------------------------------------------- # Cards on /ons-aanbod/page/{N}/; details in div.listing_detail kv pairs. _88_BASE = "https://88makelaars.nl" _88_STATUS_MAP = { "te koop": "beschikbaar", "beschikbaar": "beschikbaar", "onder bod": "onder_bod", "onder optie": "onder_bod", "verkocht onder voorbehoud": "verkocht", "verkocht": "verkocht", } def _88makelaars_detail(detail_url: str) -> dict: """Fetch 88makelaars detail page; extract kenmerken from div.listing_detail kv pairs.""" try: soup = fetch_soup(detail_url) kv: dict[str, str] = {} for div in soup.select("div.listing_detail"): txt = div.get_text(strip=True) if ":" in txt: label, _, value = txt.partition(":") kv[label.strip().lower()] = value.strip() raw_pc = kv.get("postcode") or "" pc_match = re.search(r"\d{4}\s*[A-Z]{2}", raw_pc.upper()) postcode = pc_match.group(0).replace(" ", "") if pc_match else None return { "postcode": postcode, "slaapkamers": kv.get("slaapkamers"), "woonoppervlak": kv.get("woning grootte"), "energielabel": kv.get("energieklasse"), "woningtype": kv.get("soort woning"), } except Exception as e: log.warning("88makelaars: detail fetch fout %s: %s", detail_url, e) return {} def fetch_88makelaars() -> list[RawListing]: """Fetch 88 Makelaars listings (Den Haag only).""" listings = [] page = 1 while True: if page == 1: url = f"{_88_BASE}/ons-aanbod/" else: url = f"{_88_BASE}/ons-aanbod/page/{page}/" soup = fetch_soup(url) cards = soup.select("div.property_listing") if not cards: break for card in cards: try: # URL from carousel a_tag = card.select_one(".property_unit_carousel a[href]") if not a_tag: continue detail_url = a_tag["href"] if not detail_url.startswith("http"): detail_url = _88_BASE + detail_url # City — last link in property_location_image loc_links = card.select(".property_location_image a") stad = loc_links[-1].get_text(strip=True) if loc_links else None if not stad or stad.lower() != "den haag": continue # Price prijs = parse_prijs(_text(card, ".listing_unit_price_wrapper")) if prijs and prijs > config.MAX_PRICE: continue # Status status_text = (_text(card, ".ribbon-inside") or "").lower() status = _88_STATUS_MAP.get(status_text, "beschikbaar") # Address adres = _text(card, "h4 a") or _text(card, "h4") # Surface + rooms woonoppervlak_card = parse_m2(_text(card, "span.infosize")) kamers_card = None rooms_txt = _text(card, "span.inforoom") if rooms_txt: m = re.search(r"(\d+)", rooms_txt) kamers_card = int(m.group(1)) if m else None # Hero: first active carousel image img = card.select_one(".item.active img") hero = img.get("src") or img.get("data-original") if img else None kk = _88makelaars_detail(detail_url) listings.append(RawListing( url=detail_url, source_makelaar="88makelaars", status=status, adres=adres, postcode=kk.get("postcode"), stad="Den Haag", prijs=prijs, hero_image_url=hero, woningtype=kk.get("woningtype"), woonoppervlak=parse_m2(kk.get("woonoppervlak")) or woonoppervlak_card, kamers=kamers_card, slaapkamers=int(kk["slaapkamers"]) if kk.get("slaapkamers") else None, energielabel=kk.get("energielabel"), )) if config.APP_ENV == "dev": break except Exception as e: log.warning("88makelaars: parse fout: %s", e) if len(cards) < 10: break page += 1 log.info("88makelaars: %d listings opgehaald", len(listings)) return listings # --------------------------------------------------------------------------- # Borgdorff Makelaars (Den Haag / Westland) — SURE WordPress plugin # --------------------------------------------------------------------------- # Covers Den Haag ('s-gravenhage), Monster, Naaldwijk etc. Filter for Den Haag. # Same SURE plugin as Schieland Borsboom but uses a.card--house (double dash). # No postcode on detail page. _BORGDORFF_BASE = "https://www.borgdorff.nl" _BORGDORFF_DEN_HAAG = {"'s-gravenhage", "den haag"} _BORGDORFF_BADGE_MAP = { "badge--info": "beschikbaar", "badge--warning": "onder_bod", "badge--danger": "verkocht", } def _borgdorff_detail(detail_url: str) -> dict: """Fetch Borgdorff detail page; extract #kenmerken li span pairs.""" try: soup = fetch_soup(detail_url) kv: dict[str, str] = {} for li in soup.select("#kenmerken li"): spans = li.select("span") if len(spans) >= 2: label = spans[0].get_text(strip=True).lower() value = spans[1].get_text(strip=True) kv[label] = value return { "status": kv.get("status", "").lower(), "woningtype": kv.get("soort woonhuis") or kv.get("soort woning") or kv.get("soort bouw"), "bouwjaar": kv.get("bouwjaar"), "woonoppervlak": kv.get("gebruiksoppervlakte wonen") or kv.get("gebruiksoppervlakte"), "perceeloppervlak": kv.get("perceeloppervlakte"), "slaapkamers": kv.get("aantal slaapkamers"), "energielabel": kv.get("energielabel"), } except Exception as e: log.warning("borgdorff: detail fetch fout %s: %s", detail_url, e) return {} def fetch_borgdorff() -> list[RawListing]: """Fetch Borgdorff listings; only Den Haag / 's-gravenhage, only koop.""" listings = [] page = 1 while True: if page == 1: url = f"{_BORGDORFF_BASE}/wonen?sure_koop_huur=koop" else: url = f"{_BORGDORFF_BASE}/wonen/page/{page}/?sure_koop_huur=koop" soup = fetch_soup(url) cards = soup.select("a.card--house") if not cards: break for card in cards: try: href = card.get("href", "") if not href: continue detail_url = href if href.startswith("http") else _BORGDORFF_BASE + href # Filter: only Den Haag stad_el = card.select_one("p.lead-two") stad = stad_el.get_text(strip=True) if stad_el else None if not stad or stad.lower() not in _BORGDORFF_DEN_HAAG: continue # Price — filter early prijs = parse_prijs(_text(card, "p.strong")) if prijs and prijs > config.MAX_PRICE: continue # Status from badge class label_span = card.select_one("span.card-house__label") status = "beschikbaar" if label_span: for cls in label_span.get("class", []): if cls in _BORGDORFF_BADGE_MAP: status = _BORGDORFF_BADGE_MAP[cls] break # Address adres = _text(card, "h4") # Hero: largest source srcset src_tag = card.select_one('picture source[media="(min-width:1280px)"]') hero = src_tag.get("srcset") if src_tag else None if not hero: img = card.select_one("img[data-src]") hero = img.get("data-src") if img else None if hero and not hero.startswith("http"): hero = _BORGDORFF_BASE + hero # Surface + bedrooms from data icons woonoppervlak_card = None slaapkamers_card = None for data_div in card.select("div.data"): inner = data_div.select_one("p.small") if not inner: continue txt = inner.get_text(strip=True) if data_div.select_one("i.icon-surface"): woonoppervlak_card = parse_m2(txt) elif data_div.select_one("i.icon-bed"): m = re.search(r"(\d+)", txt) slaapkamers_card = int(m.group(1)) if m else None kk = _borgdorff_detail(detail_url) # Refine status from detail page detail_status_map = { "beschikbaar": "beschikbaar", "onder bod": "onder_bod", "onder optie": "onder_bod", "verkocht": "verkocht", } if kk.get("status"): status = detail_status_map.get(kk["status"], status) listings.append(RawListing( url=detail_url, source_makelaar="borgdorff", status=status, adres=adres, postcode=None, # not exposed by broker stad=stad, prijs=prijs, hero_image_url=hero, woningtype=kk.get("woningtype"), bouwjaar=int(kk["bouwjaar"]) if kk.get("bouwjaar") else None, woonoppervlak=parse_m2(kk.get("woonoppervlak")) or woonoppervlak_card, perceeloppervlak=parse_m2(kk.get("perceeloppervlak")), slaapkamers=int(kk["slaapkamers"]) if kk.get("slaapkamers") else slaapkamers_card, energielabel=kk.get("energielabel"), )) if config.APP_ENV == "dev": break except Exception as e: log.warning("borgdorff: parse fout: %s", e) if len(cards) < 15: break page += 1 log.info("borgdorff: %d listings opgehaald", len(listings)) return listings # --------------------------------------------------------------------------- # SCRAPERS — exporteer hier alle actieve SSR adapters # --------------------------------------------------------------------------- SCRAPERS = { 'ankebodewes': fetch_ankebodewes, 'woongoed': fetch_woongoed, 'dewittegarantiemakelaars': fetch_dewittegarantiemakelaars, 'wassenaar': fetch_wassenaar, 'dens': fetch_dens, '3dmakelaars': fetch_3dmakelaars, 'dupont': fetch_dupont, 'schielandborsboom': fetch_schielandborsboom, 'vansilfhout': fetch_vansilfhout, 'vwmakelaars': fetch_vwmakelaars, 'roepman': fetch_roepman, 'zomakelaars': fetch_zomakelaars, 'post': fetch_post, 'morris': fetch_morris, 'olsthoorn': fetch_olsthoorn, '88makelaars': fetch_88makelaars, 'borgdorff': fetch_borgdorff, }