diff --git a/makelaars.md b/makelaars.md index beffd3a..3698f5f 100644 --- a/makelaars.md +++ b/makelaars.md @@ -1,4 +1,4 @@ -# Verkoopmakelaars Delft & Schiedam +# Verkoopmakelaars Delft, Leiden, Den Haag & Schiedam ## Delft @@ -13,13 +13,17 @@ | [x] | ZO makelaars | zomakelaars.nl | Van Foreestweg 4 | | [ ] | Marloes Makelaars | — | Maerten Trompstraat 28 | | [ ] | Makelaarskantoor J.E. Mouthaan | — | Julianalaan 43 | -| [ ] | Olsthoorn Makelaars Delft | olsthoornmakelaars.nl | Noordeinde 51 | -| [ ] | Post Makelaardij (v/h Bayense) | postmakelaardij.nl | Spoorsingel 1a | -| [ ] | Morris NVM Makelaars | morrismakelaardij.nl | — | +| [x] | Olsthoorn Makelaars Delft | olsthoornmakelaars.nl | Noordeinde 51 | +| [x] | Post Makelaardij (v/h Bayense) | postmakelaardij.nl | Spoorsingel 1a | +| [x] | Morris NVM Makelaars | morrismakelaardij.nl | — | | [ ] | Prinsenstad Makelaardij | — | — | | [ ] | Oude Delft Makelaardij | — | — | | [ ] | Dijksman Woningmakelaars | — | — | | [ ] | CORPOwonen | — | — | +| [ ] | Bergklis Makelaars | bergklis.nl | — | +| [ ] | Van Gulden Makelaardij | vanguldenmakelaardij.nl | Zaïrestraat 1 | +| [ ] | Van der Togt Makelaardij | vdtmakelaardij.nl | — (Voorburg, actief in Delft) | + ## Schiedam @@ -38,6 +42,19 @@ | [x] | Schieland Borsboom NVM Makelaars | schielandborsboom.nl | (Rotterdam, actief in Schiedam) | +## Den Haag + +| Done | Naam | Website | Adres | +|------|------|---------|-------| +| [ ] | Yuvam Makelaardij | yuvammakelaardij.nl | — | +| [ ] | 88 Makelaars | 88makelaars.nl | — | +| [ ] | DIVA Makelaars | divamakelaars.nl | — | +| [ ] | Elzenaar NVM Makelaars | elzenaar.com | — | +| [ ] | Frisia Makelaars | frisiamakelaars.nl | — | +| [ ] | Borgdorff Makelaars | borgdorff.nl | — (vestiging Den Haag) | +| [ ] | SMASH Makelaars | smashmakelaars.nl | — | +| [ ] | DOEN NVM Makelaars | doenmakelaars.com | Doezastraat 30 (Leiden, ook actief in Den Haag) | + ## Leiden | Done | Naam | Website | Adres | diff --git a/src/adapters/ssr.py b/src/adapters/ssr.py index 2867f6a..8313284 100644 --- a/src/adapters/ssr.py +++ b/src/adapters/ssr.py @@ -1292,6 +1292,309 @@ def fetch_roepman() -> list[RawListing]: return listings +# --------------------------------------------------------------------------- +# Post Makelaardij (v/h Bayense) — Delft & omgeving +# --------------------------------------------------------------------------- +# Custom Tailwind CSS site; covers Delft, Pijnacker, Rijswijk etc. +# Filter for Delft only. + +_POST_BASE = "https://www.postmakelaardij.nl" + +_POST_STATUS_MAP = { + "te koop": "beschikbaar", + "onder bod": "onder_bod", + "verkocht": "verkocht", +} + + +def _post_detail(detail_url: str) -> dict: + """Fetch Post Makelaardij detail page and extract kenmerken.""" + try: + soup = fetch_soup(detail_url) + + # Energielabel from CSS class: energielabel-{letter} + energielabel = None + for el in soup.select('[class]'): + for cls in el.get('class', []): + if cls.startswith('energielabel-') and cls != 'energielabel': + energielabel = cls.replace('energielabel-', '').upper() + break + if energielabel: + break + + # Woonoppervlak, perceeloppervlak, slaapkamers from icon spans + woonoppervlak = None + perceeloppervlak = None + slaapkamers = None + for span in soup.select('span.object-info-icon-text'): + txt = span.get_text(strip=True) + if 'slaapkamer' in txt: + m = re.search(r'(\d+)', txt) + slaapkamers = int(m.group(1)) if m else None + elif 'perceel' in txt: + perceeloppervlak = parse_m2(txt) + elif 'm²' in txt or 'm2' in txt: + woonoppervlak = parse_m2(txt) + + return { + "woonoppervlak": woonoppervlak, + "perceeloppervlak": perceeloppervlak, + "slaapkamers": slaapkamers, + "energielabel": energielabel, + } + except Exception as e: + log.warning("post: detail fetch fout %s: %s", detail_url, e) + return {} + + +def fetch_post() -> list[RawListing]: + """Fetch Post Makelaardij listings; only Delft, only koop.""" + listings = [] + page = 1 + + while True: + url = f"{_POST_BASE}/woningaanbod/koop?page={page}" + soup = fetch_soup(url) + cards = soup.select("article") + if not cards: + break + + for card in cards: + try: + # URL — first link in image slider + a_tag = card.select_one("a[href]") + if not a_tag: + continue + href = a_tag["href"] + detail_url = href if href.startswith("http") else _POST_BASE + href + + # Postcode + city from span.custom-postcode-text + pc_el = card.select_one("span.custom-postcode-text") + if not pc_el: + continue + pc_parts = pc_el.get_text(strip=True).split() + if len(pc_parts) < 3: + continue + postcode = pc_parts[0] + pc_parts[1] # "2613BD" + stad = " ".join(pc_parts[2:]) # "Delft" + + # Filter: only Delft + if stad.lower() != "delft": + continue + + # Price — filter early + prijs = parse_prijs(_text(card, "span.price-block")) + if prijs and prijs > config.MAX_PRICE: + continue + + # Status from span.status text + status_text = (_text(card, "span.status") or "").lower() + status = _POST_STATUS_MAP.get(status_text, "beschikbaar") + + # Address + adres = _text(card, "h4.custom-address-text") + + # Hero: first img in article + img = card.select_one("img") + hero = img["src"] if img else None + + kk = _post_detail(detail_url) + + listings.append(RawListing( + url=detail_url, + source_makelaar="post", + status=status, + adres=adres, + postcode=postcode, + stad=stad, + prijs=prijs, + hero_image_url=hero, + woonoppervlak=kk.get("woonoppervlak"), + perceeloppervlak=kk.get("perceeloppervlak"), + slaapkamers=kk.get("slaapkamers"), + energielabel=kk.get("energielabel"), + )) + if config.APP_ENV == "dev": + break + except Exception as e: + log.warning("post: parse fout: %s", e) + + if len(cards) < 12: + break + page += 1 + + log.info("post: %d listings opgehaald", len(listings)) + return listings + + +# --------------------------------------------------------------------------- +# Morris NVM Makelaars (Delft) — Realworks CMS +# --------------------------------------------------------------------------- + +def fetch_morris() -> list[RawListing]: + return fetch_realworks("https://www.morrismakelaardij.nl", "morris") + + +# --------------------------------------------------------------------------- +# Olsthoorn Makelaars Delft (SURE WordPress plugin) +# --------------------------------------------------------------------------- +# Covers Delft, Den Haag, Naaldwijk etc — we filter for Delft only. +# Detail page has no postcode; leave as None. + +_OLSTHOORN_BASE = "https://www.olsthoornmakelaars.nl" + +_OLSTHOORN_STATUS_MAP = { + "badge-available": "beschikbaar", + "badge-bid": "onder_bod", + "badge-option": "onder_bod", + "badge-sold": "verkocht", +} + +_OLSTHOORN_DETAIL_STATUS_MAP = { + "beschikbaar": "beschikbaar", + "onder bod": "onder_bod", + "onder optie": "onder_bod", + "verkocht": "verkocht", +} + + +def _olsthoorn_detail(detail_url: str) -> dict: + """Fetch Olsthoorn detail page; extract kenmerken from #kenmerken li pairs.""" + try: + soup = fetch_soup(detail_url) + kv: dict[str, str] = {} + for li in soup.select("#kenmerken li"): + spans = li.select("span") + if len(spans) >= 2: + label = spans[0].get_text(strip=True).lower() + value = spans[1].get_text(strip=True) + kv[label] = value + return { + "status": kv.get("status", "").lower(), + "woningtype": kv.get("soort object") or kv.get("soort woning") or kv.get("soort bouw"), + "bouwjaar": kv.get("bouwjaar"), + "woonoppervlak": kv.get("gebruiksoppervlakte"), + "perceeloppervlak": kv.get("perceeloppervlakte"), + "kamers": kv.get("aantal kamers"), + "slaapkamers": kv.get("aantal slaapkamers"), + "energielabel": kv.get("energielabel"), + } + except Exception as e: + log.warning("olsthoorn: detail fetch fout %s: %s", detail_url, e) + return {} + + +def fetch_olsthoorn() -> list[RawListing]: + """Fetch Olsthoorn Makelaars listings; only Delft, only koop.""" + listings = [] + page = 1 + + while True: + if page == 1: + url = f"{_OLSTHOORN_BASE}/wonen?sure_koop_huur=koop" + else: + url = f"{_OLSTHOORN_BASE}/wonen/page/{page}/?sure_koop_huur=koop" + + soup = fetch_soup(url) + cards = soup.select("a.card-house") + if not cards: + break + + for card in cards: + try: + href = card.get("href", "") + if not href: + continue + detail_url = href if href.startswith("http") else _OLSTHOORN_BASE + href + + # Filter: only Delft + stad_el = card.select_one("h2.card__title") + stad = stad_el.get_text(strip=True) if stad_el else None + if not stad or stad.lower() != "delft": + continue + + # Price from bold tag — filter early before detail fetch + prijs_b = card.select_one("b") + prijs = parse_prijs(prijs_b.get_text() if prijs_b else None) + if prijs and prijs > config.MAX_PRICE: + continue + + # Status from badge class on label span + label_span = card.select_one("span.card-house__label") + status = "beschikbaar" + if label_span: + for cls in label_span.get("class", []): + if cls in _OLSTHOORN_STATUS_MAP: + status = _OLSTHOORN_STATUS_MAP[cls] + break + + # Address: second
under .short--info (collapse internal whitespace) + adres_p = card.select("div.short--info > p") + if adres_p: + adres = " ".join(adres_p[0].get_text().split()) + else: + adres = None + + # Hero image: largest source srcset + src_tag = card.select_one('picture source[media="(min-width:1024px)"]') + hero = src_tag.get("data-srcset") if src_tag else None + if hero and not hero.startswith("http"): + hero = _OLSTHOORN_BASE + hero + + # Woonoppervlak + kamers + energielabel from card data icons + woonoppervlak_card = None + kamers_card = None + energielabel_card = None + for data_div in card.select("div.data"): + inner = data_div.select_one("span.date__inner") + if not inner: + continue + txt = inner.get_text(strip=True) + if data_div.select_one("i.icon-sizes"): + woonoppervlak_card = parse_m2(txt) + elif data_div.select_one("i.icon-door"): + m = re.search(r"(\d+)", txt) + kamers_card = int(m.group(1)) if m else None + elif data_div.select_one("i.icon-energylabel"): + energielabel_card = txt or None + + kk = _olsthoorn_detail(detail_url) + + # Refine status from detail page + detail_status = _OLSTHOORN_DETAIL_STATUS_MAP.get(kk.get("status", ""), "") + if detail_status: + status = detail_status + + listings.append(RawListing( + url=detail_url, + source_makelaar="olsthoorn", + status=status, + adres=adres, + postcode=None, # not exposed by broker + stad=stad, + prijs=prijs, + hero_image_url=hero, + woningtype=kk.get("woningtype"), + bouwjaar=int(kk["bouwjaar"]) if kk.get("bouwjaar") else None, + woonoppervlak=parse_m2(kk.get("woonoppervlak")) or woonoppervlak_card, + perceeloppervlak=parse_m2(kk.get("perceeloppervlak")), + kamers=int(kk["kamers"]) if kk.get("kamers") else kamers_card, + slaapkamers=int(kk["slaapkamers"]) if kk.get("slaapkamers") else None, + energielabel=kk.get("energielabel") or energielabel_card, + )) + if config.APP_ENV == "dev": + break + except Exception as e: + log.warning("olsthoorn: parse fout: %s", e) + + if len(cards) < 15: + break + page += 1 + + log.info("olsthoorn: %d listings opgehaald", len(listings)) + return listings + + # --------------------------------------------------------------------------- # SCRAPERS — exporteer hier alle actieve SSR adapters # --------------------------------------------------------------------------- @@ -1309,4 +1612,7 @@ SCRAPERS = { 'vwmakelaars': fetch_vwmakelaars, 'roepman': fetch_roepman, 'zomakelaars': fetch_zomakelaars, + 'post': fetch_post, + 'morris': fetch_morris, + 'olsthoorn': fetch_olsthoorn, } diff --git a/tests/test_adapters.py b/tests/test_adapters.py index 55f64ae..f959d64 100644 --- a/tests/test_adapters.py +++ b/tests/test_adapters.py @@ -16,7 +16,7 @@ logging.basicConfig( ) # --- change this to test a different adapter --- -ADAPTER = SCRAPERS['zomakelaars'] +ADAPTER = SCRAPERS['post'] if __name__ == "__main__": print(f"Testing adapter: {ADAPTER.__name__}")