diff --git a/makelaars.md b/makelaars.md index 47979bd..e2af64f 100644 --- a/makelaars.md +++ b/makelaars.md @@ -1,5 +1,9 @@ # Verkoopmakelaars Delft, Leiden, Den Haag & Schiedam +## TODO + +- **API scrapers need detail page enrichment**: OG Online API (bjornd, moerman, vandaal, elzenaar, doen, vandriel) sometimes omits fields like `energyLabel`. We should fetch the detail page for each listing and merge in missing fields (especially energielabel, bouwjaar). This is already done for SSR scrapers; needs to be added to API-based ones. + ## Delft | Done | Naam | Website | Adres | @@ -23,6 +27,7 @@ | [ ] | Bergklis Makelaars | bergklis.nl | — | | [ ] | Van Gulden Makelaardij | vanguldenmakelaardij.nl | Zaïrestraat 1 | | [ ] | Van der Togt Makelaardij | vdtmakelaardij.nl | — (Voorburg, actief in Delft) | +| [x] | Van Oord Makelaardij | vanoordmakelaardij.nl | — (Delft + Schiedam) | ## Schiedam @@ -40,6 +45,8 @@ | [x] | Moerman & De Jong Makelaars | moerman-dejong.nl | Lange Kerkstraat 80B | | [ ] | Hagestein Makelaardij | — | Degerfors 54 | | [x] | Schieland Borsboom NVM Makelaars | schielandborsboom.nl | (Rotterdam, actief in Schiedam) | +| [x] | Vandriel Makelaardij | vandrielmakelaardij.nl | — | +| [x] | Van Herk Makelaars | vanherk.nl | — | ## Den Haag diff --git a/src/adapters/api.py b/src/adapters/api.py index 0d2b58e..b9b36be 100644 --- a/src/adapters/api.py +++ b/src/adapters/api.py @@ -436,6 +436,69 @@ def fetch_doen() -> list[RawListing]: return listings +# --------------------------------------------------------------------------- +# Vandriel Makelaardij (Schiedam) — OG Online / realtime-listings +# --------------------------------------------------------------------------- + +_VANDRIEL_BASE = "https://www.vandrielmakelaardij.nl" +_VANDRIEL_SKIP = {"rented", "rented_ur"} + +_VANDRIEL_STATUS_MAP = { + "available": "beschikbaar", + "under_bid": "onder_bod", + "under_option": "onder_bod", + "sold": "verkocht", + "sold_ur": "verkocht", +} + + +def fetch_vandriel() -> list[RawListing]: + data = fetch_json( + f"{_VANDRIEL_BASE}/nl/realtime-listings/consumer", + headers={"X-Requested-With": "XMLHttpRequest"}, + ) + + listings = [] + for item in data: + if not item.get("isSales"): + continue + if item.get("statusOrig") in _VANDRIEL_SKIP: + continue + if (item.get("city") or "").lower() != "schiedam": + continue + if item.get("salesPrice", 0) > config.MAX_PRICE: + continue + + postcode = (item.get("zipcode") or "").replace(" ", "") or None + perceel = item.get("plotSurface") or None + if perceel == 0: + perceel = None + + raw_year = item.get("dateOfConstruction") or "" + bouwjaar = int(raw_year) if raw_year.isdigit() else None + + listings.append(RawListing( + url=_VANDRIEL_BASE + item["url"], + source_makelaar="vandriel", + status=_VANDRIEL_STATUS_MAP.get(item.get("statusOrig", ""), "beschikbaar"), + adres=item.get("address") or None, + postcode=postcode, + stad=item.get("city") or None, + prijs=item.get("salesPrice") or None, + woningtype=item.get("type") or None, + woonoppervlak=item.get("livingSurface") or None, + perceeloppervlak=perceel, + kamers=item.get("rooms") or None, + slaapkamers=item.get("bedrooms") or None, + bouwjaar=bouwjaar, + energielabel=item.get("energyLabel") or None, + hero_image_url=item.get("photo") or None, + )) + + log.info("vandriel: %d koopwoningen opgehaald", len(listings)) + return listings + + # --------------------------------------------------------------------------- # SCRAPERS — exporteer hier alle actieve API adapters # --------------------------------------------------------------------------- @@ -447,4 +510,5 @@ SCRAPERS = { 'vandaal': fetch_vandaal, 'elzenaar': fetch_elzenaar, 'doen': fetch_doen, + 'vandriel': fetch_vandriel, } diff --git a/src/adapters/ssr.py b/src/adapters/ssr.py index 2183d43..529bb7c 100644 --- a/src/adapters/ssr.py +++ b/src/adapters/ssr.py @@ -1874,6 +1874,265 @@ def fetch_borgdorff() -> list[RawListing]: return listings +# --------------------------------------------------------------------------- +# Van Herk Makelaars (Schiedam) — SURE WordPress plugin (card-house) +# --------------------------------------------------------------------------- +# Listings filtered by city + price in URL; pagination via /page/{N}/. +# Detail page: div.features ul.unstyled li with two (label + value). + +_VANHERK_BASE = "https://www.vanherk.nl" +_VANHERK_LISTINGS = "https://www.vanherk.nl/wonen/aanbod/zoeken/schiedam/200000-300000/" + +_VANHERK_STATUS_MAP = { + "beschikbaar": "beschikbaar", + "onder bod": "onder_bod", + "onder optie": "onder_bod", + "verkocht": "verkocht", +} + + +def _vanherk_detail(detail_url: str) -> dict: + """Fetch Van Herk detail page; extract kenmerken from div.features.""" + try: + soup = fetch_soup(detail_url) + kv: dict[str, str] = {} + for li in soup.select("div.features ul.unstyled li"): + spans = li.select("span") + if len(spans) >= 2: + label = spans[0].get_text(strip=True).lower() + value = spans[1].get_text(strip=True) + kv[label] = value + return { + "status": kv.get("status", "").lower(), + "bouwjaar": kv.get("bouwjaar"), + "woonoppervlak": kv.get("woonoppervlakte"), + "slaapkamers": kv.get("aantal slaapkamers"), + } + except Exception as e: + log.warning("vanherk: detail fetch fout %s: %s", detail_url, e) + return {} + + +def fetch_vanherk() -> list[RawListing]: + """Fetch Van Herk listings; only Schiedam, only koop.""" + listings = [] + page = 1 + + while True: + if page == 1: + url = _VANHERK_LISTINGS + else: + url = _VANHERK_LISTINGS + f"page/{page}/" + + soup = fetch_soup(url) + cards = soup.select("a.card-house") + if not cards: + break + + for card in cards: + try: + href = card.get("href", "") + if not href: + continue + detail_url = href if href.startswith("http") else _VANHERK_BASE + href + + # City from lead paragraph + lead = card.select_one("p.lead") + stad = lead.get_text(strip=True) if lead else None + + # Address from h4 (normalize whitespace incl.  ) + h4 = card.select_one("h4") + adres = " ".join(h4.get_text().split()) if h4 else None + + # Price from .subtitle + subtitle = card.select_one("p.subtitle") + prijs = parse_prijs(subtitle.get_text() if subtitle else None) + if prijs and prijs > config.MAX_PRICE: + continue + + # Hero image: largest srcset source + src_tag = card.select_one('picture source[media="(min-width:1280px)"]') + hero = src_tag.get("srcset") if src_tag else None + if hero and not hero.startswith("http"): + hero = _VANHERK_BASE + hero + + # Card data icons: surface, bedrooms, energy label + woonoppervlak_card = None + slaapkamers_card = None + energielabel_card = None + for data_div in card.select("div.data"): + classes = data_div.get("class") or [] + if "d-none" in classes: + continue + if "data-energie" in classes: + inner = data_div.select_one(".date__inner") + energielabel_card = inner.get_text(strip=True) if inner else None + elif data_div.select_one("i.icon-surface"): + inner = data_div.select_one("span.date__inner") + woonoppervlak_card = parse_m2(inner.get_text(strip=True) if inner else None) + elif data_div.select_one("i.icon-bed"): + inner = data_div.select_one("span.date__inner") + txt = inner.get_text(strip=True) if inner else None + m = re.search(r"(\d+)", txt) if txt else None + slaapkamers_card = int(m.group(1)) if m else None + + kk = _vanherk_detail(detail_url) + + status = _VANHERK_STATUS_MAP.get(kk.get("status", ""), "beschikbaar") + + listings.append(RawListing( + url=detail_url, + source_makelaar="vanherk", + status=status, + adres=adres, + stad=stad, + prijs=prijs, + hero_image_url=hero, + bouwjaar=int(kk["bouwjaar"]) if kk.get("bouwjaar", "").isdigit() else None, + woonoppervlak=parse_m2(kk.get("woonoppervlak")) or woonoppervlak_card, + slaapkamers=(int(kk["slaapkamers"]) if kk.get("slaapkamers", "").isdigit() else None) or slaapkamers_card, + energielabel=energielabel_card, + )) + if config.APP_ENV == "dev": + break + except Exception as e: + log.warning("vanherk: parse fout: %s", e) + + if len(cards) < 15: + break + page += 1 + + log.info("vanherk: %d listings opgehaald", len(listings)) + return listings + + +# --------------------------------------------------------------------------- +# Van Oord Makelaardij (Delft + Schiedam) — Elementor WordPress +# --------------------------------------------------------------------------- +# Two filtered listing URLs (one per city). Cards are div.e-loop-item. +# Detail page: ul.rw-object-features-list li with label/value spans. + +_VANOORD_BASE = "https://www.vanoordmakelaardij.nl" +_VANOORD_LISTINGS = [ + "https://www.vanoordmakelaardij.nl/aanbod/?view=list&plaats=delft&prijs_vanaf=225000&prijs_tot=300000", + "https://www.vanoordmakelaardij.nl/aanbod/?view=list&plaats=schiedam&prijs_vanaf=225000&prijs_tot=300000", +] + +_VANOORD_STATUS_MAP = { + "beschikbaar": "beschikbaar", + "onder bod": "onder_bod", + "onder optie": "onder_bod", + "verkocht": "verkocht", +} + + +def _vanoord_detail(detail_url: str) -> dict: + """Fetch Van Oord detail page; extract kenmerken from rw-object-features-list.""" + try: + soup = fetch_soup(detail_url) + kv: dict[str, str] = {} + for li in soup.select("ul.rw-object-features-list li"): + label_el = li.select_one("span.rw-object-list-label") + value_el = li.select_one("span.rw-object-list-value") + if label_el and value_el: + label = label_el.get_text(strip=True).lower() + value = value_el.get_text(strip=True) + kv[label] = value + return { + "status": kv.get("status", "").lower(), + "bouwjaar": kv.get("bouwjaar"), + "woonoppervlak": kv.get("woonoppervlakte"), + "kamers": kv.get("aantal kamers"), + "slaapkamers": kv.get("slaapkamers"), + "energielabel": kv.get("energieklasse"), + } + except Exception as e: + log.warning("vanoord: detail fetch fout %s: %s", detail_url, e) + return {} + + +def fetch_vanoord() -> list[RawListing]: + """Fetch Van Oord listings; Delft and Schiedam, only koop.""" + seen: set[str] = set() + listings = [] + + for listing_url in _VANOORD_LISTINGS: + soup = fetch_soup(listing_url) + cards = soup.select("div.e-loop-item") + + for card in cards: + try: + # Detail URL from h3 > a + a_tag = card.select_one("h3.elementor-heading-title a[href]") + if not a_tag: + continue + detail_url = a_tag["href"] + if not detail_url.startswith("http"): + detail_url = _VANOORD_BASE + detail_url + if detail_url in seen: + continue + seen.add(detail_url) + + # Status from rw-status-label widget class + status_el = card.select_one("[class*='rw-status-label--']") + status = "beschikbaar" + if status_el: + status_text = status_el.get_text(strip=True).lower() + status = _VANOORD_STATUS_MAP.get(status_text, "beschikbaar") + + # City from h4 + h4 = card.select_one("h4.elementor-heading-title") + stad = h4.get_text(strip=True) if h4 else None + + # Address from h3 > a text + adres = " ".join(a_tag.get_text().split()) + + # Price from h3 without child + prijs = None + for h3 in card.select("h3.elementor-heading-title"): + if not h3.select_one("a"): + prijs = parse_prijs(h3.get_text()) + break + if prijs and prijs > config.MAX_PRICE: + continue + + # Card icon list: [0]=surface [1]=rooms [2]=energy + icon_items = card.select("ul.elementor-icon-list-items li span.elementor-icon-list-text") + woonoppervlak_card = parse_m2(icon_items[0].get_text()) if len(icon_items) > 0 else None + kamers_card = None + if len(icon_items) > 1: + m = re.search(r"(\d+)", icon_items[1].get_text()) + kamers_card = int(m.group(1)) if m else None + energielabel_card = icon_items[2].get_text(strip=True) if len(icon_items) > 2 else None + + kk = _vanoord_detail(detail_url) + + detail_status = _VANOORD_STATUS_MAP.get(kk.get("status", ""), "") + if detail_status: + status = detail_status + + listings.append(RawListing( + url=detail_url, + source_makelaar="vanoord", + status=status, + adres=adres, + stad=stad, + prijs=prijs, + bouwjaar=int(kk["bouwjaar"]) if kk.get("bouwjaar", "").isdigit() else None, + woonoppervlak=parse_m2(kk.get("woonoppervlak")) or woonoppervlak_card, + kamers=(int(kk["kamers"]) if kk.get("kamers", "").isdigit() else None) or kamers_card, + slaapkamers=int(kk["slaapkamers"]) if kk.get("slaapkamers", "").isdigit() else None, + energielabel=kk.get("energielabel") or energielabel_card, + )) + if config.APP_ENV == "dev": + break + except Exception as e: + log.warning("vanoord: parse fout: %s", e) + + log.info("vanoord: %d listings opgehaald", len(listings)) + return listings + + # --------------------------------------------------------------------------- # SCRAPERS — exporteer hier alle actieve SSR adapters # --------------------------------------------------------------------------- @@ -1896,4 +2155,6 @@ SCRAPERS = { 'olsthoorn': fetch_olsthoorn, '88makelaars': fetch_88makelaars, 'borgdorff': fetch_borgdorff, + 'vanherk': fetch_vanherk, + 'vanoord': fetch_vanoord, }