""" adapters/api.py — JSON/API-based makelaars Elke scraper is een functie () -> list[RawListing]. Voeg nieuwe toe onderaan en registreer in SCRAPERS. """ import json import logging import re import time import httpx from bs4 import BeautifulSoup import config from huizenbot import RawListing log = logging.getLogger("huizenbot.api") # --------------------------------------------------------------------------- # Gedeelde HTTP helper # --------------------------------------------------------------------------- def fetch_json(url: str, *, params: dict = None, headers: dict = None) -> dict | list: """ GET request met User-Agent, timeout en Retry-After afhandeling. Raises httpx.HTTPError bij aanhoudende fouten. """ hdrs = {"User-Agent": config.USER_AGENT} if headers: hdrs.update(headers) for attempt in range(3): r = httpx.get(url, params=params, headers=hdrs, timeout=15) if r.status_code == 429: wait = int(r.headers.get("Retry-After", 60)) log.warning("429 op %s, wacht %ds", url, wait) time.sleep(wait) continue r.raise_for_status() return r.json() raise RuntimeError(f"Blijvend 429 op {url}") def _og_detail(url: str, makelaar: str) -> dict: """ Fetch an OG Online detail page and extract missing fields. OG Online sites typically expose kenmerken in one of two patterns: 1. A table/list with dt/dd or label/value span pairs 2. An energielabel CSS class (energielabel-A, energielabel-B, etc.) Returns a dict with any fields found; empty dict on failure. """ try: r = httpx.get( url, headers={"User-Agent": config.USER_AGENT}, timeout=15, follow_redirects=True, ) r.raise_for_status() soup = BeautifulSoup(r.text, "html.parser") # Pattern 1: energielabel CSS class on any element energielabel = None for el in soup.select("[class]"): for cls in el.get("class", []): if cls.startswith("energielabel-") and cls != "energielabel": energielabel = cls.replace("energielabel-", "").upper() break if energielabel: break # Pattern 2: kenmerken table — try dt/dd pairs first kv: dict[str, str] = {} dts = soup.select("dt") dds = soup.select("dd") for dt, dd in zip(dts, dds): kv[dt.get_text(strip=True).lower()] = dd.get_text(strip=True) # Pattern 3: ul.objectkenmerken / div.kenmerken span pairs if not kv: for li in soup.select("li"): spans = li.select("span") if len(spans) >= 2: kv[spans[0].get_text(strip=True).lower()] = spans[1].get_text(strip=True) if not energielabel: energielabel = ( kv.get("energielabel") or kv.get("energieklasse") or kv.get("energie") ) or None raw_year = kv.get("bouwjaar") or "" bouwjaar = int(raw_year) if raw_year.isdigit() else None return { "energielabel": energielabel, "bouwjaar": bouwjaar, } except Exception as e: log.warning("%s: detail fetch fout %s: %s", makelaar, url, e) return {} # --------------------------------------------------------------------------- # Bjornd # --------------------------------------------------------------------------- _BJORND_BASE = "https://www.bjornd.nl" _BJORND_SKIP = {"rented", "rented_ur"} _STATUS_MAP = { "available": "beschikbaar", "under_bid": "onder_bod", "under_option": "onder_bod", "sold": "verkocht", "sold_ur": "verkocht", } def fetch_bjornd() -> list[RawListing]: data = fetch_json( f"{_BJORND_BASE}/nl/realtime-listings/consumer", headers={"X-Requested-With": "XMLHttpRequest"}, ) listings = [] for item in data: if not item.get("isSales"): continue if item.get("statusOrig") in _BJORND_SKIP: continue if item.get("salesPrice", 0) > config.MAX_PRICE: continue detail_url = _BJORND_BASE + item["url"] raw_year = item.get("dateOfConstruction") or "" bouwjaar = int(raw_year) if raw_year.isdigit() else None energielabel = item.get("energyLabel") or None # Fetch detail page when API omits key fields if not energielabel or not bouwjaar: extra_kk = _og_detail(detail_url, "bjornd") energielabel = energielabel or extra_kk.get("energielabel") bouwjaar = bouwjaar or extra_kk.get("bouwjaar") listings.append(RawListing( url=detail_url, source_makelaar="bjornd", status=_STATUS_MAP.get(item.get("statusOrig", ""), "beschikbaar"), adres=item.get("address") or None, postcode=item.get("zipcode") or None, stad=item.get("city") or None, prijs=item.get("salesPrice") or None, woningtype=item.get("type") or None, woonoppervlak=item.get("livingSurface") or None, perceeloppervlak=item.get("plotSurface") or None, kamers=item.get("rooms") or None, slaapkamers=item.get("bedrooms") or None, bouwjaar=bouwjaar, energielabel=energielabel, hero_image_url=item.get("photo") or None, extra=json.dumps({ "balcony": item.get("balcony"), "garden": item.get("garden"), "mainType": item.get("mainType"), "buildType": item.get("buildType"), "district": item.get("district"), "lat": item.get("lat"), "lng": item.get("lng"), "isFurnished": item.get("isFurnished"), "hasOpenHouse": item.get("hasOpenHouse"), "description": item.get("description"), "photos": item.get("photos"), }, ensure_ascii=False), )) if config.APP_ENV == "dev": break log.info("bjornd: %d koopwoningen opgehaald", len(listings)) return listings # --------------------------------------------------------------------------- # Ooms # --------------------------------------------------------------------------- _OOMS_BASE = "https://ooms.com" _OOMS_CITIES = {"Delft", "Schiedam", "Rotterdam", "Leiden", "Voorburg", "Pijnacker"} _OOMS_SKIP_STATUS = {"verhuurd", "verhuurd onder voorbehoud"} _OOMS_STATUS_MAP = { "beschikbaar": "beschikbaar", "onder bod": "onder_bod", "onder optie": "onder_bod", "verkocht": "verkocht", "verkocht onder voorbehoud":"verkocht", } def fetch_ooms() -> list[RawListing]: data = fetch_json(f"{_OOMS_BASE}/api/properties/available.json") listings = [] for item in data.get("objects", []): if item.get("buy_or_rent") != "buy": continue if item.get("place") not in _OOMS_CITIES: continue if item.get("buy_price", 0) > config.MAX_PRICE: continue status_raw = item.get("availability_status", "") if status_raw in _OOMS_SKIP_STATUS: continue hnr = item.get("house_number", "") add = item.get("house_number_addition") or "" adres = f"{item.get('street_name', '')} {hnr}{(' ' + add) if add else ''}".strip() main_images = item.get("realworks_main_images") or item.get("realworks_images") or [] hero = None if main_images: sizes = main_images[0].get("sizes") or [] best = max(sizes, key=lambda s: s.get("width", 0), default=None) if best: hero = _OOMS_BASE + best["imageUrl"] perceel = item.get("parcel_surface") or None if perceel == 0: perceel = None listings.append(RawListing( url=item["url"], source_makelaar="ooms", datum_aanmelding=item.get("publish_date", "")[:10] or None, status=_OOMS_STATUS_MAP.get(status_raw, "beschikbaar"), adres=adres or None, postcode=(item.get("zip_code") or "").replace(" ", "") or None, stad=item.get("place") or None, prijs=item.get("buy_price") or None, woningtype=item.get("appartment_characteristic") or item.get("residential_building_type") or None, woonoppervlak=item.get("usable_area_living_function") or None, perceeloppervlak=perceel, kamers=item.get("amount_of_rooms") or None, slaapkamers=item.get("amount_of_bedrooms") or None, hero_image_url=hero, extra={ "office": item.get("office", {}).get("name"), "locations": item.get("locations"), "garden_types": item.get("garden_types"), "lat": item.get("lat"), "lng": item.get("lng"), "object_code": item.get("object_code"), }, )) log.info("ooms: %d listings opgehaald", len(listings)) return listings # --------------------------------------------------------------------------- # Moerman & De Jong Makelaars (Schiedam) # --------------------------------------------------------------------------- # Zelfde OG Online / realtime-listings platform als Bjornd. _MOERMAN_BASE = "https://www.moerman-dejong.nl" _MOERMAN_SKIP = {"rented", "rented_ur"} _MOERMAN_STATUS_MAP = { "available": "beschikbaar", "under_bid": "onder_bod", "under_option": "onder_bod", "sold": "verkocht", "sold_ur": "verkocht", } def fetch_moerman() -> list[RawListing]: data = fetch_json( f"{_MOERMAN_BASE}/nl/realtime-listings/consumer", headers={"X-Requested-With": "XMLHttpRequest"}, ) listings = [] for item in data: if not item.get("isSales"): continue if item.get("statusOrig") in _MOERMAN_SKIP: continue if item.get("salesPrice", 0) > config.MAX_PRICE: continue postcode = (item.get("zipcode") or "").replace(" ", "") or None perceel = item.get("plotSurface") or None if perceel == 0: perceel = None raw_year = item.get("dateOfConstruction") or "" bouwjaar = int(raw_year) if raw_year.isdigit() else None energielabel = item.get("energyLabel") or None detail_url = _MOERMAN_BASE + item["url"] if not energielabel: extra_kk = _og_detail(detail_url, "moerman") energielabel = extra_kk.get("energielabel") listings.append(RawListing( url=detail_url, source_makelaar="moerman", status=_MOERMAN_STATUS_MAP.get(item.get("statusOrig", ""), "beschikbaar"), adres=item.get("address") or None, postcode=postcode, stad=item.get("city") or None, prijs=item.get("salesPrice") or None, woningtype=item.get("type") or None, woonoppervlak=item.get("livingSurface") or None, perceeloppervlak=perceel, kamers=item.get("rooms") or None, slaapkamers=item.get("bedrooms") or None, bouwjaar=bouwjaar, energielabel=energielabel, hero_image_url=item.get("photo") or None, )) if config.APP_ENV == "dev": break log.info("moerman: %d koopwoningen opgehaald", len(listings)) return listings # --------------------------------------------------------------------------- # Van Daal Makelaardij (Delft) # --------------------------------------------------------------------------- # OG Online / realtime-listings platform. _VANDAAL_BASE = "https://www.vandaalmakelaardij.nl" _VANDAAL_SKIP = {"rented", "rented_ur"} _VANDAAL_STATUS_MAP = { "available": "beschikbaar", "under_bid": "onder_bod", "under_option": "onder_bod", "is_bought": "verkocht", "sold": "verkocht", "sold_ur": "verkocht", } def fetch_vandaal() -> list[RawListing]: data = fetch_json( f"{_VANDAAL_BASE}/nl/realtime-listings/consumer", headers={"X-Requested-With": "XMLHttpRequest"}, ) listings = [] for item in data: if not item.get("isSales"): continue if item.get("statusOrig") in _VANDAAL_SKIP: continue if item.get("salesPrice", 0) > config.MAX_PRICE: continue postcode = (item.get("zipcode") or "").replace(" ", "") or None perceel = item.get("plotSurface") or None if perceel == 0: perceel = None raw_year = item.get("dateOfConstruction") or "" bouwjaar = int(raw_year) if raw_year.isdigit() else None energielabel = item.get("energyLabel") or None detail_url = _VANDAAL_BASE + item["url"] if not energielabel: extra_kk = _og_detail(detail_url, "vandaal") energielabel = extra_kk.get("energielabel") listings.append(RawListing( url=detail_url, source_makelaar="vandaal", status=_VANDAAL_STATUS_MAP.get(item.get("statusOrig", ""), "beschikbaar"), adres=item.get("address") or None, postcode=postcode, stad=item.get("city") or None, prijs=item.get("salesPrice") or None, woningtype=item.get("type") or None, woonoppervlak=item.get("livingSurface") or None, perceeloppervlak=perceel, kamers=item.get("rooms") or None, slaapkamers=item.get("bedrooms") or None, bouwjaar=bouwjaar, energielabel=energielabel, hero_image_url=item.get("photo") or None, )) if config.APP_ENV == "dev": break log.info("vandaal: %d koopwoningen opgehaald", len(listings)) return listings # --------------------------------------------------------------------------- # Elzenaar NVM Makelaars (Den Haag) — OG Online platform # --------------------------------------------------------------------------- # Zelfde platform als bjornd/moerman/vandaal. _ELZENAAR_BASE = "https://www.elzenaar.com" _ELZENAAR_SKIP = {"rented", "rented_ur"} _ELZENAAR_CITIES = {"Den Haag", "Voorburg", "Rijswijk"} _ELZENAAR_STATUS_MAP = { "available": "beschikbaar", "under_bid": "onder_bod", "under_option": "onder_bod", "sold": "verkocht", "sold_ur": "verkocht", } def fetch_elzenaar() -> list[RawListing]: data = fetch_json( f"{_ELZENAAR_BASE}/nl/realtime-listings/consumer", headers={"X-Requested-With": "XMLHttpRequest"}, ) listings = [] for item in data: if not item.get("isSales"): continue if item.get("statusOrig") in _ELZENAAR_SKIP: continue if item.get("city") not in _ELZENAAR_CITIES: continue if item.get("salesPrice", 0) > config.MAX_PRICE: continue postcode = (item.get("zipcode") or "").replace(" ", "") or None perceel = item.get("plotSurface") or None if perceel == 0: perceel = None raw_year = item.get("dateOfConstruction") or "" bouwjaar = int(raw_year) if raw_year.isdigit() else None energielabel = item.get("energyLabel") or None detail_url = _ELZENAAR_BASE + item["url"] if not energielabel: extra_kk = _og_detail(detail_url, "elzenaar") energielabel = extra_kk.get("energielabel") listings.append(RawListing( url=detail_url, source_makelaar="elzenaar", status=_ELZENAAR_STATUS_MAP.get(item.get("statusOrig", ""), "beschikbaar"), adres=item.get("address") or None, postcode=postcode, stad=item.get("city") or None, prijs=item.get("salesPrice") or None, woningtype=item.get("type") or None, woonoppervlak=item.get("livingSurface") or None, perceeloppervlak=perceel, kamers=item.get("rooms") or None, slaapkamers=item.get("bedrooms") or None, bouwjaar=bouwjaar, energielabel=energielabel, hero_image_url=item.get("photo") or None, )) if config.APP_ENV == "dev": break log.info("elzenaar: %d koopwoningen opgehaald", len(listings)) return listings # --------------------------------------------------------------------------- # DOEN NVM Makelaars (Den Haag / Leiden / Voorburg) — OG Online platform # --------------------------------------------------------------------------- _DOEN_BASE = "https://www.doenmakelaars.com" _DOEN_SKIP = {"rented", "rented_ur"} _DOEN_CITIES = {"Den Haag", "Leiden", "Voorburg", "Leidschendam", "Rijswijk", "Wassenaar", "Zoetermeer"} _DOEN_STATUS_MAP = { "available": "beschikbaar", "under_bid": "onder_bod", "under_option": "onder_bod", "sold": "verkocht", "sold_ur": "verkocht", } def fetch_doen() -> list[RawListing]: data = fetch_json( f"{_DOEN_BASE}/nl/realtime-listings/consumer", headers={"X-Requested-With": "XMLHttpRequest"}, ) listings = [] for item in data: if not item.get("isSales"): continue if item.get("statusOrig") in _DOEN_SKIP: continue if item.get("city") not in _DOEN_CITIES: continue if item.get("salesPrice", 0) > config.MAX_PRICE: continue postcode = (item.get("zipcode") or "").replace(" ", "") or None perceel = item.get("plotSurface") or None if perceel == 0: perceel = None raw_year = item.get("dateOfConstruction") or "" bouwjaar = int(raw_year) if raw_year.isdigit() else None energielabel = item.get("energyLabel") or None detail_url = _DOEN_BASE + item["url"] if not energielabel: extra_kk = _og_detail(detail_url, "doen") energielabel = extra_kk.get("energielabel") listings.append(RawListing( url=detail_url, source_makelaar="doen", status=_DOEN_STATUS_MAP.get(item.get("statusOrig", ""), "beschikbaar"), adres=item.get("address") or None, postcode=postcode, stad=item.get("city") or None, prijs=item.get("salesPrice") or None, woningtype=item.get("type") or None, woonoppervlak=item.get("livingSurface") or None, perceeloppervlak=perceel, kamers=item.get("rooms") or None, slaapkamers=item.get("bedrooms") or None, bouwjaar=bouwjaar, energielabel=energielabel, hero_image_url=item.get("photo") or None, )) if config.APP_ENV == "dev": break log.info("doen: %d koopwoningen opgehaald", len(listings)) return listings # --------------------------------------------------------------------------- # Vandriel Makelaardij (Schiedam) — OG Online / realtime-listings # --------------------------------------------------------------------------- _VANDRIEL_BASE = "https://www.vandrielmakelaardij.nl" _VANDRIEL_SKIP = {"rented", "rented_ur"} _VANDRIEL_STATUS_MAP = { "available": "beschikbaar", "under_bid": "onder_bod", "under_option": "onder_bod", "sold": "verkocht", "sold_ur": "verkocht", } def fetch_vandriel() -> list[RawListing]: data = fetch_json( f"{_VANDRIEL_BASE}/nl/realtime-listings/consumer", headers={"X-Requested-With": "XMLHttpRequest"}, ) listings = [] for item in data: if not item.get("isSales"): continue if item.get("statusOrig") in _VANDRIEL_SKIP: continue if (item.get("city") or "").lower() != "schiedam": continue if item.get("salesPrice", 0) > config.MAX_PRICE: continue postcode = (item.get("zipcode") or "").replace(" ", "") or None perceel = item.get("plotSurface") or None if perceel == 0: perceel = None raw_year = item.get("dateOfConstruction") or "" bouwjaar = int(raw_year) if raw_year.isdigit() else None energielabel = item.get("energyLabel") or None detail_url = _VANDRIEL_BASE + item["url"] if not energielabel: extra_kk = _og_detail(detail_url, "vandriel") energielabel = extra_kk.get("energielabel") listings.append(RawListing( url=detail_url, source_makelaar="vandriel", status=_VANDRIEL_STATUS_MAP.get(item.get("statusOrig", ""), "beschikbaar"), adres=item.get("address") or None, postcode=postcode, stad=item.get("city") or None, prijs=item.get("salesPrice") or None, woningtype=item.get("type") or None, woonoppervlak=item.get("livingSurface") or None, perceeloppervlak=perceel, kamers=item.get("rooms") or None, slaapkamers=item.get("bedrooms") or None, bouwjaar=bouwjaar, energielabel=energielabel, hero_image_url=item.get("photo") or None, )) if config.APP_ENV == "dev": break log.info("vandriel: %d koopwoningen opgehaald", len(listings)) return listings # --------------------------------------------------------------------------- # SCRAPERS — exporteer hier alle actieve API adapters # --------------------------------------------------------------------------- SCRAPERS = { 'bjornd': fetch_bjornd, 'ooms': fetch_ooms, 'moerman': fetch_moerman, 'vandaal': fetch_vandaal, 'elzenaar': fetch_elzenaar, 'doen': fetch_doen, 'vandriel': fetch_vandriel, }