first setup, travel works, bjornd api works

This commit is contained in:
2026-04-03 13:50:28 +02:00
commit 26d9d936f4
19 changed files with 1152 additions and 0 deletions

6
src/adapters/__init__.py Normal file
View File

@@ -0,0 +1,6 @@
from os import wait
from typing import Callable
from adapters.api import SCRAPERS as _API
from adapters.ssr import SCRAPERS as _SSR
SCRAPERS: dict[str,Callable] = _API | _SSR

116
src/adapters/api.py Normal file
View File

@@ -0,0 +1,116 @@
"""
adapters/api.py — JSON/API-based makelaars
Elke scraper is een functie () -> list[RawListing].
Voeg nieuwe toe onderaan en registreer in SCRAPERS.
"""
import json
import logging
import time
import httpx
import config
from huizenbot import RawListing
log = logging.getLogger("huizenbot.api")
# ---------------------------------------------------------------------------
# Gedeelde HTTP helper
# ---------------------------------------------------------------------------
def fetch_json(url: str, *, params: dict = None, headers: dict = None) -> dict | list:
"""
GET request met User-Agent, timeout en Retry-After afhandeling.
Raises httpx.HTTPError bij aanhoudende fouten.
"""
hdrs = {"User-Agent": config.USER_AGENT}
if headers:
hdrs.update(headers)
for attempt in range(3):
r = httpx.get(url, params=params, headers=hdrs, timeout=15)
if r.status_code == 429:
wait = int(r.headers.get("Retry-After", 60))
log.warning("429 op %s, wacht %ds", url, wait)
time.sleep(wait)
continue
r.raise_for_status()
return r.json()
raise RuntimeError(f"Blijvend 429 op {url}")
# ---------------------------------------------------------------------------
# Bjornd
# ---------------------------------------------------------------------------
_BJORND_BASE = "https://www.bjornd.nl"
_BJORND_SKIP = {"rented", "rented_ur"}
_STATUS_MAP = {
"available": "beschikbaar",
"under_bid": "onder_bod",
"under_option": "onder_bod",
"sold": "verkocht",
"sold_ur": "verkocht",
}
def fetch_bjornd() -> list[RawListing]:
data = fetch_json(
f"{_BJORND_BASE}/nl/realtime-listings/consumer",
headers={"X-Requested-With": "XMLHttpRequest"},
)
listings = []
for item in data:
if not item.get("isSales"):
continue
if item.get("statusOrig") in _BJORND_SKIP:
continue
if item.get('salesPrice')>config.MAX_PRICE:
continue
listings.append(RawListing(
url=_BJORND_BASE + item["url"],
source_makelaar="bjornd",
status=_STATUS_MAP.get(item.get("statusOrig", ""), "beschikbaar"),
adres=item.get("address") or None,
postcode=item.get("zipcode") or None,
stad=item.get("city") or None,
prijs=item.get("salesPrice") or None,
woningtype=item.get("type") or None,
woonoppervlak=item.get("livingSurface") or None,
perceeloppervlak=item.get("plotSurface") or None,
kamers=item.get("rooms") or None,
slaapkamers=item.get("bedrooms") or None,
hero_image_url=item.get("photo") or None,
extra=json.dumps({
"balcony": item.get("balcony"),
"garden": item.get("garden"),
"mainType": item.get("mainType"),
"buildType": item.get("buildType"),
"district": item.get("district"),
"lat": item.get("lat"),
"lng": item.get("lng"),
"isFurnished": item.get("isFurnished"),
"hasOpenHouse": item.get("hasOpenHouse"),
"description": item.get("description"),
"photos": item.get("photos"),
}, ensure_ascii=False),
))
log.info("bjornd: %d koopwoningen opgehaald", len(listings))
return listings
# ---------------------------------------------------------------------------
# SCRAPERS — exporteer hier alle actieve API adapters
# ---------------------------------------------------------------------------
SCRAPERS = {
'bjornd': fetch_bjornd,
}

154
src/adapters/ssr.py Normal file
View File

@@ -0,0 +1,154 @@
"""
adapters/ssr.py — HTML/SSR-based makelaars
Elke scraper is een functie () -> list[RawListing].
Voeg nieuwe toe onderaan en registreer in SCRAPERS.
"""
import logging
import re
import time
import httpx
from bs4 import BeautifulSoup
import config
from huizenbot import RawListing
log = logging.getLogger("huizenbot.ssr")
# ---------------------------------------------------------------------------
# Gedeelde HTTP helper
# ---------------------------------------------------------------------------
def fetch_soup(url: str, *, params: dict = None) -> BeautifulSoup:
"""
GET request → BeautifulSoup. Handelt 429 af met Retry-After.
"""
for attempt in range(3):
r = httpx.get(
url,
params=params,
headers={"User-Agent": config.USER_AGENT},
timeout=15,
follow_redirects=True,
)
if r.status_code == 429:
wait = int(r.headers.get("Retry-After", 60))
log.warning("429 op %s, wacht %ds", url, wait)
time.sleep(wait)
continue
r.raise_for_status()
return BeautifulSoup(r.text, "html.parser")
raise RuntimeError(f"Blijvend 429 op {url}")
# ---------------------------------------------------------------------------
# Parse helpers
# ---------------------------------------------------------------------------
def parse_prijs(text: str | None) -> int | None:
"""'€ 325.000 k.k.' → 325000"""
if not text:
return None
digits = re.sub(r"[^\d]", "", text)
return int(digits) if digits else None
def parse_m2(text: str | None) -> int | None:
"""'87 m²' → 87"""
if not text:
return None
m = re.search(r"(\d+)", text.replace(".", ""))
return int(m.group(1)) if m else None
# ---------------------------------------------------------------------------
# Björn & Dries adapter (bjornd.nl)
# ---------------------------------------------------------------------------
# TODO: vul de echte CSS selectors in na inspectie van de pagina.
# Dit is een structureel sjabloon — de selectors zijn placeholders.
BJORND_BASE = "https://www.bjornd.nl"
BJORND_AANBOD = f"{BJORND_BASE}/aanbod"
def fetch_bjornd_demo() -> list[RawListing]:
soup = fetch_soup(BJORND_AANBOD)
listings = []
# Pas de selector aan op de echte HTML structuur
for card in soup.select(".property-card"): # ← aanpassen
try:
a_tag = card.select_one("a[href]")
if not a_tag:
continue
url = a_tag["href"]
if not url.startswith("http"):
url = BJORND_BASE + url
adres = _text(card, ".property-address") # ← aanpassen
postcode = _extract_postcode(_text(card, ".property-location"))
prijs = parse_prijs(_text(card, ".property-price"))
opp = parse_m2(_text(card, ".property-area"))
img = _src(card, "img")
listings.append(RawListing(
url=url,
source_makelaar="bjornd",
adres=adres,
postcode=postcode,
stad=_infer_stad(postcode),
prijs=prijs,
woonoppervlak=opp,
hero_image_url=img,
))
except Exception as e:
log.warning("Fout bij parsen bjornd card: %s", e)
return listings
# ---------------------------------------------------------------------------
# SSR helper utils
# ---------------------------------------------------------------------------
def _text(soup, selector: str) -> str | None:
el = soup.select_one(selector)
return el.get_text(strip=True) if el else None
def _src(soup, selector: str) -> str | None:
el = soup.select_one(selector)
if el is None:
return None
return el.get("src") or el.get("data-src")
def _extract_postcode(text: str | None) -> str | None:
if not text:
return None
m = re.search(r"\b(\d{4}\s?[A-Z]{2})\b", text)
return m.group(1).replace(" ", "") if m else None
def _infer_stad(postcode: str | None) -> str | None:
"""Simpele mapping op basis van postcode range — uitbreiden naar wens."""
if not postcode:
return None
code = int(postcode[:4])
if 2600 <= code <= 2629:
return "Delft"
if 3100 <= code <= 3135:
return "Schiedam"
return None
# ---------------------------------------------------------------------------
# SCRAPERS — exporteer hier alle actieve SSR adapters
# ---------------------------------------------------------------------------
SCRAPERS = {
'bjornd_demo': fetch_bjornd_demo,
}