first setup, travel works, bjornd api works

This commit is contained in:
2026-04-03 13:50:28 +02:00
commit 26d9d936f4
19 changed files with 1152 additions and 0 deletions

0
src/__init__.py Normal file
View File

6
src/adapters/__init__.py Normal file
View File

@@ -0,0 +1,6 @@
from os import wait
from typing import Callable
from adapters.api import SCRAPERS as _API
from adapters.ssr import SCRAPERS as _SSR
SCRAPERS: dict[str,Callable] = _API | _SSR

116
src/adapters/api.py Normal file
View File

@@ -0,0 +1,116 @@
"""
adapters/api.py — JSON/API-based makelaars
Elke scraper is een functie () -> list[RawListing].
Voeg nieuwe toe onderaan en registreer in SCRAPERS.
"""
import json
import logging
import time
import httpx
import config
from huizenbot import RawListing
log = logging.getLogger("huizenbot.api")
# ---------------------------------------------------------------------------
# Gedeelde HTTP helper
# ---------------------------------------------------------------------------
def fetch_json(url: str, *, params: dict = None, headers: dict = None) -> dict | list:
"""
GET request met User-Agent, timeout en Retry-After afhandeling.
Raises httpx.HTTPError bij aanhoudende fouten.
"""
hdrs = {"User-Agent": config.USER_AGENT}
if headers:
hdrs.update(headers)
for attempt in range(3):
r = httpx.get(url, params=params, headers=hdrs, timeout=15)
if r.status_code == 429:
wait = int(r.headers.get("Retry-After", 60))
log.warning("429 op %s, wacht %ds", url, wait)
time.sleep(wait)
continue
r.raise_for_status()
return r.json()
raise RuntimeError(f"Blijvend 429 op {url}")
# ---------------------------------------------------------------------------
# Bjornd
# ---------------------------------------------------------------------------
_BJORND_BASE = "https://www.bjornd.nl"
_BJORND_SKIP = {"rented", "rented_ur"}
_STATUS_MAP = {
"available": "beschikbaar",
"under_bid": "onder_bod",
"under_option": "onder_bod",
"sold": "verkocht",
"sold_ur": "verkocht",
}
def fetch_bjornd() -> list[RawListing]:
data = fetch_json(
f"{_BJORND_BASE}/nl/realtime-listings/consumer",
headers={"X-Requested-With": "XMLHttpRequest"},
)
listings = []
for item in data:
if not item.get("isSales"):
continue
if item.get("statusOrig") in _BJORND_SKIP:
continue
if item.get('salesPrice')>config.MAX_PRICE:
continue
listings.append(RawListing(
url=_BJORND_BASE + item["url"],
source_makelaar="bjornd",
status=_STATUS_MAP.get(item.get("statusOrig", ""), "beschikbaar"),
adres=item.get("address") or None,
postcode=item.get("zipcode") or None,
stad=item.get("city") or None,
prijs=item.get("salesPrice") or None,
woningtype=item.get("type") or None,
woonoppervlak=item.get("livingSurface") or None,
perceeloppervlak=item.get("plotSurface") or None,
kamers=item.get("rooms") or None,
slaapkamers=item.get("bedrooms") or None,
hero_image_url=item.get("photo") or None,
extra=json.dumps({
"balcony": item.get("balcony"),
"garden": item.get("garden"),
"mainType": item.get("mainType"),
"buildType": item.get("buildType"),
"district": item.get("district"),
"lat": item.get("lat"),
"lng": item.get("lng"),
"isFurnished": item.get("isFurnished"),
"hasOpenHouse": item.get("hasOpenHouse"),
"description": item.get("description"),
"photos": item.get("photos"),
}, ensure_ascii=False),
))
log.info("bjornd: %d koopwoningen opgehaald", len(listings))
return listings
# ---------------------------------------------------------------------------
# SCRAPERS — exporteer hier alle actieve API adapters
# ---------------------------------------------------------------------------
SCRAPERS = {
'bjornd': fetch_bjornd,
}

154
src/adapters/ssr.py Normal file
View File

@@ -0,0 +1,154 @@
"""
adapters/ssr.py — HTML/SSR-based makelaars
Elke scraper is een functie () -> list[RawListing].
Voeg nieuwe toe onderaan en registreer in SCRAPERS.
"""
import logging
import re
import time
import httpx
from bs4 import BeautifulSoup
import config
from huizenbot import RawListing
log = logging.getLogger("huizenbot.ssr")
# ---------------------------------------------------------------------------
# Gedeelde HTTP helper
# ---------------------------------------------------------------------------
def fetch_soup(url: str, *, params: dict = None) -> BeautifulSoup:
"""
GET request → BeautifulSoup. Handelt 429 af met Retry-After.
"""
for attempt in range(3):
r = httpx.get(
url,
params=params,
headers={"User-Agent": config.USER_AGENT},
timeout=15,
follow_redirects=True,
)
if r.status_code == 429:
wait = int(r.headers.get("Retry-After", 60))
log.warning("429 op %s, wacht %ds", url, wait)
time.sleep(wait)
continue
r.raise_for_status()
return BeautifulSoup(r.text, "html.parser")
raise RuntimeError(f"Blijvend 429 op {url}")
# ---------------------------------------------------------------------------
# Parse helpers
# ---------------------------------------------------------------------------
def parse_prijs(text: str | None) -> int | None:
"""'€ 325.000 k.k.' → 325000"""
if not text:
return None
digits = re.sub(r"[^\d]", "", text)
return int(digits) if digits else None
def parse_m2(text: str | None) -> int | None:
"""'87 m²' → 87"""
if not text:
return None
m = re.search(r"(\d+)", text.replace(".", ""))
return int(m.group(1)) if m else None
# ---------------------------------------------------------------------------
# Björn & Dries adapter (bjornd.nl)
# ---------------------------------------------------------------------------
# TODO: vul de echte CSS selectors in na inspectie van de pagina.
# Dit is een structureel sjabloon — de selectors zijn placeholders.
BJORND_BASE = "https://www.bjornd.nl"
BJORND_AANBOD = f"{BJORND_BASE}/aanbod"
def fetch_bjornd_demo() -> list[RawListing]:
soup = fetch_soup(BJORND_AANBOD)
listings = []
# Pas de selector aan op de echte HTML structuur
for card in soup.select(".property-card"): # ← aanpassen
try:
a_tag = card.select_one("a[href]")
if not a_tag:
continue
url = a_tag["href"]
if not url.startswith("http"):
url = BJORND_BASE + url
adres = _text(card, ".property-address") # ← aanpassen
postcode = _extract_postcode(_text(card, ".property-location"))
prijs = parse_prijs(_text(card, ".property-price"))
opp = parse_m2(_text(card, ".property-area"))
img = _src(card, "img")
listings.append(RawListing(
url=url,
source_makelaar="bjornd",
adres=adres,
postcode=postcode,
stad=_infer_stad(postcode),
prijs=prijs,
woonoppervlak=opp,
hero_image_url=img,
))
except Exception as e:
log.warning("Fout bij parsen bjornd card: %s", e)
return listings
# ---------------------------------------------------------------------------
# SSR helper utils
# ---------------------------------------------------------------------------
def _text(soup, selector: str) -> str | None:
el = soup.select_one(selector)
return el.get_text(strip=True) if el else None
def _src(soup, selector: str) -> str | None:
el = soup.select_one(selector)
if el is None:
return None
return el.get("src") or el.get("data-src")
def _extract_postcode(text: str | None) -> str | None:
if not text:
return None
m = re.search(r"\b(\d{4}\s?[A-Z]{2})\b", text)
return m.group(1).replace(" ", "") if m else None
def _infer_stad(postcode: str | None) -> str | None:
"""Simpele mapping op basis van postcode range — uitbreiden naar wens."""
if not postcode:
return None
code = int(postcode[:4])
if 2600 <= code <= 2629:
return "Delft"
if 3100 <= code <= 3135:
return "Schiedam"
return None
# ---------------------------------------------------------------------------
# SCRAPERS — exporteer hier alle actieve SSR adapters
# ---------------------------------------------------------------------------
SCRAPERS = {
'bjornd_demo': fetch_bjornd_demo,
}

25
src/config.py Normal file
View File

@@ -0,0 +1,25 @@
"""
config.py — vul aan met je eigen waarden. Secrets via environment variables.
"""
import os
MARK_WERK_POSTCODE = "2629HG"
MICHELLE_WERK_POSTCODE = "3133AV"
MARK_WERK_9292 = "delft/"+MARK_WERK_POSTCODE
MICHELLE_WERK_9292 = "vlaardingen/"+MICHELLE_WERK_POSTCODE
HA_WEBHOOK_URL = os.environ.get("HA_WEBHOOK_URL", "")
SMTP_HOST = os.environ.get("SMTP_HOST", "")
SMTP_PORT = int(os.environ.get("SMTP_PORT", "587"))
SMTP_FROM = os.environ.get("SMTP_FROM", "")
SMTP_TO = os.environ.get("SMTP_TO", "")
SMTP_USER = os.environ.get("SMTP_USER", "")
USER_AGENT = "Huizenbot/1.0 (+mark@kalsbeek.dev) persoonlijk gebruik"
DB_PATH = os.environ.get("DB_PATH", "/data/huizenbot.db")
FIETS_SNELHEID_FACTOR = 1.27
MAX_PRICE = 300_000

374
src/huizenbot.py Normal file
View File

@@ -0,0 +1,374 @@
"""
huizenbot.py — models, db, travel, notify, orchestration
"""
import hashlib
import json
import logging
import os
import smtplib
import sqlite3
import time
from dataclasses import dataclass, field
from datetime import datetime, date
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from typing import Callable, Any
import httpx
import config
from nine292 import ov_minuten_9292
log = logging.getLogger("huizenbot")
# ---------------------------------------------------------------------------
# Model
# ---------------------------------------------------------------------------
@dataclass
class RawListing:
url: str # required
source_makelaar: str = ""
datum_aanmelding: str | None = None
status: str = "beschikbaar" # beschikbaar | onder_bod | verkocht
adres: str | None = None
postcode: str | None = None
stad: str | None = None
prijs: int | None = None
woningtype: str | None = None
woonoppervlak: int | None = None
perceeloppervlak: int | None = None
kamers: int | None = None
slaapkamers: int | None = None
bouwjaar: int | None = None
energielabel: str | None = None
hero_image_url: str | None = None
extra: dict[str, Any] = field(default_factory=dict)
def listing_id(url: str) -> str:
return hashlib.sha256(url.encode()).hexdigest()
# ---------------------------------------------------------------------------
# Database
# ---------------------------------------------------------------------------
SCHEMA = """
CREATE TABLE IF NOT EXISTS woningen (
id TEXT PRIMARY KEY,
url TEXT UNIQUE NOT NULL,
source_makelaar TEXT NOT NULL,
first_seen TEXT NOT NULL,
last_seen TEXT NOT NULL,
datum_aanmelding TEXT,
status TEXT NOT NULL DEFAULT 'beschikbaar',
adres TEXT,
postcode TEXT,
stad TEXT,
prijs INTEGER,
woningtype TEXT,
woonoppervlak INTEGER,
perceeloppervlak INTEGER,
kamers INTEGER,
slaapkamers INTEGER,
bouwjaar INTEGER,
energielabel TEXT,
hero_image_url TEXT,
fiets_mark INTEGER,
fiets_michelle INTEGER,
ov_mark INTEGER,
ov_michelle INTEGER,
extra TEXT
);
"""
def get_db(path: str) -> sqlite3.Connection:
conn = sqlite3.connect(path)
conn.row_factory = sqlite3.Row
conn.execute("PRAGMA journal_mode=WAL")
conn.executescript(SCHEMA)
return conn
def upsert(conn: sqlite3.Connection, listing: RawListing, travel: dict[str,int]) -> bool:
"""
Insert new listing or update last_seen + status on existing.
Returns True if this was a new listing.
"""
now = datetime.utcnow().isoformat()
lid = listing_id(listing.url)
row = conn.execute("SELECT id FROM woningen WHERE id = ?", (lid,)).fetchone()
is_new = row is None
if is_new:
_cursor = conn.execute("""
INSERT INTO woningen (
id, url, source_makelaar, first_seen, last_seen, datum_aanmelding,
status, adres, postcode, stad,
prijs, woningtype, woonoppervlak, perceeloppervlak,
kamers, slaapkamers, bouwjaar, energielabel,
hero_image_url,
fiets_mark, fiets_michelle, ov_mark, ov_michelle,
extra
) VALUES (
:id, :url, :source_makelaar, :first_seen, :last_seen, :datum_aanmelding,
:status, :adres, :postcode, :stad,
:prijs, :woningtype, :woonoppervlak, :perceeloppervlak,
:kamers, :slaapkamers, :bouwjaar, :energielabel,
:hero_image_url,
:fiets_mark, :fiets_michelle, :ov_mark, :ov_michelle,
:extra
)
""", {
"id": lid,
"url": listing.url,
"source_makelaar": listing.source_makelaar,
"first_seen": now,
"last_seen": now,
"datum_aanmelding": listing.datum_aanmelding,
"status": listing.status,
"adres": listing.adres,
"postcode": listing.postcode,
"stad": listing.stad,
"prijs": listing.prijs,
"woningtype": listing.woningtype,
"woonoppervlak": listing.woonoppervlak,
"perceeloppervlak": listing.perceeloppervlak,
"kamers": listing.kamers,
"slaapkamers": listing.slaapkamers,
"bouwjaar": listing.bouwjaar,
"energielabel": listing.energielabel,
"hero_image_url": listing.hero_image_url,
"fiets_mark": travel.get("fiets_mark"),
"fiets_michelle": travel.get("fiets_michelle"),
"ov_mark": travel.get("ov_mark"),
"ov_michelle": travel.get("ov_michelle"),
"extra": json.dumps(listing.extra) if listing.extra else None,
})
else:
_cursor = conn.execute("""
UPDATE woningen SET last_seen = ?, status = ? WHERE id = ?
""", (now, listing.status, lid))
conn.commit()
return is_new
# ---------------------------------------------------------------------------
# Travel
# ---------------------------------------------------------------------------
_geocode_cache: dict[str, tuple[float, float]] = {}
def geocode(postcode: str) -> tuple[float, float] | None:
"""Postcode → (lat, lon) via Nominatim. Respects 1 req/s."""
if postcode in _geocode_cache:
return _geocode_cache[postcode]
time.sleep(1) # Nominatim rate limit
try:
r = httpx.get(
"https://nominatim.openstreetmap.org/search",
params={"q": postcode + ", Netherlands", "format": "json", "limit": 1},
headers={"User-Agent": config.USER_AGENT},
timeout=10,
)
_response = r.raise_for_status()
results = r.json()
if not results:
log.warning("Geocode geen resultaat voor %s", postcode)
return None
lat, lon = float(results[0]["lat"]), float(results[0]["lon"])
_geocode_cache[postcode] = (lat, lon)
return lat, lon
except Exception as e:
log.error("Geocode fout voor %s: %s", postcode, e)
return None
def fiets_minuten(origin: tuple[float, float], dest: tuple[float, float]) -> int | None:
"""Reistijd fiets in minuten via OSRM (routing.openstreetmap.de)."""
try:
olat, olon = origin
dlat, dlon = dest
url = (
f"https://routing.openstreetmap.de/routed-bike/route/v1/driving/"
f"{olon},{olat};{dlon},{dlat}?overview=false"
)
r = httpx.get(url, timeout=10)
r.raise_for_status()
data = r.json()
seconds = data["routes"][0]["duration"]
return round(seconds / 60 / config.FIETS_SNELHEID_FACTOR)
except Exception as e:
log.error("OSRM fout: %s", e)
return None
def ov_minuten(from_loc: str, to_loc: str) -> int | None:
"""Reistijd OV in minuten via 9292, vaste ochtendspits referentie."""
return ov_minuten_9292(from_loc, to_loc)
def _next_weekday_morning() -> str:
"""Geeft eerstvolgende doordeweekse dag om 08:30 als Navitia datetime string."""
from datetime import timedelta
d = date.today()
d += timedelta(days=1)
while d.weekday() >= 5: # 5=zaterdag, 6=zondag
d += timedelta(days=1)
return d.strftime("%Y%m%dT083000")
def bereken_reistijden(postcode: str | None) -> dict[str, int]:
"""Bereken alle reistijden voor een woning postcode. Geeft lege dict bij falen."""
if not postcode:
return {}
woning_coords = geocode(postcode)
if not woning_coords:
return {}
werk1 = geocode(config.MARK_WERK_POSTCODE)
werk2 = geocode(config.MICHELLE_WERK_POSTCODE)
result = {}
if werk1:
result["fiets_mark"] = fiets_minuten(woning_coords, werk1)
result["ov_mark"] = ov_minuten(woning_coords, werk1)
if werk2:
result["fiets_michelle"] = fiets_minuten(woning_coords, werk2)
result["ov_michelle"] = ov_minuten(woning_coords, werk2)
return result
# ---------------------------------------------------------------------------
# Notify
# ---------------------------------------------------------------------------
def notify_ha(listing: RawListing, travel: dict[str,int]) -> None:
"""Stuur webhook naar Home Assistant."""
if not config.HA_WEBHOOK_URL:
return
payload = {
"adres": listing.adres,
"stad": listing.stad,
"prijs": listing.prijs,
"status": listing.status,
"url": listing.url,
"image": listing.hero_image_url,
"fiets_mark": travel.get("fiets_mark"),
"fiets_michelle": travel.get("fiets_michelle"),
"ov_mark": travel.get("ov_mark"),
"ov_michelle": travel.get("ov_michelle"),
}
try:
r = httpx.post(config.HA_WEBHOOK_URL, json=payload, timeout=10)
r.raise_for_status()
log.info("HA notificatie verstuurd voor %s", listing.adres)
except Exception as e:
log.error("HA webhook fout: %s", e)
notify_email(listing, travel) # fallback
def notify_email(listing: RawListing, travel: dict[str,int]) -> None:
"""Stuur HTML email als fallback."""
if not config.SMTP_HOST:
return
subject = f"Nieuwe woning: {listing.adres}, {listing.stad} — €{listing.prijs:,}"
html = f"""
<html><body>
<h2>{listing.adres}, {listing.stad}</h2>
<p><strong>Prijs:</strong> €{listing.prijs:,}</p>
<p><strong>Status:</strong> {listing.status}</p>
<p><strong>Fiets P1:</strong> {travel.get('fiets_mark')} min &nbsp;
<strong>OV P1:</strong> {travel.get('ov_mark')} min</p>
<p><strong>Fiets P2:</strong> {travel.get('fiets_michelle')} min &nbsp;
<strong>OV P2:</strong> {travel.get('ov_michelle')} min</p>
{"<img src='" + listing.hero_image_url + "' width='600'>" if listing.hero_image_url else ""}
<p><a href="{listing.url}">Bekijk listing</a></p>
</body></html>
"""
msg = MIMEMultipart("alternative")
msg["Subject"] = subject
msg["From"] = config.SMTP_FROM
msg["To"] = config.SMTP_TO
msg.attach(MIMEText(html, "html"))
try:
with smtplib.SMTP(config.SMTP_HOST, config.SMTP_PORT) as s:
if config.SMTP_USER:
s.starttls()
s.login(config.SMTP_USER, os.environ.get("SMTP_PASSWORD", ""))
s.send_message(msg)
log.info("Email verstuurd voor %s", listing.adres)
except Exception as e:
log.error("Email fout: %s", e)
# ---------------------------------------------------------------------------
# Orchestration
# ---------------------------------------------------------------------------
Scraper = Callable[[], list[RawListing]]
def run(scrapers: list[Scraper], db_path: str) -> None:
conn = get_db(db_path)
total_new = 0
for scraper in scrapers:
name = scraper.__name__
log.info("Scraper starten: %s", name)
try:
listings = scraper()
except Exception as e:
log.error("Scraper %s gefaald: %s", name, e)
continue
log.info("Scraper %s: %d listings opgehaald", name, len(listings))
for listing in listings:
travel = {}
try:
# Check of het een nieuwe woning is vóór upsert
lid = listing_id(listing.url)
is_existing = conn.execute(
"SELECT id FROM woningen WHERE id = ?", (lid,)
).fetchone() is not None
if not is_existing:
travel = bereken_reistijden(listing.postcode)
is_new = upsert(conn, listing, travel)
if is_new:
total_new += 1
log.info("Nieuwe woning: %s (%s)", listing.adres, listing.url)
notify_ha(listing, travel)
except Exception as e:
log.error("Fout bij verwerken %s: %s", listing.url, e)
log.info("Run klaar. %d nieuwe woningen gevonden.", total_new)
conn.close()

16
src/main.py Normal file
View File

@@ -0,0 +1,16 @@
import logging
import sys
import config
from adapters import SCRAPERS
from huizenbot import run
logging.basicConfig(
stream=sys.stdout,
level=logging.INFO,
format="%(asctime)s %(levelname)s %(name)s%(message)s",
datefmt="%Y-%m-%dT%H:%M:%S",
)
if __name__ == "__main__":
run(SCRAPERS, config.DB_PATH)

95
src/nine292.py Normal file
View File

@@ -0,0 +1,95 @@
"""9292 public transport travel time via their web API."""
import hashlib
import hmac
import logging
import time
import urllib.parse
from datetime import date, timedelta
import httpx
log = logging.getLogger(__name__)
_BASE_URL = "https://web-api.9292.nl"
_HMAC_SECRET = "ZVWm_Qytmq.Bo-guenFtRfUPi_vMFq4yrdDA6RYZAijNi4qocHmq6oZ"
_USER_AGENT = "Mozilla/5.0 (X11; Linux x86_64; rv:149.0) Gecko/20100101 Firefox/149.0"
def _encode_params(params: dict) -> str:
"""Replicate 9292's Ye() param serializer: standard urlencode with + for spaces."""
return urllib.parse.urlencode(params).replace("%20", "+")
def _sign(url_path: str, params: dict) -> tuple[str, str]:
"""Return (x-request-time, x-validation-token) for a request."""
ts = str(int(time.time() * 1000))
qs = _encode_params(params)
full = f"{url_path}{'?' + qs if qs else ''}"
message = f"{ts}{full}{_USER_AGENT}"
token = hmac.new(_HMAC_SECRET.encode(), message.encode(), hashlib.sha256).hexdigest()
return ts, token
def _next_weekday_morning() -> str:
"""First upcoming weekday at 08:30, as ISO 8601 for 9292."""
d = date.today() + timedelta(days=1)
while d.weekday() >= 5:
d += timedelta(days=1)
return d.strftime("%Y-%m-%dT08:30:00.000Z")
def ov_minuten_9292(from_loc: str, to_loc: str) -> int | None:
"""
Travel time in minutes via 9292.
Locations are 9292-style strings, e.g.:
"delft/2629hg"
"station-amsterdam-centraal"
"amsterdam/1011ab"
"""
url_path = "/api/v1/plans"
params = {
"from": from_loc.lower(),
"to": to_loc.lower(),
"requestType": "Departure",
"dateTime": _next_weekday_morning(),
"planWithAccessibility": "false",
"extraInterchangeTime": "0",
"firstMileLessWalking": "false",
"lastMileLessWalking": "false",
"firstMileModality": "Walking",
"lastMileModality": "Walking",
"previewsBefore": "0",
"previewsAfter": "3",
}
ts, token = _sign(url_path, params)
headers = {
"User-Agent": _USER_AGENT,
"Accept": "application/json, text/plain, */*",
"Accept-Language": "nl",
"x-origin": "Plan",
"x-request-time": ts,
"x-validation-token": token,
"Origin": "https://9292.nl",
"Referer": "https://9292.nl/",
}
try:
r = httpx.get(
_BASE_URL + url_path,
params=params,
headers=headers,
timeout=15,
)
r.raise_for_status()
previews = r.json().get("previews", [])
if not previews:
log.warning("9292 geen reisadvies voor %s%s", from_loc, to_loc)
return None
# Take the shortest non-cancelled journey
valid = [p for p in previews if not p.get("cancelled")]
if not valid:
valid = previews
return min(p["durationInMinutes"] for p in valid)
except Exception as e:
log.error("9292 fout voor %s%s: %s", from_loc, to_loc, e)
return None