first setup, travel works, bjornd api works
This commit is contained in:
0
src/__init__.py
Normal file
0
src/__init__.py
Normal file
6
src/adapters/__init__.py
Normal file
6
src/adapters/__init__.py
Normal file
@@ -0,0 +1,6 @@
|
||||
from os import wait
|
||||
from typing import Callable
|
||||
from adapters.api import SCRAPERS as _API
|
||||
from adapters.ssr import SCRAPERS as _SSR
|
||||
|
||||
SCRAPERS: dict[str,Callable] = _API | _SSR
|
||||
116
src/adapters/api.py
Normal file
116
src/adapters/api.py
Normal file
@@ -0,0 +1,116 @@
|
||||
"""
|
||||
adapters/api.py — JSON/API-based makelaars
|
||||
|
||||
Elke scraper is een functie () -> list[RawListing].
|
||||
Voeg nieuwe toe onderaan en registreer in SCRAPERS.
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import time
|
||||
|
||||
import httpx
|
||||
|
||||
import config
|
||||
from huizenbot import RawListing
|
||||
|
||||
log = logging.getLogger("huizenbot.api")
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Gedeelde HTTP helper
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def fetch_json(url: str, *, params: dict = None, headers: dict = None) -> dict | list:
|
||||
"""
|
||||
GET request met User-Agent, timeout en Retry-After afhandeling.
|
||||
Raises httpx.HTTPError bij aanhoudende fouten.
|
||||
"""
|
||||
hdrs = {"User-Agent": config.USER_AGENT}
|
||||
if headers:
|
||||
hdrs.update(headers)
|
||||
|
||||
for attempt in range(3):
|
||||
r = httpx.get(url, params=params, headers=hdrs, timeout=15)
|
||||
if r.status_code == 429:
|
||||
wait = int(r.headers.get("Retry-After", 60))
|
||||
log.warning("429 op %s, wacht %ds", url, wait)
|
||||
time.sleep(wait)
|
||||
continue
|
||||
r.raise_for_status()
|
||||
return r.json()
|
||||
|
||||
raise RuntimeError(f"Blijvend 429 op {url}")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Bjornd
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_BJORND_BASE = "https://www.bjornd.nl"
|
||||
_BJORND_SKIP = {"rented", "rented_ur"}
|
||||
|
||||
_STATUS_MAP = {
|
||||
"available": "beschikbaar",
|
||||
"under_bid": "onder_bod",
|
||||
"under_option": "onder_bod",
|
||||
"sold": "verkocht",
|
||||
"sold_ur": "verkocht",
|
||||
}
|
||||
|
||||
|
||||
def fetch_bjornd() -> list[RawListing]:
|
||||
data = fetch_json(
|
||||
f"{_BJORND_BASE}/nl/realtime-listings/consumer",
|
||||
headers={"X-Requested-With": "XMLHttpRequest"},
|
||||
)
|
||||
|
||||
listings = []
|
||||
for item in data:
|
||||
if not item.get("isSales"):
|
||||
continue
|
||||
if item.get("statusOrig") in _BJORND_SKIP:
|
||||
continue
|
||||
if item.get('salesPrice')>config.MAX_PRICE:
|
||||
continue
|
||||
|
||||
|
||||
listings.append(RawListing(
|
||||
url=_BJORND_BASE + item["url"],
|
||||
source_makelaar="bjornd",
|
||||
status=_STATUS_MAP.get(item.get("statusOrig", ""), "beschikbaar"),
|
||||
adres=item.get("address") or None,
|
||||
postcode=item.get("zipcode") or None,
|
||||
stad=item.get("city") or None,
|
||||
prijs=item.get("salesPrice") or None,
|
||||
woningtype=item.get("type") or None,
|
||||
woonoppervlak=item.get("livingSurface") or None,
|
||||
perceeloppervlak=item.get("plotSurface") or None,
|
||||
kamers=item.get("rooms") or None,
|
||||
slaapkamers=item.get("bedrooms") or None,
|
||||
hero_image_url=item.get("photo") or None,
|
||||
extra=json.dumps({
|
||||
"balcony": item.get("balcony"),
|
||||
"garden": item.get("garden"),
|
||||
"mainType": item.get("mainType"),
|
||||
"buildType": item.get("buildType"),
|
||||
"district": item.get("district"),
|
||||
"lat": item.get("lat"),
|
||||
"lng": item.get("lng"),
|
||||
"isFurnished": item.get("isFurnished"),
|
||||
"hasOpenHouse": item.get("hasOpenHouse"),
|
||||
"description": item.get("description"),
|
||||
"photos": item.get("photos"),
|
||||
}, ensure_ascii=False),
|
||||
))
|
||||
|
||||
log.info("bjornd: %d koopwoningen opgehaald", len(listings))
|
||||
return listings
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# SCRAPERS — exporteer hier alle actieve API adapters
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
SCRAPERS = {
|
||||
'bjornd': fetch_bjornd,
|
||||
}
|
||||
154
src/adapters/ssr.py
Normal file
154
src/adapters/ssr.py
Normal file
@@ -0,0 +1,154 @@
|
||||
"""
|
||||
adapters/ssr.py — HTML/SSR-based makelaars
|
||||
|
||||
Elke scraper is een functie () -> list[RawListing].
|
||||
Voeg nieuwe toe onderaan en registreer in SCRAPERS.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import re
|
||||
import time
|
||||
|
||||
import httpx
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
import config
|
||||
from huizenbot import RawListing
|
||||
|
||||
log = logging.getLogger("huizenbot.ssr")
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Gedeelde HTTP helper
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def fetch_soup(url: str, *, params: dict = None) -> BeautifulSoup:
|
||||
"""
|
||||
GET request → BeautifulSoup. Handelt 429 af met Retry-After.
|
||||
"""
|
||||
for attempt in range(3):
|
||||
r = httpx.get(
|
||||
url,
|
||||
params=params,
|
||||
headers={"User-Agent": config.USER_AGENT},
|
||||
timeout=15,
|
||||
follow_redirects=True,
|
||||
)
|
||||
if r.status_code == 429:
|
||||
wait = int(r.headers.get("Retry-After", 60))
|
||||
log.warning("429 op %s, wacht %ds", url, wait)
|
||||
time.sleep(wait)
|
||||
continue
|
||||
r.raise_for_status()
|
||||
return BeautifulSoup(r.text, "html.parser")
|
||||
|
||||
raise RuntimeError(f"Blijvend 429 op {url}")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Parse helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def parse_prijs(text: str | None) -> int | None:
|
||||
"""'€ 325.000 k.k.' → 325000"""
|
||||
if not text:
|
||||
return None
|
||||
digits = re.sub(r"[^\d]", "", text)
|
||||
return int(digits) if digits else None
|
||||
|
||||
|
||||
def parse_m2(text: str | None) -> int | None:
|
||||
"""'87 m²' → 87"""
|
||||
if not text:
|
||||
return None
|
||||
m = re.search(r"(\d+)", text.replace(".", ""))
|
||||
return int(m.group(1)) if m else None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Björn & Dries adapter (bjornd.nl)
|
||||
# ---------------------------------------------------------------------------
|
||||
# TODO: vul de echte CSS selectors in na inspectie van de pagina.
|
||||
# Dit is een structureel sjabloon — de selectors zijn placeholders.
|
||||
|
||||
BJORND_BASE = "https://www.bjornd.nl"
|
||||
BJORND_AANBOD = f"{BJORND_BASE}/aanbod"
|
||||
|
||||
|
||||
def fetch_bjornd_demo() -> list[RawListing]:
|
||||
soup = fetch_soup(BJORND_AANBOD)
|
||||
listings = []
|
||||
|
||||
# Pas de selector aan op de echte HTML structuur
|
||||
for card in soup.select(".property-card"): # ← aanpassen
|
||||
try:
|
||||
a_tag = card.select_one("a[href]")
|
||||
if not a_tag:
|
||||
continue
|
||||
url = a_tag["href"]
|
||||
if not url.startswith("http"):
|
||||
url = BJORND_BASE + url
|
||||
|
||||
adres = _text(card, ".property-address") # ← aanpassen
|
||||
postcode = _extract_postcode(_text(card, ".property-location"))
|
||||
prijs = parse_prijs(_text(card, ".property-price"))
|
||||
opp = parse_m2(_text(card, ".property-area"))
|
||||
img = _src(card, "img")
|
||||
|
||||
listings.append(RawListing(
|
||||
url=url,
|
||||
source_makelaar="bjornd",
|
||||
adres=adres,
|
||||
postcode=postcode,
|
||||
stad=_infer_stad(postcode),
|
||||
prijs=prijs,
|
||||
woonoppervlak=opp,
|
||||
hero_image_url=img,
|
||||
))
|
||||
except Exception as e:
|
||||
log.warning("Fout bij parsen bjornd card: %s", e)
|
||||
|
||||
return listings
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# SSR helper utils
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _text(soup, selector: str) -> str | None:
|
||||
el = soup.select_one(selector)
|
||||
return el.get_text(strip=True) if el else None
|
||||
|
||||
|
||||
def _src(soup, selector: str) -> str | None:
|
||||
el = soup.select_one(selector)
|
||||
if el is None:
|
||||
return None
|
||||
return el.get("src") or el.get("data-src")
|
||||
|
||||
|
||||
def _extract_postcode(text: str | None) -> str | None:
|
||||
if not text:
|
||||
return None
|
||||
m = re.search(r"\b(\d{4}\s?[A-Z]{2})\b", text)
|
||||
return m.group(1).replace(" ", "") if m else None
|
||||
|
||||
|
||||
def _infer_stad(postcode: str | None) -> str | None:
|
||||
"""Simpele mapping op basis van postcode range — uitbreiden naar wens."""
|
||||
if not postcode:
|
||||
return None
|
||||
code = int(postcode[:4])
|
||||
if 2600 <= code <= 2629:
|
||||
return "Delft"
|
||||
if 3100 <= code <= 3135:
|
||||
return "Schiedam"
|
||||
return None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# SCRAPERS — exporteer hier alle actieve SSR adapters
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
SCRAPERS = {
|
||||
'bjornd_demo': fetch_bjornd_demo,
|
||||
}
|
||||
25
src/config.py
Normal file
25
src/config.py
Normal file
@@ -0,0 +1,25 @@
|
||||
"""
|
||||
config.py — vul aan met je eigen waarden. Secrets via environment variables.
|
||||
"""
|
||||
import os
|
||||
|
||||
MARK_WERK_POSTCODE = "2629HG"
|
||||
MICHELLE_WERK_POSTCODE = "3133AV"
|
||||
MARK_WERK_9292 = "delft/"+MARK_WERK_POSTCODE
|
||||
MICHELLE_WERK_9292 = "vlaardingen/"+MICHELLE_WERK_POSTCODE
|
||||
|
||||
HA_WEBHOOK_URL = os.environ.get("HA_WEBHOOK_URL", "")
|
||||
|
||||
SMTP_HOST = os.environ.get("SMTP_HOST", "")
|
||||
SMTP_PORT = int(os.environ.get("SMTP_PORT", "587"))
|
||||
SMTP_FROM = os.environ.get("SMTP_FROM", "")
|
||||
SMTP_TO = os.environ.get("SMTP_TO", "")
|
||||
SMTP_USER = os.environ.get("SMTP_USER", "")
|
||||
|
||||
USER_AGENT = "Huizenbot/1.0 (+mark@kalsbeek.dev) persoonlijk gebruik"
|
||||
|
||||
DB_PATH = os.environ.get("DB_PATH", "/data/huizenbot.db")
|
||||
|
||||
FIETS_SNELHEID_FACTOR = 1.27
|
||||
|
||||
MAX_PRICE = 300_000
|
||||
374
src/huizenbot.py
Normal file
374
src/huizenbot.py
Normal file
@@ -0,0 +1,374 @@
|
||||
"""
|
||||
huizenbot.py — models, db, travel, notify, orchestration
|
||||
"""
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import smtplib
|
||||
import sqlite3
|
||||
import time
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime, date
|
||||
from email.mime.multipart import MIMEMultipart
|
||||
from email.mime.text import MIMEText
|
||||
from typing import Callable, Any
|
||||
|
||||
import httpx
|
||||
|
||||
import config
|
||||
from nine292 import ov_minuten_9292
|
||||
|
||||
log = logging.getLogger("huizenbot")
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Model
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@dataclass
|
||||
class RawListing:
|
||||
url: str # required
|
||||
|
||||
source_makelaar: str = ""
|
||||
datum_aanmelding: str | None = None
|
||||
status: str = "beschikbaar" # beschikbaar | onder_bod | verkocht
|
||||
|
||||
adres: str | None = None
|
||||
postcode: str | None = None
|
||||
stad: str | None = None
|
||||
|
||||
prijs: int | None = None
|
||||
woningtype: str | None = None
|
||||
woonoppervlak: int | None = None
|
||||
perceeloppervlak: int | None = None
|
||||
kamers: int | None = None
|
||||
slaapkamers: int | None = None
|
||||
bouwjaar: int | None = None
|
||||
energielabel: str | None = None
|
||||
|
||||
hero_image_url: str | None = None
|
||||
|
||||
extra: dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
|
||||
def listing_id(url: str) -> str:
|
||||
return hashlib.sha256(url.encode()).hexdigest()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Database
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
SCHEMA = """
|
||||
CREATE TABLE IF NOT EXISTS woningen (
|
||||
id TEXT PRIMARY KEY,
|
||||
url TEXT UNIQUE NOT NULL,
|
||||
source_makelaar TEXT NOT NULL,
|
||||
first_seen TEXT NOT NULL,
|
||||
last_seen TEXT NOT NULL,
|
||||
datum_aanmelding TEXT,
|
||||
|
||||
status TEXT NOT NULL DEFAULT 'beschikbaar',
|
||||
|
||||
adres TEXT,
|
||||
postcode TEXT,
|
||||
stad TEXT,
|
||||
|
||||
prijs INTEGER,
|
||||
woningtype TEXT,
|
||||
woonoppervlak INTEGER,
|
||||
perceeloppervlak INTEGER,
|
||||
kamers INTEGER,
|
||||
slaapkamers INTEGER,
|
||||
bouwjaar INTEGER,
|
||||
energielabel TEXT,
|
||||
|
||||
hero_image_url TEXT,
|
||||
|
||||
fiets_mark INTEGER,
|
||||
fiets_michelle INTEGER,
|
||||
ov_mark INTEGER,
|
||||
ov_michelle INTEGER,
|
||||
|
||||
extra TEXT
|
||||
);
|
||||
"""
|
||||
|
||||
|
||||
def get_db(path: str) -> sqlite3.Connection:
|
||||
conn = sqlite3.connect(path)
|
||||
conn.row_factory = sqlite3.Row
|
||||
conn.execute("PRAGMA journal_mode=WAL")
|
||||
conn.executescript(SCHEMA)
|
||||
return conn
|
||||
|
||||
|
||||
def upsert(conn: sqlite3.Connection, listing: RawListing, travel: dict[str,int]) -> bool:
|
||||
"""
|
||||
Insert new listing or update last_seen + status on existing.
|
||||
Returns True if this was a new listing.
|
||||
"""
|
||||
now = datetime.utcnow().isoformat()
|
||||
lid = listing_id(listing.url)
|
||||
|
||||
row = conn.execute("SELECT id FROM woningen WHERE id = ?", (lid,)).fetchone()
|
||||
is_new = row is None
|
||||
|
||||
if is_new:
|
||||
_cursor = conn.execute("""
|
||||
INSERT INTO woningen (
|
||||
id, url, source_makelaar, first_seen, last_seen, datum_aanmelding,
|
||||
status, adres, postcode, stad,
|
||||
prijs, woningtype, woonoppervlak, perceeloppervlak,
|
||||
kamers, slaapkamers, bouwjaar, energielabel,
|
||||
hero_image_url,
|
||||
fiets_mark, fiets_michelle, ov_mark, ov_michelle,
|
||||
extra
|
||||
) VALUES (
|
||||
:id, :url, :source_makelaar, :first_seen, :last_seen, :datum_aanmelding,
|
||||
:status, :adres, :postcode, :stad,
|
||||
:prijs, :woningtype, :woonoppervlak, :perceeloppervlak,
|
||||
:kamers, :slaapkamers, :bouwjaar, :energielabel,
|
||||
:hero_image_url,
|
||||
:fiets_mark, :fiets_michelle, :ov_mark, :ov_michelle,
|
||||
:extra
|
||||
)
|
||||
""", {
|
||||
"id": lid,
|
||||
"url": listing.url,
|
||||
"source_makelaar": listing.source_makelaar,
|
||||
"first_seen": now,
|
||||
"last_seen": now,
|
||||
"datum_aanmelding": listing.datum_aanmelding,
|
||||
"status": listing.status,
|
||||
"adres": listing.adres,
|
||||
"postcode": listing.postcode,
|
||||
"stad": listing.stad,
|
||||
"prijs": listing.prijs,
|
||||
"woningtype": listing.woningtype,
|
||||
"woonoppervlak": listing.woonoppervlak,
|
||||
"perceeloppervlak": listing.perceeloppervlak,
|
||||
"kamers": listing.kamers,
|
||||
"slaapkamers": listing.slaapkamers,
|
||||
"bouwjaar": listing.bouwjaar,
|
||||
"energielabel": listing.energielabel,
|
||||
"hero_image_url": listing.hero_image_url,
|
||||
"fiets_mark": travel.get("fiets_mark"),
|
||||
"fiets_michelle": travel.get("fiets_michelle"),
|
||||
"ov_mark": travel.get("ov_mark"),
|
||||
"ov_michelle": travel.get("ov_michelle"),
|
||||
"extra": json.dumps(listing.extra) if listing.extra else None,
|
||||
})
|
||||
else:
|
||||
_cursor = conn.execute("""
|
||||
UPDATE woningen SET last_seen = ?, status = ? WHERE id = ?
|
||||
""", (now, listing.status, lid))
|
||||
|
||||
conn.commit()
|
||||
return is_new
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Travel
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_geocode_cache: dict[str, tuple[float, float]] = {}
|
||||
|
||||
|
||||
def geocode(postcode: str) -> tuple[float, float] | None:
|
||||
"""Postcode → (lat, lon) via Nominatim. Respects 1 req/s."""
|
||||
if postcode in _geocode_cache:
|
||||
return _geocode_cache[postcode]
|
||||
|
||||
time.sleep(1) # Nominatim rate limit
|
||||
try:
|
||||
r = httpx.get(
|
||||
"https://nominatim.openstreetmap.org/search",
|
||||
params={"q": postcode + ", Netherlands", "format": "json", "limit": 1},
|
||||
headers={"User-Agent": config.USER_AGENT},
|
||||
timeout=10,
|
||||
)
|
||||
_response = r.raise_for_status()
|
||||
results = r.json()
|
||||
if not results:
|
||||
log.warning("Geocode geen resultaat voor %s", postcode)
|
||||
return None
|
||||
lat, lon = float(results[0]["lat"]), float(results[0]["lon"])
|
||||
_geocode_cache[postcode] = (lat, lon)
|
||||
return lat, lon
|
||||
except Exception as e:
|
||||
log.error("Geocode fout voor %s: %s", postcode, e)
|
||||
return None
|
||||
|
||||
def fiets_minuten(origin: tuple[float, float], dest: tuple[float, float]) -> int | None:
|
||||
"""Reistijd fiets in minuten via OSRM (routing.openstreetmap.de)."""
|
||||
try:
|
||||
olat, olon = origin
|
||||
dlat, dlon = dest
|
||||
url = (
|
||||
f"https://routing.openstreetmap.de/routed-bike/route/v1/driving/"
|
||||
f"{olon},{olat};{dlon},{dlat}?overview=false"
|
||||
)
|
||||
r = httpx.get(url, timeout=10)
|
||||
r.raise_for_status()
|
||||
data = r.json()
|
||||
seconds = data["routes"][0]["duration"]
|
||||
return round(seconds / 60 / config.FIETS_SNELHEID_FACTOR)
|
||||
except Exception as e:
|
||||
log.error("OSRM fout: %s", e)
|
||||
return None
|
||||
|
||||
def ov_minuten(from_loc: str, to_loc: str) -> int | None:
|
||||
"""Reistijd OV in minuten via 9292, vaste ochtendspits referentie."""
|
||||
return ov_minuten_9292(from_loc, to_loc)
|
||||
|
||||
|
||||
def _next_weekday_morning() -> str:
|
||||
"""Geeft eerstvolgende doordeweekse dag om 08:30 als Navitia datetime string."""
|
||||
from datetime import timedelta
|
||||
d = date.today()
|
||||
d += timedelta(days=1)
|
||||
while d.weekday() >= 5: # 5=zaterdag, 6=zondag
|
||||
d += timedelta(days=1)
|
||||
return d.strftime("%Y%m%dT083000")
|
||||
|
||||
|
||||
def bereken_reistijden(postcode: str | None) -> dict[str, int]:
|
||||
"""Bereken alle reistijden voor een woning postcode. Geeft lege dict bij falen."""
|
||||
if not postcode:
|
||||
return {}
|
||||
|
||||
woning_coords = geocode(postcode)
|
||||
if not woning_coords:
|
||||
return {}
|
||||
|
||||
werk1 = geocode(config.MARK_WERK_POSTCODE)
|
||||
werk2 = geocode(config.MICHELLE_WERK_POSTCODE)
|
||||
|
||||
result = {}
|
||||
if werk1:
|
||||
result["fiets_mark"] = fiets_minuten(woning_coords, werk1)
|
||||
result["ov_mark"] = ov_minuten(woning_coords, werk1)
|
||||
if werk2:
|
||||
result["fiets_michelle"] = fiets_minuten(woning_coords, werk2)
|
||||
result["ov_michelle"] = ov_minuten(woning_coords, werk2)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Notify
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def notify_ha(listing: RawListing, travel: dict[str,int]) -> None:
|
||||
"""Stuur webhook naar Home Assistant."""
|
||||
if not config.HA_WEBHOOK_URL:
|
||||
return
|
||||
|
||||
payload = {
|
||||
"adres": listing.adres,
|
||||
"stad": listing.stad,
|
||||
"prijs": listing.prijs,
|
||||
"status": listing.status,
|
||||
"url": listing.url,
|
||||
"image": listing.hero_image_url,
|
||||
"fiets_mark": travel.get("fiets_mark"),
|
||||
"fiets_michelle": travel.get("fiets_michelle"),
|
||||
"ov_mark": travel.get("ov_mark"),
|
||||
"ov_michelle": travel.get("ov_michelle"),
|
||||
}
|
||||
|
||||
try:
|
||||
r = httpx.post(config.HA_WEBHOOK_URL, json=payload, timeout=10)
|
||||
r.raise_for_status()
|
||||
log.info("HA notificatie verstuurd voor %s", listing.adres)
|
||||
except Exception as e:
|
||||
log.error("HA webhook fout: %s", e)
|
||||
notify_email(listing, travel) # fallback
|
||||
|
||||
|
||||
def notify_email(listing: RawListing, travel: dict[str,int]) -> None:
|
||||
"""Stuur HTML email als fallback."""
|
||||
if not config.SMTP_HOST:
|
||||
return
|
||||
|
||||
subject = f"Nieuwe woning: {listing.adres}, {listing.stad} — €{listing.prijs:,}"
|
||||
|
||||
html = f"""
|
||||
<html><body>
|
||||
<h2>{listing.adres}, {listing.stad}</h2>
|
||||
<p><strong>Prijs:</strong> €{listing.prijs:,}</p>
|
||||
<p><strong>Status:</strong> {listing.status}</p>
|
||||
<p><strong>Fiets P1:</strong> {travel.get('fiets_mark')} min
|
||||
<strong>OV P1:</strong> {travel.get('ov_mark')} min</p>
|
||||
<p><strong>Fiets P2:</strong> {travel.get('fiets_michelle')} min
|
||||
<strong>OV P2:</strong> {travel.get('ov_michelle')} min</p>
|
||||
{"<img src='" + listing.hero_image_url + "' width='600'>" if listing.hero_image_url else ""}
|
||||
<p><a href="{listing.url}">Bekijk listing</a></p>
|
||||
</body></html>
|
||||
"""
|
||||
|
||||
msg = MIMEMultipart("alternative")
|
||||
msg["Subject"] = subject
|
||||
msg["From"] = config.SMTP_FROM
|
||||
msg["To"] = config.SMTP_TO
|
||||
msg.attach(MIMEText(html, "html"))
|
||||
|
||||
try:
|
||||
with smtplib.SMTP(config.SMTP_HOST, config.SMTP_PORT) as s:
|
||||
if config.SMTP_USER:
|
||||
s.starttls()
|
||||
s.login(config.SMTP_USER, os.environ.get("SMTP_PASSWORD", ""))
|
||||
s.send_message(msg)
|
||||
log.info("Email verstuurd voor %s", listing.adres)
|
||||
except Exception as e:
|
||||
log.error("Email fout: %s", e)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Orchestration
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
Scraper = Callable[[], list[RawListing]]
|
||||
|
||||
|
||||
def run(scrapers: list[Scraper], db_path: str) -> None:
|
||||
conn = get_db(db_path)
|
||||
total_new = 0
|
||||
|
||||
for scraper in scrapers:
|
||||
name = scraper.__name__
|
||||
log.info("Scraper starten: %s", name)
|
||||
try:
|
||||
listings = scraper()
|
||||
except Exception as e:
|
||||
log.error("Scraper %s gefaald: %s", name, e)
|
||||
continue
|
||||
|
||||
log.info("Scraper %s: %d listings opgehaald", name, len(listings))
|
||||
|
||||
for listing in listings:
|
||||
travel = {}
|
||||
try:
|
||||
# Check of het een nieuwe woning is vóór upsert
|
||||
lid = listing_id(listing.url)
|
||||
is_existing = conn.execute(
|
||||
"SELECT id FROM woningen WHERE id = ?", (lid,)
|
||||
).fetchone() is not None
|
||||
|
||||
if not is_existing:
|
||||
travel = bereken_reistijden(listing.postcode)
|
||||
|
||||
is_new = upsert(conn, listing, travel)
|
||||
|
||||
if is_new:
|
||||
total_new += 1
|
||||
log.info("Nieuwe woning: %s (%s)", listing.adres, listing.url)
|
||||
notify_ha(listing, travel)
|
||||
|
||||
except Exception as e:
|
||||
log.error("Fout bij verwerken %s: %s", listing.url, e)
|
||||
|
||||
log.info("Run klaar. %d nieuwe woningen gevonden.", total_new)
|
||||
conn.close()
|
||||
16
src/main.py
Normal file
16
src/main.py
Normal file
@@ -0,0 +1,16 @@
|
||||
import logging
|
||||
import sys
|
||||
|
||||
import config
|
||||
from adapters import SCRAPERS
|
||||
from huizenbot import run
|
||||
|
||||
logging.basicConfig(
|
||||
stream=sys.stdout,
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s %(levelname)s %(name)s — %(message)s",
|
||||
datefmt="%Y-%m-%dT%H:%M:%S",
|
||||
)
|
||||
|
||||
if __name__ == "__main__":
|
||||
run(SCRAPERS, config.DB_PATH)
|
||||
95
src/nine292.py
Normal file
95
src/nine292.py
Normal file
@@ -0,0 +1,95 @@
|
||||
"""9292 public transport travel time via their web API."""
|
||||
import hashlib
|
||||
import hmac
|
||||
import logging
|
||||
import time
|
||||
import urllib.parse
|
||||
from datetime import date, timedelta
|
||||
|
||||
import httpx
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
_BASE_URL = "https://web-api.9292.nl"
|
||||
_HMAC_SECRET = "ZVWm_Qytmq.Bo-guenFtRfUPi_vMFq4yrdDA6RYZAijNi4qocHmq6oZ"
|
||||
_USER_AGENT = "Mozilla/5.0 (X11; Linux x86_64; rv:149.0) Gecko/20100101 Firefox/149.0"
|
||||
|
||||
|
||||
def _encode_params(params: dict) -> str:
|
||||
"""Replicate 9292's Ye() param serializer: standard urlencode with + for spaces."""
|
||||
return urllib.parse.urlencode(params).replace("%20", "+")
|
||||
|
||||
|
||||
def _sign(url_path: str, params: dict) -> tuple[str, str]:
|
||||
"""Return (x-request-time, x-validation-token) for a request."""
|
||||
ts = str(int(time.time() * 1000))
|
||||
qs = _encode_params(params)
|
||||
full = f"{url_path}{'?' + qs if qs else ''}"
|
||||
message = f"{ts}{full}{_USER_AGENT}"
|
||||
token = hmac.new(_HMAC_SECRET.encode(), message.encode(), hashlib.sha256).hexdigest()
|
||||
return ts, token
|
||||
|
||||
|
||||
def _next_weekday_morning() -> str:
|
||||
"""First upcoming weekday at 08:30, as ISO 8601 for 9292."""
|
||||
d = date.today() + timedelta(days=1)
|
||||
while d.weekday() >= 5:
|
||||
d += timedelta(days=1)
|
||||
return d.strftime("%Y-%m-%dT08:30:00.000Z")
|
||||
|
||||
|
||||
def ov_minuten_9292(from_loc: str, to_loc: str) -> int | None:
|
||||
"""
|
||||
Travel time in minutes via 9292.
|
||||
|
||||
Locations are 9292-style strings, e.g.:
|
||||
"delft/2629hg"
|
||||
"station-amsterdam-centraal"
|
||||
"amsterdam/1011ab"
|
||||
"""
|
||||
url_path = "/api/v1/plans"
|
||||
params = {
|
||||
"from": from_loc.lower(),
|
||||
"to": to_loc.lower(),
|
||||
"requestType": "Departure",
|
||||
"dateTime": _next_weekday_morning(),
|
||||
"planWithAccessibility": "false",
|
||||
"extraInterchangeTime": "0",
|
||||
"firstMileLessWalking": "false",
|
||||
"lastMileLessWalking": "false",
|
||||
"firstMileModality": "Walking",
|
||||
"lastMileModality": "Walking",
|
||||
"previewsBefore": "0",
|
||||
"previewsAfter": "3",
|
||||
}
|
||||
ts, token = _sign(url_path, params)
|
||||
headers = {
|
||||
"User-Agent": _USER_AGENT,
|
||||
"Accept": "application/json, text/plain, */*",
|
||||
"Accept-Language": "nl",
|
||||
"x-origin": "Plan",
|
||||
"x-request-time": ts,
|
||||
"x-validation-token": token,
|
||||
"Origin": "https://9292.nl",
|
||||
"Referer": "https://9292.nl/",
|
||||
}
|
||||
try:
|
||||
r = httpx.get(
|
||||
_BASE_URL + url_path,
|
||||
params=params,
|
||||
headers=headers,
|
||||
timeout=15,
|
||||
)
|
||||
r.raise_for_status()
|
||||
previews = r.json().get("previews", [])
|
||||
if not previews:
|
||||
log.warning("9292 geen reisadvies voor %s → %s", from_loc, to_loc)
|
||||
return None
|
||||
# Take the shortest non-cancelled journey
|
||||
valid = [p for p in previews if not p.get("cancelled")]
|
||||
if not valid:
|
||||
valid = previews
|
||||
return min(p["durationInMinutes"] for p in valid)
|
||||
except Exception as e:
|
||||
log.error("9292 fout voor %s → %s: %s", from_loc, to_loc, e)
|
||||
return None
|
||||
Reference in New Issue
Block a user