tweaks and first real run

This commit is contained in:
2026-04-04 15:23:09 +02:00
parent 423a429f56
commit fbe50790da
5 changed files with 179 additions and 57 deletions

1
.gitignore vendored
View File

@@ -5,3 +5,4 @@
**/__pycache__/
tests/cache/
data/

View File

@@ -1,5 +1,5 @@
"""
config.py — vul aan met je eigen waarden. Secrets via environment variables.
config.py — Secrets via environment variables.
"""
import os
@@ -16,7 +16,40 @@ DB_PATH = os.environ.get("DB_PATH", "/data/huizenbot.db")
FIETS_SNELHEID_FACTOR = 1.27
MAX_PRICE = 300_000
MAX_PRICE = 300_000 # coarse pre-filter in adapters only
MIN_AREA = 65 # Sq meters
# Fine price filter: max mortgage per energy label group * 0.9
# Labels not in this map fall back to the most conservative tier.
_LABEL_DISCOUNT = 0.9
MAX_PRIJS_PER_LABEL: dict[str, int] = {
"EFG": int(286_942 * _LABEL_DISCOUNT),
"CD": int(291_942 * _LABEL_DISCOUNT),
"AB": int(296_942 * _LABEL_DISCOUNT),
"A+": int(306_942 * _LABEL_DISCOUNT),
}
_MAX_PRIJS_ONBEKEND = MAX_PRIJS_PER_LABEL["EFG"] # conservative fallback
def max_prijs_voor_label(label: str | None) -> int:
"""Return the max allowed price for a given energy label (or None/unknown)."""
if not label:
return _MAX_PRIJS_ONBEKEND
l = label.strip().upper()
if l in ("A+++", "A++", "A+"):
return MAX_PRIJS_PER_LABEL["A+"]
if l in ("A", "B"):
return MAX_PRIJS_PER_LABEL["AB"]
if l in ("C", "D"):
return MAX_PRIJS_PER_LABEL["CD"]
if l in ("E", "F", "G"):
return MAX_PRIJS_PER_LABEL["EFG"]
return _MAX_PRIJS_ONBEKEND
# Travel time limits (None travel time → pass, with warning)
MAX_OV_MINUTEN_MARK = 50
MAX_OV_MINUTEN_MICHELLE = 50
MAX_FIETS_MINUTEN_MARK = 35
# No fiets limit for michelle
APP_ENV = os.environ.get("APP_ENV", "dev")

View File

@@ -94,6 +94,7 @@ CREATE TABLE IF NOT EXISTS woningen (
def get_db(path: str) -> sqlite3.Connection:
log.info(f"Opening db at path {path}")
conn = sqlite3.connect(path)
conn.row_factory = sqlite3.Row
conn.execute("PRAGMA journal_mode=WAL")
@@ -231,7 +232,7 @@ def _next_weekday_morning() -> str:
return d.strftime("%Y%m%dT083000")
def bereken_reistijden(postcode: str | None) -> dict[str, int]:
def bereken_reistijden(postcode: str | None, stad: str | None) -> dict[str, int]:
"""Bereken alle reistijden voor een woning postcode. Geeft lege dict bij falen."""
if not postcode:
return {}
@@ -240,16 +241,20 @@ def bereken_reistijden(postcode: str | None) -> dict[str, int]:
if not woning_coords:
return {}
werk1 = geocode(config.MARK_WERK_POSTCODE)
werk2 = geocode(config.MICHELLE_WERK_POSTCODE)
werk1_coords = geocode(config.MARK_WERK_POSTCODE)
werk2_coords = geocode(config.MICHELLE_WERK_POSTCODE)
# 9292 expects "cityname/postcode" strings (lowercase city)
stad_lower = (stad or "").strip().lower()
woning_9292 = f"{stad_lower}/{postcode}" if stad_lower else postcode
result = {}
if werk1:
result["fiets_mark"] = fiets_minuten(woning_coords, werk1)
result["ov_mark"] = ov_minuten(woning_coords, werk1)
if werk2:
result["fiets_michelle"] = fiets_minuten(woning_coords, werk2)
result["ov_michelle"] = ov_minuten(woning_coords, werk2)
if werk1_coords:
result["fiets_mark"] = fiets_minuten(woning_coords, werk1_coords)
result["ov_mark"] = ov_minuten(woning_9292, config.MARK_WERK_9292)
if werk2_coords:
result["fiets_michelle"] = fiets_minuten(woning_coords, werk2_coords)
result["ov_michelle"] = ov_minuten(woning_9292, config.MICHELLE_WERK_9292)
return result
@@ -283,6 +288,67 @@ def notify_ha(listing: RawListing, travel: dict[str,int]) -> None:
except Exception as e:
log.error("HA webhook fout: %s", e)
# ---------------------------------------------------------------------------
# Filtering
# ---------------------------------------------------------------------------
def _check_filters(listing: RawListing, travel: dict[str, int]) -> bool:
"""
Returns True if the listing passes all filters and should trigger a notification.
Always errs on the side of notifying when data is missing (logs a warning).
"""
passed = True
# --- Price filter ---
if listing.prijs is not None:
max_p = config.max_prijs_voor_label(listing.energielabel)
if listing.prijs > max_p:
log.info(
"Gefilterd op prijs: %s%d > €%d (label: %s)",
listing.adres, listing.prijs, max_p, listing.energielabel or "onbekend",
)
passed = False
# --- Area filter ---
if listing.woonoppervlak is not None and listing.woonoppervlak is not > config.MIN_AREA:
log.info(f"Gefilterd op oppervlakte: {listing.woonoppervlak} < {config.MIN_AREA}")
passed = False
# --- OV filter ---
ov_mark = travel.get("ov_mark")
ov_michelle = travel.get("ov_michelle")
if ov_mark is None:
log.warning(
"OV reistijd mark ONBEKEND voor %s — notificatie wordt toch verstuurd",
listing.adres,
)
elif ov_mark > config.MAX_OV_MINUTEN_MARK:
log.info("Gefilterd op OV mark: %s %dmin > %dmin", listing.adres, ov_mark, config.MAX_OV_MINUTEN_MARK)
passed = False
if ov_michelle is None:
log.warning(
"OV reistijd michelle ONBEKEND voor %s — notificatie wordt toch verstuurd",
listing.adres,
)
elif ov_michelle > config.MAX_OV_MINUTEN_MICHELLE:
log.info("Gefilterd op OV michelle: %s %dmin > %dmin", listing.adres, ov_michelle, config.MAX_OV_MINUTEN_MICHELLE)
passed = False
# --- Fiets filter (mark only) ---
fiets_mark = travel.get("fiets_mark")
if fiets_mark is None:
log.warning(
"Fiets reistijd mark ONBEKEND voor %s — notificatie wordt toch verstuurd",
listing.adres,
)
elif fiets_mark > config.MAX_FIETS_MINUTEN_MARK:
log.info("Gefilterd op fiets mark: %s %dmin > %dmin", listing.adres, fiets_mark, config.MAX_FIETS_MINUTEN_MARK)
passed = False
return passed
# ---------------------------------------------------------------------------
# Orchestration
# ---------------------------------------------------------------------------
@@ -290,42 +356,64 @@ def notify_ha(listing: RawListing, travel: dict[str,int]) -> None:
Scraper = Callable[[], list[RawListing]]
def run(scrapers: list[Scraper], db_path: str) -> None:
conn = get_db(db_path)
total_new = 0
for scraper in scrapers:
name = scraper.__name__
log.info("Scraper starten: %s", name)
try:
listings = scraper()
except Exception as e:
log.error("Scraper %s gefaald: %s", name, e)
continue
def _run_scraper(scraper: Scraper) -> tuple[str, list[RawListing]]:
name = scraper.__name__
log.info("Scraper starten: %s", name)
try:
listings = scraper()
log.info("Scraper %s: %d listings opgehaald", name, len(listings))
return name, listings
except Exception as e:
log.error("Scraper %s gefaald: %s", name, e)
return name, []
for listing in listings:
travel = {}
try:
# Check of het een nieuwe woning is vóór upsert
lid = listing_id(listing.url)
is_existing = conn.execute(
"SELECT id FROM woningen WHERE id = ?", (lid,)
).fetchone() is not None
if not is_existing:
travel = bereken_reistijden(listing.postcode)
def run(scrapers: dict[str,Scraper], db_path: str) -> None:
import concurrent.futures
is_new = upsert(conn, listing, travel)
conn = get_db(db_path)
if is_new:
total_new += 1
log.info("Nieuwe woning: %s (%s)", listing.adres, listing.url)
total_new = 0
total_notified = 0
# Phase 1: run all scrapers concurrently (each hits a different domain)
all_listings: list[RawListing] = []
with concurrent.futures.ThreadPoolExecutor(max_workers=len(scrapers)) as pool:
futures = {pool.submit(_run_scraper, s): s for s in scrapers.values()}
for future in concurrent.futures.as_completed(futures):
_name, listings = future.result()
all_listings.extend(listings)
log.info("Alle scrapers klaar. %d listings totaal opgehaald.", len(all_listings))
# Phase 2: sequential travel calculation + upsert + filtered notify
for listing in all_listings:
travel = {}
try:
lid = listing_id(listing.url)
is_existing = conn.execute(
"SELECT id FROM woningen WHERE id = ?", (lid,)
).fetchone() is not None
if not is_existing:
travel = bereken_reistijden(listing.postcode, listing.stad)
is_new = upsert(conn, listing, travel)
if is_new:
total_new += 1
log.info("Nieuwe woning: %s (%s)", listing.adres, listing.url)
if _check_filters(listing, travel):
total_notified += 1
notify_ha(listing, travel)
else:
log.info("Geen notificatie voor %s (gefilterd)", listing.adres)
except Exception as e:
log.error("Fout bij verwerken %s: %s", listing.url, e)
except Exception as e:
log.error("Fout bij verwerken %s: %s", listing.url, e)
log.info("Run klaar. %d nieuwe woningen gevonden.", total_new)
log.info(
"Run klaar. %d nieuwe woningen, %d notificaties verstuurd.",
total_new, total_notified,
)
conn.close()

View File

@@ -8,17 +8,17 @@
<link href="https://fonts.googleapis.com/css2?family=Syne:wght@400;600;700;800&family=DM+Mono:wght@400;500&display=swap" rel="stylesheet">
<style>
:root {
--bg: #0f0f0f;
--surface: #181818;
--surface2: #222222;
--border: #2a2a2a;
--accent: #c8f060;
--accent-dim: #8aaa30;
--text: #e8e8e8;
--text-dim: #888;
--text-dimmer: #555;
--red: #ff5f5f;
--orange: #ffaa44;
--bg: #f5f0eb;
--surface: #fdf9f5;
--surface2: #ede8e2;
--border: #ddd6cc;
--accent: #6a9e78;
--accent-dim: #4f7a5c;
--text: #2e2a25;
--text-dim: #7a7068;
--text-dimmer: #aaa098;
--red: #c0524a;
--orange: #c07c3a;
--radius: 10px;
--font-ui: 'Syne', sans-serif;
--font-mono: 'DM Mono', monospace;
@@ -150,7 +150,7 @@
overflow: hidden;
transition: border-color 0.15s;
}
.card:hover { border-color: #3a3a3a; }
.card:hover { border-color: #c5bdb4; }
.card-compact {
display: grid;
@@ -187,7 +187,7 @@
position: absolute;
bottom: 0.4rem;
left: 0.4rem;
background: rgba(0,0,0,0.7);
background: rgba(255,255,255,0.75);
backdrop-filter: blur(4px);
color: var(--text-dim);
font-family: var(--font-mono);
@@ -250,7 +250,7 @@
.card-link:hover {
color: var(--accent);
border-color: var(--accent-dim);
background: rgba(200,240,96,0.06);
background: rgba(106,158,120,0.08);
}
.card-link svg { flex-shrink: 0; }

View File

@@ -53,7 +53,7 @@ def index():
d["extra"] = {}
listings.append(d)
return render_template("src/views/index.html", listings_json=json.dumps(listings, ensure_ascii=False))
return render_template("index.html", listings_json=json.dumps(listings, ensure_ascii=False))
if __name__ == "__main__":