diff --git a/.gitignore b/.gitignore
index c2f9b05..c16c11e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,3 +5,4 @@
**/__pycache__/
tests/cache/
+data/
diff --git a/src/config.py b/src/config.py
index b5959d0..c783dc0 100644
--- a/src/config.py
+++ b/src/config.py
@@ -1,5 +1,5 @@
"""
-config.py — vul aan met je eigen waarden. Secrets via environment variables.
+config.py — Secrets via environment variables.
"""
import os
@@ -16,7 +16,40 @@ DB_PATH = os.environ.get("DB_PATH", "/data/huizenbot.db")
FIETS_SNELHEID_FACTOR = 1.27
-MAX_PRICE = 300_000
+MAX_PRICE = 300_000 # coarse pre-filter in adapters only
+
+MIN_AREA = 65 # Sq meters
+
+# Fine price filter: max mortgage per energy label group * 0.9
+# Labels not in this map fall back to the most conservative tier.
+_LABEL_DISCOUNT = 0.9
+MAX_PRIJS_PER_LABEL: dict[str, int] = {
+ "EFG": int(286_942 * _LABEL_DISCOUNT),
+ "CD": int(291_942 * _LABEL_DISCOUNT),
+ "AB": int(296_942 * _LABEL_DISCOUNT),
+ "A+": int(306_942 * _LABEL_DISCOUNT),
+}
+_MAX_PRIJS_ONBEKEND = MAX_PRIJS_PER_LABEL["EFG"] # conservative fallback
+
+def max_prijs_voor_label(label: str | None) -> int:
+ """Return the max allowed price for a given energy label (or None/unknown)."""
+ if not label:
+ return _MAX_PRIJS_ONBEKEND
+ l = label.strip().upper()
+ if l in ("A+++", "A++", "A+"):
+ return MAX_PRIJS_PER_LABEL["A+"]
+ if l in ("A", "B"):
+ return MAX_PRIJS_PER_LABEL["AB"]
+ if l in ("C", "D"):
+ return MAX_PRIJS_PER_LABEL["CD"]
+ if l in ("E", "F", "G"):
+ return MAX_PRIJS_PER_LABEL["EFG"]
+ return _MAX_PRIJS_ONBEKEND
+
+# Travel time limits (None travel time → pass, with warning)
+MAX_OV_MINUTEN_MARK = 50
+MAX_OV_MINUTEN_MICHELLE = 50
+MAX_FIETS_MINUTEN_MARK = 35
+# No fiets limit for michelle
APP_ENV = os.environ.get("APP_ENV", "dev")
-
diff --git a/src/huizenbot.py b/src/huizenbot.py
index 1165549..5ff2c7e 100644
--- a/src/huizenbot.py
+++ b/src/huizenbot.py
@@ -94,6 +94,7 @@ CREATE TABLE IF NOT EXISTS woningen (
def get_db(path: str) -> sqlite3.Connection:
+ log.info(f"Opening db at path {path}")
conn = sqlite3.connect(path)
conn.row_factory = sqlite3.Row
conn.execute("PRAGMA journal_mode=WAL")
@@ -231,7 +232,7 @@ def _next_weekday_morning() -> str:
return d.strftime("%Y%m%dT083000")
-def bereken_reistijden(postcode: str | None) -> dict[str, int]:
+def bereken_reistijden(postcode: str | None, stad: str | None) -> dict[str, int]:
"""Bereken alle reistijden voor een woning postcode. Geeft lege dict bij falen."""
if not postcode:
return {}
@@ -240,16 +241,20 @@ def bereken_reistijden(postcode: str | None) -> dict[str, int]:
if not woning_coords:
return {}
- werk1 = geocode(config.MARK_WERK_POSTCODE)
- werk2 = geocode(config.MICHELLE_WERK_POSTCODE)
+ werk1_coords = geocode(config.MARK_WERK_POSTCODE)
+ werk2_coords = geocode(config.MICHELLE_WERK_POSTCODE)
+
+ # 9292 expects "cityname/postcode" strings (lowercase city)
+ stad_lower = (stad or "").strip().lower()
+ woning_9292 = f"{stad_lower}/{postcode}" if stad_lower else postcode
result = {}
- if werk1:
- result["fiets_mark"] = fiets_minuten(woning_coords, werk1)
- result["ov_mark"] = ov_minuten(woning_coords, werk1)
- if werk2:
- result["fiets_michelle"] = fiets_minuten(woning_coords, werk2)
- result["ov_michelle"] = ov_minuten(woning_coords, werk2)
+ if werk1_coords:
+ result["fiets_mark"] = fiets_minuten(woning_coords, werk1_coords)
+ result["ov_mark"] = ov_minuten(woning_9292, config.MARK_WERK_9292)
+ if werk2_coords:
+ result["fiets_michelle"] = fiets_minuten(woning_coords, werk2_coords)
+ result["ov_michelle"] = ov_minuten(woning_9292, config.MICHELLE_WERK_9292)
return result
@@ -283,6 +288,67 @@ def notify_ha(listing: RawListing, travel: dict[str,int]) -> None:
except Exception as e:
log.error("HA webhook fout: %s", e)
+# ---------------------------------------------------------------------------
+# Filtering
+# ---------------------------------------------------------------------------
+
+def _check_filters(listing: RawListing, travel: dict[str, int]) -> bool:
+ """
+ Returns True if the listing passes all filters and should trigger a notification.
+ Always errs on the side of notifying when data is missing (logs a warning).
+ """
+ passed = True
+
+ # --- Price filter ---
+ if listing.prijs is not None:
+ max_p = config.max_prijs_voor_label(listing.energielabel)
+ if listing.prijs > max_p:
+ log.info(
+ "Gefilterd op prijs: %s €%d > €%d (label: %s)",
+ listing.adres, listing.prijs, max_p, listing.energielabel or "onbekend",
+ )
+ passed = False
+ # --- Area filter ---
+ if listing.woonoppervlak is not None and listing.woonoppervlak is not > config.MIN_AREA:
+ log.info(f"Gefilterd op oppervlakte: {listing.woonoppervlak} < {config.MIN_AREA}")
+ passed = False
+
+ # --- OV filter ---
+ ov_mark = travel.get("ov_mark")
+ ov_michelle = travel.get("ov_michelle")
+
+ if ov_mark is None:
+ log.warning(
+ "OV reistijd mark ONBEKEND voor %s — notificatie wordt toch verstuurd",
+ listing.adres,
+ )
+ elif ov_mark > config.MAX_OV_MINUTEN_MARK:
+ log.info("Gefilterd op OV mark: %s %dmin > %dmin", listing.adres, ov_mark, config.MAX_OV_MINUTEN_MARK)
+ passed = False
+
+ if ov_michelle is None:
+ log.warning(
+ "OV reistijd michelle ONBEKEND voor %s — notificatie wordt toch verstuurd",
+ listing.adres,
+ )
+ elif ov_michelle > config.MAX_OV_MINUTEN_MICHELLE:
+ log.info("Gefilterd op OV michelle: %s %dmin > %dmin", listing.adres, ov_michelle, config.MAX_OV_MINUTEN_MICHELLE)
+ passed = False
+
+ # --- Fiets filter (mark only) ---
+ fiets_mark = travel.get("fiets_mark")
+ if fiets_mark is None:
+ log.warning(
+ "Fiets reistijd mark ONBEKEND voor %s — notificatie wordt toch verstuurd",
+ listing.adres,
+ )
+ elif fiets_mark > config.MAX_FIETS_MINUTEN_MARK:
+ log.info("Gefilterd op fiets mark: %s %dmin > %dmin", listing.adres, fiets_mark, config.MAX_FIETS_MINUTEN_MARK)
+ passed = False
+
+ return passed
+
+
# ---------------------------------------------------------------------------
# Orchestration
# ---------------------------------------------------------------------------
@@ -290,42 +356,64 @@ def notify_ha(listing: RawListing, travel: dict[str,int]) -> None:
Scraper = Callable[[], list[RawListing]]
-def run(scrapers: list[Scraper], db_path: str) -> None:
- conn = get_db(db_path)
- total_new = 0
-
- for scraper in scrapers:
- name = scraper.__name__
- log.info("Scraper starten: %s", name)
- try:
- listings = scraper()
- except Exception as e:
- log.error("Scraper %s gefaald: %s", name, e)
- continue
-
+def _run_scraper(scraper: Scraper) -> tuple[str, list[RawListing]]:
+ name = scraper.__name__
+ log.info("Scraper starten: %s", name)
+ try:
+ listings = scraper()
log.info("Scraper %s: %d listings opgehaald", name, len(listings))
+ return name, listings
+ except Exception as e:
+ log.error("Scraper %s gefaald: %s", name, e)
+ return name, []
- for listing in listings:
- travel = {}
- try:
- # Check of het een nieuwe woning is vóór upsert
- lid = listing_id(listing.url)
- is_existing = conn.execute(
- "SELECT id FROM woningen WHERE id = ?", (lid,)
- ).fetchone() is not None
- if not is_existing:
- travel = bereken_reistijden(listing.postcode)
+def run(scrapers: dict[str,Scraper], db_path: str) -> None:
+ import concurrent.futures
- is_new = upsert(conn, listing, travel)
+ conn = get_db(db_path)
- if is_new:
- total_new += 1
- log.info("Nieuwe woning: %s (%s)", listing.adres, listing.url)
+ total_new = 0
+ total_notified = 0
+
+ # Phase 1: run all scrapers concurrently (each hits a different domain)
+ all_listings: list[RawListing] = []
+ with concurrent.futures.ThreadPoolExecutor(max_workers=len(scrapers)) as pool:
+ futures = {pool.submit(_run_scraper, s): s for s in scrapers.values()}
+ for future in concurrent.futures.as_completed(futures):
+ _name, listings = future.result()
+ all_listings.extend(listings)
+
+ log.info("Alle scrapers klaar. %d listings totaal opgehaald.", len(all_listings))
+
+ # Phase 2: sequential travel calculation + upsert + filtered notify
+ for listing in all_listings:
+ travel = {}
+ try:
+ lid = listing_id(listing.url)
+ is_existing = conn.execute(
+ "SELECT id FROM woningen WHERE id = ?", (lid,)
+ ).fetchone() is not None
+
+ if not is_existing:
+ travel = bereken_reistijden(listing.postcode, listing.stad)
+
+ is_new = upsert(conn, listing, travel)
+
+ if is_new:
+ total_new += 1
+ log.info("Nieuwe woning: %s (%s)", listing.adres, listing.url)
+ if _check_filters(listing, travel):
+ total_notified += 1
notify_ha(listing, travel)
+ else:
+ log.info("Geen notificatie voor %s (gefilterd)", listing.adres)
- except Exception as e:
- log.error("Fout bij verwerken %s: %s", listing.url, e)
+ except Exception as e:
+ log.error("Fout bij verwerken %s: %s", listing.url, e)
- log.info("Run klaar. %d nieuwe woningen gevonden.", total_new)
+ log.info(
+ "Run klaar. %d nieuwe woningen, %d notificaties verstuurd.",
+ total_new, total_notified,
+ )
conn.close()
diff --git a/src/views/index.html b/src/templates/index.html
similarity index 97%
rename from src/views/index.html
rename to src/templates/index.html
index 4f01130..d610f40 100644
--- a/src/views/index.html
+++ b/src/templates/index.html
@@ -8,17 +8,17 @@