diff --git a/.gitignore b/.gitignore index c2f9b05..c16c11e 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,4 @@ **/__pycache__/ tests/cache/ +data/ diff --git a/src/config.py b/src/config.py index b5959d0..c783dc0 100644 --- a/src/config.py +++ b/src/config.py @@ -1,5 +1,5 @@ """ -config.py — vul aan met je eigen waarden. Secrets via environment variables. +config.py — Secrets via environment variables. """ import os @@ -16,7 +16,40 @@ DB_PATH = os.environ.get("DB_PATH", "/data/huizenbot.db") FIETS_SNELHEID_FACTOR = 1.27 -MAX_PRICE = 300_000 +MAX_PRICE = 300_000 # coarse pre-filter in adapters only + +MIN_AREA = 65 # Sq meters + +# Fine price filter: max mortgage per energy label group * 0.9 +# Labels not in this map fall back to the most conservative tier. +_LABEL_DISCOUNT = 0.9 +MAX_PRIJS_PER_LABEL: dict[str, int] = { + "EFG": int(286_942 * _LABEL_DISCOUNT), + "CD": int(291_942 * _LABEL_DISCOUNT), + "AB": int(296_942 * _LABEL_DISCOUNT), + "A+": int(306_942 * _LABEL_DISCOUNT), +} +_MAX_PRIJS_ONBEKEND = MAX_PRIJS_PER_LABEL["EFG"] # conservative fallback + +def max_prijs_voor_label(label: str | None) -> int: + """Return the max allowed price for a given energy label (or None/unknown).""" + if not label: + return _MAX_PRIJS_ONBEKEND + l = label.strip().upper() + if l in ("A+++", "A++", "A+"): + return MAX_PRIJS_PER_LABEL["A+"] + if l in ("A", "B"): + return MAX_PRIJS_PER_LABEL["AB"] + if l in ("C", "D"): + return MAX_PRIJS_PER_LABEL["CD"] + if l in ("E", "F", "G"): + return MAX_PRIJS_PER_LABEL["EFG"] + return _MAX_PRIJS_ONBEKEND + +# Travel time limits (None travel time → pass, with warning) +MAX_OV_MINUTEN_MARK = 50 +MAX_OV_MINUTEN_MICHELLE = 50 +MAX_FIETS_MINUTEN_MARK = 35 +# No fiets limit for michelle APP_ENV = os.environ.get("APP_ENV", "dev") - diff --git a/src/huizenbot.py b/src/huizenbot.py index 1165549..5ff2c7e 100644 --- a/src/huizenbot.py +++ b/src/huizenbot.py @@ -94,6 +94,7 @@ CREATE TABLE IF NOT EXISTS woningen ( def get_db(path: str) -> sqlite3.Connection: + log.info(f"Opening db at path {path}") conn = sqlite3.connect(path) conn.row_factory = sqlite3.Row conn.execute("PRAGMA journal_mode=WAL") @@ -231,7 +232,7 @@ def _next_weekday_morning() -> str: return d.strftime("%Y%m%dT083000") -def bereken_reistijden(postcode: str | None) -> dict[str, int]: +def bereken_reistijden(postcode: str | None, stad: str | None) -> dict[str, int]: """Bereken alle reistijden voor een woning postcode. Geeft lege dict bij falen.""" if not postcode: return {} @@ -240,16 +241,20 @@ def bereken_reistijden(postcode: str | None) -> dict[str, int]: if not woning_coords: return {} - werk1 = geocode(config.MARK_WERK_POSTCODE) - werk2 = geocode(config.MICHELLE_WERK_POSTCODE) + werk1_coords = geocode(config.MARK_WERK_POSTCODE) + werk2_coords = geocode(config.MICHELLE_WERK_POSTCODE) + + # 9292 expects "cityname/postcode" strings (lowercase city) + stad_lower = (stad or "").strip().lower() + woning_9292 = f"{stad_lower}/{postcode}" if stad_lower else postcode result = {} - if werk1: - result["fiets_mark"] = fiets_minuten(woning_coords, werk1) - result["ov_mark"] = ov_minuten(woning_coords, werk1) - if werk2: - result["fiets_michelle"] = fiets_minuten(woning_coords, werk2) - result["ov_michelle"] = ov_minuten(woning_coords, werk2) + if werk1_coords: + result["fiets_mark"] = fiets_minuten(woning_coords, werk1_coords) + result["ov_mark"] = ov_minuten(woning_9292, config.MARK_WERK_9292) + if werk2_coords: + result["fiets_michelle"] = fiets_minuten(woning_coords, werk2_coords) + result["ov_michelle"] = ov_minuten(woning_9292, config.MICHELLE_WERK_9292) return result @@ -283,6 +288,67 @@ def notify_ha(listing: RawListing, travel: dict[str,int]) -> None: except Exception as e: log.error("HA webhook fout: %s", e) +# --------------------------------------------------------------------------- +# Filtering +# --------------------------------------------------------------------------- + +def _check_filters(listing: RawListing, travel: dict[str, int]) -> bool: + """ + Returns True if the listing passes all filters and should trigger a notification. + Always errs on the side of notifying when data is missing (logs a warning). + """ + passed = True + + # --- Price filter --- + if listing.prijs is not None: + max_p = config.max_prijs_voor_label(listing.energielabel) + if listing.prijs > max_p: + log.info( + "Gefilterd op prijs: %s €%d > €%d (label: %s)", + listing.adres, listing.prijs, max_p, listing.energielabel or "onbekend", + ) + passed = False + # --- Area filter --- + if listing.woonoppervlak is not None and listing.woonoppervlak is not > config.MIN_AREA: + log.info(f"Gefilterd op oppervlakte: {listing.woonoppervlak} < {config.MIN_AREA}") + passed = False + + # --- OV filter --- + ov_mark = travel.get("ov_mark") + ov_michelle = travel.get("ov_michelle") + + if ov_mark is None: + log.warning( + "OV reistijd mark ONBEKEND voor %s — notificatie wordt toch verstuurd", + listing.adres, + ) + elif ov_mark > config.MAX_OV_MINUTEN_MARK: + log.info("Gefilterd op OV mark: %s %dmin > %dmin", listing.adres, ov_mark, config.MAX_OV_MINUTEN_MARK) + passed = False + + if ov_michelle is None: + log.warning( + "OV reistijd michelle ONBEKEND voor %s — notificatie wordt toch verstuurd", + listing.adres, + ) + elif ov_michelle > config.MAX_OV_MINUTEN_MICHELLE: + log.info("Gefilterd op OV michelle: %s %dmin > %dmin", listing.adres, ov_michelle, config.MAX_OV_MINUTEN_MICHELLE) + passed = False + + # --- Fiets filter (mark only) --- + fiets_mark = travel.get("fiets_mark") + if fiets_mark is None: + log.warning( + "Fiets reistijd mark ONBEKEND voor %s — notificatie wordt toch verstuurd", + listing.adres, + ) + elif fiets_mark > config.MAX_FIETS_MINUTEN_MARK: + log.info("Gefilterd op fiets mark: %s %dmin > %dmin", listing.adres, fiets_mark, config.MAX_FIETS_MINUTEN_MARK) + passed = False + + return passed + + # --------------------------------------------------------------------------- # Orchestration # --------------------------------------------------------------------------- @@ -290,42 +356,64 @@ def notify_ha(listing: RawListing, travel: dict[str,int]) -> None: Scraper = Callable[[], list[RawListing]] -def run(scrapers: list[Scraper], db_path: str) -> None: - conn = get_db(db_path) - total_new = 0 - - for scraper in scrapers: - name = scraper.__name__ - log.info("Scraper starten: %s", name) - try: - listings = scraper() - except Exception as e: - log.error("Scraper %s gefaald: %s", name, e) - continue - +def _run_scraper(scraper: Scraper) -> tuple[str, list[RawListing]]: + name = scraper.__name__ + log.info("Scraper starten: %s", name) + try: + listings = scraper() log.info("Scraper %s: %d listings opgehaald", name, len(listings)) + return name, listings + except Exception as e: + log.error("Scraper %s gefaald: %s", name, e) + return name, [] - for listing in listings: - travel = {} - try: - # Check of het een nieuwe woning is vóór upsert - lid = listing_id(listing.url) - is_existing = conn.execute( - "SELECT id FROM woningen WHERE id = ?", (lid,) - ).fetchone() is not None - if not is_existing: - travel = bereken_reistijden(listing.postcode) +def run(scrapers: dict[str,Scraper], db_path: str) -> None: + import concurrent.futures - is_new = upsert(conn, listing, travel) + conn = get_db(db_path) - if is_new: - total_new += 1 - log.info("Nieuwe woning: %s (%s)", listing.adres, listing.url) + total_new = 0 + total_notified = 0 + + # Phase 1: run all scrapers concurrently (each hits a different domain) + all_listings: list[RawListing] = [] + with concurrent.futures.ThreadPoolExecutor(max_workers=len(scrapers)) as pool: + futures = {pool.submit(_run_scraper, s): s for s in scrapers.values()} + for future in concurrent.futures.as_completed(futures): + _name, listings = future.result() + all_listings.extend(listings) + + log.info("Alle scrapers klaar. %d listings totaal opgehaald.", len(all_listings)) + + # Phase 2: sequential travel calculation + upsert + filtered notify + for listing in all_listings: + travel = {} + try: + lid = listing_id(listing.url) + is_existing = conn.execute( + "SELECT id FROM woningen WHERE id = ?", (lid,) + ).fetchone() is not None + + if not is_existing: + travel = bereken_reistijden(listing.postcode, listing.stad) + + is_new = upsert(conn, listing, travel) + + if is_new: + total_new += 1 + log.info("Nieuwe woning: %s (%s)", listing.adres, listing.url) + if _check_filters(listing, travel): + total_notified += 1 notify_ha(listing, travel) + else: + log.info("Geen notificatie voor %s (gefilterd)", listing.adres) - except Exception as e: - log.error("Fout bij verwerken %s: %s", listing.url, e) + except Exception as e: + log.error("Fout bij verwerken %s: %s", listing.url, e) - log.info("Run klaar. %d nieuwe woningen gevonden.", total_new) + log.info( + "Run klaar. %d nieuwe woningen, %d notificaties verstuurd.", + total_new, total_notified, + ) conn.close() diff --git a/src/views/index.html b/src/templates/index.html similarity index 97% rename from src/views/index.html rename to src/templates/index.html index 4f01130..d610f40 100644 --- a/src/views/index.html +++ b/src/templates/index.html @@ -8,17 +8,17 @@