tweaks and first real run
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -5,3 +5,4 @@
|
||||
**/__pycache__/
|
||||
|
||||
tests/cache/
|
||||
data/
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
"""
|
||||
config.py — vul aan met je eigen waarden. Secrets via environment variables.
|
||||
config.py — Secrets via environment variables.
|
||||
"""
|
||||
import os
|
||||
|
||||
@@ -16,7 +16,40 @@ DB_PATH = os.environ.get("DB_PATH", "/data/huizenbot.db")
|
||||
|
||||
FIETS_SNELHEID_FACTOR = 1.27
|
||||
|
||||
MAX_PRICE = 300_000
|
||||
MAX_PRICE = 300_000 # coarse pre-filter in adapters only
|
||||
|
||||
MIN_AREA = 65 # Sq meters
|
||||
|
||||
# Fine price filter: max mortgage per energy label group * 0.9
|
||||
# Labels not in this map fall back to the most conservative tier.
|
||||
_LABEL_DISCOUNT = 0.9
|
||||
MAX_PRIJS_PER_LABEL: dict[str, int] = {
|
||||
"EFG": int(286_942 * _LABEL_DISCOUNT),
|
||||
"CD": int(291_942 * _LABEL_DISCOUNT),
|
||||
"AB": int(296_942 * _LABEL_DISCOUNT),
|
||||
"A+": int(306_942 * _LABEL_DISCOUNT),
|
||||
}
|
||||
_MAX_PRIJS_ONBEKEND = MAX_PRIJS_PER_LABEL["EFG"] # conservative fallback
|
||||
|
||||
def max_prijs_voor_label(label: str | None) -> int:
|
||||
"""Return the max allowed price for a given energy label (or None/unknown)."""
|
||||
if not label:
|
||||
return _MAX_PRIJS_ONBEKEND
|
||||
l = label.strip().upper()
|
||||
if l in ("A+++", "A++", "A+"):
|
||||
return MAX_PRIJS_PER_LABEL["A+"]
|
||||
if l in ("A", "B"):
|
||||
return MAX_PRIJS_PER_LABEL["AB"]
|
||||
if l in ("C", "D"):
|
||||
return MAX_PRIJS_PER_LABEL["CD"]
|
||||
if l in ("E", "F", "G"):
|
||||
return MAX_PRIJS_PER_LABEL["EFG"]
|
||||
return _MAX_PRIJS_ONBEKEND
|
||||
|
||||
# Travel time limits (None travel time → pass, with warning)
|
||||
MAX_OV_MINUTEN_MARK = 50
|
||||
MAX_OV_MINUTEN_MICHELLE = 50
|
||||
MAX_FIETS_MINUTEN_MARK = 35
|
||||
# No fiets limit for michelle
|
||||
|
||||
APP_ENV = os.environ.get("APP_ENV", "dev")
|
||||
|
||||
|
||||
166
src/huizenbot.py
166
src/huizenbot.py
@@ -94,6 +94,7 @@ CREATE TABLE IF NOT EXISTS woningen (
|
||||
|
||||
|
||||
def get_db(path: str) -> sqlite3.Connection:
|
||||
log.info(f"Opening db at path {path}")
|
||||
conn = sqlite3.connect(path)
|
||||
conn.row_factory = sqlite3.Row
|
||||
conn.execute("PRAGMA journal_mode=WAL")
|
||||
@@ -231,7 +232,7 @@ def _next_weekday_morning() -> str:
|
||||
return d.strftime("%Y%m%dT083000")
|
||||
|
||||
|
||||
def bereken_reistijden(postcode: str | None) -> dict[str, int]:
|
||||
def bereken_reistijden(postcode: str | None, stad: str | None) -> dict[str, int]:
|
||||
"""Bereken alle reistijden voor een woning postcode. Geeft lege dict bij falen."""
|
||||
if not postcode:
|
||||
return {}
|
||||
@@ -240,16 +241,20 @@ def bereken_reistijden(postcode: str | None) -> dict[str, int]:
|
||||
if not woning_coords:
|
||||
return {}
|
||||
|
||||
werk1 = geocode(config.MARK_WERK_POSTCODE)
|
||||
werk2 = geocode(config.MICHELLE_WERK_POSTCODE)
|
||||
werk1_coords = geocode(config.MARK_WERK_POSTCODE)
|
||||
werk2_coords = geocode(config.MICHELLE_WERK_POSTCODE)
|
||||
|
||||
# 9292 expects "cityname/postcode" strings (lowercase city)
|
||||
stad_lower = (stad or "").strip().lower()
|
||||
woning_9292 = f"{stad_lower}/{postcode}" if stad_lower else postcode
|
||||
|
||||
result = {}
|
||||
if werk1:
|
||||
result["fiets_mark"] = fiets_minuten(woning_coords, werk1)
|
||||
result["ov_mark"] = ov_minuten(woning_coords, werk1)
|
||||
if werk2:
|
||||
result["fiets_michelle"] = fiets_minuten(woning_coords, werk2)
|
||||
result["ov_michelle"] = ov_minuten(woning_coords, werk2)
|
||||
if werk1_coords:
|
||||
result["fiets_mark"] = fiets_minuten(woning_coords, werk1_coords)
|
||||
result["ov_mark"] = ov_minuten(woning_9292, config.MARK_WERK_9292)
|
||||
if werk2_coords:
|
||||
result["fiets_michelle"] = fiets_minuten(woning_coords, werk2_coords)
|
||||
result["ov_michelle"] = ov_minuten(woning_9292, config.MICHELLE_WERK_9292)
|
||||
|
||||
return result
|
||||
|
||||
@@ -283,6 +288,67 @@ def notify_ha(listing: RawListing, travel: dict[str,int]) -> None:
|
||||
except Exception as e:
|
||||
log.error("HA webhook fout: %s", e)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Filtering
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _check_filters(listing: RawListing, travel: dict[str, int]) -> bool:
|
||||
"""
|
||||
Returns True if the listing passes all filters and should trigger a notification.
|
||||
Always errs on the side of notifying when data is missing (logs a warning).
|
||||
"""
|
||||
passed = True
|
||||
|
||||
# --- Price filter ---
|
||||
if listing.prijs is not None:
|
||||
max_p = config.max_prijs_voor_label(listing.energielabel)
|
||||
if listing.prijs > max_p:
|
||||
log.info(
|
||||
"Gefilterd op prijs: %s €%d > €%d (label: %s)",
|
||||
listing.adres, listing.prijs, max_p, listing.energielabel or "onbekend",
|
||||
)
|
||||
passed = False
|
||||
# --- Area filter ---
|
||||
if listing.woonoppervlak is not None and listing.woonoppervlak is not > config.MIN_AREA:
|
||||
log.info(f"Gefilterd op oppervlakte: {listing.woonoppervlak} < {config.MIN_AREA}")
|
||||
passed = False
|
||||
|
||||
# --- OV filter ---
|
||||
ov_mark = travel.get("ov_mark")
|
||||
ov_michelle = travel.get("ov_michelle")
|
||||
|
||||
if ov_mark is None:
|
||||
log.warning(
|
||||
"OV reistijd mark ONBEKEND voor %s — notificatie wordt toch verstuurd",
|
||||
listing.adres,
|
||||
)
|
||||
elif ov_mark > config.MAX_OV_MINUTEN_MARK:
|
||||
log.info("Gefilterd op OV mark: %s %dmin > %dmin", listing.adres, ov_mark, config.MAX_OV_MINUTEN_MARK)
|
||||
passed = False
|
||||
|
||||
if ov_michelle is None:
|
||||
log.warning(
|
||||
"OV reistijd michelle ONBEKEND voor %s — notificatie wordt toch verstuurd",
|
||||
listing.adres,
|
||||
)
|
||||
elif ov_michelle > config.MAX_OV_MINUTEN_MICHELLE:
|
||||
log.info("Gefilterd op OV michelle: %s %dmin > %dmin", listing.adres, ov_michelle, config.MAX_OV_MINUTEN_MICHELLE)
|
||||
passed = False
|
||||
|
||||
# --- Fiets filter (mark only) ---
|
||||
fiets_mark = travel.get("fiets_mark")
|
||||
if fiets_mark is None:
|
||||
log.warning(
|
||||
"Fiets reistijd mark ONBEKEND voor %s — notificatie wordt toch verstuurd",
|
||||
listing.adres,
|
||||
)
|
||||
elif fiets_mark > config.MAX_FIETS_MINUTEN_MARK:
|
||||
log.info("Gefilterd op fiets mark: %s %dmin > %dmin", listing.adres, fiets_mark, config.MAX_FIETS_MINUTEN_MARK)
|
||||
passed = False
|
||||
|
||||
return passed
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Orchestration
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -290,42 +356,64 @@ def notify_ha(listing: RawListing, travel: dict[str,int]) -> None:
|
||||
Scraper = Callable[[], list[RawListing]]
|
||||
|
||||
|
||||
def run(scrapers: list[Scraper], db_path: str) -> None:
|
||||
conn = get_db(db_path)
|
||||
total_new = 0
|
||||
|
||||
for scraper in scrapers:
|
||||
name = scraper.__name__
|
||||
log.info("Scraper starten: %s", name)
|
||||
try:
|
||||
listings = scraper()
|
||||
except Exception as e:
|
||||
log.error("Scraper %s gefaald: %s", name, e)
|
||||
continue
|
||||
|
||||
def _run_scraper(scraper: Scraper) -> tuple[str, list[RawListing]]:
|
||||
name = scraper.__name__
|
||||
log.info("Scraper starten: %s", name)
|
||||
try:
|
||||
listings = scraper()
|
||||
log.info("Scraper %s: %d listings opgehaald", name, len(listings))
|
||||
return name, listings
|
||||
except Exception as e:
|
||||
log.error("Scraper %s gefaald: %s", name, e)
|
||||
return name, []
|
||||
|
||||
for listing in listings:
|
||||
travel = {}
|
||||
try:
|
||||
# Check of het een nieuwe woning is vóór upsert
|
||||
lid = listing_id(listing.url)
|
||||
is_existing = conn.execute(
|
||||
"SELECT id FROM woningen WHERE id = ?", (lid,)
|
||||
).fetchone() is not None
|
||||
|
||||
if not is_existing:
|
||||
travel = bereken_reistijden(listing.postcode)
|
||||
def run(scrapers: dict[str,Scraper], db_path: str) -> None:
|
||||
import concurrent.futures
|
||||
|
||||
is_new = upsert(conn, listing, travel)
|
||||
conn = get_db(db_path)
|
||||
|
||||
if is_new:
|
||||
total_new += 1
|
||||
log.info("Nieuwe woning: %s (%s)", listing.adres, listing.url)
|
||||
total_new = 0
|
||||
total_notified = 0
|
||||
|
||||
# Phase 1: run all scrapers concurrently (each hits a different domain)
|
||||
all_listings: list[RawListing] = []
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=len(scrapers)) as pool:
|
||||
futures = {pool.submit(_run_scraper, s): s for s in scrapers.values()}
|
||||
for future in concurrent.futures.as_completed(futures):
|
||||
_name, listings = future.result()
|
||||
all_listings.extend(listings)
|
||||
|
||||
log.info("Alle scrapers klaar. %d listings totaal opgehaald.", len(all_listings))
|
||||
|
||||
# Phase 2: sequential travel calculation + upsert + filtered notify
|
||||
for listing in all_listings:
|
||||
travel = {}
|
||||
try:
|
||||
lid = listing_id(listing.url)
|
||||
is_existing = conn.execute(
|
||||
"SELECT id FROM woningen WHERE id = ?", (lid,)
|
||||
).fetchone() is not None
|
||||
|
||||
if not is_existing:
|
||||
travel = bereken_reistijden(listing.postcode, listing.stad)
|
||||
|
||||
is_new = upsert(conn, listing, travel)
|
||||
|
||||
if is_new:
|
||||
total_new += 1
|
||||
log.info("Nieuwe woning: %s (%s)", listing.adres, listing.url)
|
||||
if _check_filters(listing, travel):
|
||||
total_notified += 1
|
||||
notify_ha(listing, travel)
|
||||
else:
|
||||
log.info("Geen notificatie voor %s (gefilterd)", listing.adres)
|
||||
|
||||
except Exception as e:
|
||||
log.error("Fout bij verwerken %s: %s", listing.url, e)
|
||||
except Exception as e:
|
||||
log.error("Fout bij verwerken %s: %s", listing.url, e)
|
||||
|
||||
log.info("Run klaar. %d nieuwe woningen gevonden.", total_new)
|
||||
log.info(
|
||||
"Run klaar. %d nieuwe woningen, %d notificaties verstuurd.",
|
||||
total_new, total_notified,
|
||||
)
|
||||
conn.close()
|
||||
|
||||
@@ -8,17 +8,17 @@
|
||||
<link href="https://fonts.googleapis.com/css2?family=Syne:wght@400;600;700;800&family=DM+Mono:wght@400;500&display=swap" rel="stylesheet">
|
||||
<style>
|
||||
:root {
|
||||
--bg: #0f0f0f;
|
||||
--surface: #181818;
|
||||
--surface2: #222222;
|
||||
--border: #2a2a2a;
|
||||
--accent: #c8f060;
|
||||
--accent-dim: #8aaa30;
|
||||
--text: #e8e8e8;
|
||||
--text-dim: #888;
|
||||
--text-dimmer: #555;
|
||||
--red: #ff5f5f;
|
||||
--orange: #ffaa44;
|
||||
--bg: #f5f0eb;
|
||||
--surface: #fdf9f5;
|
||||
--surface2: #ede8e2;
|
||||
--border: #ddd6cc;
|
||||
--accent: #6a9e78;
|
||||
--accent-dim: #4f7a5c;
|
||||
--text: #2e2a25;
|
||||
--text-dim: #7a7068;
|
||||
--text-dimmer: #aaa098;
|
||||
--red: #c0524a;
|
||||
--orange: #c07c3a;
|
||||
--radius: 10px;
|
||||
--font-ui: 'Syne', sans-serif;
|
||||
--font-mono: 'DM Mono', monospace;
|
||||
@@ -150,7 +150,7 @@
|
||||
overflow: hidden;
|
||||
transition: border-color 0.15s;
|
||||
}
|
||||
.card:hover { border-color: #3a3a3a; }
|
||||
.card:hover { border-color: #c5bdb4; }
|
||||
|
||||
.card-compact {
|
||||
display: grid;
|
||||
@@ -187,7 +187,7 @@
|
||||
position: absolute;
|
||||
bottom: 0.4rem;
|
||||
left: 0.4rem;
|
||||
background: rgba(0,0,0,0.7);
|
||||
background: rgba(255,255,255,0.75);
|
||||
backdrop-filter: blur(4px);
|
||||
color: var(--text-dim);
|
||||
font-family: var(--font-mono);
|
||||
@@ -250,7 +250,7 @@
|
||||
.card-link:hover {
|
||||
color: var(--accent);
|
||||
border-color: var(--accent-dim);
|
||||
background: rgba(200,240,96,0.06);
|
||||
background: rgba(106,158,120,0.08);
|
||||
}
|
||||
.card-link svg { flex-shrink: 0; }
|
||||
|
||||
@@ -53,7 +53,7 @@ def index():
|
||||
d["extra"] = {}
|
||||
listings.append(d)
|
||||
|
||||
return render_template("src/views/index.html", listings_json=json.dumps(listings, ensure_ascii=False))
|
||||
return render_template("index.html", listings_json=json.dumps(listings, ensure_ascii=False))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
Reference in New Issue
Block a user