tweaks and first real run
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -5,3 +5,4 @@
|
|||||||
**/__pycache__/
|
**/__pycache__/
|
||||||
|
|
||||||
tests/cache/
|
tests/cache/
|
||||||
|
data/
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
"""
|
"""
|
||||||
config.py — vul aan met je eigen waarden. Secrets via environment variables.
|
config.py — Secrets via environment variables.
|
||||||
"""
|
"""
|
||||||
import os
|
import os
|
||||||
|
|
||||||
@@ -16,7 +16,40 @@ DB_PATH = os.environ.get("DB_PATH", "/data/huizenbot.db")
|
|||||||
|
|
||||||
FIETS_SNELHEID_FACTOR = 1.27
|
FIETS_SNELHEID_FACTOR = 1.27
|
||||||
|
|
||||||
MAX_PRICE = 300_000
|
MAX_PRICE = 300_000 # coarse pre-filter in adapters only
|
||||||
|
|
||||||
|
MIN_AREA = 65 # Sq meters
|
||||||
|
|
||||||
|
# Fine price filter: max mortgage per energy label group * 0.9
|
||||||
|
# Labels not in this map fall back to the most conservative tier.
|
||||||
|
_LABEL_DISCOUNT = 0.9
|
||||||
|
MAX_PRIJS_PER_LABEL: dict[str, int] = {
|
||||||
|
"EFG": int(286_942 * _LABEL_DISCOUNT),
|
||||||
|
"CD": int(291_942 * _LABEL_DISCOUNT),
|
||||||
|
"AB": int(296_942 * _LABEL_DISCOUNT),
|
||||||
|
"A+": int(306_942 * _LABEL_DISCOUNT),
|
||||||
|
}
|
||||||
|
_MAX_PRIJS_ONBEKEND = MAX_PRIJS_PER_LABEL["EFG"] # conservative fallback
|
||||||
|
|
||||||
|
def max_prijs_voor_label(label: str | None) -> int:
|
||||||
|
"""Return the max allowed price for a given energy label (or None/unknown)."""
|
||||||
|
if not label:
|
||||||
|
return _MAX_PRIJS_ONBEKEND
|
||||||
|
l = label.strip().upper()
|
||||||
|
if l in ("A+++", "A++", "A+"):
|
||||||
|
return MAX_PRIJS_PER_LABEL["A+"]
|
||||||
|
if l in ("A", "B"):
|
||||||
|
return MAX_PRIJS_PER_LABEL["AB"]
|
||||||
|
if l in ("C", "D"):
|
||||||
|
return MAX_PRIJS_PER_LABEL["CD"]
|
||||||
|
if l in ("E", "F", "G"):
|
||||||
|
return MAX_PRIJS_PER_LABEL["EFG"]
|
||||||
|
return _MAX_PRIJS_ONBEKEND
|
||||||
|
|
||||||
|
# Travel time limits (None travel time → pass, with warning)
|
||||||
|
MAX_OV_MINUTEN_MARK = 50
|
||||||
|
MAX_OV_MINUTEN_MICHELLE = 50
|
||||||
|
MAX_FIETS_MINUTEN_MARK = 35
|
||||||
|
# No fiets limit for michelle
|
||||||
|
|
||||||
APP_ENV = os.environ.get("APP_ENV", "dev")
|
APP_ENV = os.environ.get("APP_ENV", "dev")
|
||||||
|
|
||||||
|
|||||||
166
src/huizenbot.py
166
src/huizenbot.py
@@ -94,6 +94,7 @@ CREATE TABLE IF NOT EXISTS woningen (
|
|||||||
|
|
||||||
|
|
||||||
def get_db(path: str) -> sqlite3.Connection:
|
def get_db(path: str) -> sqlite3.Connection:
|
||||||
|
log.info(f"Opening db at path {path}")
|
||||||
conn = sqlite3.connect(path)
|
conn = sqlite3.connect(path)
|
||||||
conn.row_factory = sqlite3.Row
|
conn.row_factory = sqlite3.Row
|
||||||
conn.execute("PRAGMA journal_mode=WAL")
|
conn.execute("PRAGMA journal_mode=WAL")
|
||||||
@@ -231,7 +232,7 @@ def _next_weekday_morning() -> str:
|
|||||||
return d.strftime("%Y%m%dT083000")
|
return d.strftime("%Y%m%dT083000")
|
||||||
|
|
||||||
|
|
||||||
def bereken_reistijden(postcode: str | None) -> dict[str, int]:
|
def bereken_reistijden(postcode: str | None, stad: str | None) -> dict[str, int]:
|
||||||
"""Bereken alle reistijden voor een woning postcode. Geeft lege dict bij falen."""
|
"""Bereken alle reistijden voor een woning postcode. Geeft lege dict bij falen."""
|
||||||
if not postcode:
|
if not postcode:
|
||||||
return {}
|
return {}
|
||||||
@@ -240,16 +241,20 @@ def bereken_reistijden(postcode: str | None) -> dict[str, int]:
|
|||||||
if not woning_coords:
|
if not woning_coords:
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
werk1 = geocode(config.MARK_WERK_POSTCODE)
|
werk1_coords = geocode(config.MARK_WERK_POSTCODE)
|
||||||
werk2 = geocode(config.MICHELLE_WERK_POSTCODE)
|
werk2_coords = geocode(config.MICHELLE_WERK_POSTCODE)
|
||||||
|
|
||||||
|
# 9292 expects "cityname/postcode" strings (lowercase city)
|
||||||
|
stad_lower = (stad or "").strip().lower()
|
||||||
|
woning_9292 = f"{stad_lower}/{postcode}" if stad_lower else postcode
|
||||||
|
|
||||||
result = {}
|
result = {}
|
||||||
if werk1:
|
if werk1_coords:
|
||||||
result["fiets_mark"] = fiets_minuten(woning_coords, werk1)
|
result["fiets_mark"] = fiets_minuten(woning_coords, werk1_coords)
|
||||||
result["ov_mark"] = ov_minuten(woning_coords, werk1)
|
result["ov_mark"] = ov_minuten(woning_9292, config.MARK_WERK_9292)
|
||||||
if werk2:
|
if werk2_coords:
|
||||||
result["fiets_michelle"] = fiets_minuten(woning_coords, werk2)
|
result["fiets_michelle"] = fiets_minuten(woning_coords, werk2_coords)
|
||||||
result["ov_michelle"] = ov_minuten(woning_coords, werk2)
|
result["ov_michelle"] = ov_minuten(woning_9292, config.MICHELLE_WERK_9292)
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
@@ -283,6 +288,67 @@ def notify_ha(listing: RawListing, travel: dict[str,int]) -> None:
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
log.error("HA webhook fout: %s", e)
|
log.error("HA webhook fout: %s", e)
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Filtering
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _check_filters(listing: RawListing, travel: dict[str, int]) -> bool:
|
||||||
|
"""
|
||||||
|
Returns True if the listing passes all filters and should trigger a notification.
|
||||||
|
Always errs on the side of notifying when data is missing (logs a warning).
|
||||||
|
"""
|
||||||
|
passed = True
|
||||||
|
|
||||||
|
# --- Price filter ---
|
||||||
|
if listing.prijs is not None:
|
||||||
|
max_p = config.max_prijs_voor_label(listing.energielabel)
|
||||||
|
if listing.prijs > max_p:
|
||||||
|
log.info(
|
||||||
|
"Gefilterd op prijs: %s €%d > €%d (label: %s)",
|
||||||
|
listing.adres, listing.prijs, max_p, listing.energielabel or "onbekend",
|
||||||
|
)
|
||||||
|
passed = False
|
||||||
|
# --- Area filter ---
|
||||||
|
if listing.woonoppervlak is not None and listing.woonoppervlak is not > config.MIN_AREA:
|
||||||
|
log.info(f"Gefilterd op oppervlakte: {listing.woonoppervlak} < {config.MIN_AREA}")
|
||||||
|
passed = False
|
||||||
|
|
||||||
|
# --- OV filter ---
|
||||||
|
ov_mark = travel.get("ov_mark")
|
||||||
|
ov_michelle = travel.get("ov_michelle")
|
||||||
|
|
||||||
|
if ov_mark is None:
|
||||||
|
log.warning(
|
||||||
|
"OV reistijd mark ONBEKEND voor %s — notificatie wordt toch verstuurd",
|
||||||
|
listing.adres,
|
||||||
|
)
|
||||||
|
elif ov_mark > config.MAX_OV_MINUTEN_MARK:
|
||||||
|
log.info("Gefilterd op OV mark: %s %dmin > %dmin", listing.adres, ov_mark, config.MAX_OV_MINUTEN_MARK)
|
||||||
|
passed = False
|
||||||
|
|
||||||
|
if ov_michelle is None:
|
||||||
|
log.warning(
|
||||||
|
"OV reistijd michelle ONBEKEND voor %s — notificatie wordt toch verstuurd",
|
||||||
|
listing.adres,
|
||||||
|
)
|
||||||
|
elif ov_michelle > config.MAX_OV_MINUTEN_MICHELLE:
|
||||||
|
log.info("Gefilterd op OV michelle: %s %dmin > %dmin", listing.adres, ov_michelle, config.MAX_OV_MINUTEN_MICHELLE)
|
||||||
|
passed = False
|
||||||
|
|
||||||
|
# --- Fiets filter (mark only) ---
|
||||||
|
fiets_mark = travel.get("fiets_mark")
|
||||||
|
if fiets_mark is None:
|
||||||
|
log.warning(
|
||||||
|
"Fiets reistijd mark ONBEKEND voor %s — notificatie wordt toch verstuurd",
|
||||||
|
listing.adres,
|
||||||
|
)
|
||||||
|
elif fiets_mark > config.MAX_FIETS_MINUTEN_MARK:
|
||||||
|
log.info("Gefilterd op fiets mark: %s %dmin > %dmin", listing.adres, fiets_mark, config.MAX_FIETS_MINUTEN_MARK)
|
||||||
|
passed = False
|
||||||
|
|
||||||
|
return passed
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# Orchestration
|
# Orchestration
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
@@ -290,42 +356,64 @@ def notify_ha(listing: RawListing, travel: dict[str,int]) -> None:
|
|||||||
Scraper = Callable[[], list[RawListing]]
|
Scraper = Callable[[], list[RawListing]]
|
||||||
|
|
||||||
|
|
||||||
def run(scrapers: list[Scraper], db_path: str) -> None:
|
def _run_scraper(scraper: Scraper) -> tuple[str, list[RawListing]]:
|
||||||
conn = get_db(db_path)
|
name = scraper.__name__
|
||||||
total_new = 0
|
log.info("Scraper starten: %s", name)
|
||||||
|
try:
|
||||||
for scraper in scrapers:
|
listings = scraper()
|
||||||
name = scraper.__name__
|
|
||||||
log.info("Scraper starten: %s", name)
|
|
||||||
try:
|
|
||||||
listings = scraper()
|
|
||||||
except Exception as e:
|
|
||||||
log.error("Scraper %s gefaald: %s", name, e)
|
|
||||||
continue
|
|
||||||
|
|
||||||
log.info("Scraper %s: %d listings opgehaald", name, len(listings))
|
log.info("Scraper %s: %d listings opgehaald", name, len(listings))
|
||||||
|
return name, listings
|
||||||
|
except Exception as e:
|
||||||
|
log.error("Scraper %s gefaald: %s", name, e)
|
||||||
|
return name, []
|
||||||
|
|
||||||
for listing in listings:
|
|
||||||
travel = {}
|
|
||||||
try:
|
|
||||||
# Check of het een nieuwe woning is vóór upsert
|
|
||||||
lid = listing_id(listing.url)
|
|
||||||
is_existing = conn.execute(
|
|
||||||
"SELECT id FROM woningen WHERE id = ?", (lid,)
|
|
||||||
).fetchone() is not None
|
|
||||||
|
|
||||||
if not is_existing:
|
def run(scrapers: dict[str,Scraper], db_path: str) -> None:
|
||||||
travel = bereken_reistijden(listing.postcode)
|
import concurrent.futures
|
||||||
|
|
||||||
is_new = upsert(conn, listing, travel)
|
conn = get_db(db_path)
|
||||||
|
|
||||||
if is_new:
|
total_new = 0
|
||||||
total_new += 1
|
total_notified = 0
|
||||||
log.info("Nieuwe woning: %s (%s)", listing.adres, listing.url)
|
|
||||||
|
# Phase 1: run all scrapers concurrently (each hits a different domain)
|
||||||
|
all_listings: list[RawListing] = []
|
||||||
|
with concurrent.futures.ThreadPoolExecutor(max_workers=len(scrapers)) as pool:
|
||||||
|
futures = {pool.submit(_run_scraper, s): s for s in scrapers.values()}
|
||||||
|
for future in concurrent.futures.as_completed(futures):
|
||||||
|
_name, listings = future.result()
|
||||||
|
all_listings.extend(listings)
|
||||||
|
|
||||||
|
log.info("Alle scrapers klaar. %d listings totaal opgehaald.", len(all_listings))
|
||||||
|
|
||||||
|
# Phase 2: sequential travel calculation + upsert + filtered notify
|
||||||
|
for listing in all_listings:
|
||||||
|
travel = {}
|
||||||
|
try:
|
||||||
|
lid = listing_id(listing.url)
|
||||||
|
is_existing = conn.execute(
|
||||||
|
"SELECT id FROM woningen WHERE id = ?", (lid,)
|
||||||
|
).fetchone() is not None
|
||||||
|
|
||||||
|
if not is_existing:
|
||||||
|
travel = bereken_reistijden(listing.postcode, listing.stad)
|
||||||
|
|
||||||
|
is_new = upsert(conn, listing, travel)
|
||||||
|
|
||||||
|
if is_new:
|
||||||
|
total_new += 1
|
||||||
|
log.info("Nieuwe woning: %s (%s)", listing.adres, listing.url)
|
||||||
|
if _check_filters(listing, travel):
|
||||||
|
total_notified += 1
|
||||||
notify_ha(listing, travel)
|
notify_ha(listing, travel)
|
||||||
|
else:
|
||||||
|
log.info("Geen notificatie voor %s (gefilterd)", listing.adres)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
log.error("Fout bij verwerken %s: %s", listing.url, e)
|
log.error("Fout bij verwerken %s: %s", listing.url, e)
|
||||||
|
|
||||||
log.info("Run klaar. %d nieuwe woningen gevonden.", total_new)
|
log.info(
|
||||||
|
"Run klaar. %d nieuwe woningen, %d notificaties verstuurd.",
|
||||||
|
total_new, total_notified,
|
||||||
|
)
|
||||||
conn.close()
|
conn.close()
|
||||||
|
|||||||
@@ -8,17 +8,17 @@
|
|||||||
<link href="https://fonts.googleapis.com/css2?family=Syne:wght@400;600;700;800&family=DM+Mono:wght@400;500&display=swap" rel="stylesheet">
|
<link href="https://fonts.googleapis.com/css2?family=Syne:wght@400;600;700;800&family=DM+Mono:wght@400;500&display=swap" rel="stylesheet">
|
||||||
<style>
|
<style>
|
||||||
:root {
|
:root {
|
||||||
--bg: #0f0f0f;
|
--bg: #f5f0eb;
|
||||||
--surface: #181818;
|
--surface: #fdf9f5;
|
||||||
--surface2: #222222;
|
--surface2: #ede8e2;
|
||||||
--border: #2a2a2a;
|
--border: #ddd6cc;
|
||||||
--accent: #c8f060;
|
--accent: #6a9e78;
|
||||||
--accent-dim: #8aaa30;
|
--accent-dim: #4f7a5c;
|
||||||
--text: #e8e8e8;
|
--text: #2e2a25;
|
||||||
--text-dim: #888;
|
--text-dim: #7a7068;
|
||||||
--text-dimmer: #555;
|
--text-dimmer: #aaa098;
|
||||||
--red: #ff5f5f;
|
--red: #c0524a;
|
||||||
--orange: #ffaa44;
|
--orange: #c07c3a;
|
||||||
--radius: 10px;
|
--radius: 10px;
|
||||||
--font-ui: 'Syne', sans-serif;
|
--font-ui: 'Syne', sans-serif;
|
||||||
--font-mono: 'DM Mono', monospace;
|
--font-mono: 'DM Mono', monospace;
|
||||||
@@ -150,7 +150,7 @@
|
|||||||
overflow: hidden;
|
overflow: hidden;
|
||||||
transition: border-color 0.15s;
|
transition: border-color 0.15s;
|
||||||
}
|
}
|
||||||
.card:hover { border-color: #3a3a3a; }
|
.card:hover { border-color: #c5bdb4; }
|
||||||
|
|
||||||
.card-compact {
|
.card-compact {
|
||||||
display: grid;
|
display: grid;
|
||||||
@@ -187,7 +187,7 @@
|
|||||||
position: absolute;
|
position: absolute;
|
||||||
bottom: 0.4rem;
|
bottom: 0.4rem;
|
||||||
left: 0.4rem;
|
left: 0.4rem;
|
||||||
background: rgba(0,0,0,0.7);
|
background: rgba(255,255,255,0.75);
|
||||||
backdrop-filter: blur(4px);
|
backdrop-filter: blur(4px);
|
||||||
color: var(--text-dim);
|
color: var(--text-dim);
|
||||||
font-family: var(--font-mono);
|
font-family: var(--font-mono);
|
||||||
@@ -250,7 +250,7 @@
|
|||||||
.card-link:hover {
|
.card-link:hover {
|
||||||
color: var(--accent);
|
color: var(--accent);
|
||||||
border-color: var(--accent-dim);
|
border-color: var(--accent-dim);
|
||||||
background: rgba(200,240,96,0.06);
|
background: rgba(106,158,120,0.08);
|
||||||
}
|
}
|
||||||
.card-link svg { flex-shrink: 0; }
|
.card-link svg { flex-shrink: 0; }
|
||||||
|
|
||||||
@@ -53,7 +53,7 @@ def index():
|
|||||||
d["extra"] = {}
|
d["extra"] = {}
|
||||||
listings.append(d)
|
listings.append(d)
|
||||||
|
|
||||||
return render_template("src/views/index.html", listings_json=json.dumps(listings, ensure_ascii=False))
|
return render_template("index.html", listings_json=json.dumps(listings, ensure_ascii=False))
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
Reference in New Issue
Block a user