refactor: split ssr.py into package, enrich OG Online detail pages, fix travel upsert

- Split src/adapters/ssr.py (2160 LOC) into ssr/ package grouped by CMS:
  realworks.py, sure.py, schiedam.py, denhaag.py, overige.py
- Add _og_detail() to api.py; all OG Online scrapers now fall back to
  detail page fetch when energielabel/bouwjaar are missing from the API
- Fix run() to recalculate travel times for existing listings where
  fiets_mark IS NULL; upsert() now writes travel cols on existing rows too
- Update tests/cache.py to patch fetch_soup in every ssr submodule
- Update docs to reflect new package structure and mark API enrichment TODO done

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-04-11 23:39:35 +02:00
parent 1011d9cf87
commit f74e9bcfb0
14 changed files with 2478 additions and 2199 deletions

View File

@@ -22,10 +22,10 @@ def _key(url: str, params: dict[str,str] | None) -> str:
def _patch():
import adapters.api as api_mod
import adapters.ssr as ssr_mod
import adapters.ssr._shared as ssr_shared
_orig_fetch_json = api_mod.fetch_json
_orig_fetch_soup = ssr_mod.fetch_soup
_orig_fetch_soup = ssr_shared.fetch_soup
def cached_fetch_json(url, *, params: dict[str,str]|None=None, headers=None):
path = CACHE_DIR / (_key(url, params) + ".json")
@@ -46,7 +46,15 @@ def _patch():
return result
api_mod.fetch_json = cached_fetch_json
ssr_mod.fetch_soup = cached_fetch_soup
# fetch_soup is imported directly in each submodule via `from ._shared import fetch_soup`,
# so we must patch the name in every submodule that uses it.
import adapters.ssr.realworks as _rw
import adapters.ssr.sure as _sure
import adapters.ssr.schiedam as _sch
import adapters.ssr.denhaag as _dh
import adapters.ssr.overige as _ov
for _mod in [ssr_shared, _rw, _sure, _sch, _dh, _ov]:
_mod.fetch_soup = cached_fetch_soup
print("[cache] fetch_json and fetch_soup patched")