refactor: split ssr.py into package, enrich OG Online detail pages, fix travel upsert
- Split src/adapters/ssr.py (2160 LOC) into ssr/ package grouped by CMS: realworks.py, sure.py, schiedam.py, denhaag.py, overige.py - Add _og_detail() to api.py; all OG Online scrapers now fall back to detail page fetch when energielabel/bouwjaar are missing from the API - Fix run() to recalculate travel times for existing listings where fiets_mark IS NULL; upsert() now writes travel cols on existing rows too - Update tests/cache.py to patch fetch_soup in every ssr submodule - Update docs to reflect new package structure and mark API enrichment TODO done Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -22,10 +22,10 @@ def _key(url: str, params: dict[str,str] | None) -> str:
|
||||
|
||||
def _patch():
|
||||
import adapters.api as api_mod
|
||||
import adapters.ssr as ssr_mod
|
||||
import adapters.ssr._shared as ssr_shared
|
||||
|
||||
_orig_fetch_json = api_mod.fetch_json
|
||||
_orig_fetch_soup = ssr_mod.fetch_soup
|
||||
_orig_fetch_soup = ssr_shared.fetch_soup
|
||||
|
||||
def cached_fetch_json(url, *, params: dict[str,str]|None=None, headers=None):
|
||||
path = CACHE_DIR / (_key(url, params) + ".json")
|
||||
@@ -46,7 +46,15 @@ def _patch():
|
||||
return result
|
||||
|
||||
api_mod.fetch_json = cached_fetch_json
|
||||
ssr_mod.fetch_soup = cached_fetch_soup
|
||||
# fetch_soup is imported directly in each submodule via `from ._shared import fetch_soup`,
|
||||
# so we must patch the name in every submodule that uses it.
|
||||
import adapters.ssr.realworks as _rw
|
||||
import adapters.ssr.sure as _sure
|
||||
import adapters.ssr.schiedam as _sch
|
||||
import adapters.ssr.denhaag as _dh
|
||||
import adapters.ssr.overige as _ov
|
||||
for _mod in [ssr_shared, _rw, _sure, _sch, _dh, _ov]:
|
||||
_mod.fetch_soup = cached_fetch_soup
|
||||
print("[cache] fetch_json and fetch_soup patched")
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user