Files
huizenbot/tests/cache.py
Mark Kalsbeek f74e9bcfb0 refactor: split ssr.py into package, enrich OG Online detail pages, fix travel upsert
- Split src/adapters/ssr.py (2160 LOC) into ssr/ package grouped by CMS:
  realworks.py, sure.py, schiedam.py, denhaag.py, overige.py
- Add _og_detail() to api.py; all OG Online scrapers now fall back to
  detail page fetch when energielabel/bouwjaar are missing from the API
- Fix run() to recalculate travel times for existing listings where
  fiets_mark IS NULL; upsert() now writes travel cols on existing rows too
- Update tests/cache.py to patch fetch_soup in every ssr submodule
- Update docs to reflect new package structure and mark API enrichment TODO done

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-11 23:39:35 +02:00

62 lines
2.0 KiB
Python

"""
cache.py — import this before anything else in a test file to enable
file-based caching of fetch_json and fetch_soup calls.
Cache is stored in tests/cache/ keyed by a hash of the URL + params.
Delete the cache directory to bust it.
"""
import hashlib
import json
import pickle
from pathlib import Path
CACHE_DIR = Path(__file__).parent / "cache"
CACHE_DIR.mkdir(exist_ok=True)
def _key(url: str, params: dict[str,str] | None) -> str:
raw = json.dumps({"url": url, "params": params or {}}, sort_keys=True)
return hashlib.sha256(raw.encode()).hexdigest()
def _patch():
import adapters.api as api_mod
import adapters.ssr._shared as ssr_shared
_orig_fetch_json = api_mod.fetch_json
_orig_fetch_soup = ssr_shared.fetch_soup
def cached_fetch_json(url, *, params: dict[str,str]|None=None, headers=None):
path = CACHE_DIR / (_key(url, params) + ".json")
if path.exists():
print(f"[cache hit] {url}")
return json.loads(path.read_text())
result = _orig_fetch_json(url, params=params, headers=headers)
path.write_text(json.dumps(result))
return result
def cached_fetch_soup(url, *, params=None):
path = CACHE_DIR / (_key(url, params) + ".pickle")
if path.exists():
print(f"[cache hit] {url}")
return pickle.loads(path.read_bytes())
result = _orig_fetch_soup(url, params=params)
path.write_bytes(pickle.dumps(result))
return result
api_mod.fetch_json = cached_fetch_json
# fetch_soup is imported directly in each submodule via `from ._shared import fetch_soup`,
# so we must patch the name in every submodule that uses it.
import adapters.ssr.realworks as _rw
import adapters.ssr.sure as _sure
import adapters.ssr.schiedam as _sch
import adapters.ssr.denhaag as _dh
import adapters.ssr.overige as _ov
for _mod in [ssr_shared, _rw, _sure, _sch, _dh, _ov]:
_mod.fetch_soup = cached_fetch_soup
print("[cache] fetch_json and fetch_soup patched")
_patch()