add some more makelaars, and some more infra

2026-04-03 15:49:42 +02:00
parent 26d9d936f4
commit 17b35d1997
9 changed files with 928 additions and 70 deletions
--- a/.env.example
+++ b/.env.example
@@ -1,5 +1,3 @@
-NAVITIA_API_KEY=
-
 HA_WEBHOOK_URL=

 SMTP_HOST=
--- a/add_scraper_context.md
+++ b/add_scraper_context.md
@@ -0,0 +1,358 @@
+# Huizenbot — Agent Context for Adding Routes
+
+## Project Overview
+
+**Huizenbot** is a periodic scraper of real estate broker websites in Delft and Schiedam (Netherlands). It:
+- Fetches property listings from broker websites
+- Saves new ones to SQLite with `RawListing` schema
+- Calculates travel times (bike + public transit) to two work locations
+- Sends push notifications via Home Assistant webhook (with email fallback)
+
+**Your role:** You will add new broker routes (scrapers) to the `adapters/` directory. A human will:
+1. Select a broker from the list
+2. Help you investigate the broker's website
+3. For API-based brokers: develop curl requests to test
+4. For HTML scrapers: develop parsing logic using BeautifulSoup
+5. Run `tests/test_adapters.py` to validate
+6. Merge your code snippets into the codebase
+
+---
+
+## Key Schema: RawListing
+
+**Location:** `src/huizenbot.py` (lines 29–52)
+
+This is the data model you must populate. All fields except `url` are optional:
+
+```python
+@dataclass
+class RawListing:
+    url: str                          # REQUIRED — the listing URL
+    
+    source_makelaar: str = ""         # Name of the broker (e.g., "bjornd", "vdaal")
+    datum_aanmelding: str | None = None  # ISO 8601 date if available
+    status: str = "beschikbaar"       # enum: beschikbaar | onder_bod | verkocht
+    
+    # Location
+    adres: str | None = None          # Street address (e.g., "Binnenwatersloot 3")
+    postcode: str | None = None       # Dutch postcode (e.g., "2611CA")
+    stad: str | None = None           # City (e.g., "Delft")
+    
+    # Property details
+    prijs: int | None = None          # Price in euros (integer, no float)
+    woningtype: str | None = None     # Type (e.g., "appartement", "tussenwoning")
+    woonoppervlak: int | None = None  # Living space in m²
+    perceeloppervlak: int | None = None  # Plot size in m² (NULL for apartments)
+    kamers: int | None = None         # Number of rooms
+    slaapkamers: int | None = None    # Number of bedrooms
+    bouwjaar: int | None = None       # Build year
+    energielabel: str | None = None   # Energy label (e.g., "A", "B")
+    
+    # Media
+    hero_image_url: str | None = None # Main photo URL
+    
+    # Extra data (broker-specific fields)
+    extra: dict[str, Any] = field(default_factory=dict)  # Arbitrary JSON data
+```
+
+**DB Upsert:** The listing is inserted on first run (with `id = sha256(url)`) and updated only on `last_seen` / `status` on subsequent runs. Travel times are calculated only on first insert.
+
+---
+
+## Adapter Structure
+
+Adapters live in `src/adapters/` and are organized by type:
+
+### Two Adapter Types
+
+#### 1. **API-based** (`src/adapters/api.py`)
+For brokers with REST/JSON endpoints.
+
+**Pattern:**
+```python
+def fetch_bjornd() -> list[RawListing]:
+    data = fetch_json("https://...", params={...}, headers={...})
+    listings = []
+    for item in data:
+        # Filter / validate
+        if item.get("status") in _SKIP:
+            continue
+        if item.get("price") > config.MAX_PRICE:
+            continue
+        
+        listings.append(RawListing(
+            url=item["url"],
+            source_makelaar="bjornd",
+            adres=item.get("address"),
+            postcode=item.get("zipcode"),
+            # ... etc
+        ))
+    
+    log.info("bjornd: %d listings", len(listings))
+    return listings
+```
+
+**Helpers available:**
+- `fetch_json(url, *, params=None, headers=None)` — GET with User-Agent, timeout, Retry-After handling
+- Built-in logging via `log = logging.getLogger("huizenbot.api")`
+
+#### 2. **SSR/HTML-based** (`src/adapters/ssr.py`)
+For brokers with server-side rendered HTML.
+
+**Pattern:**
+```python
+def fetch_vdaal() -> list[RawListing]:
+    soup = fetch_soup("https://vdaalmakelaardij.nl/aanbod")
+    listings = []
+    
+    for card in soup.select(".property-card"):
+        try:
+            url = card.select_one("a[href]")["href"]
+            if not url.startswith("http"):
+                url = VDAAL_BASE + url
+            
+            adres = _text(card, ".address-selector")
+            postcode = _extract_postcode(adres)
+            prijs = parse_prijs(_text(card, ".price"))
+            
+            listings.append(RawListing(
+                url=url,
+                source_makelaar="vdaal",
+                adres=adres,
+                postcode=postcode,
+                stad=_infer_stad(postcode),
+                prijs=prijs,
+                # ... etc
+            ))
+        except Exception as e:
+            log.warning("Parse error: %s", e)
+    
+    log.info("vdaal: %d listings", len(listings))
+    return listings
+```
+
+**Helpers available:**
+- `fetch_soup(url, *, params=None)` — GET with BeautifulSoup, Retry-After handling
+- `parse_prijs(text)` — Extract price from strings like "€ 325.000 k.k." → 325000
+- `parse_m2(text)` — Extract area from "87 m²" → 87
+- `_text(soup, selector)` — Get inner text from element
+- `_src(soup, selector)` — Get src or data-src attribute
+- `_extract_postcode(text)` — Regex postcode from any text
+- `_infer_stad(postcode)` — Simple lookup: 2600–2629 → Delft, 3100–3135 → Schiedam
+
+---
+
+## Registration
+
+Both `api.py` and `ssr.py` have a `SCRAPERS` dict at the bottom:
+
+```python
+# api.py
+SCRAPERS = {
+    'bjornd': fetch_bjornd,
+    'your_broker': fetch_your_broker,  # ← Add here
+}
+
+# ssr.py
+SCRAPERS = {
+    'bjornd_demo': fetch_bjornd_demo,
+    'your_broker': fetch_your_broker,  # ← Add here
+}
+```
+
+The `src/adapters/__init__.py` merges both dicts, so the runner picks up all registered adapters automatically.
+
+---
+
+## Testing Workflow
+
+### 1. Understand the Website
+The human will help you:
+- Identify the broker's API endpoint (or the HTML structure)
+- Check for a `robots.txt` or rate limit headers
+- Write exploratory curl requests (for APIs) or BeautifulSoup inspections
+
+### 2. Develop & Test Locally
+- Add your scraper function to the appropriate file (`api.py` or `ssr.py`)
+- Register it in the `SCRAPERS` dict
+- The human updates `tests/test_adapters.py` to point to your adapter:
+  ```python
+  ADAPTER = SCRAPERS['your_broker_name']
+  ```
+- Run the test:
+  ```bash
+  cd tests && python test_adapters.py
+  ```
+- The test prints listings in a simple format so you can validate output
+
+### 3. Merge Code
+Once validated, the human will **copy your inline code snippets** into the main codebase. You produce **easily pasteable functions**, not entire files.
+
+---
+
+## Config & Constants
+
+**Location:** `src/config.py`
+
+Key values you may reference:
+- `MAX_PRICE = 300_000` — Price filter (your scraper can skip listings above this)
+- `USER_AGENT = "Huizenbot/1.0 (+mark@kalsbeek.dev) persoonlijk gebruik"` — Used in all HTTP headers
+- `MARK_WERK_POSTCODE`, `MICHELLE_WERK_POSTCODE` — Work postcodes for travel time calculation
+
+Secrets (API keys, webhook URLs) are **environment variables**, not in config.
+
+---
+
+## CMS Detection Tool
+
+Before investigating a broker's HTML manually, prod the human in the loop to run `autoscraper.py` from the project root:
+```bash
+python autoscraper.py listings <listings-url>
+python autoscraper.py details <detail-page-url>
+```
+
+If the broker uses a known CMS, the tool prints the exact code to add — no further investigation needed. Currently detected CMSes:
+
+- **Realworks** → prints a ready-to-paste `fetch_realworks(...)` one-liner for `ssr.py`
+
+If the CMS is unknown, the tool prints structural diagnostics (card selectors, field patterns, pagination) to guide manual adapter development.
+
+## Important Notes
+
+### Status Mapping
+Brokers use different status strings. Always map to one of:
+- `"beschikbaar"` — Available for sale
+- `"onder_bod"` — Under offer
+- `"verkocht"` — Sold
+
+Example from api.py:
+```python
+_STATUS_MAP = {
+    "available": "beschikbaar",
+    "under_bid": "onder_bod",
+    "sold": "verkocht",
+}
+status = _STATUS_MAP.get(item.get("status"), "beschikbaar")
+```
+
+### Postcode Extraction
+Always aim for the **Dutch postcode format** (4 digits + 2 letters, e.g., `"2611CA"`). The travel time calculation depends on it. If a broker only provides the address string, use `_extract_postcode(address)`.
+
+### Price Handling
+Prices are **integers** (euros), never floats. Use `parse_prijs()` for HTML.
+
+### Image URLs
+Store the hero/main image URL in `hero_image_url`. This appears in Home Assistant notifications.
+
+### Extra Data
+If a broker provides extra fields that don't fit the schema (e.g., balcony, garden, orientation), store them in the `extra` dict:
+```python
+listings.append(RawListing(
+    url=...,
+    ...
+    extra={
+        "balcony": item.get("has_balcony"),
+        "garden": item.get("has_garden"),
+        "custom_field": item.get("something_else"),
+    }
+))
+```
+
+The database stores this as JSON in the `extra` column.
+
+### Error Handling
+- Wrap individual listing parsing in try/except to continue on one bad listing
+- Log parse warnings, not errors (brokers' HTML changes)
+- Let HTTP errors bubble up (the runner catches them at the adapter level)
+
+### Rate Limiting & Ethics
+- Both `fetch_json()` and `fetch_soup()` handle 429 Retry-After automatically
+- Nominatim (geocoding) has a 1 req/s limiter built into `huizenbot.py`
+- Never spawn parallel requests without the human's approval
+- Always use the `USER_AGENT` header (includes contact info for respectful scraping)
+
+---
+
+## Example: Adding "Van Daal" (API-based)
+
+### Scenario
+The human finds that Van Daal (vandaalmakelaardij.nl) has a JSON API at:
+```
+https://api.vandaal.nl/listings?city=delft&status=available
+```
+
+### Your Code (add to api.py)
+
+```python
+# Van Daal
+# --------
+_VANDAAL_BASE = "https://www.vandaalmakelaardij.nl"
+_VANDAAL_API = "https://api.vandaal.nl/listings"
+
+_VANDAAL_STATUS_MAP = {
+    "available": "beschikbaar",
+    "under_offer": "onder_bod",
+    "sold": "verkocht",
+}
+
+def fetch_vandaal() -> list[RawListing]:
+    listings = []
+    for city in ["delft", "schiedam"]:
+        data = fetch_json(
+            _VANDAAL_API,
+            params={"city": city, "status": "available"}
+        )
+        
+        for item in data.get("listings", []):
+            if item.get("price", 0) > config.MAX_PRICE:
+                continue
+            
+            listings.append(RawListing(
+                url=item["url"],
+                source_makelaar="vandaal",
+                adres=item.get("address"),
+                postcode=item.get("postcode"),
+                stad=item.get("city"),
+                prijs=item.get("price"),
+                woningtype=item.get("type"),
+                woonoppervlak=item.get("living_area"),
+                slaapkamers=item.get("bedrooms"),
+                hero_image_url=item.get("image_url"),
+            ))
+    
+    log.info("vandaal: %d listings", len(listings))
+    return listings
+```
+
+### Register in SCRAPERS (in api.py)
+```python
+SCRAPERS = {
+    'bjornd': fetch_bjornd,
+    'vandaal': fetch_vandaal,  # ← Add this
+}
+```
+
+### Test
+Human updates `test_adapters.py`:
+```python
+ADAPTER = SCRAPERS['vandaal']
+```
+
+Then runs:
+```bash
+cd tests && python test_adapters.py
+```
+
+If all looks good, the human copies the `fetch_vandaal()` function into the real `api.py` and adds it to `SCRAPERS`.
+
+---
+
+## Summary
+
+1. **You receive** an adapter request + investigation results (API endpoint or HTML structure)
+2. **You write** a clean, self-contained scraper function that returns `list[RawListing]`
+3. **You register** it in the appropriate `SCRAPERS` dict
+4. **The human tests** it with `test_adapters.py` and validates output
+5. **The human merges** your code into the production files
+
+Keep code simple, use the provided helpers, populate `RawListing` fields as best you can, and always set `source_makelaar` and `url` correctly.
--- a/autoscraper.py
+++ b/autoscraper.py
@@ -0,0 +1,290 @@
+#!/usr/bin/env python3
+"""
+autoscraper.py — detect CMS and extract patterns from broker pages
+
+Usage:
+    python autoscraper.py listings <url>   — detect CMS + card structure
+    python autoscraper.py details <url>    — detect CMS + kenmerk patterns
+"""
+
+import re
+import sys
+
+import httpx
+from bs4 import BeautifulSoup, Tag
+
+UA = "Huizenbot/1.0 (+mark@kalsbeek.dev) persoonlijk gebruik"
+
+# ---------------------------------------------------------------------------
+# CMS fingerprints
+# ---------------------------------------------------------------------------
+
+# Each entry: (name, listings_signal, details_signal, adapter_hint)
+# signals are (selector, min_count) tuples — all must match
+CMS_FINGERPRINTS = [
+    {
+        "name": "Realworks",
+        "listings": [("li.aanbodEntry", 1), ("span.kenmerkValue", 1)],
+        "details":  [("span.kenmerkName", 3), ("span.kenmerkValue", 3)],
+        "hint": "fetch_realworks('{base_url}', '{makelaar}')",
+    },
+]
+
+# ---------------------------------------------------------------------------
+# Candidate card selectors (tried in order for unknown CMS)
+# ---------------------------------------------------------------------------
+
+CARD_CANDIDATES = [
+    "li.aanbodEntry",
+    "article",
+    "li[class*=object]",
+    "li[class*=woning]",
+    "li[class*=listing]",
+    "div[class*=object-item]",
+    "div[class*=property-item]",
+    "div[class*=aanbod]",
+    ".listing-item",
+]
+
+# ---------------------------------------------------------------------------
+# Regex patterns for field detection
+# ---------------------------------------------------------------------------
+
+RE_POSTCODE = re.compile(r"\b\d{4}\s?[A-Z]{2}\b")
+RE_PRICE    = re.compile(r"€\s*[\d.,]+")
+RE_M2       = re.compile(r"\d+\s*m[²2]")
+RE_PAGE_URL = re.compile(r"pagina[-/]?\d+|[?&]p(?:age)?=\d+|/\d+/?$")
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def fetch(url: str) -> BeautifulSoup:
+    r = httpx.get(url, headers={"User-Agent": UA}, timeout=15, follow_redirects=True)
+    r.raise_for_status()
+    return BeautifulSoup(r.text, "html.parser")
+
+
+def _selector_path(el: Tag) -> str:
+    """Short CSS-like path for an element: tag.class1.class2"""
+    parts = []
+    for ancestor in reversed(list(el.parents)):
+        if ancestor.name in (None, "[document]", "html", "body"):
+            continue
+        cls = ".".join(ancestor.get("class", []))
+        parts.append(f"{ancestor.name}.{cls}" if cls else ancestor.name)
+        if len(parts) >= 3:
+            break
+    cls = ".".join(el.get("class", []))
+    parts.append(f"{el.name}.{cls}" if cls else el.name)
+    return " > ".join(parts[-3:])
+
+
+def _detect_cms(soup: BeautifulSoup, mode: str) -> dict | None:
+    key = "listings" if mode == "listings" else "details"
+    for cms in CMS_FINGERPRINTS:
+        if all(len(soup.select(sel)) >= n for sel, n in cms[key]):
+            return cms
+    return None
+
+
+def _find_cards(soup: BeautifulSoup) -> tuple[list, str | None]:
+    for sel in CARD_CANDIDATES:
+        found = soup.select(sel)
+        if len(found) >= 2:
+            return found, sel
+    # fallback: find the most repeated element class
+    from collections import Counter
+    class_counts: Counter = Counter()
+    for el in soup.find_all(True):
+        cls = tuple(el.get("class", []))
+        if cls:
+            class_counts[cls] += 1
+    if class_counts:
+        top_cls, count = class_counts.most_common(1)[0]
+        if count >= 2:
+            sel = "." + ".".join(top_cls)
+            return soup.select(sel), f"{sel} (auto-detected, count={count})"
+    return [], None
+
+
+def _pattern_hits(soup: BeautifulSoup, pattern: re.Pattern, label: str):
+    hits = []
+    for el in soup.find_all(string=pattern):
+        parent = el.parent
+        if parent:
+            hits.append((parent.get_text(strip=True)[:80], _selector_path(parent)))
+    if hits:
+        print(f"\n  [{label}] — {len(hits)} hit(s)")
+        for text, path in hits[:4]:
+            print(f"    {path}")
+            print(f"    → {text!r}")
+
+
+# ---------------------------------------------------------------------------
+# Commands
+# ---------------------------------------------------------------------------
+
+def cmd_listings(url: str):
+    print(f"Fetching: {url}\n")
+    soup = fetch(url)
+    base_url = "/".join(url.split("/")[:3])
+
+    cms = _detect_cms(soup, "listings")
+
+    if cms:
+        print(f"✓ CMS DETECTED: {cms['name']}")
+        hint = cms["hint"].format(base_url=base_url, makelaar="<name>")
+        print(f"\n  Add to ssr.py:\n")
+        print(f"    def fetch_<name>() -> list[RawListing]:")
+        print(f"        return {hint}\n")
+        print(f"  Register in SCRAPERS dict:")
+        print(f"    '<name>': fetch_<name>,")
+        return
+
+    print("✗ CMS unknown — structural diagnostics:\n")
+
+    # Cards
+    cards, matched_sel = _find_cards(soup)
+    print(f"=== CARDS ({matched_sel or 'none found'}: {len(cards)}) ===")
+    if cards:
+        print("\n--- FIRST CARD ---")
+        print(cards[0].prettify()[:2500])
+        print("\n--- CHILD ELEMENTS & CLASSES ---")
+        for el in cards[0].find_all(True):
+            cls = el.get("class")
+            text = el.get_text(strip=True)[:50]
+            if cls:
+                print(f"  <{el.name}> .{' .'.join(cls)}   {text!r}")
+
+    # Pattern hits in cards area (or full page if no cards)
+    search_area = cards[0] if cards else soup
+    print("\n=== FIELD PATTERNS ===")
+    _pattern_hits(search_area, RE_POSTCODE, "postcode")
+    _pattern_hits(search_area, RE_PRICE,    "prijs")
+    _pattern_hits(search_area, RE_M2,       "m²")
+
+    # Pagination
+    print("\n=== PAGINATION ===")
+    page_links = soup.find_all("a", href=RE_PAGE_URL)
+    if page_links:
+        seen = set()
+        for a in page_links:
+            href = a.get("href", "")
+            if href not in seen:
+                seen.add(href)
+                print(f"  {href!r}  — {a.get_text(strip=True)!r}")
+    else:
+        print("  No pagination links found")
+
+
+def cmd_details(url: str):
+    print(f"Fetching: {url}\n")
+    soup = fetch(url)
+
+    cms = _detect_cms(soup, "details")
+
+    if cms:
+        print(f"✓ CMS DETECTED: {cms['name']}")
+        print("\n  _realworks_detail() will extract:")
+        kv: dict[str, str] = {}
+        for kenmerk in soup.select("span.kenmerk"):
+            label_el = kenmerk.select_one("span.kenmerkName")
+            value_el = kenmerk.select_one("span.kenmerkValue")
+            if label_el and value_el:
+                label = label_el.get_text(strip=True).lower()
+                value = value_el.get_text(strip=True)
+                kv[label] = value
+
+        target_fields = {
+            "type woning":        "woningtype",
+            "bouwjaar":           "bouwjaar",
+            "woonoppervlakte":    "woonoppervlak",
+            "perceeloppervlakte": "perceeloppervlak",
+            "aantal kamers":      "kamers",
+            "aantal slaapkamers": "slaapkamers",
+            "energieklasse":      "energielabel",
+        }
+        for key, field in target_fields.items():
+            val = kv.get(key, "NOT FOUND")
+            status = "✓" if key in kv else "✗"
+            print(f"    {status} {field:<20} ← {key!r}: {val!r}")
+        return
+
+    print("✗ CMS unknown — structural diagnostics:\n")
+
+    # Address
+    print("=== ADDRESS ===")
+    for tag in ["h1", "h2"]:
+        for el in soup.select(tag):
+            t = el.get_text(strip=True)
+            if t:
+                print(f"  <{tag}> {t!r}")
+
+    # Key-value patterns
+    print("\n=== KEY-VALUE STRUCTURES ===")
+    kv_selectors = [
+        ("dl", "dt", "dd"),
+        ("table", "th", "td"),
+        (".kenmerk", ".kenmerkName", ".kenmerkValue"),
+        (".spec", ".spec-label", ".spec-value"),
+        (".feature", ".feature-label", ".feature-value"),
+    ]
+    found_any = False
+    for container_sel, label_sel, value_sel in kv_selectors:
+        pairs = []
+        for container in soup.select(container_sel)[:50]:
+            label_el = container.select_one(label_sel)
+            value_el = container.select_one(value_sel)
+            if label_el and value_el:
+                l = label_el.get_text(strip=True)
+                v = value_el.get_text(strip=True)
+                if l and v:
+                    pairs.append((l, v))
+        if pairs:
+            found_any = True
+            print(f"\n  [{container_sel} > {label_sel} / {value_sel}] — {len(pairs)} pairs")
+            for l, v in pairs[:10]:
+                print(f"    {l:<30} {v}")
+
+    if not found_any:
+        print("  No key-value structures detected")
+
+    # Field pattern hits
+    print("\n=== FIELD PATTERNS ===")
+    _pattern_hits(soup, RE_POSTCODE, "postcode")
+    _pattern_hits(soup, RE_PRICE,    "prijs")
+    _pattern_hits(soup, RE_M2,       "m²")
+
+    # Images
+    print("\n=== IMAGES (first 5) ===")
+    for img in soup.select("img")[:5]:
+        src = img.get("src") or img.get("data-src")
+        alt = img.get("alt", "")
+        print(f"  {src}  [{alt}]")
+
+
+# ---------------------------------------------------------------------------
+# Entry point
+# ---------------------------------------------------------------------------
+
+def main():
+    if len(sys.argv) < 3:
+        print(__doc__)
+        sys.exit(1)
+
+    cmd = sys.argv[1]
+    url = sys.argv[2]
+
+    if cmd == "listings":
+        cmd_listings(url)
+    elif cmd == "details":
+        cmd_details(url)
+    else:
+        print(f"Unknown command: {cmd}")
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
--- a/bsprettify.py
+++ b/bsprettify.py
@@ -0,0 +1,3 @@
+import sys
+from bs4 import BeautifulSoup
+print(BeautifulSoup(sys.stdin.read(), 'html.parser').prettify())
--- a/makelaars.md
+++ b/makelaars.md
@@ -2,37 +2,57 @@

 ## Delft

-| Naam | Website | Adres |
-|------|---------|-------|
-| Van Silfhout & Hogetoorn Wereldmakelaars | vansilfhout.nl | Ireneboulevard 2 |
-| Van Daal Makelaardij | vandaalmakelaardij.nl | Voldersgracht 33 |
-| Björnd Makelaardij | bjornd.nl | Oude Delft 103 |
-| Hof van Delft Makelaardij | hofvandelftmakelaardij.nl | Wateringsevest 26 |
-| V&W Makelaars Delft | vwmakelaars.nl | Coenderstraat 31 |
-| Roepman Makelaardij NVM | roepman.nl | Molslaan 43 |
-| ZO makelaars | zomakelaars.nl | Van Foreestweg 4 |
-| Marloes Makelaars | — | Maerten Trompstraat 28 |
-| Makelaarskantoor J.E. Mouthaan | — | Julianalaan 43 |
-| Olsthoorn Makelaars Delft | olsthoornmakelaars.nl | Noordeinde 51 |
-| Post Makelaardij (v/h Bayense) | postmakelaardij.nl | Spoorsingel 1a |
-| Morris NVM Makelaars | morrismakelaardij.nl | — |
-| Prinsenstad Makelaardij | — | — |
-| Oude Delft Makelaardij | — | — |
-| Dijksman Woningmakelaars | — | — |
-| CORPOwonen | — | — |
+| Done | Naam | Website | Adres |
+| [ ] | ---- |------|---------|-------|
+| [ ] | Van Silfhout & Hogetoorn Wereldmakelaars | vansilfhout.nl | Ireneboulevard 2 |
+| [ ] | Van Daal Makelaardij | vandaalmakelaardij.nl | Voldersgracht 33 |
+| [x] | Björnd Makelaardij | bjornd.nl | Oude Delft 103 |
+| [ ] | Hof van Delft Makelaardij | hofvandelftmakelaardij.nl | Wateringsevest 26 |
+| [ ] | V&W Makelaars Delft | vwmakelaars.nl | Coenderstraat 31 |
+| [ ] | Roepman Makelaardij NVM | roepman.nl | Molslaan 43 |
+| [ ] | ZO makelaars | zomakelaars.nl | Van Foreestweg 4 |
+| [ ] | Marloes Makelaars | — | Maerten Trompstraat 28 |
+| [ ] | Makelaarskantoor J.E. Mouthaan | — | Julianalaan 43 |
+| [ ] | Olsthoorn Makelaars Delft | olsthoornmakelaars.nl | Noordeinde 51 |
+| [ ] | Post Makelaardij (v/h Bayense) | postmakelaardij.nl | Spoorsingel 1a |
+| [ ] | Morris NVM Makelaars | morrismakelaardij.nl | — |
+| [ ] | Prinsenstad Makelaardij | — | — |
+| [ ] | Oude Delft Makelaardij | — | — |
+| [ ] | Dijksman Woningmakelaars | — | — |
+| [ ] | CORPOwonen | — | — |

 ## Schiedam

-| Naam | Website | Adres |
-|------|---------|-------|
-| Anke Bodewes Makelaardij | ankebodewes.nl | Hargplein 118 |
-| Woongoed Makelaars Schiedam | woongoedmakelaars.nl | Oranjestraat 93 |
-| Ooms Makelaars Schiedam | ooms.com | Gerrit Verboonstraat 2 |
-| De Witte Garantiemakelaars | dewittegarantiemakelaars.nl | Philippusweg 2 |
-| Makelaardij Wassenaar | makelaardijwassenaar.nl | Gerrit Verboonstraat 12 |
-| 3D Makelaars | 3dmakelaars.nl | Gerrit Verboonstraat 17 |
-| Dupont Makelaars | dupont.nl | Rotterdamsedijk 437 |
-| D&S Makelaardij | densmakelaars.nl | Land van Belofte 50 |
-| Moerman & De Jong Makelaars | moerman-dejong.nl | Lange Kerkstraat 80B |
-| Hagestein Makelaardij | — | Degerfors 54 |
-| Schieland Borsboom NVM Makelaars | schielandborsboom.nl | (Rotterdam, actief in Schiedam) |
+| Done | Naam | Website | Adres |
+|------|------|---------|-------|
+| [x] | Anke Bodewes Makelaardij | ankebodewes.nl | Hargplein 118 |
+| [x] | Woongoed Makelaars Schiedam | woongoedmakelaars.nl | Oranjestraat 93 |
+| [x] | Ooms Makelaars Schiedam | ooms.com | Gerrit Verboonstraat 2 |
+| [ ] | De Witte Garantiemakelaars | dewittegarantiemakelaars.nl | Philippusweg 2 |
+| [ ] | Makelaardij Wassenaar | makelaardijwassenaar.nl | Gerrit Verboonstraat 12 |
+| [ ] | 3D Makelaars | 3dmakelaars.nl | Gerrit Verboonstraat 17 |
+| [ ] | Dupont Makelaars | dupont.nl | Rotterdamsedijk 437 |
+| [ ] | D&S Makelaardij | densmakelaars.nl | Land van Belofte 50 |
+| [ ] | Moerman & De Jong Makelaars | moerman-dejong.nl | Lange Kerkstraat 80B |
+| [ ] | Hagestein Makelaardij | — | Degerfors 54 |
+| [ ] | Schieland Borsboom NVM Makelaars | schielandborsboom.nl | (Rotterdam, actief in Schiedam) |
+
+
+## Leiden
+
+| Done | Naam | Website | Adres |
+|------|------|---------|-------|
+| [ ] | RE/MAX Makelaarsgilde | makelaars-in-leiden.nl | Levendaal 73-75 |
+| [ ] | Hypodomus Leiden | hypodomusleiden.nl | Haarlemmerstraat 268 |
+| [ ] | Alpina Leiden (v/h De Leeuw) | advies.alpina.nl | Molenwerf 4 |
+| [ ] | Fides makelaars (ERA/NVM) | fidesmakelaarsleiden.nl | Lammenschansweg 76 |
+| [ ] | Werk Makelaardij | werkmakelaardij.nl | Stevenshof (Leiden) |
+| [ ] | Kerkvliet Makelaars | kerkvlietmakelaars.nl | Hoge Rijndijk 271A |
+| [ ] | Kompas Makelaars & Taxateurs | kompasmakelaardij.nl | Maresingel 75-76 |
+| [ ] | Hoekstra en Van Eck Leiden | hoekstraenvaneck.nl | Schipholweg 55-75 |
+| [ ] | DOEN NVM Makelaars | doenmakelaars.com | Doezastraat 30 |
+| [ ] | Oudshoorn Makelaardij | oudshoornmakelaardij.nl | — |
+| [ ] | April Makelaars Leiden | aprilmakelaars.nl | Haagweg 55 |
+| [ ] | Emil NVM Makelaars | emilmakelaars.nl | — |
+| [ ] | Goedhart Makelaars | — | Oude Singel 14 |
+| [ ] | Graal Makelaardij & Taxaties | — | Rapenburg 5 |
--- a/new_scraper_prompt.md
+++ b/new_scraper_prompt.md
@@ -0,0 +1,36 @@
+# SSR
+Check out the add_scraper_context.md, let's add a new scraper.
+
+**Broker:** [name]
+**Website:** [base url]
+**Listing page URL:** [url with any price/city filters applied]
+**Detail page kenmerken:** yes / no
+
+**Listing page HTML** (one card):
+[paste]
+
+**Detail page dump:** [attached / n.a.]
+
+**Pagination:** [e.g. 10 per page, pagina-N in URL / no pagination]
+
+**Notes:** [auth, JS rendering, price filter in URL, etc.]
+
+
+# API 
+
+Check out the add_scraper_context.md, let's add a new scraper.
+
+**Broker:** [name]
+**Website:** [base url]
+**API endpoint:** [full url]
+**Auth:** [none / header: X-Foo: bar / query param]
+
+**Example curl:**
+[paste]
+
+**Example response (one item):**
+[paste]
+
+**Pagination:** [e.g. page param / offset / single response]
+
+**Notes:** [price filter, city filter, status field values, etc.]
--- a/src/adapters/api.py
+++ b/src/adapters/api.py
@@ -106,11 +106,87 @@ def fetch_bjornd() -> list[RawListing]:
    log.info("bjornd: %d koopwoningen opgehaald", len(listings))
    return listings
 
- 
+# ---------------------------------------------------------------------------
+# Ooms
+# ---------------------------------------------------------------------------
+
+_OOMS_BASE = "https://ooms.com"
+_OOMS_CITIES = {"Delft", "Schiedam", "Rotterdam", "Leiden", "Voorburg", "Pijnacker"}
+_OOMS_SKIP_STATUS = {"verhuurd", "verhuurd onder voorbehoud"}
+_OOMS_STATUS_MAP = {
+    "beschikbaar":              "beschikbaar",
+    "onder bod":                "onder_bod",
+    "onder optie":              "onder_bod",
+    "verkocht":                 "verkocht",
+    "verkocht onder voorbehoud":"verkocht",
+}
+
+
+def fetch_ooms() -> list[RawListing]:
+    data = fetch_json(f"{_OOMS_BASE}/api/properties/available.json")
+    listings = []
+
+    for item in data.get("objects", []):
+        if item.get("buy_or_rent") != "buy":
+            continue
+        if item.get("place") not in _OOMS_CITIES:
+            continue
+        if item.get("buy_price", 0) > config.MAX_PRICE:
+            continue
+
+        status_raw = item.get("availability_status", "")
+        if status_raw in _OOMS_SKIP_STATUS:
+            continue
+
+        hnr = item.get("house_number", "")
+        add = item.get("house_number_addition") or ""
+        adres = f"{item.get('street_name', '')} {hnr}{(' ' + add) if add else ''}".strip()
+
+        main_images = item.get("realworks_main_images") or item.get("realworks_images") or []
+        hero = None
+        if main_images:
+            sizes = main_images[0].get("sizes") or []
+            best = max(sizes, key=lambda s: s.get("width", 0), default=None)
+            if best:
+                hero = _OOMS_BASE + best["imageUrl"]
+
+        perceel = item.get("parcel_surface") or None
+        if perceel == 0:
+            perceel = None
+
+        listings.append(RawListing(
+            url=item["url"],
+            source_makelaar="ooms",
+            datum_aanmelding=item.get("publish_date", "")[:10] or None,
+            status=_OOMS_STATUS_MAP.get(status_raw, "beschikbaar"),
+            adres=adres or None,
+            postcode=(item.get("zip_code") or "").replace(" ", "") or None,
+            stad=item.get("place") or None,
+            prijs=item.get("buy_price") or None,
+            woningtype=item.get("appartment_characteristic") or item.get("residential_building_type") or None,
+            woonoppervlak=item.get("usable_area_living_function") or None,
+            perceeloppervlak=perceel,
+            kamers=item.get("amount_of_rooms") or None,
+            slaapkamers=item.get("amount_of_bedrooms") or None,
+            hero_image_url=hero,
+            extra={
+                "office": item.get("office", {}).get("name"),
+                "locations": item.get("locations"),
+                "garden_types": item.get("garden_types"),
+                "lat": item.get("lat"),
+                "lng": item.get("lng"),
+                "object_code": item.get("object_code"),
+            },
+        ))
+
+    log.info("ooms: %d listings opgehaald", len(listings))
+    return listings
+
 # ---------------------------------------------------------------------------
 # SCRAPERS — exporteer hier alle actieve API adapters
 # ---------------------------------------------------------------------------
 
 SCRAPERS = {
    'bjornd': fetch_bjornd,
+    'ooms': fetch_ooms,
 }
--- a/src/adapters/ssr.py
+++ b/src/adapters/ssr.py
@@ -65,51 +65,127 @@ def parse_m2(text: str | None) -> int | None:


 # ---------------------------------------------------------------------------
-# Björn & Dries adapter (bjornd.nl)
+# Realworks CMS (shared)
 # ---------------------------------------------------------------------------
-# TODO: vul de echte CSS selectors in na inspectie van de pagina.
-# Dit is een structureel sjabloon — de selectors zijn placeholders.

-BJORND_BASE = "https://www.bjornd.nl"
-BJORND_AANBOD = f"{BJORND_BASE}/aanbod"
+_REALWORKS_STATUS_MAP = {
+    "te koop":       "beschikbaar",
+    "nieuw":         "beschikbaar",
+    "onder bod":     "onder_bod",
+    "onder optie":   "onder_bod",
+    "verkocht o.v.": "verkocht",
+    "verkocht":      "verkocht",
+}


-def fetch_bjornd_demo() -> list[RawListing]:
-    soup = fetch_soup(BJORND_AANBOD)
+def _realworks_detail(detail_url: str, makelaar: str) -> dict:
+    """Fetch a Realworks detail page and extract kenmerken. Returns empty dict on failure."""
+    try:
+        soup = fetch_soup(detail_url)
+
+        # Build a label→value map from all .kenmerk spans
+        kv: dict[str, str] = {}
+        for kenmerk in soup.select("span.kenmerk"):
+            label_el = kenmerk.select_one("span.kenmerkName")
+            value_el = kenmerk.select_one("span.kenmerkValue")
+            if label_el and value_el:
+                label = label_el.get_text(strip=True).lower()
+                value = value_el.get_text(strip=True)
+                kv[label] = value
+
+        return {
+            "woningtype":       kv.get("type woning"),
+            "bouwjaar":         kv.get("bouwjaar"),
+            "woonoppervlak":    kv.get("woonoppervlakte"),
+            "perceeloppervlak": kv.get("perceeloppervlakte"),
+            "kamers":           kv.get("aantal kamers"),
+            "slaapkamers":      kv.get("aantal slaapkamers"),
+            "energielabel":     kv.get("energieklasse"),
+        }
+    except Exception as e:
+        log.warning("%s: detail fetch fout %s: %s", makelaar, detail_url, e)
+        return {}
+
+
+def fetch_realworks(base_url: str, makelaar: str) -> list[RawListing]:
+    """
+    Generic fetcher for Realworks CMS brokers.
+    Paginates via /pagina-{n}/, fetches detail page per listing.
+    """
+    listings_path = f"/aanbod/woningaanbod/-{config.MAX_PRICE}/koop"
    listings = []
+    page = 1

-    # Pas de selector aan op de echte HTML structuur
-    for card in soup.select(".property-card"):          # ← aanpassen
-        try:
-            a_tag = card.select_one("a[href]")
-            if not a_tag:
-                continue
-            url = a_tag["href"]
-            if not url.startswith("http"):
-                url = BJORND_BASE + url
+    while True:
+        url = f"{base_url}{listings_path}/pagina-{page}/"
+        soup = fetch_soup(url)
+        cards = soup.select("li.aanbodEntry")
+        if not cards:
+            break

-            adres    = _text(card, ".property-address")  # ← aanpassen
-            postcode = _extract_postcode(_text(card, ".property-location"))
-            prijs    = parse_prijs(_text(card, ".property-price"))
-            opp      = parse_m2(_text(card, ".property-area"))
-            img      = _src(card, "img")
+        for card in cards:
+            try:
+                a_tag = card.select_one("a.aanbodEntryLink")
+                if not a_tag:
+                    continue
+                listing_url = base_url + a_tag["href"]

-            listings.append(RawListing(
-                url=url,
-                source_makelaar="bjornd",
-                adres=adres,
-                postcode=postcode,
-                stad=_infer_stad(postcode),
-                prijs=prijs,
-                woonoppervlak=opp,
-                hero_image_url=img,
-            ))
-        except Exception as e:
-            log.warning("Fout bij parsen bjornd card: %s", e)
+                adres    = _text(card, ".street-address")
+                postcode = (_text(card, ".postal-code") or "").replace(" ", "") or None
+                stad     = _text(card, ".locality")
+                prijs    = parse_prijs(_text(card, ".koopprijs .kenmerkValue"))

+                status_text = (_text(card, ".objectstatusbanner") or "").lower()
+                status = _REALWORKS_STATUS_MAP.get(status_text, "beschikbaar")
+
+                img_tag = card.select_one(".hoofdfoto img")
+                hero = img_tag["src"] if img_tag else None
+
+                kk = _realworks_detail(listing_url, makelaar)
+
+                listings.append(RawListing(
+                    url=listing_url,
+                    source_makelaar=makelaar,
+                    adres=adres,
+                    postcode=postcode,
+                    stad=stad,
+                    prijs=prijs,
+                    status=status,
+                    hero_image_url=hero,
+                    woningtype=kk.get("woningtype"),
+                    bouwjaar=int(kk["bouwjaar"]) if kk.get("bouwjaar") else None,
+                    woonoppervlak=parse_m2(kk.get("woonoppervlak")),
+                    perceeloppervlak=parse_m2(kk.get("perceeloppervlak")),
+                    kamers=int(kk["kamers"]) if kk.get("kamers") else None,
+                    slaapkamers=int(kk["slaapkamers"]) if kk.get("slaapkamers") else None,
+                    energielabel=kk.get("energielabel"),
+                ))
+            except Exception as e:
+                log.warning("%s: parse fout: %s", makelaar, e)
+
+        if len(cards) < 10:
+            break
+        page += 1
+
+    log.info("%s: %d listings opgehaald", makelaar, len(listings))
    return listings


+# ---------------------------------------------------------------------------
+# Anke Bodewes Makelaardij
+# ---------------------------------------------------------------------------
+
+def fetch_ankebodewes() -> list[RawListing]:
+    return fetch_realworks("https://www.ankebodewes.nl", "ankebodewes")
+
+
+# ---------------------------------------------------------------------------
+# Woongoed Makelaars Schiedam
+# ---------------------------------------------------------------------------
+
+def fetch_woongoed() -> list[RawListing]:
+    return fetch_realworks("https://www.woongoedmakelaars.nl", "woongoed")
+
 # ---------------------------------------------------------------------------
 # SSR helper utils
 # ---------------------------------------------------------------------------
@@ -150,5 +226,6 @@ def _infer_stad(postcode: str | None) -> str | None:
 # ---------------------------------------------------------------------------

 SCRAPERS = {
-    'bjornd_demo': fetch_bjornd_demo,
+    'ankebodewes': fetch_ankebodewes,
+    'woongoed':    fetch_woongoed,
 }
--- a/tests/test_adapters.py
+++ b/tests/test_adapters.py
@@ -7,11 +7,11 @@ from adapters import SCRAPERS


 # --- change this to test a different adapter ---
-ADAPTER = SCRAPERS['bjornd']
+ADAPTER = SCRAPERS['ooms']

 if __name__ == "__main__":
    print(f"Testing adapter: {ADAPTER.__name__}")
    listings = ADAPTER()
    print(f"Got {len(listings)} listings\n")
    for l in listings:
-        print(f"  {l.adres}, {l.stad} — €{l.prijs} — {l.url}")
+        print(f"  {l.adres}, {l.postcode}, {l.stad} — €{l.prijs} — {l.kamers} rooms — {l.url}")