add scrapers: V&W, ZO Makelaars (Realworks), Roepman (JSON-LD) for Delft

- fetch_vwmakelaars, fetch_zomakelaars: one-liner Realworks wrappers
- fetch_roepman: custom JSON-LD scraper (Realworks CMS uses div.aanbodEntry
  instead of li.aanbodEntry; price from potentialAction priceSpecification)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-04-04 21:43:43 +02:00
parent d310a7a560
commit bfd69e3542
3 changed files with 120 additions and 5 deletions

View File

@@ -8,9 +8,9 @@
| [x] | Van Daal Makelaardij | vandaalmakelaardij.nl | Voldersgracht 33 |
| [x] | Björnd Makelaardij | bjornd.nl | Oude Delft 103 |
| [ ] | Hof van Delft Makelaardij | hofvandelftmakelaardij.nl | Wateringsevest 26 |
| [ ] | V&W Makelaars Delft | vwmakelaars.nl | Coenderstraat 31 |
| [ ] | Roepman Makelaardij NVM | roepman.nl | Molslaan 43 |
| [ ] | ZO makelaars | zomakelaars.nl | Van Foreestweg 4 |
| [x] | V&W Makelaars Delft | vwmakelaars.nl | Coenderstraat 31 |
| [x] | Roepman Makelaardij NVM | roepman.nl | Molslaan 43 |
| [x] | ZO makelaars | zomakelaars.nl | Van Foreestweg 4 |
| [ ] | Marloes Makelaars | — | Maerten Trompstraat 28 |
| [ ] | Makelaarskantoor J.E. Mouthaan | — | Julianalaan 43 |
| [ ] | Olsthoorn Makelaars Delft | olsthoornmakelaars.nl | Noordeinde 51 |

View File

@@ -1180,6 +1180,118 @@ def fetch_vansilfhout() -> list[RawListing]:
return listings
# ---------------------------------------------------------------------------
# V&W Makelaars Delft / ZO Makelaars (Delft) — Realworks CMS
# ---------------------------------------------------------------------------
def fetch_vwmakelaars() -> list[RawListing]:
return fetch_realworks("https://www.vwmakelaars.nl", "vwmakelaars")
def fetch_zomakelaars() -> list[RawListing]:
return fetch_realworks("https://www.zomakelaars.nl", "zomakelaars")
# ---------------------------------------------------------------------------
# Roepman Makelaardij NVM (Delft)
# ---------------------------------------------------------------------------
# Realworks CMS maar met div.aanbodEntry i.p.v. li.aanbodEntry.
# Prijs zit in JSON-LD (zelfde structuur als Wassenaar).
_ROEPMAN_BASE = "https://www.roepman.nl"
def fetch_roepman() -> list[RawListing]:
import json as _json
listings_path = f"/aanbod/woningaanbod/-{config.MAX_PRICE}/koop"
listings = []
page = 1
while True:
url = f"{_ROEPMAN_BASE}{listings_path}/pagina-{page}/"
soup = fetch_soup(url)
cards = soup.select("div.aanbodEntry")
if not cards:
break
# Collect status + photo per relative url
status_by_url: dict[str, str] = {}
photo_by_url: dict[str, str] = {}
for card in cards:
a_tag = card.select_one("a.aanbodEntryLink[href]")
if not a_tag:
continue
href = a_tag["href"]
if href in status_by_url:
continue
banner = card.select_one(".objectstatusbanner")
status_text = banner.get_text(strip=True).lower() if banner else ""
status_by_url[href] = _REALWORKS_STATUS_MAP.get(status_text, "beschikbaar")
img = card.select_one("img")
if img:
src = img.get("src", "")
if "geenfotobeschikbaar" not in src:
photo_by_url[href] = src
# Parse JSON-LD Residence blocks (one per listing)
seen: set[str] = set()
for tag in soup.select('script[type="application/ld+json"]'):
try:
ld = _json.loads(tag.string)
if ld.get("@type") != "Residence":
continue
rel_url = ld.get("url", "")
if not rel_url or rel_url in seen:
continue
seen.add(rel_url)
detail_url = _ROEPMAN_BASE + rel_url
address = ld.get("address", {})
postcode = address.get("postalCode", "").replace(" ", "") or None
price_spec = next(
(a.get("priceSpecification", {}) for a in ld.get("potentialAction", [])
if a.get("priceSpecification")),
{}
)
prijs = int(price_spec["price"]) if price_spec.get("price") else None
if prijs and prijs > config.MAX_PRICE:
continue
hero = ld.get("photo") or photo_by_url.get(rel_url)
status = status_by_url.get(rel_url, "beschikbaar")
kk = _realworks_detail(detail_url, "roepman")
listings.append(RawListing(
url=detail_url,
source_makelaar="roepman",
status=status,
adres=address.get("streetAddress") or None,
postcode=postcode,
stad=address.get("addressLocality") or None,
prijs=prijs,
hero_image_url=hero,
woningtype=kk.get("woningtype"),
bouwjaar=int(kk["bouwjaar"]) if kk.get("bouwjaar") else None,
woonoppervlak=parse_m2(kk.get("woonoppervlak")),
perceeloppervlak=parse_m2(kk.get("perceeloppervlak")),
kamers=int(kk["kamers"]) if kk.get("kamers") else None,
slaapkamers=int(kk["slaapkamers"]) if kk.get("slaapkamers") else None,
energielabel=kk.get("energielabel"),
))
if config.APP_ENV == "dev":
break
except Exception as e:
log.warning("roepman: parse fout: %s", e)
if len(cards) < 10:
break
page += 1
log.info("roepman: %d listings opgehaald", len(listings))
return listings
# ---------------------------------------------------------------------------
# SCRAPERS — exporteer hier alle actieve SSR adapters
# ---------------------------------------------------------------------------
@@ -1193,5 +1305,8 @@ SCRAPERS = {
'3dmakelaars': fetch_3dmakelaars,
'dupont': fetch_dupont,
'schielandborsboom': fetch_schielandborsboom,
'vansilfhout': fetch_vansilfhout,
'vansilfhout': fetch_vansilfhout,
'vwmakelaars': fetch_vwmakelaars,
'roepman': fetch_roepman,
'zomakelaars': fetch_zomakelaars,
}

View File

@@ -16,7 +16,7 @@ logging.basicConfig(
)
# --- change this to test a different adapter ---
ADAPTER = SCRAPERS['vansilfhout']
ADAPTER = SCRAPERS['zomakelaars']
if __name__ == "__main__":
print(f"Testing adapter: {ADAPTER.__name__}")