add scrapers: V&W, ZO Makelaars (Realworks), Roepman (JSON-LD) for Delft
- fetch_vwmakelaars, fetch_zomakelaars: one-liner Realworks wrappers - fetch_roepman: custom JSON-LD scraper (Realworks CMS uses div.aanbodEntry instead of li.aanbodEntry; price from potentialAction priceSpecification) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -8,9 +8,9 @@
|
||||
| [x] | Van Daal Makelaardij | vandaalmakelaardij.nl | Voldersgracht 33 |
|
||||
| [x] | Björnd Makelaardij | bjornd.nl | Oude Delft 103 |
|
||||
| [ ] | Hof van Delft Makelaardij | hofvandelftmakelaardij.nl | Wateringsevest 26 |
|
||||
| [ ] | V&W Makelaars Delft | vwmakelaars.nl | Coenderstraat 31 |
|
||||
| [ ] | Roepman Makelaardij NVM | roepman.nl | Molslaan 43 |
|
||||
| [ ] | ZO makelaars | zomakelaars.nl | Van Foreestweg 4 |
|
||||
| [x] | V&W Makelaars Delft | vwmakelaars.nl | Coenderstraat 31 |
|
||||
| [x] | Roepman Makelaardij NVM | roepman.nl | Molslaan 43 |
|
||||
| [x] | ZO makelaars | zomakelaars.nl | Van Foreestweg 4 |
|
||||
| [ ] | Marloes Makelaars | — | Maerten Trompstraat 28 |
|
||||
| [ ] | Makelaarskantoor J.E. Mouthaan | — | Julianalaan 43 |
|
||||
| [ ] | Olsthoorn Makelaars Delft | olsthoornmakelaars.nl | Noordeinde 51 |
|
||||
|
||||
@@ -1180,6 +1180,118 @@ def fetch_vansilfhout() -> list[RawListing]:
|
||||
return listings
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# V&W Makelaars Delft / ZO Makelaars (Delft) — Realworks CMS
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def fetch_vwmakelaars() -> list[RawListing]:
|
||||
return fetch_realworks("https://www.vwmakelaars.nl", "vwmakelaars")
|
||||
|
||||
|
||||
def fetch_zomakelaars() -> list[RawListing]:
|
||||
return fetch_realworks("https://www.zomakelaars.nl", "zomakelaars")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Roepman Makelaardij NVM (Delft)
|
||||
# ---------------------------------------------------------------------------
|
||||
# Realworks CMS maar met div.aanbodEntry i.p.v. li.aanbodEntry.
|
||||
# Prijs zit in JSON-LD (zelfde structuur als Wassenaar).
|
||||
|
||||
_ROEPMAN_BASE = "https://www.roepman.nl"
|
||||
|
||||
|
||||
def fetch_roepman() -> list[RawListing]:
|
||||
import json as _json
|
||||
listings_path = f"/aanbod/woningaanbod/-{config.MAX_PRICE}/koop"
|
||||
listings = []
|
||||
page = 1
|
||||
|
||||
while True:
|
||||
url = f"{_ROEPMAN_BASE}{listings_path}/pagina-{page}/"
|
||||
soup = fetch_soup(url)
|
||||
cards = soup.select("div.aanbodEntry")
|
||||
if not cards:
|
||||
break
|
||||
|
||||
# Collect status + photo per relative url
|
||||
status_by_url: dict[str, str] = {}
|
||||
photo_by_url: dict[str, str] = {}
|
||||
for card in cards:
|
||||
a_tag = card.select_one("a.aanbodEntryLink[href]")
|
||||
if not a_tag:
|
||||
continue
|
||||
href = a_tag["href"]
|
||||
if href in status_by_url:
|
||||
continue
|
||||
banner = card.select_one(".objectstatusbanner")
|
||||
status_text = banner.get_text(strip=True).lower() if banner else ""
|
||||
status_by_url[href] = _REALWORKS_STATUS_MAP.get(status_text, "beschikbaar")
|
||||
img = card.select_one("img")
|
||||
if img:
|
||||
src = img.get("src", "")
|
||||
if "geenfotobeschikbaar" not in src:
|
||||
photo_by_url[href] = src
|
||||
|
||||
# Parse JSON-LD Residence blocks (one per listing)
|
||||
seen: set[str] = set()
|
||||
for tag in soup.select('script[type="application/ld+json"]'):
|
||||
try:
|
||||
ld = _json.loads(tag.string)
|
||||
if ld.get("@type") != "Residence":
|
||||
continue
|
||||
rel_url = ld.get("url", "")
|
||||
if not rel_url or rel_url in seen:
|
||||
continue
|
||||
seen.add(rel_url)
|
||||
|
||||
detail_url = _ROEPMAN_BASE + rel_url
|
||||
address = ld.get("address", {})
|
||||
postcode = address.get("postalCode", "").replace(" ", "") or None
|
||||
|
||||
price_spec = next(
|
||||
(a.get("priceSpecification", {}) for a in ld.get("potentialAction", [])
|
||||
if a.get("priceSpecification")),
|
||||
{}
|
||||
)
|
||||
prijs = int(price_spec["price"]) if price_spec.get("price") else None
|
||||
if prijs and prijs > config.MAX_PRICE:
|
||||
continue
|
||||
|
||||
hero = ld.get("photo") or photo_by_url.get(rel_url)
|
||||
status = status_by_url.get(rel_url, "beschikbaar")
|
||||
kk = _realworks_detail(detail_url, "roepman")
|
||||
|
||||
listings.append(RawListing(
|
||||
url=detail_url,
|
||||
source_makelaar="roepman",
|
||||
status=status,
|
||||
adres=address.get("streetAddress") or None,
|
||||
postcode=postcode,
|
||||
stad=address.get("addressLocality") or None,
|
||||
prijs=prijs,
|
||||
hero_image_url=hero,
|
||||
woningtype=kk.get("woningtype"),
|
||||
bouwjaar=int(kk["bouwjaar"]) if kk.get("bouwjaar") else None,
|
||||
woonoppervlak=parse_m2(kk.get("woonoppervlak")),
|
||||
perceeloppervlak=parse_m2(kk.get("perceeloppervlak")),
|
||||
kamers=int(kk["kamers"]) if kk.get("kamers") else None,
|
||||
slaapkamers=int(kk["slaapkamers"]) if kk.get("slaapkamers") else None,
|
||||
energielabel=kk.get("energielabel"),
|
||||
))
|
||||
if config.APP_ENV == "dev":
|
||||
break
|
||||
except Exception as e:
|
||||
log.warning("roepman: parse fout: %s", e)
|
||||
|
||||
if len(cards) < 10:
|
||||
break
|
||||
page += 1
|
||||
|
||||
log.info("roepman: %d listings opgehaald", len(listings))
|
||||
return listings
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# SCRAPERS — exporteer hier alle actieve SSR adapters
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -1193,5 +1305,8 @@ SCRAPERS = {
|
||||
'3dmakelaars': fetch_3dmakelaars,
|
||||
'dupont': fetch_dupont,
|
||||
'schielandborsboom': fetch_schielandborsboom,
|
||||
'vansilfhout': fetch_vansilfhout,
|
||||
'vansilfhout': fetch_vansilfhout,
|
||||
'vwmakelaars': fetch_vwmakelaars,
|
||||
'roepman': fetch_roepman,
|
||||
'zomakelaars': fetch_zomakelaars,
|
||||
}
|
||||
|
||||
@@ -16,7 +16,7 @@ logging.basicConfig(
|
||||
)
|
||||
|
||||
# --- change this to test a different adapter ---
|
||||
ADAPTER = SCRAPERS['vansilfhout']
|
||||
ADAPTER = SCRAPERS['zomakelaars']
|
||||
|
||||
if __name__ == "__main__":
|
||||
print(f"Testing adapter: {ADAPTER.__name__}")
|
||||
|
||||
Reference in New Issue
Block a user