add scrapers: V&W, ZO Makelaars (Realworks), Roepman (JSON-LD) for Delft
- fetch_vwmakelaars, fetch_zomakelaars: one-liner Realworks wrappers - fetch_roepman: custom JSON-LD scraper (Realworks CMS uses div.aanbodEntry instead of li.aanbodEntry; price from potentialAction priceSpecification) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -8,9 +8,9 @@
|
|||||||
| [x] | Van Daal Makelaardij | vandaalmakelaardij.nl | Voldersgracht 33 |
|
| [x] | Van Daal Makelaardij | vandaalmakelaardij.nl | Voldersgracht 33 |
|
||||||
| [x] | Björnd Makelaardij | bjornd.nl | Oude Delft 103 |
|
| [x] | Björnd Makelaardij | bjornd.nl | Oude Delft 103 |
|
||||||
| [ ] | Hof van Delft Makelaardij | hofvandelftmakelaardij.nl | Wateringsevest 26 |
|
| [ ] | Hof van Delft Makelaardij | hofvandelftmakelaardij.nl | Wateringsevest 26 |
|
||||||
| [ ] | V&W Makelaars Delft | vwmakelaars.nl | Coenderstraat 31 |
|
| [x] | V&W Makelaars Delft | vwmakelaars.nl | Coenderstraat 31 |
|
||||||
| [ ] | Roepman Makelaardij NVM | roepman.nl | Molslaan 43 |
|
| [x] | Roepman Makelaardij NVM | roepman.nl | Molslaan 43 |
|
||||||
| [ ] | ZO makelaars | zomakelaars.nl | Van Foreestweg 4 |
|
| [x] | ZO makelaars | zomakelaars.nl | Van Foreestweg 4 |
|
||||||
| [ ] | Marloes Makelaars | — | Maerten Trompstraat 28 |
|
| [ ] | Marloes Makelaars | — | Maerten Trompstraat 28 |
|
||||||
| [ ] | Makelaarskantoor J.E. Mouthaan | — | Julianalaan 43 |
|
| [ ] | Makelaarskantoor J.E. Mouthaan | — | Julianalaan 43 |
|
||||||
| [ ] | Olsthoorn Makelaars Delft | olsthoornmakelaars.nl | Noordeinde 51 |
|
| [ ] | Olsthoorn Makelaars Delft | olsthoornmakelaars.nl | Noordeinde 51 |
|
||||||
|
|||||||
@@ -1180,6 +1180,118 @@ def fetch_vansilfhout() -> list[RawListing]:
|
|||||||
return listings
|
return listings
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# V&W Makelaars Delft / ZO Makelaars (Delft) — Realworks CMS
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def fetch_vwmakelaars() -> list[RawListing]:
|
||||||
|
return fetch_realworks("https://www.vwmakelaars.nl", "vwmakelaars")
|
||||||
|
|
||||||
|
|
||||||
|
def fetch_zomakelaars() -> list[RawListing]:
|
||||||
|
return fetch_realworks("https://www.zomakelaars.nl", "zomakelaars")
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Roepman Makelaardij NVM (Delft)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Realworks CMS maar met div.aanbodEntry i.p.v. li.aanbodEntry.
|
||||||
|
# Prijs zit in JSON-LD (zelfde structuur als Wassenaar).
|
||||||
|
|
||||||
|
_ROEPMAN_BASE = "https://www.roepman.nl"
|
||||||
|
|
||||||
|
|
||||||
|
def fetch_roepman() -> list[RawListing]:
|
||||||
|
import json as _json
|
||||||
|
listings_path = f"/aanbod/woningaanbod/-{config.MAX_PRICE}/koop"
|
||||||
|
listings = []
|
||||||
|
page = 1
|
||||||
|
|
||||||
|
while True:
|
||||||
|
url = f"{_ROEPMAN_BASE}{listings_path}/pagina-{page}/"
|
||||||
|
soup = fetch_soup(url)
|
||||||
|
cards = soup.select("div.aanbodEntry")
|
||||||
|
if not cards:
|
||||||
|
break
|
||||||
|
|
||||||
|
# Collect status + photo per relative url
|
||||||
|
status_by_url: dict[str, str] = {}
|
||||||
|
photo_by_url: dict[str, str] = {}
|
||||||
|
for card in cards:
|
||||||
|
a_tag = card.select_one("a.aanbodEntryLink[href]")
|
||||||
|
if not a_tag:
|
||||||
|
continue
|
||||||
|
href = a_tag["href"]
|
||||||
|
if href in status_by_url:
|
||||||
|
continue
|
||||||
|
banner = card.select_one(".objectstatusbanner")
|
||||||
|
status_text = banner.get_text(strip=True).lower() if banner else ""
|
||||||
|
status_by_url[href] = _REALWORKS_STATUS_MAP.get(status_text, "beschikbaar")
|
||||||
|
img = card.select_one("img")
|
||||||
|
if img:
|
||||||
|
src = img.get("src", "")
|
||||||
|
if "geenfotobeschikbaar" not in src:
|
||||||
|
photo_by_url[href] = src
|
||||||
|
|
||||||
|
# Parse JSON-LD Residence blocks (one per listing)
|
||||||
|
seen: set[str] = set()
|
||||||
|
for tag in soup.select('script[type="application/ld+json"]'):
|
||||||
|
try:
|
||||||
|
ld = _json.loads(tag.string)
|
||||||
|
if ld.get("@type") != "Residence":
|
||||||
|
continue
|
||||||
|
rel_url = ld.get("url", "")
|
||||||
|
if not rel_url or rel_url in seen:
|
||||||
|
continue
|
||||||
|
seen.add(rel_url)
|
||||||
|
|
||||||
|
detail_url = _ROEPMAN_BASE + rel_url
|
||||||
|
address = ld.get("address", {})
|
||||||
|
postcode = address.get("postalCode", "").replace(" ", "") or None
|
||||||
|
|
||||||
|
price_spec = next(
|
||||||
|
(a.get("priceSpecification", {}) for a in ld.get("potentialAction", [])
|
||||||
|
if a.get("priceSpecification")),
|
||||||
|
{}
|
||||||
|
)
|
||||||
|
prijs = int(price_spec["price"]) if price_spec.get("price") else None
|
||||||
|
if prijs and prijs > config.MAX_PRICE:
|
||||||
|
continue
|
||||||
|
|
||||||
|
hero = ld.get("photo") or photo_by_url.get(rel_url)
|
||||||
|
status = status_by_url.get(rel_url, "beschikbaar")
|
||||||
|
kk = _realworks_detail(detail_url, "roepman")
|
||||||
|
|
||||||
|
listings.append(RawListing(
|
||||||
|
url=detail_url,
|
||||||
|
source_makelaar="roepman",
|
||||||
|
status=status,
|
||||||
|
adres=address.get("streetAddress") or None,
|
||||||
|
postcode=postcode,
|
||||||
|
stad=address.get("addressLocality") or None,
|
||||||
|
prijs=prijs,
|
||||||
|
hero_image_url=hero,
|
||||||
|
woningtype=kk.get("woningtype"),
|
||||||
|
bouwjaar=int(kk["bouwjaar"]) if kk.get("bouwjaar") else None,
|
||||||
|
woonoppervlak=parse_m2(kk.get("woonoppervlak")),
|
||||||
|
perceeloppervlak=parse_m2(kk.get("perceeloppervlak")),
|
||||||
|
kamers=int(kk["kamers"]) if kk.get("kamers") else None,
|
||||||
|
slaapkamers=int(kk["slaapkamers"]) if kk.get("slaapkamers") else None,
|
||||||
|
energielabel=kk.get("energielabel"),
|
||||||
|
))
|
||||||
|
if config.APP_ENV == "dev":
|
||||||
|
break
|
||||||
|
except Exception as e:
|
||||||
|
log.warning("roepman: parse fout: %s", e)
|
||||||
|
|
||||||
|
if len(cards) < 10:
|
||||||
|
break
|
||||||
|
page += 1
|
||||||
|
|
||||||
|
log.info("roepman: %d listings opgehaald", len(listings))
|
||||||
|
return listings
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# SCRAPERS — exporteer hier alle actieve SSR adapters
|
# SCRAPERS — exporteer hier alle actieve SSR adapters
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
@@ -1194,4 +1306,7 @@ SCRAPERS = {
|
|||||||
'dupont': fetch_dupont,
|
'dupont': fetch_dupont,
|
||||||
'schielandborsboom': fetch_schielandborsboom,
|
'schielandborsboom': fetch_schielandborsboom,
|
||||||
'vansilfhout': fetch_vansilfhout,
|
'vansilfhout': fetch_vansilfhout,
|
||||||
|
'vwmakelaars': fetch_vwmakelaars,
|
||||||
|
'roepman': fetch_roepman,
|
||||||
|
'zomakelaars': fetch_zomakelaars,
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -16,7 +16,7 @@ logging.basicConfig(
|
|||||||
)
|
)
|
||||||
|
|
||||||
# --- change this to test a different adapter ---
|
# --- change this to test a different adapter ---
|
||||||
ADAPTER = SCRAPERS['vansilfhout']
|
ADAPTER = SCRAPERS['zomakelaars']
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
print(f"Testing adapter: {ADAPTER.__name__}")
|
print(f"Testing adapter: {ADAPTER.__name__}")
|
||||||
|
|||||||
Reference in New Issue
Block a user