update autoscraper, add another makellaar

This commit is contained in:
2026-04-03 16:09:38 +02:00
parent 17b35d1997
commit efd31686be
3 changed files with 145 additions and 1 deletions

View File

@@ -9,6 +9,7 @@ Usage:
import re
import sys
import json
import httpx
from bs4 import BeautifulSoup, Tag
@@ -264,6 +265,31 @@ def cmd_details(url: str):
alt = img.get("alt", "")
print(f" {src} [{alt}]")
# JSON-LD
print("\n=== JSON-LD (schema.org) ===")
for tag in soup.select('script[type="application/ld+json"]'):
try:
ld = json.loads(tag.string)
offered = ld.get("itemOffered", {})
address = offered.get("address", {})
floor_size = offered.get("floorSize", {})
fields = {
"woningtype": offered.get("@type"),
"adres": address.get("streetAddress"),
"postcode": address.get("postalCode"),
"stad": address.get("addressLocality"),
"prijs": ld.get("price"),
"woonoppervlak": floor_size.get("value"),
"kamers": offered.get("numberOfRooms"),
"bouwjaar": offered.get("yearBuilt"),
"availability": ld.get("availability"),
"image": ld.get("image"),
}
for k, v in fields.items():
mark = "" if v is not None else ""
print(f" {mark} {k:<16} {v!r}")
except Exception as e:
print(f" parse fout: {e}")
# ---------------------------------------------------------------------------
# Entry point

View File

@@ -28,7 +28,7 @@
| [x] | Anke Bodewes Makelaardij | ankebodewes.nl | Hargplein 118 |
| [x] | Woongoed Makelaars Schiedam | woongoedmakelaars.nl | Oranjestraat 93 |
| [x] | Ooms Makelaars Schiedam | ooms.com | Gerrit Verboonstraat 2 |
| [ ] | De Witte Garantiemakelaars | dewittegarantiemakelaars.nl | Philippusweg 2 |
| [x] | De Witte Garantiemakelaars | dewittegarantiemakelaars.nl | Philippusweg 2 |
| [ ] | Makelaardij Wassenaar | makelaardijwassenaar.nl | Gerrit Verboonstraat 12 |
| [ ] | 3D Makelaars | 3dmakelaars.nl | Gerrit Verboonstraat 17 |
| [ ] | Dupont Makelaars | dupont.nl | Rotterdamsedijk 437 |

View File

@@ -186,6 +186,123 @@ def fetch_ankebodewes() -> list[RawListing]:
def fetch_woongoed() -> list[RawListing]:
return fetch_realworks("https://www.woongoedmakelaars.nl", "woongoed")
# ---------------------------------------------------------------------------
# De Witte Garantiemakelaars
# ---------------------------------------------------------------------------
_DEWITTE_BASE = "https://dewittegarantiemakelaars.nl"
_DEWITTE_PILL_MAP = {
"bg-fun-green": "beschikbaar",
"bg-sold": "verkocht",
}
_DEWITTE_TYPE_MAP = {
"Apartment": "appartement",
"House": "woning",
"SingleFamilyResidence": "woning",
"Residence": "woning",
}
def _dewitte_jsonld(detail_url: str) -> dict:
"""Fetch detail page and return parsed JSON-LD dict, or {} on failure."""
import json
try:
soup = fetch_soup(detail_url)
tag = soup.select_one('script[type="application/ld+json"]')
if not tag:
log.warning("dewitte: geen JSON-LD op %s", detail_url)
return {}
return json.loads(tag.string)
except Exception as e:
log.warning("dewitte: JSON-LD fout %s: %s", detail_url, e)
return {}
def fetch_dewittegarantiemakelaars() -> list[RawListing]:
listings = []
page = 1
while True:
url = (
f"{_DEWITTE_BASE}/woningaanbod"
f"?buy_rent=buy&buy_price=1-{config.MAX_PRICE}&page={page}"
)
soup = fetch_soup(url)
cards = soup.select("div.card.card--property")
if not cards:
break
for card in cards:
try:
a_tag = card.select_one("a.card__anchor")
if not a_tag:
continue
detail_url = a_tag["href"]
if not detail_url.startswith("http"):
detail_url = _DEWITTE_BASE + detail_url
pill = card.select_one("span.pill")
pill_classes = pill.get("class", []) if pill else []
status_key = next(
(c for c in pill_classes if c.startswith("bg-")), None
)
status = _DEWITTE_PILL_MAP.get(status_key, "onder_bod")
ld = _dewitte_jsonld(detail_url)
if not ld:
continue
offered = ld.get("itemOffered", {})
address = offered.get("address", {})
floor_size = offered.get("floorSize", {})
postcode = address.get("postalCode", "").replace(" ", "") or None
stad = address.get("addressLocality") or None
adres = address.get("streetAddress") or None
prijs = ld.get("price")
if prijs and int(prijs) > config.MAX_PRICE:
continue
woningtype = _DEWITTE_TYPE_MAP.get(offered.get("@type", ""))
woonoppervlak = int(floor_size["value"]) if floor_size.get("value") else None
kamers = offered.get("numberOfRooms")
bouwjaar = offered.get("yearBuilt")
# Full-res image from JSON-LD, fall back to card thumbnail
hero = ld.get("image")
if not hero:
img = card.select_one("picture img")
hero = img["src"] if img else None
listings.append(RawListing(
url=detail_url,
source_makelaar="dewittegarantiemakelaars",
status=status,
adres=adres,
postcode=postcode,
stad=stad,
prijs=int(prijs) if prijs else None,
woningtype=woningtype,
woonoppervlak=woonoppervlak,
kamers=int(kamers) if kamers else None,
bouwjaar=int(bouwjaar) if bouwjaar else None,
hero_image_url=hero,
))
except Exception as e:
log.warning("dewitte: parse fout: %s", e)
if len(cards) < 10:
break
page += 1
log.info("dewittegarantiemakelaars: %d listings opgehaald", len(listings))
return listings
# ---------------------------------------------------------------------------
# SSR helper utils
# ---------------------------------------------------------------------------
@@ -228,4 +345,5 @@ def _infer_stad(postcode: str | None) -> str | None:
SCRAPERS = {
'ankebodewes': fetch_ankebodewes,
'woongoed': fetch_woongoed,
'dewittegarantiemakelaars': fetch_dewittegarantiemakelaars
}