Add D&S Makelaars scraper (Schiedam)

Fetches 51+ listings from D&S with full details:
- Paginates through /aanbod/koopwoningen
- Extracts property postcode from Google Maps iframe URL
- Parses all kenmerken (features) from detail pages
- Includes price, address, rooms, area, build year, energy label

Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-04-03 16:42:52 +02:00
parent 18c01139c2
commit 918042d27e
2 changed files with 154 additions and 1 deletions

View File

@@ -456,6 +456,158 @@ def _infer_stad(postcode: str | None) -> str | None:
return None
# ---------------------------------------------------------------------------
# D&S Makelaars (Schiedam)
# ---------------------------------------------------------------------------
_DS_BASE = "https://www.densmakelaars.nl"
_DS_STATUS_MAP = {
"onder bod": "onder_bod",
"te koop": "beschikbaar",
"nieuw": "beschikbaar",
"beschikbaar": "beschikbaar",
"verkocht": "verkocht",
}
def _ds_detail(detail_url: str, html_text: str = None) -> dict:
"""Fetch D&S detail page and extract all kenmerken from <dt>/<dd> pairs and postcode from maps URL."""
try:
# If html_text not provided, fetch it
if html_text is None:
import httpx
r = httpx.get(
detail_url,
headers={"User-Agent": config.USER_AGENT},
timeout=15,
follow_redirects=True,
)
html_text = r.text
soup = BeautifulSoup(html_text, "html.parser")
# Parse <dt>/<dd> pairs into a label → value map
kv: dict[str, str] = {}
dts = soup.select("dt")
dds = soup.select("dd")
for dt, dd in zip(dts, dds):
label = dt.get_text(strip=True).lower()
value = dd.get_text(strip=True)
kv[label] = value
# Extract postcode from Google Maps URL in iframe src
# Pattern: q=...POSTCODE...,CITY where POSTCODE is 4 digits + 2 letters
postcode = None
m = re.search(r'q=.+?,(\d{4})\s+([A-Z]{2}),', html_text)
if m:
postcode = f"{m.group(1)}{m.group(2)}"
# Extract specific fields
result = {
"status": kv.get("status", "beschikbaar").lower(),
"woningtype": kv.get("soort woning"),
"bouwjaar": kv.get("bouwjaar"),
"woonoppervlak": kv.get("woonoppervlakte"),
"kamers": kv.get("aantal kamers"),
"slaapkamers": kv.get("aantal slaapkamers"),
"energielabel": kv.get("energielabel"),
"postcode": postcode,
}
return result
except Exception as e:
log.warning("dens: detail fetch fout %s: %s", detail_url, e)
return {}
def fetch_dens() -> list[RawListing]:
"""Fetch D&S Makelaars listings with full detail pages."""
listings = []
page = 1
while True:
url = f"{_DS_BASE}/aanbod/koopwoningen?page={page}"
soup = fetch_soup(url)
cards = soup.select(".col-12.col-md-4.object-wrapper")
if not cards:
break
for card in cards:
try:
# Extract URL
a_tag = card.select_one("a.property")
if not a_tag or "href" not in a_tag.attrs:
continue
detail_url = a_tag["href"]
if not detail_url.startswith("http"):
detail_url = _DS_BASE + detail_url
# Extract listing page data
status_label = _text(card, "span.label") or "beschikbaar"
status_label = status_label.strip().lower()
status = _DS_STATUS_MAP.get(status_label, "beschikbaar")
adres = _text(card, "h3")
stad = _text(card, "h4")
prijs_text = _text(card, "div.price")
prijs = parse_prijs(prijs_text)
# Extract area and rooms from footer
footer_spans = card.select("div.footer span")
woonoppervlak = None
kamers = None
for span in footer_spans:
text = span.get_text(strip=True)
if "" in text:
woonoppervlak = parse_m2(text)
elif "kamers" in text.lower():
m = re.search(r"(\d+)", text)
if m:
kamers = int(m.group(1))
# Extract hero image
img_tag = card.select_one("img")
hero = img_tag["src"] if img_tag else None
# Fetch and parse detail page
detail_data = _ds_detail(detail_url)
# Use postcode from detail data (extracted from Google Maps URL)
postcode = detail_data.get("postcode")
# Determine status from detail page if available
if detail_data.get("status"):
status = _DS_STATUS_MAP.get(detail_data["status"], status)
# Build listing
listings.append(RawListing(
url=detail_url,
source_makelaar="dens",
adres=adres,
postcode=postcode,
stad=stad or _infer_stad(postcode),
prijs=prijs,
status=status,
hero_image_url=hero,
woningtype=detail_data.get("woningtype"),
bouwjaar=int(detail_data["bouwjaar"]) if detail_data.get("bouwjaar") else None,
woonoppervlak=parse_m2(detail_data.get("woonoppervlak")) or woonoppervlak,
kamers=int(detail_data["kamers"]) if detail_data.get("kamers") else kamers,
slaapkamers=int(detail_data["slaapkamers"]) if detail_data.get("slaapkamers") else None,
energielabel=detail_data.get("energielabel"),
))
except Exception as e:
log.warning("dens: parse fout: %s", e)
if len(cards) < 10:
break
page += 1
log.info("dens: %d listings opgehaald", len(listings))
return listings
# ---------------------------------------------------------------------------
# SCRAPERS — exporteer hier alle actieve SSR adapters
# ---------------------------------------------------------------------------
@@ -465,4 +617,5 @@ SCRAPERS = {
'woongoed': fetch_woongoed,
'dewittegarantiemakelaars': fetch_dewittegarantiemakelaars,
'wassenaar': fetch_wassenaar,
'dens': fetch_dens,
}