Add D&S Makelaars scraper (Schiedam)
Fetches 51+ listings from D&S with full details: - Paginates through /aanbod/koopwoningen - Extracts property postcode from Google Maps iframe URL - Parses all kenmerken (features) from detail pages - Includes price, address, rooms, area, build year, energy label Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -32,7 +32,7 @@
|
|||||||
| [x] | Makelaardij Wassenaar | makelaardijwassenaar.nl | Gerrit Verboonstraat 12 |
|
| [x] | Makelaardij Wassenaar | makelaardijwassenaar.nl | Gerrit Verboonstraat 12 |
|
||||||
| [ ] | 3D Makelaars | 3dmakelaars.nl | Gerrit Verboonstraat 17 |
|
| [ ] | 3D Makelaars | 3dmakelaars.nl | Gerrit Verboonstraat 17 |
|
||||||
| [ ] | Dupont Makelaars | dupont.nl | Rotterdamsedijk 437 |
|
| [ ] | Dupont Makelaars | dupont.nl | Rotterdamsedijk 437 |
|
||||||
| [ ] | D&S Makelaardij | densmakelaars.nl | Land van Belofte 50 |
|
| [x] | D&S Makelaardij | densmakelaars.nl | Land van Belofte 50 |
|
||||||
| [ ] | Moerman & De Jong Makelaars | moerman-dejong.nl | Lange Kerkstraat 80B |
|
| [ ] | Moerman & De Jong Makelaars | moerman-dejong.nl | Lange Kerkstraat 80B |
|
||||||
| [ ] | Hagestein Makelaardij | — | Degerfors 54 |
|
| [ ] | Hagestein Makelaardij | — | Degerfors 54 |
|
||||||
| [ ] | Schieland Borsboom NVM Makelaars | schielandborsboom.nl | (Rotterdam, actief in Schiedam) |
|
| [ ] | Schieland Borsboom NVM Makelaars | schielandborsboom.nl | (Rotterdam, actief in Schiedam) |
|
||||||
|
|||||||
@@ -456,6 +456,158 @@ def _infer_stad(postcode: str | None) -> str | None:
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# D&S Makelaars (Schiedam)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
_DS_BASE = "https://www.densmakelaars.nl"
|
||||||
|
|
||||||
|
_DS_STATUS_MAP = {
|
||||||
|
"onder bod": "onder_bod",
|
||||||
|
"te koop": "beschikbaar",
|
||||||
|
"nieuw": "beschikbaar",
|
||||||
|
"beschikbaar": "beschikbaar",
|
||||||
|
"verkocht": "verkocht",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _ds_detail(detail_url: str, html_text: str = None) -> dict:
|
||||||
|
"""Fetch D&S detail page and extract all kenmerken from <dt>/<dd> pairs and postcode from maps URL."""
|
||||||
|
try:
|
||||||
|
# If html_text not provided, fetch it
|
||||||
|
if html_text is None:
|
||||||
|
import httpx
|
||||||
|
r = httpx.get(
|
||||||
|
detail_url,
|
||||||
|
headers={"User-Agent": config.USER_AGENT},
|
||||||
|
timeout=15,
|
||||||
|
follow_redirects=True,
|
||||||
|
)
|
||||||
|
html_text = r.text
|
||||||
|
|
||||||
|
soup = BeautifulSoup(html_text, "html.parser")
|
||||||
|
|
||||||
|
# Parse <dt>/<dd> pairs into a label → value map
|
||||||
|
kv: dict[str, str] = {}
|
||||||
|
dts = soup.select("dt")
|
||||||
|
dds = soup.select("dd")
|
||||||
|
|
||||||
|
for dt, dd in zip(dts, dds):
|
||||||
|
label = dt.get_text(strip=True).lower()
|
||||||
|
value = dd.get_text(strip=True)
|
||||||
|
kv[label] = value
|
||||||
|
|
||||||
|
# Extract postcode from Google Maps URL in iframe src
|
||||||
|
# Pattern: q=...POSTCODE...,CITY where POSTCODE is 4 digits + 2 letters
|
||||||
|
postcode = None
|
||||||
|
m = re.search(r'q=.+?,(\d{4})\s+([A-Z]{2}),', html_text)
|
||||||
|
if m:
|
||||||
|
postcode = f"{m.group(1)}{m.group(2)}"
|
||||||
|
|
||||||
|
# Extract specific fields
|
||||||
|
result = {
|
||||||
|
"status": kv.get("status", "beschikbaar").lower(),
|
||||||
|
"woningtype": kv.get("soort woning"),
|
||||||
|
"bouwjaar": kv.get("bouwjaar"),
|
||||||
|
"woonoppervlak": kv.get("woonoppervlakte"),
|
||||||
|
"kamers": kv.get("aantal kamers"),
|
||||||
|
"slaapkamers": kv.get("aantal slaapkamers"),
|
||||||
|
"energielabel": kv.get("energielabel"),
|
||||||
|
"postcode": postcode,
|
||||||
|
}
|
||||||
|
return result
|
||||||
|
except Exception as e:
|
||||||
|
log.warning("dens: detail fetch fout %s: %s", detail_url, e)
|
||||||
|
return {}
|
||||||
|
|
||||||
|
|
||||||
|
def fetch_dens() -> list[RawListing]:
|
||||||
|
"""Fetch D&S Makelaars listings with full detail pages."""
|
||||||
|
listings = []
|
||||||
|
page = 1
|
||||||
|
|
||||||
|
while True:
|
||||||
|
url = f"{_DS_BASE}/aanbod/koopwoningen?page={page}"
|
||||||
|
soup = fetch_soup(url)
|
||||||
|
cards = soup.select(".col-12.col-md-4.object-wrapper")
|
||||||
|
if not cards:
|
||||||
|
break
|
||||||
|
|
||||||
|
for card in cards:
|
||||||
|
try:
|
||||||
|
# Extract URL
|
||||||
|
a_tag = card.select_one("a.property")
|
||||||
|
if not a_tag or "href" not in a_tag.attrs:
|
||||||
|
continue
|
||||||
|
detail_url = a_tag["href"]
|
||||||
|
if not detail_url.startswith("http"):
|
||||||
|
detail_url = _DS_BASE + detail_url
|
||||||
|
|
||||||
|
# Extract listing page data
|
||||||
|
status_label = _text(card, "span.label") or "beschikbaar"
|
||||||
|
status_label = status_label.strip().lower()
|
||||||
|
status = _DS_STATUS_MAP.get(status_label, "beschikbaar")
|
||||||
|
|
||||||
|
adres = _text(card, "h3")
|
||||||
|
stad = _text(card, "h4")
|
||||||
|
prijs_text = _text(card, "div.price")
|
||||||
|
prijs = parse_prijs(prijs_text)
|
||||||
|
|
||||||
|
# Extract area and rooms from footer
|
||||||
|
footer_spans = card.select("div.footer span")
|
||||||
|
woonoppervlak = None
|
||||||
|
kamers = None
|
||||||
|
for span in footer_spans:
|
||||||
|
text = span.get_text(strip=True)
|
||||||
|
if "m²" in text:
|
||||||
|
woonoppervlak = parse_m2(text)
|
||||||
|
elif "kamers" in text.lower():
|
||||||
|
m = re.search(r"(\d+)", text)
|
||||||
|
if m:
|
||||||
|
kamers = int(m.group(1))
|
||||||
|
|
||||||
|
# Extract hero image
|
||||||
|
img_tag = card.select_one("img")
|
||||||
|
hero = img_tag["src"] if img_tag else None
|
||||||
|
|
||||||
|
# Fetch and parse detail page
|
||||||
|
detail_data = _ds_detail(detail_url)
|
||||||
|
|
||||||
|
# Use postcode from detail data (extracted from Google Maps URL)
|
||||||
|
postcode = detail_data.get("postcode")
|
||||||
|
|
||||||
|
# Determine status from detail page if available
|
||||||
|
if detail_data.get("status"):
|
||||||
|
status = _DS_STATUS_MAP.get(detail_data["status"], status)
|
||||||
|
|
||||||
|
# Build listing
|
||||||
|
listings.append(RawListing(
|
||||||
|
url=detail_url,
|
||||||
|
source_makelaar="dens",
|
||||||
|
adres=adres,
|
||||||
|
postcode=postcode,
|
||||||
|
stad=stad or _infer_stad(postcode),
|
||||||
|
prijs=prijs,
|
||||||
|
status=status,
|
||||||
|
hero_image_url=hero,
|
||||||
|
woningtype=detail_data.get("woningtype"),
|
||||||
|
bouwjaar=int(detail_data["bouwjaar"]) if detail_data.get("bouwjaar") else None,
|
||||||
|
woonoppervlak=parse_m2(detail_data.get("woonoppervlak")) or woonoppervlak,
|
||||||
|
kamers=int(detail_data["kamers"]) if detail_data.get("kamers") else kamers,
|
||||||
|
slaapkamers=int(detail_data["slaapkamers"]) if detail_data.get("slaapkamers") else None,
|
||||||
|
energielabel=detail_data.get("energielabel"),
|
||||||
|
))
|
||||||
|
except Exception as e:
|
||||||
|
log.warning("dens: parse fout: %s", e)
|
||||||
|
|
||||||
|
if len(cards) < 10:
|
||||||
|
break
|
||||||
|
page += 1
|
||||||
|
|
||||||
|
log.info("dens: %d listings opgehaald", len(listings))
|
||||||
|
return listings
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# SCRAPERS — exporteer hier alle actieve SSR adapters
|
# SCRAPERS — exporteer hier alle actieve SSR adapters
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
@@ -465,4 +617,5 @@ SCRAPERS = {
|
|||||||
'woongoed': fetch_woongoed,
|
'woongoed': fetch_woongoed,
|
||||||
'dewittegarantiemakelaars': fetch_dewittegarantiemakelaars,
|
'dewittegarantiemakelaars': fetch_dewittegarantiemakelaars,
|
||||||
'wassenaar': fetch_wassenaar,
|
'wassenaar': fetch_wassenaar,
|
||||||
|
'dens': fetch_dens,
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user