add scrapers: Moerman & De Jong (API) and Schieland Borsboom (SSR)
- fetch_moerman: OG Online realtime-listings API (same platform as bjornd), includes bouwjaar from dateOfConstruction, energielabel, strips postcode space - fetch_schielandborsboom: paginated HTML scraper filtered to Schiedam, fetches #kenmerken detail page for full specs (bouwjaar, kamers, etc.) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -33,9 +33,9 @@
|
|||||||
| [x] | 3D Makelaars | 3dmakelaars.nl | Gerrit Verboonstraat 17 |
|
| [x] | 3D Makelaars | 3dmakelaars.nl | Gerrit Verboonstraat 17 |
|
||||||
| [x] | Dupont Makelaars | dupont.nl | Rotterdamsedijk 437 |
|
| [x] | Dupont Makelaars | dupont.nl | Rotterdamsedijk 437 |
|
||||||
| [x] | D&S Makelaardij | densmakelaars.nl | Land van Belofte 50 |
|
| [x] | D&S Makelaardij | densmakelaars.nl | Land van Belofte 50 |
|
||||||
| [ ] | Moerman & De Jong Makelaars | moerman-dejong.nl | Lange Kerkstraat 80B |
|
| [x] | Moerman & De Jong Makelaars | moerman-dejong.nl | Lange Kerkstraat 80B |
|
||||||
| [ ] | Hagestein Makelaardij | — | Degerfors 54 |
|
| [ ] | Hagestein Makelaardij | — | Degerfors 54 |
|
||||||
| [ ] | Schieland Borsboom NVM Makelaars | schielandborsboom.nl | (Rotterdam, actief in Schiedam) |
|
| [x] | Schieland Borsboom NVM Makelaars | schielandborsboom.nl | (Rotterdam, actief in Schiedam) |
|
||||||
|
|
||||||
|
|
||||||
## Leiden
|
## Leiden
|
||||||
|
|||||||
@@ -182,11 +182,74 @@ def fetch_ooms() -> list[RawListing]:
|
|||||||
log.info("ooms: %d listings opgehaald", len(listings))
|
log.info("ooms: %d listings opgehaald", len(listings))
|
||||||
return listings
|
return listings
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Moerman & De Jong Makelaars (Schiedam)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Zelfde OG Online / realtime-listings platform als Bjornd.
|
||||||
|
|
||||||
|
_MOERMAN_BASE = "https://www.moerman-dejong.nl"
|
||||||
|
_MOERMAN_SKIP = {"rented", "rented_ur"}
|
||||||
|
|
||||||
|
_MOERMAN_STATUS_MAP = {
|
||||||
|
"available": "beschikbaar",
|
||||||
|
"under_bid": "onder_bod",
|
||||||
|
"under_option": "onder_bod",
|
||||||
|
"sold": "verkocht",
|
||||||
|
"sold_ur": "verkocht",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def fetch_moerman() -> list[RawListing]:
|
||||||
|
data = fetch_json(
|
||||||
|
f"{_MOERMAN_BASE}/nl/realtime-listings/consumer",
|
||||||
|
headers={"X-Requested-With": "XMLHttpRequest"},
|
||||||
|
)
|
||||||
|
|
||||||
|
listings = []
|
||||||
|
for item in data:
|
||||||
|
if not item.get("isSales"):
|
||||||
|
continue
|
||||||
|
if item.get("statusOrig") in _MOERMAN_SKIP:
|
||||||
|
continue
|
||||||
|
if item.get("salesPrice", 0) > config.MAX_PRICE:
|
||||||
|
continue
|
||||||
|
|
||||||
|
postcode = (item.get("zipcode") or "").replace(" ", "") or None
|
||||||
|
perceel = item.get("plotSurface") or None
|
||||||
|
if perceel == 0:
|
||||||
|
perceel = None
|
||||||
|
|
||||||
|
raw_year = item.get("dateOfConstruction") or ""
|
||||||
|
bouwjaar = int(raw_year) if raw_year.isdigit() else None
|
||||||
|
|
||||||
|
listings.append(RawListing(
|
||||||
|
url=_MOERMAN_BASE + item["url"],
|
||||||
|
source_makelaar="moerman",
|
||||||
|
status=_MOERMAN_STATUS_MAP.get(item.get("statusOrig", ""), "beschikbaar"),
|
||||||
|
adres=item.get("address") or None,
|
||||||
|
postcode=postcode,
|
||||||
|
stad=item.get("city") or None,
|
||||||
|
prijs=item.get("salesPrice") or None,
|
||||||
|
woningtype=item.get("type") or None,
|
||||||
|
woonoppervlak=item.get("livingSurface") or None,
|
||||||
|
perceeloppervlak=perceel,
|
||||||
|
kamers=item.get("rooms") or None,
|
||||||
|
slaapkamers=item.get("bedrooms") or None,
|
||||||
|
bouwjaar=bouwjaar,
|
||||||
|
energielabel=item.get("energyLabel") or None,
|
||||||
|
hero_image_url=item.get("photo") or None,
|
||||||
|
))
|
||||||
|
|
||||||
|
log.info("moerman: %d koopwoningen opgehaald", len(listings))
|
||||||
|
return listings
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# SCRAPERS — exporteer hier alle actieve API adapters
|
# SCRAPERS — exporteer hier alle actieve API adapters
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
SCRAPERS = {
|
SCRAPERS = {
|
||||||
'bjornd': fetch_bjornd,
|
'bjornd': fetch_bjornd,
|
||||||
'ooms': fetch_ooms,
|
'ooms': fetch_ooms,
|
||||||
|
'moerman': fetch_moerman,
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -870,6 +870,174 @@ def fetch_dupont() -> list[RawListing]:
|
|||||||
return listings
|
return listings
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Schieland Borsboom NVM Makelaars (Rotterdam, actief in Schiedam)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
_SCHIELAND_BASE = "https://www.schielandborsboom.nl"
|
||||||
|
|
||||||
|
_SCHIELAND_STATUS_MAP = {
|
||||||
|
"beschikbaar": "beschikbaar",
|
||||||
|
"onder bod": "onder_bod",
|
||||||
|
"onder optie": "onder_bod",
|
||||||
|
"verkocht o.v.": "verkocht",
|
||||||
|
"verkocht": "verkocht",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _schieland_detail(detail_url: str) -> dict:
|
||||||
|
"""Fetch Schieland Borsboom detail page and extract kenmerken."""
|
||||||
|
try:
|
||||||
|
soup = fetch_soup(detail_url)
|
||||||
|
|
||||||
|
# Postcode from house__status p (e.g. "3117 DP Schiedam")
|
||||||
|
postcode_el = soup.select_one("div.house__status p")
|
||||||
|
postcode = _extract_postcode(postcode_el.get_text()) if postcode_el else None
|
||||||
|
|
||||||
|
# Parse #kenmerken section: <li><strong>label</strong><span>value</span></li>
|
||||||
|
kv: dict[str, str] = {}
|
||||||
|
kenmerken = soup.select_one("#kenmerken")
|
||||||
|
if kenmerken:
|
||||||
|
for li in kenmerken.select("li"):
|
||||||
|
label_el = li.select_one("strong")
|
||||||
|
value_el = li.select_one("span")
|
||||||
|
if label_el and value_el:
|
||||||
|
# Strip nested links (e.g. "Hypotheek berekenen")
|
||||||
|
for a in value_el.select("a"):
|
||||||
|
a.decompose()
|
||||||
|
kv[label_el.get_text(strip=True).lower()] = value_el.get_text(strip=True)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"postcode": postcode,
|
||||||
|
"status": kv.get("status", "").lower(),
|
||||||
|
"woningtype": kv.get("soort bouw"),
|
||||||
|
"bouwjaar": kv.get("bouwjaar"),
|
||||||
|
"woonoppervlak": kv.get("woonoppervlakte"),
|
||||||
|
"perceeloppervlak": kv.get("perceeloppervlakte"),
|
||||||
|
"kamers": kv.get("aantal kamers"),
|
||||||
|
"slaapkamers": kv.get("aantal slaapkamers"),
|
||||||
|
"energielabel": kv.get("energielabel"),
|
||||||
|
}
|
||||||
|
except Exception as e:
|
||||||
|
log.warning("schielandborsboom: detail fetch fout %s: %s", detail_url, e)
|
||||||
|
return {}
|
||||||
|
|
||||||
|
|
||||||
|
def fetch_schielandborsboom() -> list[RawListing]:
|
||||||
|
"""Fetch Schieland Borsboom NVM listings (koop only, Schiedam)."""
|
||||||
|
listings = []
|
||||||
|
page = 1
|
||||||
|
|
||||||
|
while True:
|
||||||
|
if page == 1:
|
||||||
|
url = f"{_SCHIELAND_BASE}/wonen?sure_koop_huur=koop"
|
||||||
|
else:
|
||||||
|
url = f"{_SCHIELAND_BASE}/wonen/page/{page}/?sure_koop_huur=koop"
|
||||||
|
|
||||||
|
soup = fetch_soup(url)
|
||||||
|
cards = soup.select("div.card.card--house")
|
||||||
|
if not cards:
|
||||||
|
break
|
||||||
|
|
||||||
|
for card in cards:
|
||||||
|
try:
|
||||||
|
a_tag = card.select_one("a.card__anchor")
|
||||||
|
if not a_tag or "href" not in a_tag.attrs:
|
||||||
|
continue
|
||||||
|
detail_url = a_tag["href"]
|
||||||
|
if not detail_url.startswith("http"):
|
||||||
|
detail_url = _SCHIELAND_BASE + detail_url
|
||||||
|
|
||||||
|
# Filter: only Schiedam
|
||||||
|
stad_el = card.select_one("p.house-place")
|
||||||
|
stad = stad_el.get_text(strip=True) if stad_el else None
|
||||||
|
if not stad or stad.lower() != "schiedam":
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Status from card-house__thumb second class
|
||||||
|
thumb = card.select_one("div.card-house__thumb")
|
||||||
|
status_classes = thumb.get("class", []) if thumb else []
|
||||||
|
status_text = next(
|
||||||
|
(c for c in status_classes if c != "card-house__thumb"), "beschikbaar"
|
||||||
|
).lower()
|
||||||
|
status = _SCHIELAND_STATUS_MAP.get(status_text, "beschikbaar")
|
||||||
|
|
||||||
|
# Price
|
||||||
|
prijs = parse_prijs(_text(card, "p.price"))
|
||||||
|
if prijs and prijs > config.MAX_PRICE:
|
||||||
|
continue
|
||||||
|
|
||||||
|
adres = _text(card, "h4.house-street")
|
||||||
|
|
||||||
|
# Hero image from picture source (medium size)
|
||||||
|
src_tag = card.select_one('picture source[media="(min-width:100px)"]')
|
||||||
|
hero = src_tag["srcset"] if src_tag else _src(card, "img")
|
||||||
|
if hero and not hero.startswith("http"):
|
||||||
|
hero = _SCHIELAND_BASE + hero
|
||||||
|
|
||||||
|
# Data icons on card: surface, bedrooms, energy label
|
||||||
|
woonoppervlak_card = None
|
||||||
|
slaapkamers_card = None
|
||||||
|
energielabel_card = None
|
||||||
|
for data_div in card.select("div.data"):
|
||||||
|
txt = data_div.get_text(strip=True)
|
||||||
|
if data_div.select_one("i.icon-surface"):
|
||||||
|
woonoppervlak_card = parse_m2(txt)
|
||||||
|
elif data_div.select_one("i.icon-bedrooms"):
|
||||||
|
m = re.search(r"(\d+)", txt)
|
||||||
|
slaapkamers_card = int(m.group(1)) if m else None
|
||||||
|
elif data_div.select_one("i.icon-label"):
|
||||||
|
energielabel_card = txt.strip() or None
|
||||||
|
|
||||||
|
# Fetch detail page for full kenmerken
|
||||||
|
kk = _schieland_detail(detail_url)
|
||||||
|
|
||||||
|
# Refine status from detail page
|
||||||
|
if kk.get("status"):
|
||||||
|
status = _SCHIELAND_STATUS_MAP.get(kk["status"], status)
|
||||||
|
|
||||||
|
# Parse kamers: "5 kamers" → 5
|
||||||
|
kamers = None
|
||||||
|
if kk.get("kamers"):
|
||||||
|
m = re.search(r"(\d+)", kk["kamers"])
|
||||||
|
kamers = int(m.group(1)) if m else None
|
||||||
|
|
||||||
|
# Parse slaapkamers: "3" or "3 slaapkamers" → 3
|
||||||
|
slaapkamers = slaapkamers_card
|
||||||
|
if kk.get("slaapkamers"):
|
||||||
|
m = re.search(r"(\d+)", kk["slaapkamers"])
|
||||||
|
slaapkamers = int(m.group(1)) if m else slaapkamers_card
|
||||||
|
|
||||||
|
listings.append(RawListing(
|
||||||
|
url=detail_url,
|
||||||
|
source_makelaar="schielandborsboom",
|
||||||
|
status=status,
|
||||||
|
adres=adres,
|
||||||
|
postcode=kk.get("postcode"),
|
||||||
|
stad=stad,
|
||||||
|
prijs=prijs,
|
||||||
|
hero_image_url=hero,
|
||||||
|
woningtype=kk.get("woningtype"),
|
||||||
|
bouwjaar=int(kk["bouwjaar"]) if kk.get("bouwjaar") else None,
|
||||||
|
woonoppervlak=parse_m2(kk.get("woonoppervlak")) or woonoppervlak_card,
|
||||||
|
perceeloppervlak=parse_m2(kk.get("perceeloppervlak")),
|
||||||
|
kamers=kamers,
|
||||||
|
slaapkamers=slaapkamers,
|
||||||
|
energielabel=kk.get("energielabel") or energielabel_card,
|
||||||
|
))
|
||||||
|
if config.APP_ENV == "dev":
|
||||||
|
break
|
||||||
|
except Exception as e:
|
||||||
|
log.warning("schielandborsboom: parse fout: %s", e)
|
||||||
|
|
||||||
|
if len(cards) < 18:
|
||||||
|
break
|
||||||
|
page += 1
|
||||||
|
|
||||||
|
log.info("schielandborsboom: %d listings opgehaald", len(listings))
|
||||||
|
return listings
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# SCRAPERS — exporteer hier alle actieve SSR adapters
|
# SCRAPERS — exporteer hier alle actieve SSR adapters
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
@@ -882,4 +1050,5 @@ SCRAPERS = {
|
|||||||
'dens': fetch_dens,
|
'dens': fetch_dens,
|
||||||
'3dmakelaars': fetch_3dmakelaars,
|
'3dmakelaars': fetch_3dmakelaars,
|
||||||
'dupont': fetch_dupont,
|
'dupont': fetch_dupont,
|
||||||
|
'schielandborsboom': fetch_schielandborsboom,
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -16,7 +16,7 @@ logging.basicConfig(
|
|||||||
)
|
)
|
||||||
|
|
||||||
# --- change this to test a different adapter ---
|
# --- change this to test a different adapter ---
|
||||||
ADAPTER = SCRAPERS['dupont']
|
ADAPTER = SCRAPERS['schielandborsboom']
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
print(f"Testing adapter: {ADAPTER.__name__}")
|
print(f"Testing adapter: {ADAPTER.__name__}")
|
||||||
|
|||||||
Reference in New Issue
Block a user