add scrapers: Olsthoorn (SURE), Post Makelaardij, Morris (Realworks) for Delft
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
25
makelaars.md
25
makelaars.md
@@ -1,4 +1,4 @@
|
|||||||
# Verkoopmakelaars Delft & Schiedam
|
# Verkoopmakelaars Delft, Leiden, Den Haag & Schiedam
|
||||||
|
|
||||||
## Delft
|
## Delft
|
||||||
|
|
||||||
@@ -13,13 +13,17 @@
|
|||||||
| [x] | ZO makelaars | zomakelaars.nl | Van Foreestweg 4 |
|
| [x] | ZO makelaars | zomakelaars.nl | Van Foreestweg 4 |
|
||||||
| [ ] | Marloes Makelaars | — | Maerten Trompstraat 28 |
|
| [ ] | Marloes Makelaars | — | Maerten Trompstraat 28 |
|
||||||
| [ ] | Makelaarskantoor J.E. Mouthaan | — | Julianalaan 43 |
|
| [ ] | Makelaarskantoor J.E. Mouthaan | — | Julianalaan 43 |
|
||||||
| [ ] | Olsthoorn Makelaars Delft | olsthoornmakelaars.nl | Noordeinde 51 |
|
| [x] | Olsthoorn Makelaars Delft | olsthoornmakelaars.nl | Noordeinde 51 |
|
||||||
| [ ] | Post Makelaardij (v/h Bayense) | postmakelaardij.nl | Spoorsingel 1a |
|
| [x] | Post Makelaardij (v/h Bayense) | postmakelaardij.nl | Spoorsingel 1a |
|
||||||
| [ ] | Morris NVM Makelaars | morrismakelaardij.nl | — |
|
| [x] | Morris NVM Makelaars | morrismakelaardij.nl | — |
|
||||||
| [ ] | Prinsenstad Makelaardij | — | — |
|
| [ ] | Prinsenstad Makelaardij | — | — |
|
||||||
| [ ] | Oude Delft Makelaardij | — | — |
|
| [ ] | Oude Delft Makelaardij | — | — |
|
||||||
| [ ] | Dijksman Woningmakelaars | — | — |
|
| [ ] | Dijksman Woningmakelaars | — | — |
|
||||||
| [ ] | CORPOwonen | — | — |
|
| [ ] | CORPOwonen | — | — |
|
||||||
|
| [ ] | Bergklis Makelaars | bergklis.nl | — |
|
||||||
|
| [ ] | Van Gulden Makelaardij | vanguldenmakelaardij.nl | Zaïrestraat 1 |
|
||||||
|
| [ ] | Van der Togt Makelaardij | vdtmakelaardij.nl | — (Voorburg, actief in Delft) |
|
||||||
|
|
||||||
|
|
||||||
## Schiedam
|
## Schiedam
|
||||||
|
|
||||||
@@ -38,6 +42,19 @@
|
|||||||
| [x] | Schieland Borsboom NVM Makelaars | schielandborsboom.nl | (Rotterdam, actief in Schiedam) |
|
| [x] | Schieland Borsboom NVM Makelaars | schielandborsboom.nl | (Rotterdam, actief in Schiedam) |
|
||||||
|
|
||||||
|
|
||||||
|
## Den Haag
|
||||||
|
|
||||||
|
| Done | Naam | Website | Adres |
|
||||||
|
|------|------|---------|-------|
|
||||||
|
| [ ] | Yuvam Makelaardij | yuvammakelaardij.nl | — |
|
||||||
|
| [ ] | 88 Makelaars | 88makelaars.nl | — |
|
||||||
|
| [ ] | DIVA Makelaars | divamakelaars.nl | — |
|
||||||
|
| [ ] | Elzenaar NVM Makelaars | elzenaar.com | — |
|
||||||
|
| [ ] | Frisia Makelaars | frisiamakelaars.nl | — |
|
||||||
|
| [ ] | Borgdorff Makelaars | borgdorff.nl | — (vestiging Den Haag) |
|
||||||
|
| [ ] | SMASH Makelaars | smashmakelaars.nl | — |
|
||||||
|
| [ ] | DOEN NVM Makelaars | doenmakelaars.com | Doezastraat 30 (Leiden, ook actief in Den Haag) |
|
||||||
|
|
||||||
## Leiden
|
## Leiden
|
||||||
|
|
||||||
| Done | Naam | Website | Adres |
|
| Done | Naam | Website | Adres |
|
||||||
|
|||||||
@@ -1292,6 +1292,309 @@ def fetch_roepman() -> list[RawListing]:
|
|||||||
return listings
|
return listings
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Post Makelaardij (v/h Bayense) — Delft & omgeving
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Custom Tailwind CSS site; covers Delft, Pijnacker, Rijswijk etc.
|
||||||
|
# Filter for Delft only.
|
||||||
|
|
||||||
|
_POST_BASE = "https://www.postmakelaardij.nl"
|
||||||
|
|
||||||
|
_POST_STATUS_MAP = {
|
||||||
|
"te koop": "beschikbaar",
|
||||||
|
"onder bod": "onder_bod",
|
||||||
|
"verkocht": "verkocht",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _post_detail(detail_url: str) -> dict:
|
||||||
|
"""Fetch Post Makelaardij detail page and extract kenmerken."""
|
||||||
|
try:
|
||||||
|
soup = fetch_soup(detail_url)
|
||||||
|
|
||||||
|
# Energielabel from CSS class: energielabel-{letter}
|
||||||
|
energielabel = None
|
||||||
|
for el in soup.select('[class]'):
|
||||||
|
for cls in el.get('class', []):
|
||||||
|
if cls.startswith('energielabel-') and cls != 'energielabel':
|
||||||
|
energielabel = cls.replace('energielabel-', '').upper()
|
||||||
|
break
|
||||||
|
if energielabel:
|
||||||
|
break
|
||||||
|
|
||||||
|
# Woonoppervlak, perceeloppervlak, slaapkamers from icon spans
|
||||||
|
woonoppervlak = None
|
||||||
|
perceeloppervlak = None
|
||||||
|
slaapkamers = None
|
||||||
|
for span in soup.select('span.object-info-icon-text'):
|
||||||
|
txt = span.get_text(strip=True)
|
||||||
|
if 'slaapkamer' in txt:
|
||||||
|
m = re.search(r'(\d+)', txt)
|
||||||
|
slaapkamers = int(m.group(1)) if m else None
|
||||||
|
elif 'perceel' in txt:
|
||||||
|
perceeloppervlak = parse_m2(txt)
|
||||||
|
elif 'm²' in txt or 'm2' in txt:
|
||||||
|
woonoppervlak = parse_m2(txt)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"woonoppervlak": woonoppervlak,
|
||||||
|
"perceeloppervlak": perceeloppervlak,
|
||||||
|
"slaapkamers": slaapkamers,
|
||||||
|
"energielabel": energielabel,
|
||||||
|
}
|
||||||
|
except Exception as e:
|
||||||
|
log.warning("post: detail fetch fout %s: %s", detail_url, e)
|
||||||
|
return {}
|
||||||
|
|
||||||
|
|
||||||
|
def fetch_post() -> list[RawListing]:
|
||||||
|
"""Fetch Post Makelaardij listings; only Delft, only koop."""
|
||||||
|
listings = []
|
||||||
|
page = 1
|
||||||
|
|
||||||
|
while True:
|
||||||
|
url = f"{_POST_BASE}/woningaanbod/koop?page={page}"
|
||||||
|
soup = fetch_soup(url)
|
||||||
|
cards = soup.select("article")
|
||||||
|
if not cards:
|
||||||
|
break
|
||||||
|
|
||||||
|
for card in cards:
|
||||||
|
try:
|
||||||
|
# URL — first link in image slider
|
||||||
|
a_tag = card.select_one("a[href]")
|
||||||
|
if not a_tag:
|
||||||
|
continue
|
||||||
|
href = a_tag["href"]
|
||||||
|
detail_url = href if href.startswith("http") else _POST_BASE + href
|
||||||
|
|
||||||
|
# Postcode + city from span.custom-postcode-text
|
||||||
|
pc_el = card.select_one("span.custom-postcode-text")
|
||||||
|
if not pc_el:
|
||||||
|
continue
|
||||||
|
pc_parts = pc_el.get_text(strip=True).split()
|
||||||
|
if len(pc_parts) < 3:
|
||||||
|
continue
|
||||||
|
postcode = pc_parts[0] + pc_parts[1] # "2613BD"
|
||||||
|
stad = " ".join(pc_parts[2:]) # "Delft"
|
||||||
|
|
||||||
|
# Filter: only Delft
|
||||||
|
if stad.lower() != "delft":
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Price — filter early
|
||||||
|
prijs = parse_prijs(_text(card, "span.price-block"))
|
||||||
|
if prijs and prijs > config.MAX_PRICE:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Status from span.status text
|
||||||
|
status_text = (_text(card, "span.status") or "").lower()
|
||||||
|
status = _POST_STATUS_MAP.get(status_text, "beschikbaar")
|
||||||
|
|
||||||
|
# Address
|
||||||
|
adres = _text(card, "h4.custom-address-text")
|
||||||
|
|
||||||
|
# Hero: first img in article
|
||||||
|
img = card.select_one("img")
|
||||||
|
hero = img["src"] if img else None
|
||||||
|
|
||||||
|
kk = _post_detail(detail_url)
|
||||||
|
|
||||||
|
listings.append(RawListing(
|
||||||
|
url=detail_url,
|
||||||
|
source_makelaar="post",
|
||||||
|
status=status,
|
||||||
|
adres=adres,
|
||||||
|
postcode=postcode,
|
||||||
|
stad=stad,
|
||||||
|
prijs=prijs,
|
||||||
|
hero_image_url=hero,
|
||||||
|
woonoppervlak=kk.get("woonoppervlak"),
|
||||||
|
perceeloppervlak=kk.get("perceeloppervlak"),
|
||||||
|
slaapkamers=kk.get("slaapkamers"),
|
||||||
|
energielabel=kk.get("energielabel"),
|
||||||
|
))
|
||||||
|
if config.APP_ENV == "dev":
|
||||||
|
break
|
||||||
|
except Exception as e:
|
||||||
|
log.warning("post: parse fout: %s", e)
|
||||||
|
|
||||||
|
if len(cards) < 12:
|
||||||
|
break
|
||||||
|
page += 1
|
||||||
|
|
||||||
|
log.info("post: %d listings opgehaald", len(listings))
|
||||||
|
return listings
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Morris NVM Makelaars (Delft) — Realworks CMS
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def fetch_morris() -> list[RawListing]:
|
||||||
|
return fetch_realworks("https://www.morrismakelaardij.nl", "morris")
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Olsthoorn Makelaars Delft (SURE WordPress plugin)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Covers Delft, Den Haag, Naaldwijk etc — we filter for Delft only.
|
||||||
|
# Detail page has no postcode; leave as None.
|
||||||
|
|
||||||
|
_OLSTHOORN_BASE = "https://www.olsthoornmakelaars.nl"
|
||||||
|
|
||||||
|
_OLSTHOORN_STATUS_MAP = {
|
||||||
|
"badge-available": "beschikbaar",
|
||||||
|
"badge-bid": "onder_bod",
|
||||||
|
"badge-option": "onder_bod",
|
||||||
|
"badge-sold": "verkocht",
|
||||||
|
}
|
||||||
|
|
||||||
|
_OLSTHOORN_DETAIL_STATUS_MAP = {
|
||||||
|
"beschikbaar": "beschikbaar",
|
||||||
|
"onder bod": "onder_bod",
|
||||||
|
"onder optie": "onder_bod",
|
||||||
|
"verkocht": "verkocht",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _olsthoorn_detail(detail_url: str) -> dict:
|
||||||
|
"""Fetch Olsthoorn detail page; extract kenmerken from #kenmerken li pairs."""
|
||||||
|
try:
|
||||||
|
soup = fetch_soup(detail_url)
|
||||||
|
kv: dict[str, str] = {}
|
||||||
|
for li in soup.select("#kenmerken li"):
|
||||||
|
spans = li.select("span")
|
||||||
|
if len(spans) >= 2:
|
||||||
|
label = spans[0].get_text(strip=True).lower()
|
||||||
|
value = spans[1].get_text(strip=True)
|
||||||
|
kv[label] = value
|
||||||
|
return {
|
||||||
|
"status": kv.get("status", "").lower(),
|
||||||
|
"woningtype": kv.get("soort object") or kv.get("soort woning") or kv.get("soort bouw"),
|
||||||
|
"bouwjaar": kv.get("bouwjaar"),
|
||||||
|
"woonoppervlak": kv.get("gebruiksoppervlakte"),
|
||||||
|
"perceeloppervlak": kv.get("perceeloppervlakte"),
|
||||||
|
"kamers": kv.get("aantal kamers"),
|
||||||
|
"slaapkamers": kv.get("aantal slaapkamers"),
|
||||||
|
"energielabel": kv.get("energielabel"),
|
||||||
|
}
|
||||||
|
except Exception as e:
|
||||||
|
log.warning("olsthoorn: detail fetch fout %s: %s", detail_url, e)
|
||||||
|
return {}
|
||||||
|
|
||||||
|
|
||||||
|
def fetch_olsthoorn() -> list[RawListing]:
|
||||||
|
"""Fetch Olsthoorn Makelaars listings; only Delft, only koop."""
|
||||||
|
listings = []
|
||||||
|
page = 1
|
||||||
|
|
||||||
|
while True:
|
||||||
|
if page == 1:
|
||||||
|
url = f"{_OLSTHOORN_BASE}/wonen?sure_koop_huur=koop"
|
||||||
|
else:
|
||||||
|
url = f"{_OLSTHOORN_BASE}/wonen/page/{page}/?sure_koop_huur=koop"
|
||||||
|
|
||||||
|
soup = fetch_soup(url)
|
||||||
|
cards = soup.select("a.card-house")
|
||||||
|
if not cards:
|
||||||
|
break
|
||||||
|
|
||||||
|
for card in cards:
|
||||||
|
try:
|
||||||
|
href = card.get("href", "")
|
||||||
|
if not href:
|
||||||
|
continue
|
||||||
|
detail_url = href if href.startswith("http") else _OLSTHOORN_BASE + href
|
||||||
|
|
||||||
|
# Filter: only Delft
|
||||||
|
stad_el = card.select_one("h2.card__title")
|
||||||
|
stad = stad_el.get_text(strip=True) if stad_el else None
|
||||||
|
if not stad or stad.lower() != "delft":
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Price from bold tag — filter early before detail fetch
|
||||||
|
prijs_b = card.select_one("b")
|
||||||
|
prijs = parse_prijs(prijs_b.get_text() if prijs_b else None)
|
||||||
|
if prijs and prijs > config.MAX_PRICE:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Status from badge class on label span
|
||||||
|
label_span = card.select_one("span.card-house__label")
|
||||||
|
status = "beschikbaar"
|
||||||
|
if label_span:
|
||||||
|
for cls in label_span.get("class", []):
|
||||||
|
if cls in _OLSTHOORN_STATUS_MAP:
|
||||||
|
status = _OLSTHOORN_STATUS_MAP[cls]
|
||||||
|
break
|
||||||
|
|
||||||
|
# Address: second <p> under .short--info (collapse internal whitespace)
|
||||||
|
adres_p = card.select("div.short--info > p")
|
||||||
|
if adres_p:
|
||||||
|
adres = " ".join(adres_p[0].get_text().split())
|
||||||
|
else:
|
||||||
|
adres = None
|
||||||
|
|
||||||
|
# Hero image: largest source srcset
|
||||||
|
src_tag = card.select_one('picture source[media="(min-width:1024px)"]')
|
||||||
|
hero = src_tag.get("data-srcset") if src_tag else None
|
||||||
|
if hero and not hero.startswith("http"):
|
||||||
|
hero = _OLSTHOORN_BASE + hero
|
||||||
|
|
||||||
|
# Woonoppervlak + kamers + energielabel from card data icons
|
||||||
|
woonoppervlak_card = None
|
||||||
|
kamers_card = None
|
||||||
|
energielabel_card = None
|
||||||
|
for data_div in card.select("div.data"):
|
||||||
|
inner = data_div.select_one("span.date__inner")
|
||||||
|
if not inner:
|
||||||
|
continue
|
||||||
|
txt = inner.get_text(strip=True)
|
||||||
|
if data_div.select_one("i.icon-sizes"):
|
||||||
|
woonoppervlak_card = parse_m2(txt)
|
||||||
|
elif data_div.select_one("i.icon-door"):
|
||||||
|
m = re.search(r"(\d+)", txt)
|
||||||
|
kamers_card = int(m.group(1)) if m else None
|
||||||
|
elif data_div.select_one("i.icon-energylabel"):
|
||||||
|
energielabel_card = txt or None
|
||||||
|
|
||||||
|
kk = _olsthoorn_detail(detail_url)
|
||||||
|
|
||||||
|
# Refine status from detail page
|
||||||
|
detail_status = _OLSTHOORN_DETAIL_STATUS_MAP.get(kk.get("status", ""), "")
|
||||||
|
if detail_status:
|
||||||
|
status = detail_status
|
||||||
|
|
||||||
|
listings.append(RawListing(
|
||||||
|
url=detail_url,
|
||||||
|
source_makelaar="olsthoorn",
|
||||||
|
status=status,
|
||||||
|
adres=adres,
|
||||||
|
postcode=None, # not exposed by broker
|
||||||
|
stad=stad,
|
||||||
|
prijs=prijs,
|
||||||
|
hero_image_url=hero,
|
||||||
|
woningtype=kk.get("woningtype"),
|
||||||
|
bouwjaar=int(kk["bouwjaar"]) if kk.get("bouwjaar") else None,
|
||||||
|
woonoppervlak=parse_m2(kk.get("woonoppervlak")) or woonoppervlak_card,
|
||||||
|
perceeloppervlak=parse_m2(kk.get("perceeloppervlak")),
|
||||||
|
kamers=int(kk["kamers"]) if kk.get("kamers") else kamers_card,
|
||||||
|
slaapkamers=int(kk["slaapkamers"]) if kk.get("slaapkamers") else None,
|
||||||
|
energielabel=kk.get("energielabel") or energielabel_card,
|
||||||
|
))
|
||||||
|
if config.APP_ENV == "dev":
|
||||||
|
break
|
||||||
|
except Exception as e:
|
||||||
|
log.warning("olsthoorn: parse fout: %s", e)
|
||||||
|
|
||||||
|
if len(cards) < 15:
|
||||||
|
break
|
||||||
|
page += 1
|
||||||
|
|
||||||
|
log.info("olsthoorn: %d listings opgehaald", len(listings))
|
||||||
|
return listings
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# SCRAPERS — exporteer hier alle actieve SSR adapters
|
# SCRAPERS — exporteer hier alle actieve SSR adapters
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
@@ -1309,4 +1612,7 @@ SCRAPERS = {
|
|||||||
'vwmakelaars': fetch_vwmakelaars,
|
'vwmakelaars': fetch_vwmakelaars,
|
||||||
'roepman': fetch_roepman,
|
'roepman': fetch_roepman,
|
||||||
'zomakelaars': fetch_zomakelaars,
|
'zomakelaars': fetch_zomakelaars,
|
||||||
|
'post': fetch_post,
|
||||||
|
'morris': fetch_morris,
|
||||||
|
'olsthoorn': fetch_olsthoorn,
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -16,7 +16,7 @@ logging.basicConfig(
|
|||||||
)
|
)
|
||||||
|
|
||||||
# --- change this to test a different adapter ---
|
# --- change this to test a different adapter ---
|
||||||
ADAPTER = SCRAPERS['zomakelaars']
|
ADAPTER = SCRAPERS['post']
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
print(f"Testing adapter: {ADAPTER.__name__}")
|
print(f"Testing adapter: {ADAPTER.__name__}")
|
||||||
|
|||||||
Reference in New Issue
Block a user