Compare commits
2 Commits
c6328cee46
...
fc6f3ff809
| Author | SHA1 | Date | |
|---|---|---|---|
| fc6f3ff809 | |||
| 1841412c93 |
@@ -24,6 +24,7 @@ from .realworks import (
|
|||||||
fetch_wassenaar,
|
fetch_wassenaar,
|
||||||
fetch_roepman,
|
fetch_roepman,
|
||||||
fetch_post,
|
fetch_post,
|
||||||
|
fetch_vankleef,
|
||||||
)
|
)
|
||||||
from .sure import (
|
from .sure import (
|
||||||
fetch_schielandborsboom,
|
fetch_schielandborsboom,
|
||||||
@@ -60,4 +61,5 @@ SCRAPERS = {
|
|||||||
'borgdorff': fetch_borgdorff,
|
'borgdorff': fetch_borgdorff,
|
||||||
'vanherk': fetch_vanherk,
|
'vanherk': fetch_vanherk,
|
||||||
'vanoord': fetch_vanoord,
|
'vanoord': fetch_vanoord,
|
||||||
|
'vankleef': fetch_vankleef,
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -151,6 +151,72 @@ def fetch_morris() -> list[RawListing]:
|
|||||||
return fetch_realworks("https://www.morrismakelaardij.nl", "morris")
|
return fetch_realworks("https://www.morrismakelaardij.nl", "morris")
|
||||||
|
|
||||||
|
|
||||||
|
def fetch_vankleef() -> list[RawListing]:
|
||||||
|
"""Fetch Van Kleef makelaars — only Schiedam, as specified."""
|
||||||
|
listings_path = f"/aanbod/woningaanbod/schiedam/koop"
|
||||||
|
listings = []
|
||||||
|
page = 1
|
||||||
|
|
||||||
|
while True:
|
||||||
|
url = f"https://www.vankleefmakelaars.nl{listings_path}/pagina-{page}/"
|
||||||
|
soup = fetch_soup(url)
|
||||||
|
cards = soup.select("li.aanbodEntry")
|
||||||
|
if not cards:
|
||||||
|
break
|
||||||
|
|
||||||
|
for card in cards:
|
||||||
|
try:
|
||||||
|
a_tag = card.select_one("a.aanbodEntryLink")
|
||||||
|
if not a_tag:
|
||||||
|
continue
|
||||||
|
listing_url = "https://www.vankleefmakelaars.nl" + a_tag["href"]
|
||||||
|
|
||||||
|
adres = _text(card, ".street-address")
|
||||||
|
postcode = (_text(card, ".postal-code") or "").replace(" ", "") or None
|
||||||
|
stad = _text(card, ".locality")
|
||||||
|
prijs = parse_prijs(_text(card, ".koopprijs .kenmerkValue"))
|
||||||
|
|
||||||
|
if prijs and prijs > config.MAX_PRICE:
|
||||||
|
continue
|
||||||
|
|
||||||
|
status_text = (_text(card, ".objectstatusbanner") or "").lower()
|
||||||
|
status = _REALWORKS_STATUS_MAP.get(status_text, "beschikbaar")
|
||||||
|
|
||||||
|
img_tag = card.select_one(".hoofdfoto img")
|
||||||
|
hero = img_tag["src"] if img_tag else None
|
||||||
|
|
||||||
|
kk = _realworks_detail(listing_url, "vankleef")
|
||||||
|
|
||||||
|
listings.append(RawListing(
|
||||||
|
url=listing_url,
|
||||||
|
source_makelaar="vankleef",
|
||||||
|
adres=adres,
|
||||||
|
postcode=postcode,
|
||||||
|
stad=stad,
|
||||||
|
prijs=prijs,
|
||||||
|
status=status,
|
||||||
|
hero_image_url=hero,
|
||||||
|
woningtype=kk.get("woningtype"),
|
||||||
|
bouwjaar=int(kk["bouwjaar"]) if kk.get("bouwjaar") else None,
|
||||||
|
woonoppervlak=parse_m2(kk.get("woonoppervlak")),
|
||||||
|
perceeloppervlak=parse_m2(kk.get("perceeloppervlak")),
|
||||||
|
kamers=int(kk["kamers"]) if kk.get("kamers") else None,
|
||||||
|
slaapkamers=int(kk["slaapkamers"]) if kk.get("slaapkamers") else None,
|
||||||
|
energielabel=kk.get("energielabel"),
|
||||||
|
))
|
||||||
|
if config.APP_ENV == "dev":
|
||||||
|
break
|
||||||
|
except Exception as e:
|
||||||
|
log.warning("vankleef: parse fout: %s", e)
|
||||||
|
|
||||||
|
if len(cards) < 10:
|
||||||
|
break
|
||||||
|
page += 1
|
||||||
|
|
||||||
|
log.info("vankleef: %d listings opgehaald", len(listings))
|
||||||
|
return listings
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# Makelaardij Wassenaar (Schiedam) — Realworks CMS, JSON-LD listing page
|
# Makelaardij Wassenaar (Schiedam) — Realworks CMS, JSON-LD listing page
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|||||||
@@ -38,29 +38,39 @@ def _schieland_detail(detail_url: str) -> dict:
|
|||||||
postcode_el = soup.select_one("div.house__status p")
|
postcode_el = soup.select_one("div.house__status p")
|
||||||
postcode = _extract_postcode(postcode_el.get_text()) if postcode_el else None
|
postcode = _extract_postcode(postcode_el.get_text()) if postcode_el else None
|
||||||
|
|
||||||
# Parse #kenmerken section: <li><strong>label</strong><span>value</span></li>
|
# Parse house-features__block sections: div.house-features__block > ul > li
|
||||||
kv: dict[str, str] = {}
|
kv: dict[str, str] = {}
|
||||||
kenmerken = soup.select_one("#kenmerken")
|
for block in soup.select("div.house-features__block"):
|
||||||
if kenmerken:
|
h4 = block.select_one("h4")
|
||||||
for li in kenmerken.select("li"):
|
if not h4:
|
||||||
label_el = li.select_one("strong")
|
continue
|
||||||
value_el = li.select_one("span")
|
section_title = h4.get_text(strip=True).lower()
|
||||||
if label_el and value_el:
|
|
||||||
# Strip nested links (e.g. "Hypotheek berekenen")
|
for li in block.select("ul > li"):
|
||||||
for a in value_el.select("a"):
|
strong = li.select_one("strong")
|
||||||
a.decompose()
|
span = li.select_one("span")
|
||||||
kv[label_el.get_text(strip=True).lower()] = value_el.get_text(strip=True)
|
if not strong or not span:
|
||||||
|
continue
|
||||||
|
|
||||||
|
label = strong.get_text(strip=True).lower()
|
||||||
|
value = span.get_text(strip=True)
|
||||||
|
|
||||||
|
# Remove links from value
|
||||||
|
for a in span.select("a"):
|
||||||
|
value = value.replace(a.get_text(strip=True), "").strip()
|
||||||
|
|
||||||
|
kv[f"{section_title}.{label}"] = value
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"postcode": postcode,
|
"postcode": postcode,
|
||||||
"status": kv.get("status", "").lower(),
|
"status": kv.get("overdracht.status", "").lower(),
|
||||||
"woningtype": kv.get("soort bouw"),
|
"woningtype": kv.get("bouwvorm.soort bouw"),
|
||||||
"bouwjaar": kv.get("bouwjaar"),
|
"bouwjaar": kv.get("bouwvorm.bouwjaar"),
|
||||||
"woonoppervlak": kv.get("woonoppervlakte"),
|
"woonoppervlak": kv.get("indeling.woonoppervlakte"),
|
||||||
"perceeloppervlak": kv.get("perceeloppervlakte"),
|
"perceeloppervlak": kv.get("indeling.perceeloppervlakte"),
|
||||||
"kamers": kv.get("aantal kamers"),
|
"kamers": kv.get("indeling.aantal kamers"),
|
||||||
"slaapkamers": kv.get("aantal slaapkamers"),
|
"slaapkamers": kv.get("indeling.aantal slaapkamers"),
|
||||||
"energielabel": kv.get("energielabel"),
|
"energielabel": kv.get("energie & installatie.energielabel"),
|
||||||
}
|
}
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
log.warning("schielandborsboom: detail fetch fout %s: %s", detail_url, e)
|
log.warning("schielandborsboom: detail fetch fout %s: %s", detail_url, e)
|
||||||
@@ -74,9 +84,9 @@ def fetch_schielandborsboom() -> list[RawListing]:
|
|||||||
|
|
||||||
while True:
|
while True:
|
||||||
if page == 1:
|
if page == 1:
|
||||||
url = f"{_SCHIELAND_BASE}/wonen?sure_koop_huur=koop"
|
url = f"{_SCHIELAND_BASE}/wonen/zoeken/heel-nederland/prijs=200000-300000/schiedam/"
|
||||||
else:
|
else:
|
||||||
url = f"{_SCHIELAND_BASE}/wonen/page/{page}/?sure_koop_huur=koop"
|
url = f"{_SCHIELAND_BASE}/wonen/zoeken/heel-nederland/prijs=200000-300000/schiedam/?pagina={page}"
|
||||||
|
|
||||||
soup = fetch_soup(url)
|
soup = fetch_soup(url)
|
||||||
cards = soup.select("div.card.card--house")
|
cards = soup.select("div.card.card--house")
|
||||||
@@ -98,13 +108,18 @@ def fetch_schielandborsboom() -> list[RawListing]:
|
|||||||
if not stad or stad.lower() != "schiedam":
|
if not stad or stad.lower() != "schiedam":
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Status from card-house__thumb second class
|
# Status from card-house__status badge
|
||||||
thumb = card.select_one("div.card-house__thumb")
|
status_el = card.select_one("div.card-house__status")
|
||||||
status_classes = thumb.get("class", []) if thumb else []
|
status_text = status_el.get_text(strip=True).lower() if status_el else ""
|
||||||
status_text = next(
|
# Check for known status keywords in badge text
|
||||||
(c for c in status_classes if c != "card-house__thumb"), "beschikbaar"
|
if "beschikbaar" in status_text:
|
||||||
).lower()
|
status = "beschikbaar"
|
||||||
status = _SCHIELAND_STATUS_MAP.get(status_text, "beschikbaar")
|
elif "onder bod" in status_text:
|
||||||
|
status = "onder_bod"
|
||||||
|
elif "verkocht" in status_text:
|
||||||
|
status = "verkocht"
|
||||||
|
else:
|
||||||
|
status = "beschikbaar"
|
||||||
|
|
||||||
# Price
|
# Price
|
||||||
prijs = parse_prijs(_text(card, "p.price"))
|
prijs = parse_prijs(_text(card, "p.price"))
|
||||||
|
|||||||
Reference in New Issue
Block a user