# scraper.py import requests from bs4 import BeautifulSoup from crawler.extractor import extract_structured_data from crawler.utils import log def fetch_and_parse(url): log.info(f"Abruf: {url}") headers = {"User-Agent": "Mozilla/5.0"} try: r = requests.get(url, headers=headers, timeout=10) r.raise_for_status() except Exception as e: log.error(f"Fehler beim Abruf von {url}: {e}") raise # WICHTIG: HTML an Extractor übergeben, NICHT Text! html = r.text try: return extract_structured_data(html, url) except Exception as e: log.error(f"Extraktionsfehler bei {url}: {e}") raise