You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

28 lines
671 B

# scraper.py
import requests
from bs4 import BeautifulSoup
from crawler.extractor import extract_structured_data
from crawler.utils import log
def fetch_and_parse(url):
log.info(f"Abruf: {url}")
headers = {"User-Agent": "Mozilla/5.0"}
try:
r = requests.get(url, headers=headers, timeout=10)
r.raise_for_status()
except Exception as e:
log.error(f"Fehler beim Abruf von {url}: {e}")
raise
# WICHTIG: HTML an Extractor übergeben, NICHT Text!
html = r.text
try:
return extract_structured_data(html, url)
except Exception as e:
log.error(f"Extraktionsfehler bei {url}: {e}")
raise