You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
35 lines
858 B
35 lines
858 B
import requests
|
|
from bs4 import BeautifulSoup
|
|
|
|
URL = "https://futureoffestivals.com/ausstellerinnen-2026/"
|
|
PREFIX = "https://futureoffestivals.com/exhibitors/"
|
|
OUTPUT_FILE = "aussteller_urls.txt"
|
|
|
|
def extract_urls():
|
|
print(f"Lade Seite: {URL}")
|
|
|
|
headers = {"User-Agent": "Mozilla/5.0"}
|
|
r = requests.get(URL, headers=headers, timeout=10)
|
|
r.raise_for_status()
|
|
|
|
soup = BeautifulSoup(r.text, "html.parser")
|
|
|
|
urls = set()
|
|
|
|
for a in soup.find_all("a", href=True):
|
|
href = a["href"].strip()
|
|
if href.startswith(PREFIX):
|
|
urls.add(href)
|
|
|
|
urls = sorted(urls)
|
|
|
|
print(f"{len(urls)} URLs gefunden. Speichere in {OUTPUT_FILE} ...")
|
|
|
|
with open(OUTPUT_FILE, "w", encoding="utf-8") as f:
|
|
for u in urls:
|
|
f.write(u + "\n")
|
|
|
|
print("Fertig!")
|
|
|
|
if __name__ == "__main__":
|
|
extract_urls()
|