You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

38 lines
876 B

# __main__.py
import sys
import pandas as pd
import time
from crawler.scraper import fetch_and_parse
from crawler.utils import log
def main():
if len(sys.argv) < 3:
print("Usage: python -m crawler <urls.txt> <output.xlsx>")
sys.exit(1)
url_file = sys.argv[1]
output_file = sys.argv[2]
log.info(f"Lade URL-Liste aus {url_file}")
with open(url_file, "r", encoding="utf-8") as f:
urls = [line.strip() for line in f if line.strip()]
results = []
for url in urls:
try:
data = fetch_and_parse(url)
results.append(data)
except Exception as e:
log.error(f"Fehler bei {url}: {e}")
time.sleep(1)
df = pd.DataFrame(results)
df.to_excel(output_file, index=False)
log.info(f"Fertig! Datei gespeichert: {output_file}")
if __name__ == "__main__":
main()