Spaces:
Runtime error
Runtime error
| import streamlit as st | |
| import requests | |
| from bs4 import BeautifulSoup | |
| import csv | |
| import os | |
| import pandas as pd | |
| import random | |
| from huggingface_hub import Repository, HfApi, HfFolder | |
| import openai | |
| api = HfApi() | |
| token = os.getenv("token") | |
| tokenread = os.getenv("tokenread") | |
| localdir = "HeadlinePrediction" | |
| repo = Repository(local_dir=localdir, clone_from="https://huggingface.co/spaces/Add1E/HeadlinePrediction", token=token) | |
| def add_to_csv(var1, var2, var3, var4, var5, var6, var7, filename): | |
| # Öffnen der CSV-Datei im Anhängemodus | |
| with open(os.path.abspath(f'{localdir}/results.csv'), 'a', newline='', encoding = "utf-8") as file: | |
| writer = csv.writer(file) | |
| # Hinzufügen der Variablen als neue Zeile in die CSV-Datei | |
| writer.writerow([var1, var2, var3, var4, var5, var6, var7]) | |
| # Definiert die Funktion zum Scrapen der Webseite | |
| def scrape_website(url): | |
| try: | |
| response = requests.get(url) | |
| response.raise_for_status() | |
| soup = BeautifulSoup(response.text, 'html.parser') | |
| content = { | |
| 'scraped_html': ' '.join(p.get_text() for p in soup.find_all('p', class_="sc-beqWaB jOAegM")), | |
| 'heute_überschrift': ' '.join(p.get_text() for p in soup.find_all('h1', class_="sc-beqWaB iTcspr")), | |
| 'heute_zsm': ' '.join(p.get_text() for p in soup.find_all('p', class_="sc-beqWaB iOdRIJ")) | |
| } | |
| return content | |
| except Exception as e: | |
| return str(e) | |
| def send_to_chatgpt(api_key, prompt_file, transcript): | |
| try: | |
| # Liest den Prompt aus der Datei | |
| with open(prompt_file, 'r', encoding='utf-8') as file: | |
| prompt = file.read().strip() | |
| openai.api_key = api_key | |
| response = openai.ChatCompletion.create( | |
| model="gpt-4-1106-preview", | |
| messages=[ | |
| {"role": "system", "content": prompt}, | |
| {"role": "system", "content": transcript} | |
| ], | |
| ) | |
| return response.choices[0].message["content"] | |
| except Exception as e: | |
| return str(e) | |
| st.title("Webseiten-Scraper") | |
| # Beispiel für die Verwendung der Funktiona | |
| api_key = os.getenv("api_key") # Setzen Sie hier Ihren OpenAI-API-Schlüssel ein | |
| base_prompt = "txt.txt" | |
| csv_name = "results.csv" | |
| df = pd.read_csv(csv_name, encoding='utf-8') | |
| df.columns = [col.replace(' ', '_') for col in df.columns] | |
| # Eingabefelder für URL und p_class | |
| url = st.text_input("URL eingeben", "https://www.beispielwebsite.com") | |
| if st.button("Scrape"): | |
| if url: | |
| scraped_html = scrape_website(url) | |
| response = send_to_chatgpt(api_key, base_prompt, scraped_html['scraped_html']) | |
| st.code(response) | |
| teile = response.split("Kernaussagen:") | |
| ueberschriften_teil, kernaussagen_teil = teile[0], teile[1] | |
| ueberschriften = ueberschriften_teil.split("\n")[1:] # Erste Zeile überspringen | |
| ueberschrift_1 = ueberschriften[0].split(" ", 1)[1] # "1." entfernen | |
| ueberschrift_2 = ueberschriften[1].split(" ", 1)[1] # "2." entfernen | |
| kernaussagen = kernaussagen_teil.split("\n")[1:] # Erste Zeile überspringen | |
| kernaussage_1 = kernaussagen[0].split(" ", 1)[1] # "1." entfernen | |
| kernaussage_2 = kernaussagen[1].split(" ", 1)[1] # "2." entfernen | |
| add_to_csv( | |
| url, scraped_html['heute_überschrift'], ueberschrift_1, ueberschrift_2, scraped_html['heute_zsm'], | |
| kernaussage_1, kernaussage_2, csv_name | |
| ) | |
| else: | |
| st.error("Bitte geben Sie eine gültige URL ein.") | |
| if st.sidebar.button("Upload Data"): | |
| repo.git_add(os.path.abspath(f'{localdir}/results.csv')) | |
| repo.git_commit("Add new headlines.csv") | |
| repo.git_push() | |
| auswahl = st.sidebar.selectbox("Wählen Sie eine Prediction:", ["None", "Prediction 1", "Prediction 2", "Prediction 3", "Prediction 4", "Prediction 5"]) | |
| random_numbers = set() | |
| while len(random_numbers) < 5: | |
| random_numbers.add(random.randint(0, len(df)-1)) | |
| random_numbers = list(random_numbers) | |
| if auswahl == "Prediction 1": | |
| st.dataframe(df.iloc[random_numbers[0]]) | |
| elif auswahl == "Prediction 2": | |
| st.dataframe(df.iloc[random_numbers[1]]) | |
| elif auswahl == "Prediction 3": | |
| st.dataframe(df.iloc[random_numbers[2]]) | |
| elif auswahl == "Prediction 4": | |
| st.dataframe(df.iloc[random_numbers[3]]) | |
| elif auswahl == "Prediction 5": | |
| st.dataframe(df.iloc[random_numbers[4]]) | |
| if st.sidebar.button("Show Full Data"): | |
| df = pd.read_csv(csv_name, encoding='utf-8') | |
| st.dataframe(df) |