Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import requests | |
| from bs4 import BeautifulSoup | |
| import csv | |
| import os | |
| import pandas as pd | |
| import random | |
| from huggingface_hub import Repository, HfApi, HfFolder | |
| import openai | |
| api = HfApi() | |
| token = os.getenv("token") | |
| tokenread = os.getenv("tokenread") | |
| localdir = "HeadlinePrediction" | |
| repo = Repository(local_dir=localdir, clone_from="https://huggingface.co/spaces/Add1E/HeadlinePrediction", token=token) | |
| def add_to_csv(var1, var2, var3, var4, var5, var6, var7, filename): | |
| # Öffnen der CSV-Datei im Anhängemodus | |
| with open(os.path.abspath(f'{localdir}/{filename}'), 'a', newline='', encoding = "utf-8") as file: | |
| writer = csv.writer(file) | |
| # Hinzufügen der Variablen als neue Zeile in die CSV-Datei | |
| writer.writerow([var1, var2, var3, var4, var5, var6, var7]) | |
| # Definiert die Funktion zum Scrapen der Webseite | |
| def scrape_website(url): | |
| try: | |
| response = requests.get(url) | |
| response.raise_for_status() | |
| soup = BeautifulSoup(response.text, 'html.parser') | |
| content = { | |
| 'scraped_html': ' '.join(p.get_text() for p in soup.find_all('p', class_="sc-8df4b824-0 iGqRWd")), | |
| 'heute_überschrift': ' '.join(p.get_text() for p in soup.find_all('h1', class_="sc-8df4b824-0 geJYKG")), | |
| 'heute_zsm': ' '.join(p.get_text() for p in soup.find_all('p', class_="sc-8df4b824-0 AwWvY")) | |
| } | |
| return content | |
| except Exception as e: | |
| return str(e) | |
| def send_to_chatgpt(prompt_file, transcript): | |
| try: | |
| # Liest den Prompt aus der Datei | |
| #with open(prompt_file, 'r', encoding='utf-8') as file: | |
| # prompt = file.read().strip() | |
| openai.api_key = os.getenv("OPENAI_API_KEY") | |
| openai.api_base = os.getenv("OPENAI_API_BASE") | |
| openai.api_type = os.getenv("OPENAI_API_TYPE") | |
| openai.api_version = os.getenv("OPENAI_API_VERSION") | |
| response = openai.ChatCompletion.create( | |
| engine="gpt-4-0613", | |
| messages=[ | |
| {"role": "system", "content": prompt_file}, | |
| {"role": "system", "content": transcript} | |
| ], | |
| ) | |
| return response.choices[0].message["content"] | |
| except Exception as e: | |
| return str(e) | |
| st.title("Webseiten-Scraper") | |
| base_prompt = os.getenv("prompt") | |
| prompt2 = os.getenv("prompt2") | |
| prompt3 = os.getenv("prompt3") | |
| csv_name = "results.csv" | |
| df = pd.read_csv(csv_name, encoding='utf-8') | |
| df.columns = [col.replace(' ', '_') for col in df.columns] | |
| # Eingabefelder für URL und p_class | |
| url = st.text_input("URL eingeben", "https://www.beispielwebsite.com") | |
| if st.button("Scrape"): | |
| if url: | |
| scraped_html = scrape_website(url) | |
| st.code(scraped_html) | |
| response = send_to_chatgpt(base_prompt, scraped_html['scraped_html']) | |
| st.write("Heute-Überschrift: ") | |
| st.code(scraped_html['heute_überschrift']) | |
| st.write("Heute-Kernaussage: ") | |
| st.code(scraped_html['heute_zsm']) | |
| st.write("ChatGPT: ") | |
| st.code(response) | |
| #teile = response.split("Kernaussagen:") | |
| #ueberschriften_teil, kernaussagen_teil = teile[0], teile[1] | |
| #ueberschriften = ueberschriften_teil.split("\n")[1:] # Erste Zeile überspringen | |
| #ueberschrift_1 = ueberschriften[0].split(" ", 1)[1] # "1." entfernen | |
| #ueberschrift_2 = ueberschriften[1].split(" ", 1)[1] # "2." entfernen | |
| #kernaussagen = kernaussagen_teil.split("\n")[1:] # Erste Zeile überspringen | |
| #kernaussage_1 = kernaussagen[0].split(" ", 1)[1] # "1." entfernen | |
| #kernaussage_2 = kernaussagen[1].split(" ", 1)[1] # "2." entfernen | |
| #add_to_csv( | |
| # url, scraped_html['heute_überschrift'], ueberschrift_1, ueberschrift_2, scraped_html['heute_zsm'], | |
| # kernaussage_1, kernaussage_2, csv_name | |
| #) | |
| else: | |
| st.error("Bitte geben Sie eine gültige URL ein.") | |
| if st.button("What does GPT need?"): | |
| if url: | |
| scraped_html = scrape_website(url) | |
| scraped_txt = "Headline: " + scraped_html['heute_überschrift'] + " Sub-headline: " + scraped_html['heute_zsm'] + " news text: " + scraped_html['scraped_html'] | |
| response = send_to_chatgpt(prompt2, scraped_txt) | |
| st.write("Heute-Artikel: ") | |
| st.code(scraped_html['heute_überschrift']) | |
| st.code(scraped_html['heute_zsm']) | |
| st.write(scraped_html['scraped_html']) | |
| st.write("ChatGPT says it needs: ") | |
| st.code(response) | |
| #response2 = send_to_chatgpt(prompt3, response) | |
| #st.write("What ChatGPT wrote with this instructions") | |
| #st.code(response2) | |
| else: | |
| st.error("Bitte geben Sie eine gültige URL ein.") | |
| if st.sidebar.button("Upload Data"): | |
| repo.git_add(os.path.abspath(f'{localdir}/{filename}')) | |
| repo.git_commit("Add new headlines.csv") | |
| repo.git_push() | |
| auswahl = st.sidebar.selectbox("Wählen Sie eine Prediction:", ["None", "Prediction 1", "Prediction 2", "Prediction 3", "Prediction 4", "Prediction 5"]) | |
| random_numbers = set() | |
| while len(random_numbers) < 5: | |
| random_numbers.add(random.randint(0, len(df)-1)) | |
| random_numbers = list(random_numbers) | |
| if auswahl == "Prediction 1": | |
| st.dataframe(df.iloc[random_numbers[0]]) | |
| elif auswahl == "Prediction 2": | |
| st.dataframe(df.iloc[random_numbers[1]]) | |
| elif auswahl == "Prediction 3": | |
| st.dataframe(df.iloc[random_numbers[2]]) | |
| elif auswahl == "Prediction 4": | |
| st.dataframe(df.iloc[random_numbers[3]]) | |
| elif auswahl == "Prediction 5": | |
| st.dataframe(df.iloc[random_numbers[4]]) | |
| if st.sidebar.button("Show Full Data"): | |
| df = pd.read_csv(csv_name, encoding='utf-8') | |
| st.dataframe(df) |