tdurzynski commited on
Commit
4290320
·
verified ·
1 Parent(s): 142e92e

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +61 -0
app.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ from bs4 import BeautifulSoup
3
+ import openai
4
+ import gradio as gr
5
+ import os
6
+
7
+ # Set OpenAI API Key (store in Hugging Face secrets or set manually)
8
+ openai.api_key = os.getenv("OPENAI_API_KEY")
9
+
10
+ def scrape_and_summarize(url):
11
+ """
12
+ Scrapes the given website URL and summarizes its content using GPT-4o mini.
13
+ """
14
+ try:
15
+ # Fetch website content
16
+ headers = {"User-Agent": "Mozilla/5.0"}
17
+ response = requests.get(url, headers=headers, timeout=10)
18
+ response.raise_for_status()
19
+
20
+ # Parse HTML content
21
+ soup = BeautifulSoup(response.text, "html.parser")
22
+ paragraphs = soup.find_all("p")
23
+ text_content = "\n".join([p.get_text() for p in paragraphs if p.get_text().strip()])
24
+
25
+ if not text_content:
26
+ return "No readable content found on this page."
27
+
28
+ # Limit text to 4000 characters for summarization
29
+ text_content = text_content[:4000]
30
+
31
+ # Call OpenAI GPT-4o mini for summarization
32
+ response = openai.ChatCompletion.create(
33
+ model="gpt-4o-mini",
34
+ messages=[
35
+ {"role": "system", "content": "You are a helpful assistant that summarizes webpage content."},
36
+ {"role": "user", "content": f"Summarize the following webpage content:\n\n{text_content}"}
37
+ ]
38
+ )
39
+
40
+ summary = response["choices"][0]["message"]["content"]
41
+ return summary
42
+
43
+ except requests.exceptions.RequestException as e:
44
+ return f"Error fetching the webpage: {str(e)}"
45
+ except Exception as e:
46
+ return f"An error occurred: {str(e)}"
47
+
48
+ # Gradio UI
49
+ with gr.Blocks() as demo:
50
+ gr.Markdown("# Web Page Summarizer")
51
+ gr.Markdown("Enter a website URL to get a summary of its content.")
52
+
53
+ url_input = gr.Textbox(label="Website URL", placeholder="https://example.com")
54
+ output = gr.Textbox(label="Summary", interactive=False)
55
+ submit_button = gr.Button("Summarize")
56
+
57
+ submit_button.click(scrape_and_summarize, inputs=[url_input], outputs=[output])
58
+
59
+ # Launch Gradio App
60
+ if __name__ == "__main__":
61
+ demo.launch()