bala00712200502 commited on
Commit
5e0460c
Β·
verified Β·
1 Parent(s): b25c6b8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -22
app.py CHANGED
@@ -5,57 +5,84 @@ import gradio as gr
5
  import requests
6
  from bs4 import BeautifulSoup
7
  from dotenv import load_dotenv
8
- import openai
 
9
 
10
  # Load environment variables
11
  load_dotenv()
12
- openai.api_key = os.getenv("OPENAI_API_KEY")
13
- model = os.getenv("OPENAI_MODEL", "gpt-3.5-turbo")
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
  # 🌐 Web Scraper
16
  def scrape_text_from_url(url):
 
 
 
 
17
  try:
18
  response = requests.get(url, timeout=10)
 
 
19
  soup = BeautifulSoup(response.content, "html.parser")
20
 
21
- # Remove scripts and style
22
  for tag in soup(["script", "style"]):
23
  tag.decompose()
24
 
25
- # Extract visible text
26
  text = " ".join(chunk.strip() for chunk in soup.stripped_strings)
27
  return text[:5000] # limit to avoid token overflow
 
 
28
  except Exception as e:
29
- return f"❌ Error scraping the page: {str(e)}"
 
 
 
 
 
 
 
 
30
 
31
- # 🧠 LLM Summarizer
32
- def summarize_with_gpt(text):
33
  try:
34
- response = openai.ChatCompletion.create(
35
- model=model,
36
- messages=[
37
- {"role": "system", "content": "You are a helpful assistant that summarizes articles."},
38
- {"role": "user", "content": f"Please summarize the following content:\n\n{text}"}
39
- ],
40
- temperature=0.7,
41
- max_tokens=500
42
- )
43
- return response.choices[0].message.content.strip()
44
  except Exception as e:
45
- return f"❌ Error from OpenAI: {str(e)}"
46
 
47
  # πŸ” Combined Function
48
  def scrape_and_summarize(url):
 
 
 
49
  raw_text = scrape_text_from_url(url)
 
50
  if "❌" in raw_text:
51
- return raw_text, ""
52
- summary = summarize_with_gpt(raw_text)
 
53
  return raw_text, summary
54
 
55
  # 🎨 Gradio UI
56
  with gr.Blocks(title="πŸ”Ž Web Summarizer with AI") as demo:
57
  gr.Markdown("## 🧠🌐 Web Article Summarizer")
58
- gr.Markdown("Enter a webpage URL below. The AI will scrape and summarize the content.")
59
 
60
  with gr.Row():
61
  url_input = gr.Textbox(label="πŸ”— Enter URL", placeholder="https://example.com", scale=4)
@@ -67,8 +94,10 @@ with gr.Blocks(title="πŸ”Ž Web Summarizer with AI") as demo:
67
  with gr.Column(scale=1):
68
  summary_output = gr.Textbox(label="πŸ“„ AI Summary", lines=15, interactive=False)
69
 
 
70
  btn.click(scrape_and_summarize, inputs=[url_input], outputs=[raw_output, summary_output])
71
 
72
  # πŸš€ Launch app
73
  if __name__ == "__main__":
 
74
  demo.launch()
 
5
  import requests
6
  from bs4 import BeautifulSoup
7
  from dotenv import load_dotenv
8
+ # Import the Google Generative AI library
9
+ import google.generativeai as genai
10
 
11
  # Load environment variables
12
  load_dotenv()
13
+ # Get the Gemini API key
14
+ gemini_api_key = os.getenv("GEMINI_API_KEY")
15
+
16
+ # Configure the generative AI model
17
+ # Ensure GEMINI_API_KEY is set in your .env file
18
+ if not gemini_api_key:
19
+ raise ValueError("GEMINI_API_KEY environment variable not set.")
20
+
21
+ genai.configure(api_key=gemini_api_key)
22
+
23
+ # Use the specified Gemini model
24
+ # You can change 'gemini-1.5-flash-latest' if needed, but the user requested 1.5 Flash
25
+ model_name = os.getenv("GEMINI_MODEL", "gemini-1.5-flash-latest")
26
+ model = genai.GenerativeModel(model_name)
27
+
28
 
29
  # 🌐 Web Scraper
30
  def scrape_text_from_url(url):
31
+ """
32
+ Scrapes visible text content from a given URL.
33
+ Limits the text length to 5000 characters to avoid large inputs.
34
+ """
35
  try:
36
  response = requests.get(url, timeout=10)
37
+ # Raise an exception for bad status codes (4xx or 5xx)
38
+ response.raise_for_status()
39
  soup = BeautifulSoup(response.content, "html.parser")
40
 
41
+ # Remove scripts and style tags
42
  for tag in soup(["script", "style"]):
43
  tag.decompose()
44
 
45
+ # Extract visible text and join into a single string
46
  text = " ".join(chunk.strip() for chunk in soup.stripped_strings)
47
  return text[:5000] # limit to avoid token overflow
48
+ except requests.exceptions.RequestException as e:
49
+ return f"❌ Error fetching the page: {str(e)}"
50
  except Exception as e:
51
+ return f"❌ An unexpected error occurred during scraping: {str(e)}"
52
+
53
+ # 🧠 LLM Summarizer (using Gemini)
54
+ def summarize_with_gemini(text):
55
+ """
56
+ Summarizes the provided text using the configured Gemini model.
57
+ """
58
+ if not text or "❌" in text:
59
+ return "Cannot summarize due to scraping error or empty text."
60
 
 
 
61
  try:
62
+ # Use the generate_content method for Gemini
63
+ response = model.generate_content(f"Please summarize the following content:\n\n{text}")
64
+ # Access the text content from the response
65
+ return response.text.strip()
 
 
 
 
 
 
66
  except Exception as e:
67
+ return f"❌ Error from Gemini API: {str(e)}"
68
 
69
  # πŸ” Combined Function
70
  def scrape_and_summarize(url):
71
+ """
72
+ Combines scraping and summarizing functionalities.
73
+ """
74
  raw_text = scrape_text_from_url(url)
75
+ # Check if scraping failed before attempting summarization
76
  if "❌" in raw_text:
77
+ return raw_text, "Summarization skipped due to scraping error."
78
+
79
+ summary = summarize_with_gemini(raw_text)
80
  return raw_text, summary
81
 
82
  # 🎨 Gradio UI
83
  with gr.Blocks(title="πŸ”Ž Web Summarizer with AI") as demo:
84
  gr.Markdown("## 🧠🌐 Web Article Summarizer")
85
+ gr.Markdown("Enter a webpage URL below. The AI will scrape and summarize the content using Gemini 1.5 Flash.")
86
 
87
  with gr.Row():
88
  url_input = gr.Textbox(label="πŸ”— Enter URL", placeholder="https://example.com", scale=4)
 
94
  with gr.Column(scale=1):
95
  summary_output = gr.Textbox(label="πŸ“„ AI Summary", lines=15, interactive=False)
96
 
97
+ # Link the button click event to the combined function
98
  btn.click(scrape_and_summarize, inputs=[url_input], outputs=[raw_output, summary_output])
99
 
100
  # πŸš€ Launch app
101
  if __name__ == "__main__":
102
+ # You can set share=True to create a public link (be cautious with API keys)
103
  demo.launch()