Spaces:

bala00712200502
/

webscraping

Sleeping

App Files Files Community

bala00712200502 commited on Apr 24, 2025

Commit

5e0460c

verified ·

1 Parent(s): b25c6b8

Update app.py

Browse files

Files changed (1) hide show

app.py +51 -22

app.py CHANGED Viewed

@@ -5,57 +5,84 @@ import gradio as gr
 import requests
 from bs4 import BeautifulSoup
 from dotenv import load_dotenv
-import openai
 # Load environment variables
 load_dotenv()
-openai.api_key = os.getenv("OPENAI_API_KEY")
-model = os.getenv("OPENAI_MODEL", "gpt-3.5-turbo")
 # 🌐 Web Scraper
 def scrape_text_from_url(url):
     try:
         response = requests.get(url, timeout=10)
         soup = BeautifulSoup(response.content, "html.parser")
-        # Remove scripts and style
         for tag in soup(["script", "style"]):
             tag.decompose()
-        # Extract visible text
         text = " ".join(chunk.strip() for chunk in soup.stripped_strings)
         return text[:5000]  # limit to avoid token overflow
     except Exception as e:
-        return f"❌ Error scraping the page: {str(e)}"
-# 🧠 LLM Summarizer
-def summarize_with_gpt(text):
     try:
-        response = openai.ChatCompletion.create(
-            model=model,
-            messages=[
-                {"role": "system", "content": "You are a helpful assistant that summarizes articles."},
-                {"role": "user", "content": f"Please summarize the following content:\n\n{text}"}
-            ],
-            temperature=0.7,
-            max_tokens=500
-        )
-        return response.choices[0].message.content.strip()
     except Exception as e:
-        return f"❌ Error from OpenAI: {str(e)}"
 # 🔁 Combined Function
 def scrape_and_summarize(url):
     raw_text = scrape_text_from_url(url)
     if "❌" in raw_text:
-        return raw_text, ""
-    summary = summarize_with_gpt(raw_text)
     return raw_text, summary
 # 🎨 Gradio UI
 with gr.Blocks(title="🔎 Web Summarizer with AI") as demo:
     gr.Markdown("## 🧠🌐 Web Article Summarizer")
-    gr.Markdown("Enter a webpage URL below. The AI will scrape and summarize the content.")
     with gr.Row():
         url_input = gr.Textbox(label="🔗 Enter URL", placeholder="https://example.com", scale=4)
@@ -67,8 +94,10 @@ with gr.Blocks(title="🔎 Web Summarizer with AI") as demo:
         with gr.Column(scale=1):
             summary_output = gr.Textbox(label="📄 AI Summary", lines=15, interactive=False)
     btn.click(scrape_and_summarize, inputs=[url_input], outputs=[raw_output, summary_output])
 # 🚀 Launch app
 if __name__ == "__main__":
     demo.launch()

 import requests
 from bs4 import BeautifulSoup
 from dotenv import load_dotenv
+# Import the Google Generative AI library
+import google.generativeai as genai
 # Load environment variables
 load_dotenv()
+# Get the Gemini API key
+gemini_api_key = os.getenv("GEMINI_API_KEY")
+# Configure the generative AI model
+# Ensure GEMINI_API_KEY is set in your .env file
+if not gemini_api_key:
+    raise ValueError("GEMINI_API_KEY environment variable not set.")
+genai.configure(api_key=gemini_api_key)
+# Use the specified Gemini model
+# You can change 'gemini-1.5-flash-latest' if needed, but the user requested 1.5 Flash
+model_name = os.getenv("GEMINI_MODEL", "gemini-1.5-flash-latest")
+model = genai.GenerativeModel(model_name)
 # 🌐 Web Scraper
 def scrape_text_from_url(url):
+    """
+    Scrapes visible text content from a given URL.
+    Limits the text length to 5000 characters to avoid large inputs.
+    """
     try:
         response = requests.get(url, timeout=10)
+        # Raise an exception for bad status codes (4xx or 5xx)
+        response.raise_for_status()
         soup = BeautifulSoup(response.content, "html.parser")
+        # Remove scripts and style tags
         for tag in soup(["script", "style"]):
             tag.decompose()
+        # Extract visible text and join into a single string
         text = " ".join(chunk.strip() for chunk in soup.stripped_strings)
         return text[:5000]  # limit to avoid token overflow
+    except requests.exceptions.RequestException as e:
+        return f"❌ Error fetching the page: {str(e)}"
     except Exception as e:
+        return f"❌ An unexpected error occurred during scraping: {str(e)}"
+# 🧠 LLM Summarizer (using Gemini)
+def summarize_with_gemini(text):
+    """
+    Summarizes the provided text using the configured Gemini model.
+    """
+    if not text or "❌" in text:
+        return "Cannot summarize due to scraping error or empty text."
     try:
+        # Use the generate_content method for Gemini
+        response = model.generate_content(f"Please summarize the following content:\n\n{text}")
+        # Access the text content from the response
+        return response.text.strip()
     except Exception as e:
+        return f"❌ Error from Gemini API: {str(e)}"
 # 🔁 Combined Function
 def scrape_and_summarize(url):
+    """
+    Combines scraping and summarizing functionalities.
+    """
     raw_text = scrape_text_from_url(url)
+    # Check if scraping failed before attempting summarization
     if "❌" in raw_text:
+        return raw_text, "Summarization skipped due to scraping error."
+    summary = summarize_with_gemini(raw_text)
     return raw_text, summary
 # 🎨 Gradio UI
 with gr.Blocks(title="🔎 Web Summarizer with AI") as demo:
     gr.Markdown("## 🧠🌐 Web Article Summarizer")
+    gr.Markdown("Enter a webpage URL below. The AI will scrape and summarize the content using Gemini 1.5 Flash.")
     with gr.Row():
         url_input = gr.Textbox(label="🔗 Enter URL", placeholder="https://example.com", scale=4)
         with gr.Column(scale=1):
             summary_output = gr.Textbox(label="📄 AI Summary", lines=15, interactive=False)
+    # Link the button click event to the combined function
     btn.click(scrape_and_summarize, inputs=[url_input], outputs=[raw_output, summary_output])
 # 🚀 Launch app
 if __name__ == "__main__":
+    # You can set share=True to create a public link (be cautious with API keys)
     demo.launch()