Spaces:

MHamdan
/

SmartWebAnalyzerPlus

Sleeping

App Files Files Community

MHamdan commited on Feb 15, 2025

Commit

37ff7dd

1 Parent(s): 5b0db0d

Initial commit with full functionality

Browse files

Files changed (4) hide show

app.py +61 -0
requirements.txt +6 -0
smart_web_analyzer.py +54 -0
space.yml +5 -0

app.py ADDED Viewed

	@@ -0,0 +1,61 @@

+# app.py
+import gradio as gr
+from smart_web_analyzer import WebAnalyzer
+analyzer = WebAnalyzer()
+def format_results(results: dict) -> dict:
+    """Format analysis results for Gradio tabs"""
+    outputs = {}
+    if 'error' in results:
+        return {"📜 Error": f"❌ {results['error']}"}
+    outputs["📜 Clean Text"] = results.get('clean_text', 'No text extracted')
+    if 'summary' in results:
+        outputs["📝 Summary"] = f"**AI Summary:**\n{results['summary']}"
+    if 'sentiment' in results:
+        outputs["🎭 Sentiment"] = f"**Sentiment Score:**\n{results['sentiment']}"
+    if 'topics' in results:
+        topics = "\n".join([f"- **{k}**: {v:.2f}" for k,v in results['topics'].items()])
+        outputs["📊 Topics"] = f"**Detected Topics:**\n{topics}"
+    return outputs
+with gr.Blocks(title="Smart Web Analyzer Plus") as demo:
+    gr.Markdown("# 🌐 Smart Web Analyzer Plus")
+    with gr.Row():
+        url_input = gr.Textbox(label="Enter URL", placeholder="https://example.com")
+        modes = gr.CheckboxGroup(["summarize", "sentiment", "topics"],
+                                label="Analysis Types")
+        submit_btn = gr.Button("Analyze", variant="primary")
+    with gr.Tabs():
+        with gr.Tab("📜 Clean Text"):
+            clean_text = gr.Markdown()
+        with gr.Tab("📝 Summary"):
+            summary = gr.Markdown()
+        with gr.Tab("🎭 Sentiment"):
+            sentiment = gr.Markdown()
+        with gr.Tab("📊 Topics"):
+            topics = gr.Markdown()
+    examples = gr.Examples(
+        examples=[
+            ["https://www.bbc.com/news/technology-67881954", ["summarize", "sentiment"]],
+            ["https://arxiv.org/html/2312.17296v1", ["topics", "summarize"]]
+        ],
+        inputs=[url_input, modes]
+    )
+    submit_btn.click(
+        fn=lambda url, m: format_results(analyzer.analyze(url, m)),
+        inputs=[url_input, modes],
+        outputs=[clean_text, summary, sentiment, topics]
+    )
+if __name__ == "__main__":
+    demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+# requirements.txt
+gradio>=4.0.0
+beautifulsoup4>=4.12.0
+requests>=2.31.0
+transformers>=4.40.0
+torch>=2.2.0

smart_web_analyzer.py ADDED Viewed

	@@ -0,0 +1,54 @@

+# smart_web_analyzer.py
+import requests
+from bs4 import BeautifulSoup
+from transformers import pipeline
+import torch
+class WebAnalyzer:
+    def __init__(self):
+        self.device = 0 if torch.cuda.is_available() else -1
+        self.models = {
+            'summarize': pipeline("summarization", model="facebook/bart-large-cnn"),
+            'sentiment': pipeline("text-classification",
+                                model="nlptown/bert-base-multilingual-uncased-sentiment"),
+            'topics': pipeline("zero-shot-classification",
+                             model="facebook/bart-large-mnli")
+        }
+    def fetch_content(self, url: str) -> str:
+        """Fetch webpage content with custom headers"""
+        headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'}
+        response = requests.get(url, headers=headers, timeout=15)
+        response.raise_for_status()
+        return response.text
+    def clean_html(self, html: str) -> str:
+        """Basic HTML cleaning preserving all tags"""
+        soup = BeautifulSoup(html, 'html.parser')
+        return soup.prettify()
+    def analyze(self, url: str, modes: list) -> dict:
+        """Core analysis pipeline"""
+        results = {}
+        try:
+            html = self.fetch_content(url)
+            results['clean_text'] = self.clean_html(html)
+            if 'summarize' in modes:
+                results['summary'] = self.models['summarize'](html, max_length=150)[0]['summary_text']
+            if 'sentiment' in modes:
+                sentiment = self.models['sentiment'](html[:512])[0]
+                results['sentiment'] = f"{sentiment['label']} ({sentiment['score']:.2f})"
+            if 'topics' in modes:
+                topics = self.models['topics'](html[:512],
+                                            candidate_labels=["Technology", "AI", "Business",
+                                                             "Science", "Politics"])
+                results['topics'] = {topic: score for topic, score
+                                   in zip(topics['labels'], topics['scores'])}
+        except Exception as e:
+            results['error'] = str(e)
+        return results

space.yml ADDED Viewed

	@@ -0,0 +1,5 @@

+# space.yml
+title: Content Web Analyzer Plus
+sdk: gradio
+python:
+  version: "3.10"