Spaces:
Runtime error
Runtime error
| from smolagents import Tool | |
| from typing import Any, Optional | |
| class SimpleTool(Tool): | |
| name = "analyze_content" | |
| description = "Enhanced web content analyzer with multiple analysis modes." | |
| inputs = {"input_text":{"type":"string","description":"URL or direct text to analyze."},"mode":{"type":"string","nullable":True,"description":"Analysis mode ('analyze', 'summarize', 'sentiment', 'topics')."}} | |
| output_type = "string" | |
| def forward(self, input_text: str, mode: str = "analyze") -> str: | |
| """Enhanced web content analyzer with multiple analysis modes. | |
| Args: | |
| input_text: URL or direct text to analyze. | |
| mode: Analysis mode ('analyze', 'summarize', 'sentiment', 'topics'). | |
| Returns: | |
| str: JSON-formatted analysis results | |
| """ | |
| import requests | |
| from bs4 import BeautifulSoup | |
| import re | |
| from transformers import pipeline | |
| import json | |
| try: | |
| # Setup request headers | |
| headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)'} | |
| # Process input | |
| if input_text.startswith(('http://', 'https://')): | |
| response = requests.get(input_text, headers=headers, timeout=10) | |
| soup = BeautifulSoup(response.text, 'html.parser') | |
| # Clean page content | |
| for tag in soup(['script', 'style', 'meta']): | |
| tag.decompose() | |
| title = soup.title.string if soup.title else "No title found" | |
| content = soup.get_text() | |
| else: | |
| title = "Text Analysis" | |
| content = input_text | |
| # Clean text | |
| clean_text = re.sub(r'\s+', ' ', content).strip() | |
| if len(clean_text) < 100: | |
| return json.dumps({ | |
| "status": "error", | |
| "message": "Content too short for analysis (minimum 100 characters)" | |
| }) | |
| # Initialize models | |
| summarizer = pipeline("summarization", model="facebook/bart-large-cnn") | |
| classifier = pipeline("text-classification", | |
| model="nlptown/bert-base-multilingual-uncased-sentiment") | |
| # Basic stats | |
| stats = { | |
| "title": title, | |
| "characters": len(clean_text), | |
| "words": len(clean_text.split()), | |
| "paragraphs": len([p for p in clean_text.split("\n") if p.strip()]), | |
| "reading_time": f"{len(clean_text.split()) // 200} minutes" | |
| } | |
| result = {"status": "success", "stats": stats} | |
| # Mode-specific processing | |
| if mode == "analyze": | |
| # Get summary | |
| summary = summarizer(clean_text[:1024], max_length=100, min_length=30)[0]['summary_text'] | |
| # Get overall sentiment | |
| sentiment = classifier(clean_text[:512])[0] | |
| score = int(sentiment['label'][0]) | |
| sentiment_text = ["Very Negative", "Negative", "Neutral", "Positive", "Very Positive"][score-1] | |
| result.update({ | |
| "summary": summary, | |
| "sentiment": { | |
| "overall": sentiment_text, | |
| "score": score, | |
| "confidence": f"{score/5*100:.1f}%" | |
| } | |
| }) | |
| elif mode == "sentiment": | |
| # Analyze paragraphs | |
| paragraphs = [p for p in clean_text.split("\n") if len(p.strip()) > 50] | |
| sentiments = [] | |
| for i, para in enumerate(paragraphs[:5]): | |
| sent = classifier(para[:512])[0] | |
| score = int(sent['label'][0]) | |
| sentiments.append({ | |
| "section": i + 1, | |
| "text": para[:100] + "...", | |
| "sentiment": ["Very Negative", "Negative", "Neutral", "Positive", "Very Positive"][score-1], | |
| "score": score | |
| }) | |
| result.update({ | |
| "sentiment_analysis": { | |
| "sections": sentiments, | |
| "total_sections": len(sentiments) | |
| } | |
| }) | |
| elif mode == "summarize": | |
| # Process in chunks | |
| chunks = [clean_text[i:i+1024] for i in range(0, min(len(clean_text), 3072), 1024)] | |
| summaries = [] | |
| for chunk in chunks: | |
| if len(chunk) > 100: | |
| summary = summarizer(chunk, max_length=100, min_length=30)[0]['summary_text'] | |
| summaries.append(summary) | |
| result.update({ | |
| "summaries": summaries, | |
| "chunks_analyzed": len(summaries) | |
| }) | |
| elif mode == "topics": | |
| # Basic topic categorization | |
| categories = { | |
| "Technology": r"tech|software|hardware|digital|computer|AI|data", | |
| "Business": r"business|market|finance|economy|industry", | |
| "Science": r"science|research|study|discovery", | |
| "Health": r"health|medical|medicine|wellness", | |
| "General": r"news|world|people|life" | |
| } | |
| topic_scores = {} | |
| for topic, pattern in categories.items(): | |
| matches = len(re.findall(pattern, clean_text.lower())) | |
| topic_scores[topic] = matches | |
| result.update({ | |
| "topic_analysis": { | |
| "detected_topics": topic_scores, | |
| "primary_topic": max(topic_scores.items(), key=lambda x: x[1])[0] | |
| } | |
| }) | |
| return json.dumps(result, indent=2) | |
| except requests.exceptions.RequestException as e: | |
| return json.dumps({ | |
| "status": "error", | |
| "message": f"Failed to fetch content: {str(e)}", | |
| "type": "request_error" | |
| }) | |
| except Exception as e: | |
| return json.dumps({ | |
| "status": "error", | |
| "message": f"Analysis failed: {str(e)}", | |
| "type": "general_error" | |
| }) |