Spaces:
Sleeping
Sleeping
| from typing import List, Dict | |
| import requests | |
| from bs4 import BeautifulSoup | |
| from transformers import pipeline | |
| from gtts import gTTS | |
| from keybert import KeyBERT | |
| from deep_translator import GoogleTranslator | |
| import base64 | |
| from io import BytesIO | |
| from sentence_transformers import SentenceTransformer | |
| from pathlib import Path | |
| import tempfile | |
| # Initialize models | |
| sentiment_pipeline = pipeline( | |
| "sentiment-analysis", | |
| model="distilbert/distilbert-base-uncased-finetuned-sst-2-english", | |
| ) | |
| summary_pipeline = pipeline( | |
| "summarization", | |
| model="facebook/bart-large-cnn", | |
| ) | |
| kw_model = KeyBERT() | |
| def extract_articles(company: str) -> List[Dict]: | |
| headers = { | |
| 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' | |
| } | |
| url = f"https://www.indiatvnews.com/topic/{company}" | |
| response = requests.get(url, headers=headers) | |
| response.raise_for_status() | |
| soup = BeautifulSoup(response.content, "html.parser") | |
| articles = [] | |
| li_elements = soup.select('li.eventTracking')[:5] | |
| for i in li_elements: | |
| link = i.find('a', class_='thumb') | |
| request_internal = requests.get(link['href']) | |
| soup_internal = BeautifulSoup(request_internal.content, "html.parser") | |
| p_elements = soup_internal.select('p:not(:has(*))') | |
| merged_text = ' '.join(p.get_text(strip=True) for p in p_elements) | |
| summary_text = summary_pipeline(merged_text, max_length=50, min_length=20, do_sample=False) | |
| summary = summary_text[0]['summary_text'] | |
| topics = kw_model.extract_keywords(summary, keyphrase_ngram_range=(1, 2), stop_words='english') | |
| topic_list = [topic[0] for topic in topics] | |
| articles.append({ | |
| "Title": i['title'], | |
| "Summary": summary, | |
| "URL": link['href'], | |
| "Topics": topic_list | |
| }) | |
| return articles | |
| def analyze_sentiment(text: str) -> Dict: | |
| if not text or text == 'No Summary Available': | |
| return {"label": "NEUTRAL", "score": 0.5} | |
| result = sentiment_pipeline(text)[0] | |
| return result | |
| def perform_comparative_analysis(articles): | |
| sentiment_counts = {"POSITIVE": 0, "NEGATIVE": 0, "NEUTRAL": 0} | |
| common_topics = set(articles[0]['Topics']) | |
| unique_topics = [set(article['Topics']) for article in articles] | |
| coverage_differences = [] | |
| for i in range(len(articles) - 1): | |
| for j in range(i + 1, len(articles)): | |
| diff = set(articles[i]['Topics']) ^ set(articles[j]['Topics']) | |
| coverage_differences.append({ | |
| "Comparison": f"{articles[i]['Title']} vs {articles[j]['Title']}", | |
| "Unique Topics": list(diff) | |
| }) | |
| common_topics &= set(articles[j]['Topics']) | |
| topic_overlap = { | |
| "Common Topics": list(common_topics), | |
| "Unique Topics": [list(topics - common_topics) for topics in unique_topics] | |
| } | |
| for article in articles: | |
| sentiment_result = analyze_sentiment(article['Summary']) | |
| article['Sentiment'] = sentiment_result['label'] | |
| sentiment_counts[sentiment_result['label']] += 1 | |
| return sentiment_counts, coverage_differences, topic_overlap | |
| # Add validation in your backend's generate_hindi_tts function | |
| def generate_hindi_tts(text: str) -> str: | |
| temp_dir = tempfile.mkdtemp() | |
| try: | |
| # Validate input | |
| if not text or len(text.strip()) < 50: | |
| return "" | |
| # Translation | |
| translated = GoogleTranslator(source='en', target='hi').translate(text[:1000]) | |
| print(f"Translated text: {translated}") | |
| if not translated: | |
| return "" | |
| # Create temp file | |
| temp_file = Path(temp_dir) / "hindi_summary.wav" | |
| # Generate audio | |
| tts = gTTS(translated, lang='hi') | |
| tts.save(temp_file) | |
| # Read and encode | |
| with open(temp_file, "rb") as f: | |
| audio_bytes = f.read() | |
| return base64.b64encode(audio_bytes).decode('utf-8') | |
| except Exception as e: | |
| print(f"Audio Generation Error: {str(e)}") | |
| return "" | |
| finally: | |
| # Cleanup - remove temp directory and contents | |
| try: | |
| for file in Path(temp_dir).glob("*"): | |
| file.unlink() | |
| Path(temp_dir).rmdir() | |
| except Exception as cleanup_error: | |
| print(f"Cleanup error: {str(cleanup_error)}") |