swap1411 commited on
Commit
ce04e85
·
verified ·
1 Parent(s): 3e24682

Upload 2 files

Browse files
Files changed (2) hide show
  1. api.py +24 -0
  2. utils.py +139 -0
api.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException
2
+ from utils import generate_report
3
+ from fastapi.middleware.cors import CORSMiddleware
4
+
5
+ app = FastAPI()
6
+
7
+ app.add_middleware(
8
+ CORSMiddleware,
9
+ allow_origins=["*"],
10
+ allow_credentials=True,
11
+ allow_methods=["*"],
12
+ allow_headers=["*"],
13
+ )
14
+
15
+ @app.get("/report")
16
+ async def get_report(company: str):
17
+ report, audio_file = generate_report(company)
18
+ if "error" in report:
19
+ raise HTTPException(status_code=404, detail=report["error"])
20
+ return {"report": report, "audio": audio_file}
21
+
22
+ if __name__ == "__main__":
23
+ import uvicorn
24
+ uvicorn.run(app, host="127.0.0.1", port=8000)
utils.py ADDED
@@ -0,0 +1,139 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+ from bs4 import BeautifulSoup
4
+ from transformers import pipeline
5
+ from gtts import gTTS
6
+ from dotenv import load_dotenv
7
+ import torch
8
+ from collections import defaultdict
9
+ import spacy
10
+
11
+ load_dotenv()
12
+ nlp = spacy.load("en_core_web_sm") # For topic extraction
13
+
14
+ # Initialize environment variables
15
+ NEWS_API_KEY = os.environ.get('NEWS_API_KEY')
16
+
17
+ def fetch_news(company):
18
+ """Fetch news articles using NewsAPI."""
19
+ url = f"https://newsapi.org/v2/everything?q={company}&apiKey={NEWS_API_KEY}"
20
+ response = requests.get(url)
21
+ if response.status_code != 200:
22
+ return []
23
+ articles = response.json().get('articles', [])
24
+ return articles
25
+
26
+ def scrape_article(url):
27
+ """Scrape article title and content using BeautifulSoup."""
28
+ try:
29
+ response = requests.get(url, timeout=10)
30
+ soup = BeautifulSoup(response.content, 'html.parser')
31
+ title = soup.title.text.strip() if soup.title else "No Title"
32
+ paragraphs = soup.find_all('p')
33
+ content = ' '.join([p.text.strip() for p in paragraphs if p.text.strip()])
34
+ return title, content
35
+ except Exception as e:
36
+ print(f"Error scraping {url}: {e}")
37
+ return None, None
38
+
39
+ # Load models
40
+ sentiment_analyzer = pipeline("sentiment-analysis",
41
+ model="distilbert-base-uncased-finetuned-sst-2-english",
42
+ framework="pt")
43
+
44
+ summarizer = pipeline("summarization",
45
+ model="facebook/bart-large-cnn",
46
+ framework="pt")
47
+
48
+ def analyze_sentiment(text):
49
+ """Analyze sentiment of the text."""
50
+ truncated_text = text[:512]
51
+ result = sentiment_analyzer(truncated_text)[0]
52
+ return result['label']
53
+
54
+ def generate_report(company):
55
+ """Process company name to generate report with 10 unique articles."""
56
+ articles = fetch_news(company)
57
+ if not articles:
58
+ return {"error": "No articles found"}, None
59
+
60
+ report = {
61
+ "Company": company,
62
+ "Articles": [],
63
+ "Comparative Analysis": {
64
+ "Sentiment Distribution": {"Positive": 0, "Negative": 0, "Neutral": 0},
65
+ "Coverage Differences": [],
66
+ "Topic Overlap": {}
67
+ }
68
+ }
69
+
70
+ unique_articles = []
71
+ seen_urls = set()
72
+ all_topics = []
73
+
74
+ # Collect up to 10 unique articles
75
+ for article in articles:
76
+ url = article.get('url')
77
+ if url and url not in seen_urls:
78
+ seen_urls.add(url)
79
+ unique_articles.append(article)
80
+ if len(unique_articles) >= 10:
81
+ break
82
+
83
+ # Process articles
84
+ for article in unique_articles:
85
+ url = article.get('url')
86
+ title, content = scrape_article(url)
87
+ if not content:
88
+ continue
89
+
90
+ # Generate summary
91
+ try:
92
+ summary = summarizer(content, max_length=130, min_length=30)[0]['summary_text']
93
+ except:
94
+ summary = content[:100] + '...' if len(content) > 100 else content
95
+
96
+ # Analyze sentiment
97
+ sentiment = analyze_sentiment(content)
98
+ sentiment_key = "Positive" if sentiment == "POSITIVE" else "Negative" if sentiment == "NEGATIVE" else "Neutral"
99
+ report["Comparative Analysis"]["Sentiment Distribution"][sentiment_key] += 1
100
+
101
+ # Extract topics with spaCy
102
+ doc = nlp(content)
103
+ topics = [ent.text for ent in doc.ents if ent.label_ in ("ORG", "PRODUCT", "LAW")][:3]
104
+ all_topics.extend(topics)
105
+
106
+ report["Articles"].append({
107
+ "Title": title,
108
+ "Summary": summary,
109
+ "Sentiment": sentiment_key,
110
+ "Topics": topics
111
+ })
112
+
113
+ # Comparative Analysis
114
+ topic_counts = defaultdict(int)
115
+ for topic in all_topics:
116
+ topic_counts[topic] += 1
117
+
118
+ common_topics = [topic for topic, count in topic_counts.items() if count > 1]
119
+ unique_topics = list(set(all_topics))
120
+
121
+ # Add coverage differences
122
+ if len(report["Articles"]) >= 2:
123
+ report["Comparative Analysis"]["Coverage Differences"].append({
124
+ "Comparison": f"{report['Articles'][0]['Title']} vs {report['Articles'][1]['Title']}",
125
+ "Impact": "Different aspects of the company covered"
126
+ })
127
+
128
+ report["Comparative Analysis"]["Topic Overlap"] = {
129
+ "Common Topics": common_topics,
130
+ "Unique Topics": unique_topics
131
+ }
132
+
133
+ # Generate Hindi TTS with gTTS
134
+ tts_text = f"{company} के लिए समाचार सारांश। सकारात्मक लेख: {report['Comparative Analysis']['Sentiment Distribution']['Positive']}, नकारात्मक: {report['Comparative Analysis']['Sentiment Distribution']['Negative']}, तटस्थ: {report['Comparative Analysis']['Sentiment Distribution']['Neutral']}."
135
+ tts = gTTS(tts_text, lang='hi')
136
+ tts_file = "summary_hi.mp3"
137
+ tts.save(tts_file)
138
+
139
+ return report, tts_file