sujoy0011 commited on
Commit
b8b6651
Β·
verified Β·
1 Parent(s): cee837c

Upload 4 files

Browse files
Files changed (4) hide show
  1. Dockerfile +18 -0
  2. api.py +315 -0
  3. main.py +1039 -0
  4. requirements.txt +0 -0
Dockerfile ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use the official Python base image
2
+ FROM python:3.9
3
+
4
+ # Set the working directory
5
+ WORKDIR /app
6
+
7
+ # Copy the requirements file and install dependencies
8
+ COPY requirements.txt .
9
+ RUN pip install --no-cache-dir -r requirements.txt
10
+
11
+ # Copy the model and code files
12
+ COPY . .
13
+
14
+ # Expose the port FastAPI will run on
15
+ EXPOSE 7860
16
+
17
+ # Command to run the FastAPI app using Uvicorn
18
+ CMD ["uvicorn", "api:app", "--host", "0.0.0.0", "--port", "7860"]
api.py ADDED
@@ -0,0 +1,315 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException, Depends, Request, BackgroundTasks
2
+ from fastapi.middleware.cors import CORSMiddleware
3
+ from fastapi.responses import StreamingResponse, JSONResponse
4
+ from pydantic import BaseModel
5
+ import os
6
+ import json
7
+ import time
8
+ import io
9
+ import requests
10
+ from typing import Optional, List, Dict, Any
11
+ import gtts
12
+
13
+ # Import NewsAgent class
14
+ from main import NewsAgent
15
+
16
+ app = FastAPI(
17
+ title="NewsAI API",
18
+ description="A FastAPI backend for a location-specific news agent that provides news based on pincode and preferred language.",
19
+ version="1.0.0"
20
+ )
21
+
22
+ # Add CORS middleware to allow frontend to communicate with the backend
23
+ app.add_middleware(
24
+ CORSMiddleware,
25
+ allow_origins=["*"], # Replace with specific origins in production
26
+ allow_credentials=True,
27
+ allow_methods=["*"],
28
+ allow_headers=["*"],
29
+ )
30
+
31
+ # Initialize the NewsAgent
32
+ news_agent = NewsAgent()
33
+
34
+ # Session storage for user conversations
35
+ user_sessions = {}
36
+
37
+ # Supported languages for translation
38
+ SUPPORTED_LANGUAGES = {
39
+ "en": "English",
40
+ "hi": "Hindi",
41
+ "bn": "Bengali",
42
+ "te": "Telugu",
43
+ "ta": "Tamil",
44
+ "mr": "Marathi",
45
+ "gu": "Gujarati",
46
+ "kn": "Kannada",
47
+ "ml": "Malayalam",
48
+ "pa": "Punjabi",
49
+ "or": "Odia",
50
+ "as": "Assamese",
51
+ "fr": "French",
52
+ "de": "German",
53
+ "es": "Spanish",
54
+ "zh-CN": "Chinese (Simplified)",
55
+ "ja": "Japanese",
56
+ "ko": "Korean",
57
+ "ar": "Arabic",
58
+ "ru": "Russian"
59
+ }
60
+
61
+ # Request/response models
62
+ class ChatRequest(BaseModel):
63
+ message: str
64
+ session_id: str
65
+ pincode: Optional[str] = None
66
+ language: str = "en"
67
+
68
+ class PincodeRequest(BaseModel):
69
+ pincode: str
70
+
71
+ class TextToSpeechRequest(BaseModel):
72
+ text: str
73
+ lang: str = "en"
74
+
75
+ class TranslateRequest(BaseModel):
76
+ text: str
77
+ target_language: str
78
+
79
+ class NewsResponse(BaseModel):
80
+ response: str
81
+ audio_url: Optional[str] = None
82
+ translated: bool = False
83
+
84
+ # Helper function to get or create a session
85
+ def get_session(session_id: str) -> Dict:
86
+ """Get or create a user session by ID."""
87
+ if session_id not in user_sessions:
88
+ user_sessions[session_id] = {"location": None, "language": "en", "history": []}
89
+ return user_sessions[session_id]
90
+
91
+ # Helper function to get location from pincode
92
+ def get_location_from_pincode(pincode: str) -> Optional[str]:
93
+ """Get location (city/state) from Indian pincode."""
94
+ try:
95
+ # First try India Post API
96
+ url = f"https://api.postalpincode.in/pincode/{pincode}"
97
+ response = requests.get(url, timeout=5,verify=False)
98
+ data = response.json()
99
+
100
+ if data and data[0]["Status"] == "Success":
101
+ post_office = data[0]["PostOffice"][0]
102
+ district = post_office["District"]
103
+ state = post_office["State"]
104
+ return f"{district}, {state}"
105
+
106
+ # Fallback to pincode map
107
+ pincode_map = {
108
+ "11": "Delhi",
109
+ "12": "Haryana",
110
+ "13": "Haryana",
111
+ "14": "Punjab",
112
+ "15": "Punjab",
113
+ "16": "Punjab",
114
+ "17": "Himachal Pradesh",
115
+ "18": "Jammu & Kashmir",
116
+ "19": "Jammu & Kashmir",
117
+ "20": "Uttar Pradesh",
118
+ "21": "Uttar Pradesh",
119
+ "22": "Uttar Pradesh",
120
+ "23": "Uttar Pradesh",
121
+ "24": "Uttar Pradesh",
122
+ "25": "Uttar Pradesh",
123
+ "26": "Uttar Pradesh",
124
+ "27": "Uttar Pradesh",
125
+ "28": "Uttar Pradesh",
126
+ "30": "Rajasthan",
127
+ "31": "Rajasthan",
128
+ "32": "Rajasthan",
129
+ "33": "Rajasthan",
130
+ "34": "Rajasthan",
131
+ "36": "Gujarat",
132
+ "37": "Gujarat",
133
+ "38": "Gujarat",
134
+ "39": "Gujarat",
135
+ "40": "Maharashtra",
136
+ "41": "Maharashtra",
137
+ "42": "Maharashtra",
138
+ "43": "Maharashtra",
139
+ "44": "Maharashtra",
140
+ "45": "Madhya Pradesh",
141
+ "46": "Madhya Pradesh",
142
+ "47": "Madhya Pradesh",
143
+ "48": "Madhya Pradesh",
144
+ "49": "Chhattisgarh",
145
+ "50": "Andhra Pradesh",
146
+ "51": "Andhra Pradesh",
147
+ "52": "Telangana",
148
+ "53": "Telangana",
149
+ "56": "Karnataka",
150
+ "57": "Karnataka",
151
+ "58": "Karnataka",
152
+ "59": "Karnataka",
153
+ "60": "Tamil Nadu",
154
+ "61": "Tamil Nadu",
155
+ "62": "Tamil Nadu",
156
+ "63": "Tamil Nadu",
157
+ "64": "Tamil Nadu",
158
+ "67": "Kerala",
159
+ "68": "Kerala",
160
+ "69": "Kerala",
161
+ "70": "West Bengal",
162
+ "71": "West Bengal",
163
+ "72": "West Bengal",
164
+ "73": "West Bengal",
165
+ "74": "West Bengal",
166
+ "75": "Odisha",
167
+ "76": "Odisha",
168
+ "77": "Odisha",
169
+ "78": "Assam",
170
+ "79": "North East India",
171
+ "80": "Bihar",
172
+ "81": "Bihar",
173
+ "82": "Bihar",
174
+ "83": "Jharkhand",
175
+ "84": "Jharkhand",
176
+ "85": "Jharkhand"
177
+ }
178
+
179
+ # Get state from first 2 digits
180
+ state = pincode_map.get(pincode[:2], "Unknown")
181
+ return state
182
+
183
+ except Exception as e:
184
+ print(f"Error getting location from pincode: {e}")
185
+ return None
186
+
187
+ # Routes
188
+ @app.get("/")
189
+ async def root():
190
+ """Health check endpoint"""
191
+ return {"status": "online", "message": "NewsAI API is running"}
192
+
193
+ @app.get("/api/languages")
194
+ async def get_languages():
195
+ """Get list of supported languages"""
196
+ return {"languages": SUPPORTED_LANGUAGES}
197
+
198
+ @app.post("/api/pincode")
199
+ async def lookup_pincode(request: PincodeRequest):
200
+ """Look up location from pincode"""
201
+ location = get_location_from_pincode(request.pincode)
202
+ if not location:
203
+ raise HTTPException(status_code=404, detail="Could not find location for this pincode")
204
+ return {"pincode": request.pincode, "location": location}
205
+
206
+ @app.post("/api/chat")
207
+ async def chat(request: ChatRequest):
208
+ """Process a chat message and return a response in the requested language"""
209
+ session = get_session(request.session_id)
210
+
211
+ # Update language preference
212
+ if request.language:
213
+ session["language"] = request.language
214
+
215
+ # Update location if pincode provided
216
+ if request.pincode:
217
+ location = get_location_from_pincode(request.pincode)
218
+ if location:
219
+ session["location"] = location
220
+
221
+ # Process the query
222
+ query = request.message
223
+
224
+ # If location is set, include it in the query for location-specific news
225
+ if session["location"] and "news" in query.lower() and session["location"].lower() not in query.lower():
226
+ query = f"{query} in {session['location']}"
227
+
228
+ # Process the query
229
+ response = news_agent.process_query(query)
230
+
231
+ # Translate response if needed
232
+ original_response = response
233
+ if session["language"] != "en":
234
+ try:
235
+ # Use the translate_text method from NewsAgent
236
+ translation_input = json.dumps({"text": response, "lang": session["language"]})
237
+ translated_response = news_agent.translate_text(translation_input)
238
+
239
+ # Extract translated text from response format "Translated text: {text}"
240
+ if "Translated text: " in translated_response:
241
+ response = translated_response.replace("Translated text: ", "")
242
+ else:
243
+ response = translated_response
244
+ except Exception as e:
245
+ print(f"Translation error: {e}")
246
+ # Keep original response if translation fails
247
+
248
+ # Store in session history
249
+ session["history"].append({"role": "user", "content": request.message})
250
+ session["history"].append({"role": "assistant", "content": response})
251
+
252
+ return {
253
+ "response": response,
254
+ "original_response": original_response if session["language"] != "en" else None,
255
+ "language": session["language"],
256
+ "location": session["location"]
257
+ }
258
+
259
+ @app.post("/api/translate")
260
+ async def translate_text(request: TranslateRequest):
261
+ """Translate text to the specified language"""
262
+ try:
263
+ translation_input = json.dumps({"text": request.text, "lang": request.target_language})
264
+ translated_text = news_agent.translate_text(translation_input)
265
+
266
+ # Extract translated text from response format "Translated text: {text}"
267
+ if "Translated text: " in translated_text:
268
+ translated_text = translated_text.replace("Translated text: ", "")
269
+
270
+ return {"translated_text": translated_text, "language": request.target_language}
271
+ except Exception as e:
272
+ raise HTTPException(status_code=500, detail=f"Translation error: {str(e)}")
273
+
274
+ @app.post("/api/text-to-speech")
275
+ async def text_to_speech(request: TextToSpeechRequest):
276
+ """Convert text to speech and return audio file"""
277
+ try:
278
+ # Generate speech
279
+ tts = gtts.gTTS(text=request.text, lang=request.lang, slow=False)
280
+
281
+ # Save to in-memory file
282
+ audio_io = io.BytesIO()
283
+ tts.write_to_fp(audio_io)
284
+ audio_io.seek(0)
285
+
286
+ # Return audio file
287
+ return StreamingResponse(
288
+ audio_io,
289
+ media_type="audio/mpeg",
290
+ headers={"Content-Disposition": "attachment; filename=speech.mp3"}
291
+ )
292
+ except Exception as e:
293
+ raise HTTPException(status_code=500, detail=f"Error generating speech: {str(e)}")
294
+
295
+ @app.get("/api/news/{location}")
296
+ async def get_location_news(location: str, count: int = 5, language: str = "en"):
297
+ """Fetch news for a specific location and optionally translate it"""
298
+ try:
299
+ # Fetch news
300
+ news = news_agent.fetch_city_news(f"{location}, {count}")
301
+
302
+ # Translate if needed
303
+ if language != "en":
304
+ translation_input = json.dumps({"text": news, "lang": language})
305
+ translated_news = news_agent.translate_text(translation_input)
306
+
307
+ # Extract translated text
308
+ if "Translated text: " in translated_news:
309
+ news = translated_news.replace("Translated text: ", "")
310
+ else:
311
+ news = translated_news
312
+
313
+ return {"news": news, "language": language}
314
+ except Exception as e:
315
+ raise HTTPException(status_code=500, detail=f"Error fetching news: {str(e)}")
main.py ADDED
@@ -0,0 +1,1039 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import time
4
+ import feedparser
5
+ import requests
6
+ from bs4 import BeautifulSoup
7
+ import re
8
+ from datetime import datetime, timedelta
9
+ import gtts
10
+ from playsound3 import playsound
11
+ from googletrans import Translator
12
+ import urllib.parse
13
+ from deep_translator import GoogleTranslator
14
+ from dotenv import load_dotenv
15
+
16
+
17
+
18
+ # LangChain imports
19
+ from langchain_google_genai import GoogleGenerativeAI
20
+ from langchain_chroma import Chroma
21
+ from langchain_huggingface import HuggingFaceEmbeddings
22
+ from langchain.schema import Document
23
+ from langchain_core.prompts import ChatPromptTemplate
24
+ from langchain.memory import ConversationSummaryBufferMemory
25
+ from langchain.tools import Tool
26
+ from langchain.agents import AgentExecutor, create_react_agent
27
+ from langchain_community.utilities import DuckDuckGoSearchAPIWrapper
28
+
29
+ # Load environment variables
30
+ load_dotenv()
31
+
32
+
33
+ class NewsAgent:
34
+ def __init__(self):
35
+ print("πŸš€ Initializing News Agent...")
36
+ self.setup_llm()
37
+ self.setup_embeddings()
38
+ self.setup_vector_store()
39
+ self.test_vector_db() # Test the vector DB
40
+ self.delete_old_news() # Delete old news on startup
41
+ self.setup_memory()
42
+ self.setup_search_tools()
43
+ self.setup_tools()
44
+ self.setup_agent()
45
+ self.locations = set() # Track locations we've already fetched
46
+ print("βœ… News Agent initialized and ready!")
47
+
48
+ def setup_llm(self):
49
+ """Initialize the Gemini model."""
50
+ try:
51
+ api_key = os.getenv("GOOGLE_API_KEY")
52
+ if not api_key:
53
+ raise ValueError("GOOGLE_API_KEY environment variable not set")
54
+
55
+ self.llm = GoogleGenerativeAI(
56
+ model="gemini-1.5-flash",
57
+ google_api_key=api_key,
58
+ temperature=0.2,
59
+ top_p=0.8,
60
+ max_output_tokens=2048
61
+ )
62
+ print("βœ… Gemini 1.5 Flash model initialized")
63
+ except Exception as e:
64
+ print(f"❌ Error initializing Gemini model: {e}")
65
+ raise
66
+
67
+ def setup_embeddings(self):
68
+ """Initialize the embedding model."""
69
+ try:
70
+ self.embedding_model = HuggingFaceEmbeddings(
71
+ model_name="sentence-transformers/all-MiniLM-L6-v2"
72
+ )
73
+ print("βœ… Embedding model initialized")
74
+ except Exception as e:
75
+ print(f"❌ Error initializing embedding model: {e}")
76
+ raise
77
+
78
+ def setup_vector_store(self):
79
+ """Initialize ChromaDB vector store."""
80
+ try:
81
+ self.vector_store = Chroma(
82
+ persist_directory="./chroma_db",
83
+ embedding_function=self.embedding_model
84
+ )
85
+ print("βœ… Vector store initialized")
86
+ except Exception as e:
87
+ print(f"❌ Error initializing vector store: {e}")
88
+ raise
89
+
90
+ def test_vector_db(self):
91
+ """Test if the vector database is working properly."""
92
+ try:
93
+ # Check if DB is empty
94
+ db_info = self.vector_store.get()
95
+ print(f"Vector DB contains {len(db_info['ids'])} documents")
96
+
97
+ if len(db_info['ids']) > 0:
98
+ # Try a simple search
99
+ results = self.vector_store.similarity_search("test", k=1)
100
+ print(f"Test search returned {len(results)} results")
101
+ if results:
102
+ print(f"Sample document: {results[0].metadata['title']}")
103
+ return True
104
+ else:
105
+ print("Vector DB is empty")
106
+ return False
107
+ except Exception as e:
108
+ print(f"❌ Error testing vector DB: {e}")
109
+ return False
110
+
111
+ def is_recent_news_available(self, location, max_age_minutes=180):
112
+ """Check if recent news for a location is available in the database."""
113
+ try:
114
+ now = datetime.now()
115
+ # Search for news related to the location
116
+ results = self.vector_store.similarity_search(location, k=10)
117
+
118
+ # Filter results to those within max_age_minutes
119
+ recent_news = []
120
+ for doc in results:
121
+ metadata = doc.metadata
122
+ if metadata.get('location', '').lower() == location.lower():
123
+ timestamp_str = metadata.get('timestamp')
124
+ if timestamp_str:
125
+ try:
126
+ timestamp = datetime.fromisoformat(timestamp_str)
127
+ if now - timestamp <= timedelta(minutes=max_age_minutes):
128
+ recent_news.append(doc)
129
+ except Exception:
130
+ # Ignore parsing errors
131
+ continue
132
+
133
+ print(f"Found {len(recent_news)} recent news items for {location} in database")
134
+ return recent_news
135
+ except Exception as e:
136
+ print(f"❌ Error checking recent news: {e}")
137
+ return []
138
+
139
+ def delete_old_news(self, max_age_minutes=60):
140
+ """Delete news older than the specified age from the database."""
141
+ try:
142
+ now = datetime.now()
143
+ # Get all documents
144
+ all_docs = self.vector_store.get()
145
+ all_ids = all_docs['ids']
146
+ all_metadatas = all_docs['metadatas']
147
+
148
+ # Identify documents older than max_age_minutes
149
+ ids_to_delete = []
150
+ for doc_id, metadata in zip(all_ids, all_metadatas):
151
+ timestamp_str = metadata.get('timestamp') if metadata else None
152
+ if timestamp_str:
153
+ try:
154
+ timestamp = datetime.fromisoformat(timestamp_str)
155
+ if now - timestamp > timedelta(minutes=max_age_minutes):
156
+ ids_to_delete.append(doc_id)
157
+ except Exception:
158
+ # Ignore parsing errors
159
+ continue
160
+
161
+ # Delete old documents
162
+ if ids_to_delete:
163
+ self.vector_store.delete(ids=ids_to_delete)
164
+ print(f"βœ… Deleted {len(ids_to_delete)} old news items from database")
165
+
166
+ return len(ids_to_delete)
167
+ except Exception as e:
168
+ print(f"❌ Error deleting old news: {e}")
169
+ return 0
170
+
171
+ def determine_news_count(self, user_request):
172
+ """Determine how many news articles to fetch based on user request."""
173
+ # Check if user is asking for more news
174
+ more_patterns = ["more news", "additional news", "more articles", "show more", "get more"]
175
+
176
+ if any(pattern in user_request.lower() for pattern in more_patterns):
177
+ # Check if user specified a number
178
+ number_match = re.search(r'(\d+)\s+(more|additional|extra)', user_request.lower())
179
+ if number_match:
180
+ try:
181
+ count = int(number_match.group(1))
182
+ # Cap at a reasonable maximum
183
+ return min(count, 20)
184
+ except ValueError:
185
+ pass
186
+
187
+ return 15 # Return more news if requested without specific number
188
+ else:
189
+ return 5 # Default number of news
190
+
191
+ def setup_memory(self):
192
+ """Initialize conversation memory."""
193
+ try:
194
+ self.memory = ConversationSummaryBufferMemory(
195
+ llm=self.llm,
196
+ max_token_limit=4000, # Increased token limit for better context retention
197
+ return_messages=True,
198
+ memory_key="chat_history",
199
+ input_key="input", # Explicitly define input key
200
+ output_key="output" # Explicitly define output key
201
+ )
202
+ print("βœ… Conversation memory initialized")
203
+ except Exception as e:
204
+ print(f"❌ Error initializing memory: {e}")
205
+ raise
206
+
207
+ def setup_search_tools(self):
208
+ """Set up search tools."""
209
+ try:
210
+ # Setup DuckDuckGo search
211
+ self.ddg_wrapper = DuckDuckGoSearchAPIWrapper(
212
+ time="d", # Search for content from the past day
213
+ max_results=5
214
+ )
215
+
216
+ # Setup DuckDuckGo news search
217
+ self.ddg_news_wrapper = DuckDuckGoSearchAPIWrapper(
218
+ time="d", # Search for content from the past day
219
+ max_results=5
220
+ )
221
+
222
+ print("βœ… Search tools initialized")
223
+ except Exception as e:
224
+ print(f"❌ Error initializing search tools: {e}")
225
+ raise
226
+
227
+ def setup_tools(self):
228
+ """Set up tools for the agent."""
229
+ self.tools = [
230
+ Tool(
231
+ name="FetchNews",
232
+ func=self.fetch_city_news,
233
+ description="Fetches the latest news for a specific city or location. Input should be the name of the city or 'city, number' to specify how many articles to fetch."
234
+ ),
235
+ Tool(
236
+ name="SearchNewsArticle",
237
+ func=self.search_news_article,
238
+ description="Searches for news articles on a specific topic or title and returns summaries. Input should be the topic or title to search for."
239
+ ),
240
+ Tool(
241
+ name="GetMoreInfoOnNews",
242
+ func=self.get_more_info_on_news,
243
+ description="Gets more detailed information about a specific news story. Input should be the news title or topic you want more information about."
244
+ ),
245
+ Tool(
246
+ name="GetArticleContent",
247
+ func=self.get_article_content,
248
+ description="Gets the content of a news article from a URL. Input should be the URL of the article."
249
+ ),
250
+ Tool(
251
+ name="SummarizeText",
252
+ func=self.summarize_text,
253
+ description="Summarizes a text. Input should be the text to summarize."
254
+ ),
255
+ Tool(
256
+ name="TextToSpeech",
257
+ func=self.text_to_speech,
258
+ description="Converts text to speech in a specified language. Input should be a JSON string with 'text' and 'lang' keys."
259
+ ),
260
+ Tool(
261
+ name="TranslateText",
262
+ func=self.translate_text,
263
+ description="Translates text to a specified language. Input should be a JSON string with 'text' and 'lang' keys."
264
+ ),
265
+ Tool(
266
+ name="SearchNewsInDB",
267
+ func=self.search_news_in_db,
268
+ description="Searches for news in the database. Input should be the search query."
269
+ ),
270
+ Tool(
271
+ name="GetRecentNewsFromDB",
272
+ func=self.get_recent_news_from_db,
273
+ description="Gets recent news for a location from the database. Input should be the location name."
274
+ )
275
+ ]
276
+ print("βœ… Agent tools initialized")
277
+
278
+ def setup_agent(self):
279
+ """Set up the LangChain agent."""
280
+ prompt = ChatPromptTemplate.from_messages([
281
+ ("system", """You are a helpful AI assistant that specializes in providing location-specific news.
282
+ You can fetch news, search for articles, get more information on specific news stories, summarize text, translate content, and convert text to speech.
283
+ Always try to understand what location the user is asking about and provide relevant news.
284
+ If you're not sure about a location, ask for clarification.
285
+
286
+ IMPORTANT: Maintain conversation context. When the user asks follow-up questions about previously mentioned news articles,
287
+ use your memory of the conversation to understand which article they're referring to. If they ask for more details about a
288
+ news story you've mentioned, use the GetMoreInfoOnNews tool with the appropriate title.
289
+
290
+ When providing news:
291
+ 1. Always ensure you're providing the most recent news (from today if possible)
292
+ 2. First check if recent news is available in the database before fetching from the web
293
+ 3. If a user asks for more information about a specific news story, use the GetMoreInfoOnNews tool
294
+ 4. Always include relevant links when providing detailed information about news
295
+ 5. Summarize news articles in a concise and informative way
296
+ 6. If a user asks for more news, provide additional articles (up to 15)
297
+ 7. Remember which news articles you've already mentioned in the conversation
298
+
299
+ You have access to the following tools:
300
+
301
+ {tools}
302
+
303
+ Use the following format:
304
+
305
+ Question: the input question you must answer
306
+ Thought: you should always think about what to do
307
+ Action: the action to take, should be one of [{tool_names}]
308
+ Action Input: the input to the action
309
+ Observation: the result of the action
310
+ ... (this Thought/Action/Action Input/Observation can repeat N times)
311
+ Thought: I now know the final answer
312
+ Final Answer: the final answer to the original input question
313
+
314
+ Chat History: {chat_history}
315
+ """),
316
+ ("human", "{input}"),
317
+ ("ai", "{agent_scratchpad}")
318
+ ])
319
+
320
+ self.agent = create_react_agent(
321
+ llm=self.llm,
322
+ tools=self.tools,
323
+ prompt=prompt
324
+ )
325
+
326
+ self.agent_executor = AgentExecutor(
327
+ agent=self.agent,
328
+ tools=self.tools,
329
+ memory=self.memory,
330
+ verbose=True,
331
+ handle_parsing_errors=True,
332
+ return_intermediate_steps=True # Return intermediate steps for better debugging
333
+ )
334
+ print("βœ… Agent executor initialized")
335
+
336
+
337
+ def get_recent_news_from_db(self, location):
338
+ """Gets recent news for a location from the database."""
339
+ try:
340
+ recent_news = self.is_recent_news_available(location)
341
+
342
+ if not recent_news:
343
+ return f"No recent news found in database for {location}. Try fetching fresh news."
344
+
345
+ response = f"πŸ“° Recent News from {location} (from database):\n\n"
346
+ for i, doc in enumerate(recent_news, 1):
347
+ metadata = doc.metadata
348
+ response += f"{i}. {metadata.get('title', 'Unknown Title')}\n"
349
+ response += f" Source: {metadata.get('source', 'Unknown Source')}\n"
350
+ response += f" Published: {metadata.get('date', 'Unknown Date')}\n"
351
+ response += f" Link: {metadata.get('link', 'No Link Available')}\n"
352
+
353
+ # Extract summary from content
354
+ content = doc.page_content
355
+ summary_match = re.search(r"SUMMARY: (.*?)(?:CONTENT:|$)", content, re.DOTALL)
356
+ if summary_match:
357
+ summary = summary_match.group(1).strip()
358
+ response += f" Summary: {summary}\n"
359
+
360
+ response += "\n"
361
+
362
+ return response
363
+ except Exception as e:
364
+ print(f"❌ Error getting recent news from DB: {e}")
365
+ return f"Error retrieving recent news for {location} from database."
366
+
367
+ def search_news_article(self, query):
368
+ """Search for news articles on a specific topic using DuckDuckGo News."""
369
+ try:
370
+ print(f"πŸ” Searching for news articles on: {query}")
371
+
372
+ # Parse input for number of results if provided
373
+ parts = query.split(',')
374
+ search_query = parts[0].strip()
375
+ max_results = 5
376
+
377
+ if len(parts) > 1:
378
+ try:
379
+ max_results = int(parts[1].strip())
380
+ max_results = min(max_results, 20) # Cap at 20 results
381
+ except ValueError:
382
+ pass
383
+
384
+ # Use DuckDuckGo search with news-specific query
385
+ search_results = self.ddg_news_wrapper.results(f"{search_query} news", max_results=max_results)
386
+
387
+ if not search_results:
388
+ return f"No news articles found for: {search_query}"
389
+
390
+ # Process search results
391
+ articles = []
392
+ for i, result in enumerate(search_results[:max_results]):
393
+ title = result.get("title", "No title")
394
+ link = result.get("link", "No link")
395
+ snippet = result.get("snippet", "No snippet")
396
+ published_date = result.get("published", datetime.now().strftime("%a, %d %b %Y %H:%M:%S"))
397
+ source = result.get("source", "Unknown source")
398
+
399
+ # Create article object
400
+ article = {
401
+ "title": title,
402
+ "source": source,
403
+ "link": link,
404
+ "published": published_date,
405
+ "snippet": snippet,
406
+ "query": search_query
407
+ }
408
+
409
+ articles.append(article)
410
+
411
+ # Store in vector database for RAG
412
+ self.store_article_in_db(article)
413
+
414
+ # Format response
415
+ response = f"πŸ“° Latest News Articles on '{search_query}':\n\n"
416
+ for i, article in enumerate(articles, 1):
417
+ response += f"{i}. {article['title']}\n"
418
+ response += f" Source: {article['source']}\n"
419
+ response += f" Published: {article['published']}\n"
420
+ response += f" Link: {article['link']}\n"
421
+ response += f" Summary: {article['snippet']}\n\n"
422
+
423
+ return response
424
+
425
+ except Exception as e:
426
+ print(f"❌ Error searching for news articles: {e}")
427
+ return f"Error searching for news articles on '{query}': {str(e)}"
428
+
429
+ def get_more_info_on_news(self, news_title):
430
+ """Gets more detailed information about a specific news story."""
431
+ try:
432
+ print(f"πŸ” Getting more information on: {news_title}")
433
+
434
+ # First, search for the news in our database
435
+ db_results = self.search_news_in_db(news_title, k=1)
436
+
437
+ # If we found something relevant in the database
438
+ if "No relevant news found" not in db_results:
439
+ # Extract the URL from the database results
440
+ url_match = re.search(r"Link: (https?://[^\s]+)", db_results)
441
+ if url_match:
442
+ article_url = url_match.group(1)
443
+
444
+ # Get the full content of the article
445
+ content = self.get_article_content(article_url)
446
+
447
+ # Summarize the content
448
+ summary = self.summarize_text(content)
449
+
450
+ return f"πŸ“° More Information on '{news_title}':\n\n{summary}\n\nSource: {article_url}"
451
+
452
+ # If we didn't find anything in the database or couldn't extract the URL,
453
+ # search for the news using DuckDuckGo
454
+ search_results = self.ddg_wrapper.results(f"{news_title} latest news", max_results=5)
455
+
456
+ if not search_results:
457
+ return f"Could not find more information on: {news_title}"
458
+
459
+ # Get the first result
460
+ result = search_results[0]
461
+ article_url = result.get("link")
462
+
463
+ if not article_url:
464
+ return f"Could not find a relevant article for: {news_title}"
465
+
466
+ # Get the content of the article
467
+ content = self.get_article_content(article_url)
468
+
469
+ # Summarize the content
470
+ summary = self.summarize_text(content)
471
+
472
+ # Store in vector database for future reference
473
+ self.store_article_in_db({
474
+ "title": news_title,
475
+ "link": article_url,
476
+ "content": content,
477
+ "summary": summary,
478
+ "source": result.get("source", "Unknown source"),
479
+ "published": datetime.now().strftime("%a, %d %b %Y")
480
+ })
481
+
482
+ return f"πŸ“° More Information on '{news_title}':\n\n{summary}\n\nSource: {article_url}"
483
+
484
+ except Exception as e:
485
+ print(f"❌ Error getting more information: {e}")
486
+ return f"Error getting more information on '{news_title}': {str(e)}"
487
+
488
+ def get_article_content(self, url):
489
+ """Extract content from a news article URL."""
490
+ try:
491
+ headers = {
492
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
493
+ }
494
+
495
+ # Check if URL is valid
496
+ if not url.startswith('http'):
497
+ return "Invalid URL. Please provide a URL starting with http:// or https://"
498
+
499
+ # Send request
500
+ response = requests.get(url, headers=headers, timeout=10)
501
+ response.raise_for_status() # Raise exception for 4XX/5XX status codes
502
+
503
+ # Parse HTML
504
+ soup = BeautifulSoup(response.text, 'html.parser')
505
+
506
+ # Remove script, style, and nav elements
507
+ for element in soup(['script', 'style', 'nav', 'header', 'footer', 'aside']):
508
+ element.decompose()
509
+
510
+ # Try to find the main content
511
+ main_content = None
512
+
513
+ # Look for article tag
514
+ article = soup.find('article')
515
+ if article:
516
+ main_content = article
517
+
518
+ # Look for main tag if article not found
519
+ if not main_content:
520
+ main_tag = soup.find('main')
521
+ if main_tag:
522
+ main_content = main_tag
523
+
524
+ # Look for div with content-related class names
525
+ if not main_content:
526
+ content_div = soup.find('div', class_=lambda c: c and any(x in c.lower() for x in ['content', 'article', 'story', 'entry', 'post']))
527
+ if content_div:
528
+ main_content = content_div
529
+
530
+ # Extract text from main content or fallback to body
531
+ if main_content:
532
+ paragraphs = main_content.find_all('p')
533
+ else:
534
+ paragraphs = soup.find_all('p')
535
+
536
+ # Join paragraphs
537
+ content = '\n\n'.join([p.get_text().strip() for p in paragraphs if len(p.get_text().strip()) > 40])
538
+
539
+ # If content is too short, try a different approach
540
+ if len(content) < 200:
541
+ # Get all text from body
542
+ body = soup.find('body')
543
+ if body:
544
+ content = body.get_text(separator='\n')
545
+
546
+ # Clean up content
547
+ lines = [line.strip() for line in content.split('\n') if line.strip()]
548
+ content = '\n'.join(lines)
549
+
550
+ # If still no content, return error
551
+ if not content or len(content) < 100:
552
+ return "Could not extract meaningful content from the article."
553
+
554
+ # Truncate if too long
555
+ if len(content) > 8000:
556
+ content = content[:8000] + "...[content truncated]"
557
+
558
+ return content
559
+
560
+ except requests.exceptions.RequestException as e:
561
+ return f"Error fetching article: {str(e)}"
562
+ except Exception as e:
563
+ return f"Error extracting content: {str(e)}"
564
+
565
+ def summarize_text(self, text):
566
+ """Summarize text using the LLM."""
567
+ try:
568
+ if not text or len(text) < 100:
569
+ return "Text is too short to summarize."
570
+
571
+ # Truncate text if it's too long
572
+ if len(text) > 10000:
573
+ text = text[:10000] + "...[content truncated]"
574
+
575
+ prompt = f"""
576
+ Summarize the following news article in a concise way (3-5 sentences), highlighting the key points:
577
+
578
+ {text}
579
+
580
+ Summary:
581
+ """
582
+
583
+ response = self.llm.invoke(prompt)
584
+ return response
585
+ except Exception as e:
586
+ print(f"❌ Error summarizing text: {e}")
587
+ return "Could not generate summary due to an error."
588
+
589
+ def fetch_city_news(self, city_input, max_articles=5):
590
+ """Fetch news for a specific city using Google News RSS first, then enhance with search."""
591
+ # Parse input for city and optional count
592
+ parts = city_input.split(',')
593
+ city = parts[0].strip()
594
+
595
+ if len(parts) > 1:
596
+ try:
597
+ max_articles = int(parts[1].strip())
598
+ max_articles = min(max_articles, 20) # Cap at 20 articles
599
+ except ValueError:
600
+ pass
601
+
602
+ print(f"πŸ” Fetching {max_articles} news articles for: {city}")
603
+
604
+ # Check if we have recent news in the database
605
+ recent_news = self.is_recent_news_available(city)
606
+ if recent_news and len(recent_news) >= max_articles:
607
+ print(f"βœ… Found {len(recent_news)} recent news items in database for {city}")
608
+ response = f"πŸ“° Latest News from {city} (from database):\n\n"
609
+ for i, doc in enumerate(recent_news[:max_articles], 1):
610
+ metadata = doc.metadata
611
+ response += f"{i}. {metadata.get('title', 'Unknown Title')}\n"
612
+ response += f" Source: {metadata.get('source', 'Unknown Source')}\n"
613
+ response += f" Published: {metadata.get('date', 'Unknown Date')}\n"
614
+ response += f" Link: {metadata.get('link', 'No Link Available')}\n"
615
+
616
+ # Extract summary from content
617
+ content = doc.page_content
618
+ summary_match = re.search(r"SUMMARY: (.*?)(?:CONTENT:|$)", content, re.DOTALL)
619
+ if summary_match:
620
+ summary = summary_match.group(1).strip()
621
+ response += f" Summary: {summary}\n"
622
+
623
+ response += "\n"
624
+
625
+ return response
626
+
627
+ # Clean the city name to avoid URL issues
628
+ clean_city = city.strip().replace("\n", "").replace("\r", "")
629
+ encoded_city = urllib.parse.quote(clean_city)
630
+
631
+ try:
632
+ # First get news from Google News RSS
633
+ rss_url = f"https://news.google.com/rss/search?q={encoded_city}+when:1d&hl=en-US&gl=US&ceid=US:en"
634
+ feed = feedparser.parse(rss_url)
635
+
636
+ if not feed.entries:
637
+ return f"No news found for {city}"
638
+
639
+ # Process articles from RSS feed
640
+ articles = []
641
+ for entry in feed.entries[:max_articles]:
642
+ # Extract title and source
643
+ title_parts = entry.title.split(" - ")
644
+ title = title_parts[0].strip() if len(title_parts) > 1 else entry.title.strip()
645
+ source = title_parts[-1].strip() if len(title_parts) > 1 else "Unknown"
646
+
647
+ # Get the article link
648
+ google_news_link = entry.link
649
+
650
+ # Extract publication date
651
+ published_date = entry.get("published", datetime.now().strftime("%a, %d %b %Y"))
652
+
653
+ print(f"πŸ“° Found news: {title}")
654
+ print(f"πŸ” Searching for more details about: {title}")
655
+
656
+ # Now search for more details about this specific news
657
+ try:
658
+ search_results = self.ddg_wrapper.results(f"{title} {city} news", max_results=3)
659
+
660
+ if search_results:
661
+ # Get the first result
662
+ result = search_results[0]
663
+ article_url = result.get("link")
664
+
665
+ # Get the content of the article
666
+ content = self.get_article_content(article_url)
667
+
668
+ # Summarize the content
669
+ summary = self.summarize_text(content)
670
+ else:
671
+ article_url = google_news_link
672
+ content = ""
673
+ summary = "No additional details available."
674
+ except Exception as e:
675
+ print(f"❌ Error getting more details: {e}")
676
+ article_url = google_news_link
677
+ content = ""
678
+ summary = "Could not retrieve additional details due to an error."
679
+
680
+ # Create article object
681
+ article = {
682
+ "title": title,
683
+ "source": source,
684
+ "link": article_url,
685
+ "published": published_date,
686
+ "location": city,
687
+ "summary": summary,
688
+ "content": content if 'content' in locals() else ""
689
+ }
690
+
691
+ articles.append(article)
692
+
693
+ # Store in vector database for RAG
694
+ self.store_article_in_db(article)
695
+
696
+ # Add location to tracked locations
697
+ self.locations.add(city.lower())
698
+
699
+ # Format response
700
+ response = f"πŸ“° Latest News from {city}:\n\n"
701
+ for i, article in enumerate(articles, 1):
702
+ response += f"{i}. {article['title']}\n"
703
+ response += f" Source: {article['source']}\n"
704
+ response += f" Published: {article['published']}\n"
705
+ response += f" Link: {article['link']}\n"
706
+ response += f" Summary: {article['summary']}\n\n"
707
+
708
+ return response
709
+
710
+ except Exception as e:
711
+ print(f"❌ Error fetching news: {e}")
712
+ return f"Error fetching news for {city}: {str(e)}"
713
+
714
+ def store_article_in_db(self, article):
715
+ """Store an article in the vector database."""
716
+ try:
717
+ # Create document text
718
+ doc_text = f"""
719
+ TITLE: {article.get('title', 'Unknown Title')}
720
+ SOURCE: {article.get('source', 'Unknown Source')}
721
+ PUBLISHED: {article.get('published', datetime.now().strftime('%a, %d %b %Y'))}
722
+ LOCATION: {article.get('location', 'Unknown Location')}
723
+ LINK: {article.get('link', 'No Link Available')}
724
+ SUMMARY: {article.get('summary', article.get('snippet', 'No Summary Available'))}
725
+ CONTENT: {article.get('content', 'No Content Available')}
726
+ """
727
+
728
+ # Add metadata
729
+ metadata = {
730
+ "title": article.get('title', 'Unknown Title'),
731
+ "source": article.get('source', 'Unknown Source'),
732
+ "location": article.get('location', 'Unknown Location'),
733
+ "date": article.get('published', datetime.now().strftime('%a, %d %b %Y')),
734
+ "link": article.get('link', 'No Link Available'),
735
+ "type": "news",
736
+ "timestamp": datetime.now().isoformat() # Add timestamp for recency filtering
737
+ }
738
+
739
+ # Create document
740
+ document = Document(page_content=doc_text, metadata=metadata)
741
+
742
+ # Add to vector store - this automatically persists the data
743
+ self.vector_store.add_documents([document])
744
+
745
+ # Verify storage
746
+ print(f"βœ… Stored article in vector database: {article.get('title', 'Unknown Title')}")
747
+ try:
748
+ db_info = self.vector_store.get()
749
+ print(f" Current DB size: {len(db_info['ids'])} documents")
750
+ except:
751
+ print(" Could not verify DB size")
752
+
753
+ return True
754
+ except Exception as e:
755
+ print(f"❌ Error storing article: {e}")
756
+ print(f"Article data: {article}")
757
+ return False
758
+
759
+ def text_to_speech(self, input_json):
760
+ """Convert text to speech in the specified language."""
761
+ try:
762
+ # Parse input JSON
763
+ try:
764
+ data = json.loads(input_json)
765
+ text = data.get("text", "")
766
+ lang = data.get("lang", "en")
767
+ except json.JSONDecodeError:
768
+ # If not valid JSON, assume it's just text
769
+ text = input_json
770
+ lang = "en"
771
+
772
+ if not text:
773
+ return "No text provided for speech conversion."
774
+
775
+ # Get supported languages
776
+ supported_languages = gtts.lang.tts_langs()
777
+
778
+ if lang not in supported_languages:
779
+ return f"Language '{lang}' is not supported for text-to-speech."
780
+
781
+ # Generate speech
782
+ output_file = f"speech_{int(time.time())}.mp3"
783
+ tts = gtts.gTTS(text=text, lang=lang, slow=False)
784
+ tts.save(output_file)
785
+ playsound(output_file)
786
+
787
+ return f"Successfully converted text to speech in {supported_languages[lang]}."
788
+ except Exception as e:
789
+ print(f"❌ Error in text-to-speech: {e}")
790
+ return f"Error in text-to-speech: {str(e)}"
791
+
792
+
793
+ def translate_text(self, input_json):
794
+ """Translate text to the specified language."""
795
+ try:
796
+ # Parse input JSON
797
+ try:
798
+ data = json.loads(input_json)
799
+ text = data.get("text", "")
800
+ lang = data.get("lang", "en")
801
+ except json.JSONDecodeError:
802
+ # If not valid JSON, assume format is "text|lang"
803
+ parts = input_json.split("|")
804
+ text = parts[0]
805
+ lang = parts[1] if len(parts) > 1 else "en"
806
+
807
+ if not text:
808
+ return "No text provided for translation."
809
+
810
+ # Translate text using deep-translator
811
+ translator = GoogleTranslator(source='auto', target=lang)
812
+ translated_text = translator.translate(text)
813
+
814
+ return f"Translated text: {translated_text}"
815
+ except Exception as e:
816
+ print(f"❌ Error in translation: {e}")
817
+ return f"Error in translation: {str(e)}"
818
+
819
+
820
+
821
+
822
+ def search_news_in_db(self, query, k=3):
823
+ """Search for news in the vector database with recency filtering."""
824
+ try:
825
+ # Get current date
826
+ current_date = datetime.now()
827
+
828
+ # First, perform the similarity search
829
+ results = self.vector_store.similarity_search(query, k=k*2) # Get more results than needed for filtering
830
+
831
+ if not results:
832
+ return "No relevant news found in the database."
833
+
834
+ # Filter for recent news (prioritize news from the last 24 hours)
835
+ recent_results = []
836
+ older_results = []
837
+
838
+ for doc in results:
839
+ metadata = doc.metadata
840
+ timestamp_str = metadata.get("timestamp")
841
+
842
+ if timestamp_str:
843
+ try:
844
+ timestamp = datetime.fromisoformat(timestamp_str)
845
+ # If news is from the last 24 hours
846
+ if current_date - timestamp <= timedelta(days=1):
847
+ recent_results.append(doc)
848
+ else:
849
+ older_results.append(doc)
850
+ except (ValueError, TypeError):
851
+ older_results.append(doc)
852
+ else:
853
+ older_results.append(doc)
854
+
855
+ # Combine recent and older results, prioritizing recent ones
856
+ filtered_results = recent_results + older_results
857
+
858
+ # Limit to the requested number of results
859
+ filtered_results = filtered_results[:k]
860
+
861
+ if not filtered_results:
862
+ return "No relevant news found in the database."
863
+
864
+ response = "πŸ“° Related News from Database:\n\n"
865
+ for i, doc in enumerate(filtered_results, 1):
866
+ metadata = doc.metadata
867
+ response += f"{i}. {metadata.get('title', 'Unknown Title')}\n"
868
+ response += f" Source: {metadata.get('source', 'Unknown Source')}\n"
869
+ response += f" Location: {metadata.get('location', 'Unknown Location')}\n"
870
+ response += f" Published: {metadata.get('date', 'Unknown Date')}\n"
871
+ response += f" Link: {metadata.get('link', 'No Link Available')}\n\n"
872
+
873
+ return response
874
+ except Exception as e:
875
+ print(f"❌ Error searching news in DB: {e}")
876
+ return "Error searching the news database."
877
+
878
+ def extract_locations(self, query):
879
+ """Extract potential location names from the query."""
880
+ try:
881
+ prompt = f"""
882
+ Extract any city or country names from this text. Return ONLY the names separated by commas, or 'None' if no locations are found:
883
+
884
+ Text: {query}
885
+ """
886
+
887
+ response = self.llm.invoke(prompt)
888
+ locations = [loc.strip() for loc in response.strip().split(',') if loc.strip().lower() != 'none']
889
+ return locations
890
+ except Exception:
891
+ # Fallback to simple keyword extraction
892
+ common_cities = ["new york", "london", "tokyo", "paris", "delhi", "mumbai", "kolkata", "bangalore", "bhubaneswar"]
893
+ found = []
894
+ for city in common_cities:
895
+ if city.lower() in query.lower():
896
+ found.append(city)
897
+ return found
898
+
899
+ def process_query(self, query):
900
+ """Process a user query through the agent."""
901
+ # Clean up old news first
902
+ self.delete_old_news()
903
+
904
+ # Get conversation history to provide context
905
+ chat_history = self.get_conversation_context()
906
+
907
+ # Determine how many news to fetch
908
+ news_count = self.determine_news_count(query)
909
+
910
+ # Check if query contains a location
911
+ potential_locations = self.extract_locations(query)
912
+
913
+ # Check if user is asking for more details about a specific news
914
+ is_asking_for_details = any(pattern in query.lower() for pattern in
915
+ ["more details", "tell me more about", "more information on",
916
+ "details on", "what about", "tell me about"])
917
+
918
+ # If asking for details about specific news, try to extract the news title from context
919
+ if is_asking_for_details and not any(word in query.lower() for word in ["news", "article"]):
920
+ # Try to extract news title from the query or recent conversation
921
+ news_title = self.extract_news_title_from_context(query, chat_history)
922
+ if news_title:
923
+ print(f"πŸ“ Extracted news title from context: {news_title}")
924
+ # Append the extracted title to the query for clarity
925
+ query = f"{query} about '{news_title}'"
926
+
927
+ # For location-based queries
928
+ for location in potential_locations:
929
+ # Check if we have recent news in the database
930
+ recent_news = self.is_recent_news_available(location)
931
+
932
+ # If user wants more news or we don't have recent news, fetch from web
933
+ if not recent_news or "more" in query.lower():
934
+ if location.lower() not in [loc.lower() for loc in self.locations]:
935
+ print(f"πŸ”„ Detected new location: {location}. Fetching news...")
936
+ self.fetch_city_news(f"{location}, {news_count}")
937
+
938
+ # Process through the agent with enhanced context
939
+ try:
940
+ chat_history = self.get_conversation_context()
941
+ response = self.agent_executor.invoke({
942
+ "input": query,
943
+ "chat_history": chat_history # This will be included in the system message
944
+ })
945
+ return response["output"]
946
+ except Exception as e:
947
+ print(f"❌ Error processing query: {e}")
948
+ return "I'm sorry, I encountered an error while processing your question. Please try again."
949
+
950
+ def get_conversation_context(self):
951
+ """Get formatted conversation history for context."""
952
+ try:
953
+ # Get messages from memory
954
+ messages = self.memory.chat_memory.messages
955
+
956
+ if not messages:
957
+ return []
958
+
959
+ return messages
960
+ except Exception as e:
961
+ print(f"❌ Error retrieving conversation context: {e}")
962
+ return []
963
+
964
+ def extract_news_title_from_context(self, query, chat_history):
965
+ """Extract relevant news title from conversation context or query."""
966
+ try:
967
+ # First, check if there are any news titles in the recent AI messages
968
+ recent_ai_messages = [msg.content for msg in chat_history[-4:] if hasattr(msg, 'type') and msg.type == 'ai']
969
+
970
+ # Combine recent AI messages
971
+ context_text = " ".join(recent_ai_messages)
972
+
973
+ # Look for news titles in the format typically used in our responses
974
+ title_matches = re.findall(r'\d+\.\s+(.*?)\n', context_text)
975
+
976
+ if title_matches:
977
+ # Use the LLM to determine which title is most relevant to the query
978
+ titles_text = "\n".join([f"{i+1}. {title}" for i, title in enumerate(title_matches)])
979
+
980
+ prompt = f"""
981
+ Given the user query and the list of recently mentioned news titles, which title is the user most likely referring to?
982
+ Return ONLY the title, or "None" if none seem relevant.
983
+
984
+ User query: {query}
985
+
986
+ Recently mentioned titles:
987
+ {titles_text}
988
+ """
989
+
990
+ response = self.llm.invoke(prompt).strip()
991
+
992
+ if response and response.lower() != "none":
993
+ return response
994
+
995
+ # If we couldn't find a title from context, try to extract it from the query
996
+ # This is a fallback for explicit mentions
997
+ query_words = query.lower().split()
998
+ for i, word in enumerate(query_words):
999
+ if word in ["about", "regarding", "concerning", "on"]:
1000
+ if i+1 < len(query_words):
1001
+ potential_title = " ".join(query_words[i+1:])
1002
+ # Remove quotes if present
1003
+ potential_title = potential_title.strip('"\'')
1004
+ if len(potential_title) > 3: # Minimum length check
1005
+ return potential_title
1006
+
1007
+ return None
1008
+ except Exception as e:
1009
+ print(f"❌ Error extracting news title from context: {e}")
1010
+ return None
1011
+
1012
+
1013
+
1014
+ def main():
1015
+ print("=" * 50)
1016
+ print("🌍 Location-Specific News Agent")
1017
+ print("=" * 50)
1018
+ print("Initializing system...")
1019
+
1020
+ agent = NewsAgent()
1021
+
1022
+ print("\nChat with the news agent! Type 'exit' to quit.")
1023
+ print("Example: 'What's happening in Delhi today?'")
1024
+
1025
+ while True:
1026
+ user_input = input("\nYou: ").strip()
1027
+
1028
+ if user_input.lower() in ['exit', 'quit', 'bye']:
1029
+ print("Thank you for using the news agent. Goodbye!")
1030
+ break
1031
+
1032
+ if not user_input:
1033
+ continue
1034
+
1035
+ response = agent.process_query(user_input)
1036
+ print(f"\nAI: {response}")
1037
+
1038
+ if __name__ == "__main__":
1039
+ main()
requirements.txt ADDED
Binary file (6.89 kB). View file