Spaces:
Paused
Paused
Upload 4 files
Browse files- Dockerfile +18 -0
- api.py +315 -0
- main.py +1039 -0
- requirements.txt +0 -0
Dockerfile
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Use the official Python base image
|
| 2 |
+
FROM python:3.9
|
| 3 |
+
|
| 4 |
+
# Set the working directory
|
| 5 |
+
WORKDIR /app
|
| 6 |
+
|
| 7 |
+
# Copy the requirements file and install dependencies
|
| 8 |
+
COPY requirements.txt .
|
| 9 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 10 |
+
|
| 11 |
+
# Copy the model and code files
|
| 12 |
+
COPY . .
|
| 13 |
+
|
| 14 |
+
# Expose the port FastAPI will run on
|
| 15 |
+
EXPOSE 7860
|
| 16 |
+
|
| 17 |
+
# Command to run the FastAPI app using Uvicorn
|
| 18 |
+
CMD ["uvicorn", "api:app", "--host", "0.0.0.0", "--port", "7860"]
|
api.py
ADDED
|
@@ -0,0 +1,315 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import FastAPI, HTTPException, Depends, Request, BackgroundTasks
|
| 2 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 3 |
+
from fastapi.responses import StreamingResponse, JSONResponse
|
| 4 |
+
from pydantic import BaseModel
|
| 5 |
+
import os
|
| 6 |
+
import json
|
| 7 |
+
import time
|
| 8 |
+
import io
|
| 9 |
+
import requests
|
| 10 |
+
from typing import Optional, List, Dict, Any
|
| 11 |
+
import gtts
|
| 12 |
+
|
| 13 |
+
# Import NewsAgent class
|
| 14 |
+
from main import NewsAgent
|
| 15 |
+
|
| 16 |
+
app = FastAPI(
|
| 17 |
+
title="NewsAI API",
|
| 18 |
+
description="A FastAPI backend for a location-specific news agent that provides news based on pincode and preferred language.",
|
| 19 |
+
version="1.0.0"
|
| 20 |
+
)
|
| 21 |
+
|
| 22 |
+
# Add CORS middleware to allow frontend to communicate with the backend
|
| 23 |
+
app.add_middleware(
|
| 24 |
+
CORSMiddleware,
|
| 25 |
+
allow_origins=["*"], # Replace with specific origins in production
|
| 26 |
+
allow_credentials=True,
|
| 27 |
+
allow_methods=["*"],
|
| 28 |
+
allow_headers=["*"],
|
| 29 |
+
)
|
| 30 |
+
|
| 31 |
+
# Initialize the NewsAgent
|
| 32 |
+
news_agent = NewsAgent()
|
| 33 |
+
|
| 34 |
+
# Session storage for user conversations
|
| 35 |
+
user_sessions = {}
|
| 36 |
+
|
| 37 |
+
# Supported languages for translation
|
| 38 |
+
SUPPORTED_LANGUAGES = {
|
| 39 |
+
"en": "English",
|
| 40 |
+
"hi": "Hindi",
|
| 41 |
+
"bn": "Bengali",
|
| 42 |
+
"te": "Telugu",
|
| 43 |
+
"ta": "Tamil",
|
| 44 |
+
"mr": "Marathi",
|
| 45 |
+
"gu": "Gujarati",
|
| 46 |
+
"kn": "Kannada",
|
| 47 |
+
"ml": "Malayalam",
|
| 48 |
+
"pa": "Punjabi",
|
| 49 |
+
"or": "Odia",
|
| 50 |
+
"as": "Assamese",
|
| 51 |
+
"fr": "French",
|
| 52 |
+
"de": "German",
|
| 53 |
+
"es": "Spanish",
|
| 54 |
+
"zh-CN": "Chinese (Simplified)",
|
| 55 |
+
"ja": "Japanese",
|
| 56 |
+
"ko": "Korean",
|
| 57 |
+
"ar": "Arabic",
|
| 58 |
+
"ru": "Russian"
|
| 59 |
+
}
|
| 60 |
+
|
| 61 |
+
# Request/response models
|
| 62 |
+
class ChatRequest(BaseModel):
|
| 63 |
+
message: str
|
| 64 |
+
session_id: str
|
| 65 |
+
pincode: Optional[str] = None
|
| 66 |
+
language: str = "en"
|
| 67 |
+
|
| 68 |
+
class PincodeRequest(BaseModel):
|
| 69 |
+
pincode: str
|
| 70 |
+
|
| 71 |
+
class TextToSpeechRequest(BaseModel):
|
| 72 |
+
text: str
|
| 73 |
+
lang: str = "en"
|
| 74 |
+
|
| 75 |
+
class TranslateRequest(BaseModel):
|
| 76 |
+
text: str
|
| 77 |
+
target_language: str
|
| 78 |
+
|
| 79 |
+
class NewsResponse(BaseModel):
|
| 80 |
+
response: str
|
| 81 |
+
audio_url: Optional[str] = None
|
| 82 |
+
translated: bool = False
|
| 83 |
+
|
| 84 |
+
# Helper function to get or create a session
|
| 85 |
+
def get_session(session_id: str) -> Dict:
|
| 86 |
+
"""Get or create a user session by ID."""
|
| 87 |
+
if session_id not in user_sessions:
|
| 88 |
+
user_sessions[session_id] = {"location": None, "language": "en", "history": []}
|
| 89 |
+
return user_sessions[session_id]
|
| 90 |
+
|
| 91 |
+
# Helper function to get location from pincode
|
| 92 |
+
def get_location_from_pincode(pincode: str) -> Optional[str]:
|
| 93 |
+
"""Get location (city/state) from Indian pincode."""
|
| 94 |
+
try:
|
| 95 |
+
# First try India Post API
|
| 96 |
+
url = f"https://api.postalpincode.in/pincode/{pincode}"
|
| 97 |
+
response = requests.get(url, timeout=5,verify=False)
|
| 98 |
+
data = response.json()
|
| 99 |
+
|
| 100 |
+
if data and data[0]["Status"] == "Success":
|
| 101 |
+
post_office = data[0]["PostOffice"][0]
|
| 102 |
+
district = post_office["District"]
|
| 103 |
+
state = post_office["State"]
|
| 104 |
+
return f"{district}, {state}"
|
| 105 |
+
|
| 106 |
+
# Fallback to pincode map
|
| 107 |
+
pincode_map = {
|
| 108 |
+
"11": "Delhi",
|
| 109 |
+
"12": "Haryana",
|
| 110 |
+
"13": "Haryana",
|
| 111 |
+
"14": "Punjab",
|
| 112 |
+
"15": "Punjab",
|
| 113 |
+
"16": "Punjab",
|
| 114 |
+
"17": "Himachal Pradesh",
|
| 115 |
+
"18": "Jammu & Kashmir",
|
| 116 |
+
"19": "Jammu & Kashmir",
|
| 117 |
+
"20": "Uttar Pradesh",
|
| 118 |
+
"21": "Uttar Pradesh",
|
| 119 |
+
"22": "Uttar Pradesh",
|
| 120 |
+
"23": "Uttar Pradesh",
|
| 121 |
+
"24": "Uttar Pradesh",
|
| 122 |
+
"25": "Uttar Pradesh",
|
| 123 |
+
"26": "Uttar Pradesh",
|
| 124 |
+
"27": "Uttar Pradesh",
|
| 125 |
+
"28": "Uttar Pradesh",
|
| 126 |
+
"30": "Rajasthan",
|
| 127 |
+
"31": "Rajasthan",
|
| 128 |
+
"32": "Rajasthan",
|
| 129 |
+
"33": "Rajasthan",
|
| 130 |
+
"34": "Rajasthan",
|
| 131 |
+
"36": "Gujarat",
|
| 132 |
+
"37": "Gujarat",
|
| 133 |
+
"38": "Gujarat",
|
| 134 |
+
"39": "Gujarat",
|
| 135 |
+
"40": "Maharashtra",
|
| 136 |
+
"41": "Maharashtra",
|
| 137 |
+
"42": "Maharashtra",
|
| 138 |
+
"43": "Maharashtra",
|
| 139 |
+
"44": "Maharashtra",
|
| 140 |
+
"45": "Madhya Pradesh",
|
| 141 |
+
"46": "Madhya Pradesh",
|
| 142 |
+
"47": "Madhya Pradesh",
|
| 143 |
+
"48": "Madhya Pradesh",
|
| 144 |
+
"49": "Chhattisgarh",
|
| 145 |
+
"50": "Andhra Pradesh",
|
| 146 |
+
"51": "Andhra Pradesh",
|
| 147 |
+
"52": "Telangana",
|
| 148 |
+
"53": "Telangana",
|
| 149 |
+
"56": "Karnataka",
|
| 150 |
+
"57": "Karnataka",
|
| 151 |
+
"58": "Karnataka",
|
| 152 |
+
"59": "Karnataka",
|
| 153 |
+
"60": "Tamil Nadu",
|
| 154 |
+
"61": "Tamil Nadu",
|
| 155 |
+
"62": "Tamil Nadu",
|
| 156 |
+
"63": "Tamil Nadu",
|
| 157 |
+
"64": "Tamil Nadu",
|
| 158 |
+
"67": "Kerala",
|
| 159 |
+
"68": "Kerala",
|
| 160 |
+
"69": "Kerala",
|
| 161 |
+
"70": "West Bengal",
|
| 162 |
+
"71": "West Bengal",
|
| 163 |
+
"72": "West Bengal",
|
| 164 |
+
"73": "West Bengal",
|
| 165 |
+
"74": "West Bengal",
|
| 166 |
+
"75": "Odisha",
|
| 167 |
+
"76": "Odisha",
|
| 168 |
+
"77": "Odisha",
|
| 169 |
+
"78": "Assam",
|
| 170 |
+
"79": "North East India",
|
| 171 |
+
"80": "Bihar",
|
| 172 |
+
"81": "Bihar",
|
| 173 |
+
"82": "Bihar",
|
| 174 |
+
"83": "Jharkhand",
|
| 175 |
+
"84": "Jharkhand",
|
| 176 |
+
"85": "Jharkhand"
|
| 177 |
+
}
|
| 178 |
+
|
| 179 |
+
# Get state from first 2 digits
|
| 180 |
+
state = pincode_map.get(pincode[:2], "Unknown")
|
| 181 |
+
return state
|
| 182 |
+
|
| 183 |
+
except Exception as e:
|
| 184 |
+
print(f"Error getting location from pincode: {e}")
|
| 185 |
+
return None
|
| 186 |
+
|
| 187 |
+
# Routes
|
| 188 |
+
@app.get("/")
|
| 189 |
+
async def root():
|
| 190 |
+
"""Health check endpoint"""
|
| 191 |
+
return {"status": "online", "message": "NewsAI API is running"}
|
| 192 |
+
|
| 193 |
+
@app.get("/api/languages")
|
| 194 |
+
async def get_languages():
|
| 195 |
+
"""Get list of supported languages"""
|
| 196 |
+
return {"languages": SUPPORTED_LANGUAGES}
|
| 197 |
+
|
| 198 |
+
@app.post("/api/pincode")
|
| 199 |
+
async def lookup_pincode(request: PincodeRequest):
|
| 200 |
+
"""Look up location from pincode"""
|
| 201 |
+
location = get_location_from_pincode(request.pincode)
|
| 202 |
+
if not location:
|
| 203 |
+
raise HTTPException(status_code=404, detail="Could not find location for this pincode")
|
| 204 |
+
return {"pincode": request.pincode, "location": location}
|
| 205 |
+
|
| 206 |
+
@app.post("/api/chat")
|
| 207 |
+
async def chat(request: ChatRequest):
|
| 208 |
+
"""Process a chat message and return a response in the requested language"""
|
| 209 |
+
session = get_session(request.session_id)
|
| 210 |
+
|
| 211 |
+
# Update language preference
|
| 212 |
+
if request.language:
|
| 213 |
+
session["language"] = request.language
|
| 214 |
+
|
| 215 |
+
# Update location if pincode provided
|
| 216 |
+
if request.pincode:
|
| 217 |
+
location = get_location_from_pincode(request.pincode)
|
| 218 |
+
if location:
|
| 219 |
+
session["location"] = location
|
| 220 |
+
|
| 221 |
+
# Process the query
|
| 222 |
+
query = request.message
|
| 223 |
+
|
| 224 |
+
# If location is set, include it in the query for location-specific news
|
| 225 |
+
if session["location"] and "news" in query.lower() and session["location"].lower() not in query.lower():
|
| 226 |
+
query = f"{query} in {session['location']}"
|
| 227 |
+
|
| 228 |
+
# Process the query
|
| 229 |
+
response = news_agent.process_query(query)
|
| 230 |
+
|
| 231 |
+
# Translate response if needed
|
| 232 |
+
original_response = response
|
| 233 |
+
if session["language"] != "en":
|
| 234 |
+
try:
|
| 235 |
+
# Use the translate_text method from NewsAgent
|
| 236 |
+
translation_input = json.dumps({"text": response, "lang": session["language"]})
|
| 237 |
+
translated_response = news_agent.translate_text(translation_input)
|
| 238 |
+
|
| 239 |
+
# Extract translated text from response format "Translated text: {text}"
|
| 240 |
+
if "Translated text: " in translated_response:
|
| 241 |
+
response = translated_response.replace("Translated text: ", "")
|
| 242 |
+
else:
|
| 243 |
+
response = translated_response
|
| 244 |
+
except Exception as e:
|
| 245 |
+
print(f"Translation error: {e}")
|
| 246 |
+
# Keep original response if translation fails
|
| 247 |
+
|
| 248 |
+
# Store in session history
|
| 249 |
+
session["history"].append({"role": "user", "content": request.message})
|
| 250 |
+
session["history"].append({"role": "assistant", "content": response})
|
| 251 |
+
|
| 252 |
+
return {
|
| 253 |
+
"response": response,
|
| 254 |
+
"original_response": original_response if session["language"] != "en" else None,
|
| 255 |
+
"language": session["language"],
|
| 256 |
+
"location": session["location"]
|
| 257 |
+
}
|
| 258 |
+
|
| 259 |
+
@app.post("/api/translate")
|
| 260 |
+
async def translate_text(request: TranslateRequest):
|
| 261 |
+
"""Translate text to the specified language"""
|
| 262 |
+
try:
|
| 263 |
+
translation_input = json.dumps({"text": request.text, "lang": request.target_language})
|
| 264 |
+
translated_text = news_agent.translate_text(translation_input)
|
| 265 |
+
|
| 266 |
+
# Extract translated text from response format "Translated text: {text}"
|
| 267 |
+
if "Translated text: " in translated_text:
|
| 268 |
+
translated_text = translated_text.replace("Translated text: ", "")
|
| 269 |
+
|
| 270 |
+
return {"translated_text": translated_text, "language": request.target_language}
|
| 271 |
+
except Exception as e:
|
| 272 |
+
raise HTTPException(status_code=500, detail=f"Translation error: {str(e)}")
|
| 273 |
+
|
| 274 |
+
@app.post("/api/text-to-speech")
|
| 275 |
+
async def text_to_speech(request: TextToSpeechRequest):
|
| 276 |
+
"""Convert text to speech and return audio file"""
|
| 277 |
+
try:
|
| 278 |
+
# Generate speech
|
| 279 |
+
tts = gtts.gTTS(text=request.text, lang=request.lang, slow=False)
|
| 280 |
+
|
| 281 |
+
# Save to in-memory file
|
| 282 |
+
audio_io = io.BytesIO()
|
| 283 |
+
tts.write_to_fp(audio_io)
|
| 284 |
+
audio_io.seek(0)
|
| 285 |
+
|
| 286 |
+
# Return audio file
|
| 287 |
+
return StreamingResponse(
|
| 288 |
+
audio_io,
|
| 289 |
+
media_type="audio/mpeg",
|
| 290 |
+
headers={"Content-Disposition": "attachment; filename=speech.mp3"}
|
| 291 |
+
)
|
| 292 |
+
except Exception as e:
|
| 293 |
+
raise HTTPException(status_code=500, detail=f"Error generating speech: {str(e)}")
|
| 294 |
+
|
| 295 |
+
@app.get("/api/news/{location}")
|
| 296 |
+
async def get_location_news(location: str, count: int = 5, language: str = "en"):
|
| 297 |
+
"""Fetch news for a specific location and optionally translate it"""
|
| 298 |
+
try:
|
| 299 |
+
# Fetch news
|
| 300 |
+
news = news_agent.fetch_city_news(f"{location}, {count}")
|
| 301 |
+
|
| 302 |
+
# Translate if needed
|
| 303 |
+
if language != "en":
|
| 304 |
+
translation_input = json.dumps({"text": news, "lang": language})
|
| 305 |
+
translated_news = news_agent.translate_text(translation_input)
|
| 306 |
+
|
| 307 |
+
# Extract translated text
|
| 308 |
+
if "Translated text: " in translated_news:
|
| 309 |
+
news = translated_news.replace("Translated text: ", "")
|
| 310 |
+
else:
|
| 311 |
+
news = translated_news
|
| 312 |
+
|
| 313 |
+
return {"news": news, "language": language}
|
| 314 |
+
except Exception as e:
|
| 315 |
+
raise HTTPException(status_code=500, detail=f"Error fetching news: {str(e)}")
|
main.py
ADDED
|
@@ -0,0 +1,1039 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import json
|
| 3 |
+
import time
|
| 4 |
+
import feedparser
|
| 5 |
+
import requests
|
| 6 |
+
from bs4 import BeautifulSoup
|
| 7 |
+
import re
|
| 8 |
+
from datetime import datetime, timedelta
|
| 9 |
+
import gtts
|
| 10 |
+
from playsound3 import playsound
|
| 11 |
+
from googletrans import Translator
|
| 12 |
+
import urllib.parse
|
| 13 |
+
from deep_translator import GoogleTranslator
|
| 14 |
+
from dotenv import load_dotenv
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
# LangChain imports
|
| 19 |
+
from langchain_google_genai import GoogleGenerativeAI
|
| 20 |
+
from langchain_chroma import Chroma
|
| 21 |
+
from langchain_huggingface import HuggingFaceEmbeddings
|
| 22 |
+
from langchain.schema import Document
|
| 23 |
+
from langchain_core.prompts import ChatPromptTemplate
|
| 24 |
+
from langchain.memory import ConversationSummaryBufferMemory
|
| 25 |
+
from langchain.tools import Tool
|
| 26 |
+
from langchain.agents import AgentExecutor, create_react_agent
|
| 27 |
+
from langchain_community.utilities import DuckDuckGoSearchAPIWrapper
|
| 28 |
+
|
| 29 |
+
# Load environment variables
|
| 30 |
+
load_dotenv()
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
class NewsAgent:
|
| 34 |
+
def __init__(self):
|
| 35 |
+
print("π Initializing News Agent...")
|
| 36 |
+
self.setup_llm()
|
| 37 |
+
self.setup_embeddings()
|
| 38 |
+
self.setup_vector_store()
|
| 39 |
+
self.test_vector_db() # Test the vector DB
|
| 40 |
+
self.delete_old_news() # Delete old news on startup
|
| 41 |
+
self.setup_memory()
|
| 42 |
+
self.setup_search_tools()
|
| 43 |
+
self.setup_tools()
|
| 44 |
+
self.setup_agent()
|
| 45 |
+
self.locations = set() # Track locations we've already fetched
|
| 46 |
+
print("β
News Agent initialized and ready!")
|
| 47 |
+
|
| 48 |
+
def setup_llm(self):
|
| 49 |
+
"""Initialize the Gemini model."""
|
| 50 |
+
try:
|
| 51 |
+
api_key = os.getenv("GOOGLE_API_KEY")
|
| 52 |
+
if not api_key:
|
| 53 |
+
raise ValueError("GOOGLE_API_KEY environment variable not set")
|
| 54 |
+
|
| 55 |
+
self.llm = GoogleGenerativeAI(
|
| 56 |
+
model="gemini-1.5-flash",
|
| 57 |
+
google_api_key=api_key,
|
| 58 |
+
temperature=0.2,
|
| 59 |
+
top_p=0.8,
|
| 60 |
+
max_output_tokens=2048
|
| 61 |
+
)
|
| 62 |
+
print("β
Gemini 1.5 Flash model initialized")
|
| 63 |
+
except Exception as e:
|
| 64 |
+
print(f"β Error initializing Gemini model: {e}")
|
| 65 |
+
raise
|
| 66 |
+
|
| 67 |
+
def setup_embeddings(self):
|
| 68 |
+
"""Initialize the embedding model."""
|
| 69 |
+
try:
|
| 70 |
+
self.embedding_model = HuggingFaceEmbeddings(
|
| 71 |
+
model_name="sentence-transformers/all-MiniLM-L6-v2"
|
| 72 |
+
)
|
| 73 |
+
print("β
Embedding model initialized")
|
| 74 |
+
except Exception as e:
|
| 75 |
+
print(f"β Error initializing embedding model: {e}")
|
| 76 |
+
raise
|
| 77 |
+
|
| 78 |
+
def setup_vector_store(self):
|
| 79 |
+
"""Initialize ChromaDB vector store."""
|
| 80 |
+
try:
|
| 81 |
+
self.vector_store = Chroma(
|
| 82 |
+
persist_directory="./chroma_db",
|
| 83 |
+
embedding_function=self.embedding_model
|
| 84 |
+
)
|
| 85 |
+
print("β
Vector store initialized")
|
| 86 |
+
except Exception as e:
|
| 87 |
+
print(f"β Error initializing vector store: {e}")
|
| 88 |
+
raise
|
| 89 |
+
|
| 90 |
+
def test_vector_db(self):
|
| 91 |
+
"""Test if the vector database is working properly."""
|
| 92 |
+
try:
|
| 93 |
+
# Check if DB is empty
|
| 94 |
+
db_info = self.vector_store.get()
|
| 95 |
+
print(f"Vector DB contains {len(db_info['ids'])} documents")
|
| 96 |
+
|
| 97 |
+
if len(db_info['ids']) > 0:
|
| 98 |
+
# Try a simple search
|
| 99 |
+
results = self.vector_store.similarity_search("test", k=1)
|
| 100 |
+
print(f"Test search returned {len(results)} results")
|
| 101 |
+
if results:
|
| 102 |
+
print(f"Sample document: {results[0].metadata['title']}")
|
| 103 |
+
return True
|
| 104 |
+
else:
|
| 105 |
+
print("Vector DB is empty")
|
| 106 |
+
return False
|
| 107 |
+
except Exception as e:
|
| 108 |
+
print(f"β Error testing vector DB: {e}")
|
| 109 |
+
return False
|
| 110 |
+
|
| 111 |
+
def is_recent_news_available(self, location, max_age_minutes=180):
|
| 112 |
+
"""Check if recent news for a location is available in the database."""
|
| 113 |
+
try:
|
| 114 |
+
now = datetime.now()
|
| 115 |
+
# Search for news related to the location
|
| 116 |
+
results = self.vector_store.similarity_search(location, k=10)
|
| 117 |
+
|
| 118 |
+
# Filter results to those within max_age_minutes
|
| 119 |
+
recent_news = []
|
| 120 |
+
for doc in results:
|
| 121 |
+
metadata = doc.metadata
|
| 122 |
+
if metadata.get('location', '').lower() == location.lower():
|
| 123 |
+
timestamp_str = metadata.get('timestamp')
|
| 124 |
+
if timestamp_str:
|
| 125 |
+
try:
|
| 126 |
+
timestamp = datetime.fromisoformat(timestamp_str)
|
| 127 |
+
if now - timestamp <= timedelta(minutes=max_age_minutes):
|
| 128 |
+
recent_news.append(doc)
|
| 129 |
+
except Exception:
|
| 130 |
+
# Ignore parsing errors
|
| 131 |
+
continue
|
| 132 |
+
|
| 133 |
+
print(f"Found {len(recent_news)} recent news items for {location} in database")
|
| 134 |
+
return recent_news
|
| 135 |
+
except Exception as e:
|
| 136 |
+
print(f"β Error checking recent news: {e}")
|
| 137 |
+
return []
|
| 138 |
+
|
| 139 |
+
def delete_old_news(self, max_age_minutes=60):
|
| 140 |
+
"""Delete news older than the specified age from the database."""
|
| 141 |
+
try:
|
| 142 |
+
now = datetime.now()
|
| 143 |
+
# Get all documents
|
| 144 |
+
all_docs = self.vector_store.get()
|
| 145 |
+
all_ids = all_docs['ids']
|
| 146 |
+
all_metadatas = all_docs['metadatas']
|
| 147 |
+
|
| 148 |
+
# Identify documents older than max_age_minutes
|
| 149 |
+
ids_to_delete = []
|
| 150 |
+
for doc_id, metadata in zip(all_ids, all_metadatas):
|
| 151 |
+
timestamp_str = metadata.get('timestamp') if metadata else None
|
| 152 |
+
if timestamp_str:
|
| 153 |
+
try:
|
| 154 |
+
timestamp = datetime.fromisoformat(timestamp_str)
|
| 155 |
+
if now - timestamp > timedelta(minutes=max_age_minutes):
|
| 156 |
+
ids_to_delete.append(doc_id)
|
| 157 |
+
except Exception:
|
| 158 |
+
# Ignore parsing errors
|
| 159 |
+
continue
|
| 160 |
+
|
| 161 |
+
# Delete old documents
|
| 162 |
+
if ids_to_delete:
|
| 163 |
+
self.vector_store.delete(ids=ids_to_delete)
|
| 164 |
+
print(f"β
Deleted {len(ids_to_delete)} old news items from database")
|
| 165 |
+
|
| 166 |
+
return len(ids_to_delete)
|
| 167 |
+
except Exception as e:
|
| 168 |
+
print(f"β Error deleting old news: {e}")
|
| 169 |
+
return 0
|
| 170 |
+
|
| 171 |
+
def determine_news_count(self, user_request):
|
| 172 |
+
"""Determine how many news articles to fetch based on user request."""
|
| 173 |
+
# Check if user is asking for more news
|
| 174 |
+
more_patterns = ["more news", "additional news", "more articles", "show more", "get more"]
|
| 175 |
+
|
| 176 |
+
if any(pattern in user_request.lower() for pattern in more_patterns):
|
| 177 |
+
# Check if user specified a number
|
| 178 |
+
number_match = re.search(r'(\d+)\s+(more|additional|extra)', user_request.lower())
|
| 179 |
+
if number_match:
|
| 180 |
+
try:
|
| 181 |
+
count = int(number_match.group(1))
|
| 182 |
+
# Cap at a reasonable maximum
|
| 183 |
+
return min(count, 20)
|
| 184 |
+
except ValueError:
|
| 185 |
+
pass
|
| 186 |
+
|
| 187 |
+
return 15 # Return more news if requested without specific number
|
| 188 |
+
else:
|
| 189 |
+
return 5 # Default number of news
|
| 190 |
+
|
| 191 |
+
def setup_memory(self):
|
| 192 |
+
"""Initialize conversation memory."""
|
| 193 |
+
try:
|
| 194 |
+
self.memory = ConversationSummaryBufferMemory(
|
| 195 |
+
llm=self.llm,
|
| 196 |
+
max_token_limit=4000, # Increased token limit for better context retention
|
| 197 |
+
return_messages=True,
|
| 198 |
+
memory_key="chat_history",
|
| 199 |
+
input_key="input", # Explicitly define input key
|
| 200 |
+
output_key="output" # Explicitly define output key
|
| 201 |
+
)
|
| 202 |
+
print("β
Conversation memory initialized")
|
| 203 |
+
except Exception as e:
|
| 204 |
+
print(f"β Error initializing memory: {e}")
|
| 205 |
+
raise
|
| 206 |
+
|
| 207 |
+
def setup_search_tools(self):
|
| 208 |
+
"""Set up search tools."""
|
| 209 |
+
try:
|
| 210 |
+
# Setup DuckDuckGo search
|
| 211 |
+
self.ddg_wrapper = DuckDuckGoSearchAPIWrapper(
|
| 212 |
+
time="d", # Search for content from the past day
|
| 213 |
+
max_results=5
|
| 214 |
+
)
|
| 215 |
+
|
| 216 |
+
# Setup DuckDuckGo news search
|
| 217 |
+
self.ddg_news_wrapper = DuckDuckGoSearchAPIWrapper(
|
| 218 |
+
time="d", # Search for content from the past day
|
| 219 |
+
max_results=5
|
| 220 |
+
)
|
| 221 |
+
|
| 222 |
+
print("β
Search tools initialized")
|
| 223 |
+
except Exception as e:
|
| 224 |
+
print(f"β Error initializing search tools: {e}")
|
| 225 |
+
raise
|
| 226 |
+
|
| 227 |
+
def setup_tools(self):
|
| 228 |
+
"""Set up tools for the agent."""
|
| 229 |
+
self.tools = [
|
| 230 |
+
Tool(
|
| 231 |
+
name="FetchNews",
|
| 232 |
+
func=self.fetch_city_news,
|
| 233 |
+
description="Fetches the latest news for a specific city or location. Input should be the name of the city or 'city, number' to specify how many articles to fetch."
|
| 234 |
+
),
|
| 235 |
+
Tool(
|
| 236 |
+
name="SearchNewsArticle",
|
| 237 |
+
func=self.search_news_article,
|
| 238 |
+
description="Searches for news articles on a specific topic or title and returns summaries. Input should be the topic or title to search for."
|
| 239 |
+
),
|
| 240 |
+
Tool(
|
| 241 |
+
name="GetMoreInfoOnNews",
|
| 242 |
+
func=self.get_more_info_on_news,
|
| 243 |
+
description="Gets more detailed information about a specific news story. Input should be the news title or topic you want more information about."
|
| 244 |
+
),
|
| 245 |
+
Tool(
|
| 246 |
+
name="GetArticleContent",
|
| 247 |
+
func=self.get_article_content,
|
| 248 |
+
description="Gets the content of a news article from a URL. Input should be the URL of the article."
|
| 249 |
+
),
|
| 250 |
+
Tool(
|
| 251 |
+
name="SummarizeText",
|
| 252 |
+
func=self.summarize_text,
|
| 253 |
+
description="Summarizes a text. Input should be the text to summarize."
|
| 254 |
+
),
|
| 255 |
+
Tool(
|
| 256 |
+
name="TextToSpeech",
|
| 257 |
+
func=self.text_to_speech,
|
| 258 |
+
description="Converts text to speech in a specified language. Input should be a JSON string with 'text' and 'lang' keys."
|
| 259 |
+
),
|
| 260 |
+
Tool(
|
| 261 |
+
name="TranslateText",
|
| 262 |
+
func=self.translate_text,
|
| 263 |
+
description="Translates text to a specified language. Input should be a JSON string with 'text' and 'lang' keys."
|
| 264 |
+
),
|
| 265 |
+
Tool(
|
| 266 |
+
name="SearchNewsInDB",
|
| 267 |
+
func=self.search_news_in_db,
|
| 268 |
+
description="Searches for news in the database. Input should be the search query."
|
| 269 |
+
),
|
| 270 |
+
Tool(
|
| 271 |
+
name="GetRecentNewsFromDB",
|
| 272 |
+
func=self.get_recent_news_from_db,
|
| 273 |
+
description="Gets recent news for a location from the database. Input should be the location name."
|
| 274 |
+
)
|
| 275 |
+
]
|
| 276 |
+
print("β
Agent tools initialized")
|
| 277 |
+
|
| 278 |
+
def setup_agent(self):
|
| 279 |
+
"""Set up the LangChain agent."""
|
| 280 |
+
prompt = ChatPromptTemplate.from_messages([
|
| 281 |
+
("system", """You are a helpful AI assistant that specializes in providing location-specific news.
|
| 282 |
+
You can fetch news, search for articles, get more information on specific news stories, summarize text, translate content, and convert text to speech.
|
| 283 |
+
Always try to understand what location the user is asking about and provide relevant news.
|
| 284 |
+
If you're not sure about a location, ask for clarification.
|
| 285 |
+
|
| 286 |
+
IMPORTANT: Maintain conversation context. When the user asks follow-up questions about previously mentioned news articles,
|
| 287 |
+
use your memory of the conversation to understand which article they're referring to. If they ask for more details about a
|
| 288 |
+
news story you've mentioned, use the GetMoreInfoOnNews tool with the appropriate title.
|
| 289 |
+
|
| 290 |
+
When providing news:
|
| 291 |
+
1. Always ensure you're providing the most recent news (from today if possible)
|
| 292 |
+
2. First check if recent news is available in the database before fetching from the web
|
| 293 |
+
3. If a user asks for more information about a specific news story, use the GetMoreInfoOnNews tool
|
| 294 |
+
4. Always include relevant links when providing detailed information about news
|
| 295 |
+
5. Summarize news articles in a concise and informative way
|
| 296 |
+
6. If a user asks for more news, provide additional articles (up to 15)
|
| 297 |
+
7. Remember which news articles you've already mentioned in the conversation
|
| 298 |
+
|
| 299 |
+
You have access to the following tools:
|
| 300 |
+
|
| 301 |
+
{tools}
|
| 302 |
+
|
| 303 |
+
Use the following format:
|
| 304 |
+
|
| 305 |
+
Question: the input question you must answer
|
| 306 |
+
Thought: you should always think about what to do
|
| 307 |
+
Action: the action to take, should be one of [{tool_names}]
|
| 308 |
+
Action Input: the input to the action
|
| 309 |
+
Observation: the result of the action
|
| 310 |
+
... (this Thought/Action/Action Input/Observation can repeat N times)
|
| 311 |
+
Thought: I now know the final answer
|
| 312 |
+
Final Answer: the final answer to the original input question
|
| 313 |
+
|
| 314 |
+
Chat History: {chat_history}
|
| 315 |
+
"""),
|
| 316 |
+
("human", "{input}"),
|
| 317 |
+
("ai", "{agent_scratchpad}")
|
| 318 |
+
])
|
| 319 |
+
|
| 320 |
+
self.agent = create_react_agent(
|
| 321 |
+
llm=self.llm,
|
| 322 |
+
tools=self.tools,
|
| 323 |
+
prompt=prompt
|
| 324 |
+
)
|
| 325 |
+
|
| 326 |
+
self.agent_executor = AgentExecutor(
|
| 327 |
+
agent=self.agent,
|
| 328 |
+
tools=self.tools,
|
| 329 |
+
memory=self.memory,
|
| 330 |
+
verbose=True,
|
| 331 |
+
handle_parsing_errors=True,
|
| 332 |
+
return_intermediate_steps=True # Return intermediate steps for better debugging
|
| 333 |
+
)
|
| 334 |
+
print("β
Agent executor initialized")
|
| 335 |
+
|
| 336 |
+
|
| 337 |
+
def get_recent_news_from_db(self, location):
|
| 338 |
+
"""Gets recent news for a location from the database."""
|
| 339 |
+
try:
|
| 340 |
+
recent_news = self.is_recent_news_available(location)
|
| 341 |
+
|
| 342 |
+
if not recent_news:
|
| 343 |
+
return f"No recent news found in database for {location}. Try fetching fresh news."
|
| 344 |
+
|
| 345 |
+
response = f"π° Recent News from {location} (from database):\n\n"
|
| 346 |
+
for i, doc in enumerate(recent_news, 1):
|
| 347 |
+
metadata = doc.metadata
|
| 348 |
+
response += f"{i}. {metadata.get('title', 'Unknown Title')}\n"
|
| 349 |
+
response += f" Source: {metadata.get('source', 'Unknown Source')}\n"
|
| 350 |
+
response += f" Published: {metadata.get('date', 'Unknown Date')}\n"
|
| 351 |
+
response += f" Link: {metadata.get('link', 'No Link Available')}\n"
|
| 352 |
+
|
| 353 |
+
# Extract summary from content
|
| 354 |
+
content = doc.page_content
|
| 355 |
+
summary_match = re.search(r"SUMMARY: (.*?)(?:CONTENT:|$)", content, re.DOTALL)
|
| 356 |
+
if summary_match:
|
| 357 |
+
summary = summary_match.group(1).strip()
|
| 358 |
+
response += f" Summary: {summary}\n"
|
| 359 |
+
|
| 360 |
+
response += "\n"
|
| 361 |
+
|
| 362 |
+
return response
|
| 363 |
+
except Exception as e:
|
| 364 |
+
print(f"β Error getting recent news from DB: {e}")
|
| 365 |
+
return f"Error retrieving recent news for {location} from database."
|
| 366 |
+
|
| 367 |
+
def search_news_article(self, query):
|
| 368 |
+
"""Search for news articles on a specific topic using DuckDuckGo News."""
|
| 369 |
+
try:
|
| 370 |
+
print(f"π Searching for news articles on: {query}")
|
| 371 |
+
|
| 372 |
+
# Parse input for number of results if provided
|
| 373 |
+
parts = query.split(',')
|
| 374 |
+
search_query = parts[0].strip()
|
| 375 |
+
max_results = 5
|
| 376 |
+
|
| 377 |
+
if len(parts) > 1:
|
| 378 |
+
try:
|
| 379 |
+
max_results = int(parts[1].strip())
|
| 380 |
+
max_results = min(max_results, 20) # Cap at 20 results
|
| 381 |
+
except ValueError:
|
| 382 |
+
pass
|
| 383 |
+
|
| 384 |
+
# Use DuckDuckGo search with news-specific query
|
| 385 |
+
search_results = self.ddg_news_wrapper.results(f"{search_query} news", max_results=max_results)
|
| 386 |
+
|
| 387 |
+
if not search_results:
|
| 388 |
+
return f"No news articles found for: {search_query}"
|
| 389 |
+
|
| 390 |
+
# Process search results
|
| 391 |
+
articles = []
|
| 392 |
+
for i, result in enumerate(search_results[:max_results]):
|
| 393 |
+
title = result.get("title", "No title")
|
| 394 |
+
link = result.get("link", "No link")
|
| 395 |
+
snippet = result.get("snippet", "No snippet")
|
| 396 |
+
published_date = result.get("published", datetime.now().strftime("%a, %d %b %Y %H:%M:%S"))
|
| 397 |
+
source = result.get("source", "Unknown source")
|
| 398 |
+
|
| 399 |
+
# Create article object
|
| 400 |
+
article = {
|
| 401 |
+
"title": title,
|
| 402 |
+
"source": source,
|
| 403 |
+
"link": link,
|
| 404 |
+
"published": published_date,
|
| 405 |
+
"snippet": snippet,
|
| 406 |
+
"query": search_query
|
| 407 |
+
}
|
| 408 |
+
|
| 409 |
+
articles.append(article)
|
| 410 |
+
|
| 411 |
+
# Store in vector database for RAG
|
| 412 |
+
self.store_article_in_db(article)
|
| 413 |
+
|
| 414 |
+
# Format response
|
| 415 |
+
response = f"π° Latest News Articles on '{search_query}':\n\n"
|
| 416 |
+
for i, article in enumerate(articles, 1):
|
| 417 |
+
response += f"{i}. {article['title']}\n"
|
| 418 |
+
response += f" Source: {article['source']}\n"
|
| 419 |
+
response += f" Published: {article['published']}\n"
|
| 420 |
+
response += f" Link: {article['link']}\n"
|
| 421 |
+
response += f" Summary: {article['snippet']}\n\n"
|
| 422 |
+
|
| 423 |
+
return response
|
| 424 |
+
|
| 425 |
+
except Exception as e:
|
| 426 |
+
print(f"β Error searching for news articles: {e}")
|
| 427 |
+
return f"Error searching for news articles on '{query}': {str(e)}"
|
| 428 |
+
|
| 429 |
+
def get_more_info_on_news(self, news_title):
|
| 430 |
+
"""Gets more detailed information about a specific news story."""
|
| 431 |
+
try:
|
| 432 |
+
print(f"π Getting more information on: {news_title}")
|
| 433 |
+
|
| 434 |
+
# First, search for the news in our database
|
| 435 |
+
db_results = self.search_news_in_db(news_title, k=1)
|
| 436 |
+
|
| 437 |
+
# If we found something relevant in the database
|
| 438 |
+
if "No relevant news found" not in db_results:
|
| 439 |
+
# Extract the URL from the database results
|
| 440 |
+
url_match = re.search(r"Link: (https?://[^\s]+)", db_results)
|
| 441 |
+
if url_match:
|
| 442 |
+
article_url = url_match.group(1)
|
| 443 |
+
|
| 444 |
+
# Get the full content of the article
|
| 445 |
+
content = self.get_article_content(article_url)
|
| 446 |
+
|
| 447 |
+
# Summarize the content
|
| 448 |
+
summary = self.summarize_text(content)
|
| 449 |
+
|
| 450 |
+
return f"π° More Information on '{news_title}':\n\n{summary}\n\nSource: {article_url}"
|
| 451 |
+
|
| 452 |
+
# If we didn't find anything in the database or couldn't extract the URL,
|
| 453 |
+
# search for the news using DuckDuckGo
|
| 454 |
+
search_results = self.ddg_wrapper.results(f"{news_title} latest news", max_results=5)
|
| 455 |
+
|
| 456 |
+
if not search_results:
|
| 457 |
+
return f"Could not find more information on: {news_title}"
|
| 458 |
+
|
| 459 |
+
# Get the first result
|
| 460 |
+
result = search_results[0]
|
| 461 |
+
article_url = result.get("link")
|
| 462 |
+
|
| 463 |
+
if not article_url:
|
| 464 |
+
return f"Could not find a relevant article for: {news_title}"
|
| 465 |
+
|
| 466 |
+
# Get the content of the article
|
| 467 |
+
content = self.get_article_content(article_url)
|
| 468 |
+
|
| 469 |
+
# Summarize the content
|
| 470 |
+
summary = self.summarize_text(content)
|
| 471 |
+
|
| 472 |
+
# Store in vector database for future reference
|
| 473 |
+
self.store_article_in_db({
|
| 474 |
+
"title": news_title,
|
| 475 |
+
"link": article_url,
|
| 476 |
+
"content": content,
|
| 477 |
+
"summary": summary,
|
| 478 |
+
"source": result.get("source", "Unknown source"),
|
| 479 |
+
"published": datetime.now().strftime("%a, %d %b %Y")
|
| 480 |
+
})
|
| 481 |
+
|
| 482 |
+
return f"π° More Information on '{news_title}':\n\n{summary}\n\nSource: {article_url}"
|
| 483 |
+
|
| 484 |
+
except Exception as e:
|
| 485 |
+
print(f"β Error getting more information: {e}")
|
| 486 |
+
return f"Error getting more information on '{news_title}': {str(e)}"
|
| 487 |
+
|
| 488 |
+
def get_article_content(self, url):
|
| 489 |
+
"""Extract content from a news article URL."""
|
| 490 |
+
try:
|
| 491 |
+
headers = {
|
| 492 |
+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
|
| 493 |
+
}
|
| 494 |
+
|
| 495 |
+
# Check if URL is valid
|
| 496 |
+
if not url.startswith('http'):
|
| 497 |
+
return "Invalid URL. Please provide a URL starting with http:// or https://"
|
| 498 |
+
|
| 499 |
+
# Send request
|
| 500 |
+
response = requests.get(url, headers=headers, timeout=10)
|
| 501 |
+
response.raise_for_status() # Raise exception for 4XX/5XX status codes
|
| 502 |
+
|
| 503 |
+
# Parse HTML
|
| 504 |
+
soup = BeautifulSoup(response.text, 'html.parser')
|
| 505 |
+
|
| 506 |
+
# Remove script, style, and nav elements
|
| 507 |
+
for element in soup(['script', 'style', 'nav', 'header', 'footer', 'aside']):
|
| 508 |
+
element.decompose()
|
| 509 |
+
|
| 510 |
+
# Try to find the main content
|
| 511 |
+
main_content = None
|
| 512 |
+
|
| 513 |
+
# Look for article tag
|
| 514 |
+
article = soup.find('article')
|
| 515 |
+
if article:
|
| 516 |
+
main_content = article
|
| 517 |
+
|
| 518 |
+
# Look for main tag if article not found
|
| 519 |
+
if not main_content:
|
| 520 |
+
main_tag = soup.find('main')
|
| 521 |
+
if main_tag:
|
| 522 |
+
main_content = main_tag
|
| 523 |
+
|
| 524 |
+
# Look for div with content-related class names
|
| 525 |
+
if not main_content:
|
| 526 |
+
content_div = soup.find('div', class_=lambda c: c and any(x in c.lower() for x in ['content', 'article', 'story', 'entry', 'post']))
|
| 527 |
+
if content_div:
|
| 528 |
+
main_content = content_div
|
| 529 |
+
|
| 530 |
+
# Extract text from main content or fallback to body
|
| 531 |
+
if main_content:
|
| 532 |
+
paragraphs = main_content.find_all('p')
|
| 533 |
+
else:
|
| 534 |
+
paragraphs = soup.find_all('p')
|
| 535 |
+
|
| 536 |
+
# Join paragraphs
|
| 537 |
+
content = '\n\n'.join([p.get_text().strip() for p in paragraphs if len(p.get_text().strip()) > 40])
|
| 538 |
+
|
| 539 |
+
# If content is too short, try a different approach
|
| 540 |
+
if len(content) < 200:
|
| 541 |
+
# Get all text from body
|
| 542 |
+
body = soup.find('body')
|
| 543 |
+
if body:
|
| 544 |
+
content = body.get_text(separator='\n')
|
| 545 |
+
|
| 546 |
+
# Clean up content
|
| 547 |
+
lines = [line.strip() for line in content.split('\n') if line.strip()]
|
| 548 |
+
content = '\n'.join(lines)
|
| 549 |
+
|
| 550 |
+
# If still no content, return error
|
| 551 |
+
if not content or len(content) < 100:
|
| 552 |
+
return "Could not extract meaningful content from the article."
|
| 553 |
+
|
| 554 |
+
# Truncate if too long
|
| 555 |
+
if len(content) > 8000:
|
| 556 |
+
content = content[:8000] + "...[content truncated]"
|
| 557 |
+
|
| 558 |
+
return content
|
| 559 |
+
|
| 560 |
+
except requests.exceptions.RequestException as e:
|
| 561 |
+
return f"Error fetching article: {str(e)}"
|
| 562 |
+
except Exception as e:
|
| 563 |
+
return f"Error extracting content: {str(e)}"
|
| 564 |
+
|
| 565 |
+
def summarize_text(self, text):
|
| 566 |
+
"""Summarize text using the LLM."""
|
| 567 |
+
try:
|
| 568 |
+
if not text or len(text) < 100:
|
| 569 |
+
return "Text is too short to summarize."
|
| 570 |
+
|
| 571 |
+
# Truncate text if it's too long
|
| 572 |
+
if len(text) > 10000:
|
| 573 |
+
text = text[:10000] + "...[content truncated]"
|
| 574 |
+
|
| 575 |
+
prompt = f"""
|
| 576 |
+
Summarize the following news article in a concise way (3-5 sentences), highlighting the key points:
|
| 577 |
+
|
| 578 |
+
{text}
|
| 579 |
+
|
| 580 |
+
Summary:
|
| 581 |
+
"""
|
| 582 |
+
|
| 583 |
+
response = self.llm.invoke(prompt)
|
| 584 |
+
return response
|
| 585 |
+
except Exception as e:
|
| 586 |
+
print(f"β Error summarizing text: {e}")
|
| 587 |
+
return "Could not generate summary due to an error."
|
| 588 |
+
|
| 589 |
+
def fetch_city_news(self, city_input, max_articles=5):
|
| 590 |
+
"""Fetch news for a specific city using Google News RSS first, then enhance with search."""
|
| 591 |
+
# Parse input for city and optional count
|
| 592 |
+
parts = city_input.split(',')
|
| 593 |
+
city = parts[0].strip()
|
| 594 |
+
|
| 595 |
+
if len(parts) > 1:
|
| 596 |
+
try:
|
| 597 |
+
max_articles = int(parts[1].strip())
|
| 598 |
+
max_articles = min(max_articles, 20) # Cap at 20 articles
|
| 599 |
+
except ValueError:
|
| 600 |
+
pass
|
| 601 |
+
|
| 602 |
+
print(f"π Fetching {max_articles} news articles for: {city}")
|
| 603 |
+
|
| 604 |
+
# Check if we have recent news in the database
|
| 605 |
+
recent_news = self.is_recent_news_available(city)
|
| 606 |
+
if recent_news and len(recent_news) >= max_articles:
|
| 607 |
+
print(f"β
Found {len(recent_news)} recent news items in database for {city}")
|
| 608 |
+
response = f"π° Latest News from {city} (from database):\n\n"
|
| 609 |
+
for i, doc in enumerate(recent_news[:max_articles], 1):
|
| 610 |
+
metadata = doc.metadata
|
| 611 |
+
response += f"{i}. {metadata.get('title', 'Unknown Title')}\n"
|
| 612 |
+
response += f" Source: {metadata.get('source', 'Unknown Source')}\n"
|
| 613 |
+
response += f" Published: {metadata.get('date', 'Unknown Date')}\n"
|
| 614 |
+
response += f" Link: {metadata.get('link', 'No Link Available')}\n"
|
| 615 |
+
|
| 616 |
+
# Extract summary from content
|
| 617 |
+
content = doc.page_content
|
| 618 |
+
summary_match = re.search(r"SUMMARY: (.*?)(?:CONTENT:|$)", content, re.DOTALL)
|
| 619 |
+
if summary_match:
|
| 620 |
+
summary = summary_match.group(1).strip()
|
| 621 |
+
response += f" Summary: {summary}\n"
|
| 622 |
+
|
| 623 |
+
response += "\n"
|
| 624 |
+
|
| 625 |
+
return response
|
| 626 |
+
|
| 627 |
+
# Clean the city name to avoid URL issues
|
| 628 |
+
clean_city = city.strip().replace("\n", "").replace("\r", "")
|
| 629 |
+
encoded_city = urllib.parse.quote(clean_city)
|
| 630 |
+
|
| 631 |
+
try:
|
| 632 |
+
# First get news from Google News RSS
|
| 633 |
+
rss_url = f"https://news.google.com/rss/search?q={encoded_city}+when:1d&hl=en-US&gl=US&ceid=US:en"
|
| 634 |
+
feed = feedparser.parse(rss_url)
|
| 635 |
+
|
| 636 |
+
if not feed.entries:
|
| 637 |
+
return f"No news found for {city}"
|
| 638 |
+
|
| 639 |
+
# Process articles from RSS feed
|
| 640 |
+
articles = []
|
| 641 |
+
for entry in feed.entries[:max_articles]:
|
| 642 |
+
# Extract title and source
|
| 643 |
+
title_parts = entry.title.split(" - ")
|
| 644 |
+
title = title_parts[0].strip() if len(title_parts) > 1 else entry.title.strip()
|
| 645 |
+
source = title_parts[-1].strip() if len(title_parts) > 1 else "Unknown"
|
| 646 |
+
|
| 647 |
+
# Get the article link
|
| 648 |
+
google_news_link = entry.link
|
| 649 |
+
|
| 650 |
+
# Extract publication date
|
| 651 |
+
published_date = entry.get("published", datetime.now().strftime("%a, %d %b %Y"))
|
| 652 |
+
|
| 653 |
+
print(f"π° Found news: {title}")
|
| 654 |
+
print(f"π Searching for more details about: {title}")
|
| 655 |
+
|
| 656 |
+
# Now search for more details about this specific news
|
| 657 |
+
try:
|
| 658 |
+
search_results = self.ddg_wrapper.results(f"{title} {city} news", max_results=3)
|
| 659 |
+
|
| 660 |
+
if search_results:
|
| 661 |
+
# Get the first result
|
| 662 |
+
result = search_results[0]
|
| 663 |
+
article_url = result.get("link")
|
| 664 |
+
|
| 665 |
+
# Get the content of the article
|
| 666 |
+
content = self.get_article_content(article_url)
|
| 667 |
+
|
| 668 |
+
# Summarize the content
|
| 669 |
+
summary = self.summarize_text(content)
|
| 670 |
+
else:
|
| 671 |
+
article_url = google_news_link
|
| 672 |
+
content = ""
|
| 673 |
+
summary = "No additional details available."
|
| 674 |
+
except Exception as e:
|
| 675 |
+
print(f"β Error getting more details: {e}")
|
| 676 |
+
article_url = google_news_link
|
| 677 |
+
content = ""
|
| 678 |
+
summary = "Could not retrieve additional details due to an error."
|
| 679 |
+
|
| 680 |
+
# Create article object
|
| 681 |
+
article = {
|
| 682 |
+
"title": title,
|
| 683 |
+
"source": source,
|
| 684 |
+
"link": article_url,
|
| 685 |
+
"published": published_date,
|
| 686 |
+
"location": city,
|
| 687 |
+
"summary": summary,
|
| 688 |
+
"content": content if 'content' in locals() else ""
|
| 689 |
+
}
|
| 690 |
+
|
| 691 |
+
articles.append(article)
|
| 692 |
+
|
| 693 |
+
# Store in vector database for RAG
|
| 694 |
+
self.store_article_in_db(article)
|
| 695 |
+
|
| 696 |
+
# Add location to tracked locations
|
| 697 |
+
self.locations.add(city.lower())
|
| 698 |
+
|
| 699 |
+
# Format response
|
| 700 |
+
response = f"π° Latest News from {city}:\n\n"
|
| 701 |
+
for i, article in enumerate(articles, 1):
|
| 702 |
+
response += f"{i}. {article['title']}\n"
|
| 703 |
+
response += f" Source: {article['source']}\n"
|
| 704 |
+
response += f" Published: {article['published']}\n"
|
| 705 |
+
response += f" Link: {article['link']}\n"
|
| 706 |
+
response += f" Summary: {article['summary']}\n\n"
|
| 707 |
+
|
| 708 |
+
return response
|
| 709 |
+
|
| 710 |
+
except Exception as e:
|
| 711 |
+
print(f"β Error fetching news: {e}")
|
| 712 |
+
return f"Error fetching news for {city}: {str(e)}"
|
| 713 |
+
|
| 714 |
+
def store_article_in_db(self, article):
|
| 715 |
+
"""Store an article in the vector database."""
|
| 716 |
+
try:
|
| 717 |
+
# Create document text
|
| 718 |
+
doc_text = f"""
|
| 719 |
+
TITLE: {article.get('title', 'Unknown Title')}
|
| 720 |
+
SOURCE: {article.get('source', 'Unknown Source')}
|
| 721 |
+
PUBLISHED: {article.get('published', datetime.now().strftime('%a, %d %b %Y'))}
|
| 722 |
+
LOCATION: {article.get('location', 'Unknown Location')}
|
| 723 |
+
LINK: {article.get('link', 'No Link Available')}
|
| 724 |
+
SUMMARY: {article.get('summary', article.get('snippet', 'No Summary Available'))}
|
| 725 |
+
CONTENT: {article.get('content', 'No Content Available')}
|
| 726 |
+
"""
|
| 727 |
+
|
| 728 |
+
# Add metadata
|
| 729 |
+
metadata = {
|
| 730 |
+
"title": article.get('title', 'Unknown Title'),
|
| 731 |
+
"source": article.get('source', 'Unknown Source'),
|
| 732 |
+
"location": article.get('location', 'Unknown Location'),
|
| 733 |
+
"date": article.get('published', datetime.now().strftime('%a, %d %b %Y')),
|
| 734 |
+
"link": article.get('link', 'No Link Available'),
|
| 735 |
+
"type": "news",
|
| 736 |
+
"timestamp": datetime.now().isoformat() # Add timestamp for recency filtering
|
| 737 |
+
}
|
| 738 |
+
|
| 739 |
+
# Create document
|
| 740 |
+
document = Document(page_content=doc_text, metadata=metadata)
|
| 741 |
+
|
| 742 |
+
# Add to vector store - this automatically persists the data
|
| 743 |
+
self.vector_store.add_documents([document])
|
| 744 |
+
|
| 745 |
+
# Verify storage
|
| 746 |
+
print(f"β
Stored article in vector database: {article.get('title', 'Unknown Title')}")
|
| 747 |
+
try:
|
| 748 |
+
db_info = self.vector_store.get()
|
| 749 |
+
print(f" Current DB size: {len(db_info['ids'])} documents")
|
| 750 |
+
except:
|
| 751 |
+
print(" Could not verify DB size")
|
| 752 |
+
|
| 753 |
+
return True
|
| 754 |
+
except Exception as e:
|
| 755 |
+
print(f"β Error storing article: {e}")
|
| 756 |
+
print(f"Article data: {article}")
|
| 757 |
+
return False
|
| 758 |
+
|
| 759 |
+
def text_to_speech(self, input_json):
|
| 760 |
+
"""Convert text to speech in the specified language."""
|
| 761 |
+
try:
|
| 762 |
+
# Parse input JSON
|
| 763 |
+
try:
|
| 764 |
+
data = json.loads(input_json)
|
| 765 |
+
text = data.get("text", "")
|
| 766 |
+
lang = data.get("lang", "en")
|
| 767 |
+
except json.JSONDecodeError:
|
| 768 |
+
# If not valid JSON, assume it's just text
|
| 769 |
+
text = input_json
|
| 770 |
+
lang = "en"
|
| 771 |
+
|
| 772 |
+
if not text:
|
| 773 |
+
return "No text provided for speech conversion."
|
| 774 |
+
|
| 775 |
+
# Get supported languages
|
| 776 |
+
supported_languages = gtts.lang.tts_langs()
|
| 777 |
+
|
| 778 |
+
if lang not in supported_languages:
|
| 779 |
+
return f"Language '{lang}' is not supported for text-to-speech."
|
| 780 |
+
|
| 781 |
+
# Generate speech
|
| 782 |
+
output_file = f"speech_{int(time.time())}.mp3"
|
| 783 |
+
tts = gtts.gTTS(text=text, lang=lang, slow=False)
|
| 784 |
+
tts.save(output_file)
|
| 785 |
+
playsound(output_file)
|
| 786 |
+
|
| 787 |
+
return f"Successfully converted text to speech in {supported_languages[lang]}."
|
| 788 |
+
except Exception as e:
|
| 789 |
+
print(f"β Error in text-to-speech: {e}")
|
| 790 |
+
return f"Error in text-to-speech: {str(e)}"
|
| 791 |
+
|
| 792 |
+
|
| 793 |
+
def translate_text(self, input_json):
|
| 794 |
+
"""Translate text to the specified language."""
|
| 795 |
+
try:
|
| 796 |
+
# Parse input JSON
|
| 797 |
+
try:
|
| 798 |
+
data = json.loads(input_json)
|
| 799 |
+
text = data.get("text", "")
|
| 800 |
+
lang = data.get("lang", "en")
|
| 801 |
+
except json.JSONDecodeError:
|
| 802 |
+
# If not valid JSON, assume format is "text|lang"
|
| 803 |
+
parts = input_json.split("|")
|
| 804 |
+
text = parts[0]
|
| 805 |
+
lang = parts[1] if len(parts) > 1 else "en"
|
| 806 |
+
|
| 807 |
+
if not text:
|
| 808 |
+
return "No text provided for translation."
|
| 809 |
+
|
| 810 |
+
# Translate text using deep-translator
|
| 811 |
+
translator = GoogleTranslator(source='auto', target=lang)
|
| 812 |
+
translated_text = translator.translate(text)
|
| 813 |
+
|
| 814 |
+
return f"Translated text: {translated_text}"
|
| 815 |
+
except Exception as e:
|
| 816 |
+
print(f"β Error in translation: {e}")
|
| 817 |
+
return f"Error in translation: {str(e)}"
|
| 818 |
+
|
| 819 |
+
|
| 820 |
+
|
| 821 |
+
|
| 822 |
+
def search_news_in_db(self, query, k=3):
|
| 823 |
+
"""Search for news in the vector database with recency filtering."""
|
| 824 |
+
try:
|
| 825 |
+
# Get current date
|
| 826 |
+
current_date = datetime.now()
|
| 827 |
+
|
| 828 |
+
# First, perform the similarity search
|
| 829 |
+
results = self.vector_store.similarity_search(query, k=k*2) # Get more results than needed for filtering
|
| 830 |
+
|
| 831 |
+
if not results:
|
| 832 |
+
return "No relevant news found in the database."
|
| 833 |
+
|
| 834 |
+
# Filter for recent news (prioritize news from the last 24 hours)
|
| 835 |
+
recent_results = []
|
| 836 |
+
older_results = []
|
| 837 |
+
|
| 838 |
+
for doc in results:
|
| 839 |
+
metadata = doc.metadata
|
| 840 |
+
timestamp_str = metadata.get("timestamp")
|
| 841 |
+
|
| 842 |
+
if timestamp_str:
|
| 843 |
+
try:
|
| 844 |
+
timestamp = datetime.fromisoformat(timestamp_str)
|
| 845 |
+
# If news is from the last 24 hours
|
| 846 |
+
if current_date - timestamp <= timedelta(days=1):
|
| 847 |
+
recent_results.append(doc)
|
| 848 |
+
else:
|
| 849 |
+
older_results.append(doc)
|
| 850 |
+
except (ValueError, TypeError):
|
| 851 |
+
older_results.append(doc)
|
| 852 |
+
else:
|
| 853 |
+
older_results.append(doc)
|
| 854 |
+
|
| 855 |
+
# Combine recent and older results, prioritizing recent ones
|
| 856 |
+
filtered_results = recent_results + older_results
|
| 857 |
+
|
| 858 |
+
# Limit to the requested number of results
|
| 859 |
+
filtered_results = filtered_results[:k]
|
| 860 |
+
|
| 861 |
+
if not filtered_results:
|
| 862 |
+
return "No relevant news found in the database."
|
| 863 |
+
|
| 864 |
+
response = "π° Related News from Database:\n\n"
|
| 865 |
+
for i, doc in enumerate(filtered_results, 1):
|
| 866 |
+
metadata = doc.metadata
|
| 867 |
+
response += f"{i}. {metadata.get('title', 'Unknown Title')}\n"
|
| 868 |
+
response += f" Source: {metadata.get('source', 'Unknown Source')}\n"
|
| 869 |
+
response += f" Location: {metadata.get('location', 'Unknown Location')}\n"
|
| 870 |
+
response += f" Published: {metadata.get('date', 'Unknown Date')}\n"
|
| 871 |
+
response += f" Link: {metadata.get('link', 'No Link Available')}\n\n"
|
| 872 |
+
|
| 873 |
+
return response
|
| 874 |
+
except Exception as e:
|
| 875 |
+
print(f"β Error searching news in DB: {e}")
|
| 876 |
+
return "Error searching the news database."
|
| 877 |
+
|
| 878 |
+
def extract_locations(self, query):
|
| 879 |
+
"""Extract potential location names from the query."""
|
| 880 |
+
try:
|
| 881 |
+
prompt = f"""
|
| 882 |
+
Extract any city or country names from this text. Return ONLY the names separated by commas, or 'None' if no locations are found:
|
| 883 |
+
|
| 884 |
+
Text: {query}
|
| 885 |
+
"""
|
| 886 |
+
|
| 887 |
+
response = self.llm.invoke(prompt)
|
| 888 |
+
locations = [loc.strip() for loc in response.strip().split(',') if loc.strip().lower() != 'none']
|
| 889 |
+
return locations
|
| 890 |
+
except Exception:
|
| 891 |
+
# Fallback to simple keyword extraction
|
| 892 |
+
common_cities = ["new york", "london", "tokyo", "paris", "delhi", "mumbai", "kolkata", "bangalore", "bhubaneswar"]
|
| 893 |
+
found = []
|
| 894 |
+
for city in common_cities:
|
| 895 |
+
if city.lower() in query.lower():
|
| 896 |
+
found.append(city)
|
| 897 |
+
return found
|
| 898 |
+
|
| 899 |
+
def process_query(self, query):
|
| 900 |
+
"""Process a user query through the agent."""
|
| 901 |
+
# Clean up old news first
|
| 902 |
+
self.delete_old_news()
|
| 903 |
+
|
| 904 |
+
# Get conversation history to provide context
|
| 905 |
+
chat_history = self.get_conversation_context()
|
| 906 |
+
|
| 907 |
+
# Determine how many news to fetch
|
| 908 |
+
news_count = self.determine_news_count(query)
|
| 909 |
+
|
| 910 |
+
# Check if query contains a location
|
| 911 |
+
potential_locations = self.extract_locations(query)
|
| 912 |
+
|
| 913 |
+
# Check if user is asking for more details about a specific news
|
| 914 |
+
is_asking_for_details = any(pattern in query.lower() for pattern in
|
| 915 |
+
["more details", "tell me more about", "more information on",
|
| 916 |
+
"details on", "what about", "tell me about"])
|
| 917 |
+
|
| 918 |
+
# If asking for details about specific news, try to extract the news title from context
|
| 919 |
+
if is_asking_for_details and not any(word in query.lower() for word in ["news", "article"]):
|
| 920 |
+
# Try to extract news title from the query or recent conversation
|
| 921 |
+
news_title = self.extract_news_title_from_context(query, chat_history)
|
| 922 |
+
if news_title:
|
| 923 |
+
print(f"π Extracted news title from context: {news_title}")
|
| 924 |
+
# Append the extracted title to the query for clarity
|
| 925 |
+
query = f"{query} about '{news_title}'"
|
| 926 |
+
|
| 927 |
+
# For location-based queries
|
| 928 |
+
for location in potential_locations:
|
| 929 |
+
# Check if we have recent news in the database
|
| 930 |
+
recent_news = self.is_recent_news_available(location)
|
| 931 |
+
|
| 932 |
+
# If user wants more news or we don't have recent news, fetch from web
|
| 933 |
+
if not recent_news or "more" in query.lower():
|
| 934 |
+
if location.lower() not in [loc.lower() for loc in self.locations]:
|
| 935 |
+
print(f"π Detected new location: {location}. Fetching news...")
|
| 936 |
+
self.fetch_city_news(f"{location}, {news_count}")
|
| 937 |
+
|
| 938 |
+
# Process through the agent with enhanced context
|
| 939 |
+
try:
|
| 940 |
+
chat_history = self.get_conversation_context()
|
| 941 |
+
response = self.agent_executor.invoke({
|
| 942 |
+
"input": query,
|
| 943 |
+
"chat_history": chat_history # This will be included in the system message
|
| 944 |
+
})
|
| 945 |
+
return response["output"]
|
| 946 |
+
except Exception as e:
|
| 947 |
+
print(f"β Error processing query: {e}")
|
| 948 |
+
return "I'm sorry, I encountered an error while processing your question. Please try again."
|
| 949 |
+
|
| 950 |
+
def get_conversation_context(self):
|
| 951 |
+
"""Get formatted conversation history for context."""
|
| 952 |
+
try:
|
| 953 |
+
# Get messages from memory
|
| 954 |
+
messages = self.memory.chat_memory.messages
|
| 955 |
+
|
| 956 |
+
if not messages:
|
| 957 |
+
return []
|
| 958 |
+
|
| 959 |
+
return messages
|
| 960 |
+
except Exception as e:
|
| 961 |
+
print(f"β Error retrieving conversation context: {e}")
|
| 962 |
+
return []
|
| 963 |
+
|
| 964 |
+
def extract_news_title_from_context(self, query, chat_history):
|
| 965 |
+
"""Extract relevant news title from conversation context or query."""
|
| 966 |
+
try:
|
| 967 |
+
# First, check if there are any news titles in the recent AI messages
|
| 968 |
+
recent_ai_messages = [msg.content for msg in chat_history[-4:] if hasattr(msg, 'type') and msg.type == 'ai']
|
| 969 |
+
|
| 970 |
+
# Combine recent AI messages
|
| 971 |
+
context_text = " ".join(recent_ai_messages)
|
| 972 |
+
|
| 973 |
+
# Look for news titles in the format typically used in our responses
|
| 974 |
+
title_matches = re.findall(r'\d+\.\s+(.*?)\n', context_text)
|
| 975 |
+
|
| 976 |
+
if title_matches:
|
| 977 |
+
# Use the LLM to determine which title is most relevant to the query
|
| 978 |
+
titles_text = "\n".join([f"{i+1}. {title}" for i, title in enumerate(title_matches)])
|
| 979 |
+
|
| 980 |
+
prompt = f"""
|
| 981 |
+
Given the user query and the list of recently mentioned news titles, which title is the user most likely referring to?
|
| 982 |
+
Return ONLY the title, or "None" if none seem relevant.
|
| 983 |
+
|
| 984 |
+
User query: {query}
|
| 985 |
+
|
| 986 |
+
Recently mentioned titles:
|
| 987 |
+
{titles_text}
|
| 988 |
+
"""
|
| 989 |
+
|
| 990 |
+
response = self.llm.invoke(prompt).strip()
|
| 991 |
+
|
| 992 |
+
if response and response.lower() != "none":
|
| 993 |
+
return response
|
| 994 |
+
|
| 995 |
+
# If we couldn't find a title from context, try to extract it from the query
|
| 996 |
+
# This is a fallback for explicit mentions
|
| 997 |
+
query_words = query.lower().split()
|
| 998 |
+
for i, word in enumerate(query_words):
|
| 999 |
+
if word in ["about", "regarding", "concerning", "on"]:
|
| 1000 |
+
if i+1 < len(query_words):
|
| 1001 |
+
potential_title = " ".join(query_words[i+1:])
|
| 1002 |
+
# Remove quotes if present
|
| 1003 |
+
potential_title = potential_title.strip('"\'')
|
| 1004 |
+
if len(potential_title) > 3: # Minimum length check
|
| 1005 |
+
return potential_title
|
| 1006 |
+
|
| 1007 |
+
return None
|
| 1008 |
+
except Exception as e:
|
| 1009 |
+
print(f"β Error extracting news title from context: {e}")
|
| 1010 |
+
return None
|
| 1011 |
+
|
| 1012 |
+
|
| 1013 |
+
|
| 1014 |
+
def main():
|
| 1015 |
+
print("=" * 50)
|
| 1016 |
+
print("π Location-Specific News Agent")
|
| 1017 |
+
print("=" * 50)
|
| 1018 |
+
print("Initializing system...")
|
| 1019 |
+
|
| 1020 |
+
agent = NewsAgent()
|
| 1021 |
+
|
| 1022 |
+
print("\nChat with the news agent! Type 'exit' to quit.")
|
| 1023 |
+
print("Example: 'What's happening in Delhi today?'")
|
| 1024 |
+
|
| 1025 |
+
while True:
|
| 1026 |
+
user_input = input("\nYou: ").strip()
|
| 1027 |
+
|
| 1028 |
+
if user_input.lower() in ['exit', 'quit', 'bye']:
|
| 1029 |
+
print("Thank you for using the news agent. Goodbye!")
|
| 1030 |
+
break
|
| 1031 |
+
|
| 1032 |
+
if not user_input:
|
| 1033 |
+
continue
|
| 1034 |
+
|
| 1035 |
+
response = agent.process_query(user_input)
|
| 1036 |
+
print(f"\nAI: {response}")
|
| 1037 |
+
|
| 1038 |
+
if __name__ == "__main__":
|
| 1039 |
+
main()
|
requirements.txt
ADDED
|
Binary file (6.89 kB). View file
|
|
|