Spaces:
Sleeping
Sleeping
Commit
·
ca2bd00
1
Parent(s):
60572ef
Bug fix
Browse files- app.py +150 -208
- src/core/recommender.py +138 -78
- src/database/mongodb.py +14 -9
app.py
CHANGED
|
@@ -1,174 +1,57 @@
|
|
| 1 |
-
# import gradio as gr
|
| 2 |
-
# from src.main import app as fastapi_app
|
| 3 |
-
# from src.core.recommender import recommender
|
| 4 |
-
# import logging
|
| 5 |
-
# from src.database.mongodb import mongodb
|
| 6 |
-
|
| 7 |
-
# logger = logging.getLogger(__name__)
|
| 8 |
-
# # Configure basic logging if not already set up elsewhere for Gradio context
|
| 9 |
-
# logging.basicConfig(level=logging.INFO)
|
| 10 |
-
|
| 11 |
-
# # Manually initialize and attach recommender if not already present
|
| 12 |
-
# if not hasattr(fastapi_app.state, "recommender"):
|
| 13 |
-
# recommender.load_components()
|
| 14 |
-
# fastapi_app.state.recommender = recommender
|
| 15 |
-
|
| 16 |
-
# def get_recommendations_gradio(query: str, k: int = 5):
|
| 17 |
-
# try:
|
| 18 |
-
# response = fastapi_app.state.recommender.get_recommendations(query, k)
|
| 19 |
-
# return response
|
| 20 |
-
# except Exception as e:
|
| 21 |
-
# return {"error": str(e)}
|
| 22 |
-
|
| 23 |
-
# def get_recommendations_by_id_gradio(msid: str, k: int = 5):
|
| 24 |
-
# try:
|
| 25 |
-
# # Corrected method name here
|
| 26 |
-
# response = fastapi_app.state.recommender.get_recommendations_by_id(msid, k)
|
| 27 |
-
# return response
|
| 28 |
-
# except Exception as e:
|
| 29 |
-
# return {"error": str(e)}
|
| 30 |
-
|
| 31 |
-
# def get_recommendations_user_feedback_gradio(user_id: str, msid: str, clicked_msid: str, k: int = 5):
|
| 32 |
-
# """
|
| 33 |
-
# Handles user feedback via Gradio:
|
| 34 |
-
# 1. (Optionally) Computes recommendations based on the feedback (mimicking FastAPI endpoint action).
|
| 35 |
-
# 2. Saves the feedback to MongoDB.
|
| 36 |
-
# 3. Returns a success message.
|
| 37 |
-
# """
|
| 38 |
-
# try:
|
| 39 |
-
# # Step 1: (Optional) Compute recommendations based on feedback, similar to FastAPI endpoint.
|
| 40 |
-
# # The result of this call is not the final output for this Gradio UI, per the request.
|
| 41 |
-
# # This ensures the Gradio function performs similar actions to the API endpoint.
|
| 42 |
-
# _ = fastapi_app.state.recommender.get_recommendations_user_feedback(user_id, msid, clicked_msid, k)
|
| 43 |
-
# logger.info(f"Gradio: (Computed recommendations for user '{user_id}' based on click, not shown in this UI)")
|
| 44 |
-
|
| 45 |
-
# # Step 2: Save feedback to MongoDB
|
| 46 |
-
# actual_clicked_msids = [s.strip() for s in clicked_msid.split(',') if s.strip()]
|
| 47 |
-
# if not actual_clicked_msids:
|
| 48 |
-
# logger.warning(f"Gradio: Invalid clicked_msid: {clicked_msid} for user {user_id}")
|
| 49 |
-
# return {"error": "clicked_msid parameter is invalid or does not contain valid MSIDs."}
|
| 50 |
-
|
| 51 |
-
# logger.info(
|
| 52 |
-
# f"Gradio: Saving feedback for user '{user_id}', context msid: '{msid}', clicked msids: {actual_clicked_msids}"
|
| 53 |
-
# )
|
| 54 |
-
|
| 55 |
-
# feedback_collection_name = "user_feedback_tracking"
|
| 56 |
-
# # Assuming mongodb.db is the PyMongo Database object, consistent with recommender.py
|
| 57 |
-
# feedback_collection = mongodb.db[feedback_collection_name]
|
| 58 |
-
|
| 59 |
-
# user_doc = feedback_collection.find_one({"user_id": user_id})
|
| 60 |
-
|
| 61 |
-
# if user_doc:
|
| 62 |
-
# # This update logic mirrors the one in routes.py
|
| 63 |
-
# feedback_collection.update_one(
|
| 64 |
-
# {"user_id": user_id},
|
| 65 |
-
# {"$addToSet": {"Articles": {"msid": msid, "Read": {"$each": actual_clicked_msids}}}}
|
| 66 |
-
# )
|
| 67 |
-
# else:
|
| 68 |
-
# feedback_collection.insert_one({
|
| 69 |
-
# "user_id": user_id,
|
| 70 |
-
# "Articles": [{"msid": msid, "Read": actual_clicked_msids}]
|
| 71 |
-
# })
|
| 72 |
-
|
| 73 |
-
# logger.info(f"Gradio: Successfully saved feedback for user {user_id}")
|
| 74 |
-
# return {"message": "Response saved successfully"}
|
| 75 |
-
|
| 76 |
-
# except Exception as e:
|
| 77 |
-
# logger.error(f"Gradio: Error in get_recommendations_user_feedback_gradio for user {user_id}: {e}", exc_info=True)
|
| 78 |
-
# return {"error": f"An error occurred: {str(e)}"}
|
| 79 |
-
|
| 80 |
-
# def get_recommendations_summary_gradio(msid: str, k: int = 5, summary: bool = True, smart_tip: bool = True):
|
| 81 |
-
# try:
|
| 82 |
-
# response = fastapi_app.state.recommender.get_recommendations_summary(msid, k, summary, smart_tip)
|
| 83 |
-
# return response
|
| 84 |
-
# except Exception as e:
|
| 85 |
-
# return {"error": str(e)}
|
| 86 |
-
|
| 87 |
-
# iface1 = gr.Interface(
|
| 88 |
-
# fn=get_recommendations_gradio,
|
| 89 |
-
# inputs=[
|
| 90 |
-
# gr.Textbox(label="Query", placeholder="Enter your search query..."),
|
| 91 |
-
# gr.Slider(minimum=1, maximum=10, value=5, step=1, label="Number of recommendations")
|
| 92 |
-
# ],
|
| 93 |
-
# outputs=gr.JSON(),
|
| 94 |
-
# title="Recommendation System",
|
| 95 |
-
# description="Enter a query to get personalized recommendations."
|
| 96 |
-
# )
|
| 97 |
-
|
| 98 |
-
# iface2 = gr.Interface(
|
| 99 |
-
# fn=get_recommendations_by_id_gradio,
|
| 100 |
-
# inputs=[
|
| 101 |
-
# gr.Textbox(label="MSID", placeholder="Enter the MSID..."),
|
| 102 |
-
# gr.Slider(minimum=1, maximum=10, value=5, step=1, label="Number of recommendations")
|
| 103 |
-
# ],
|
| 104 |
-
# outputs=gr.JSON(),
|
| 105 |
-
# title="Recommendations by MSID",
|
| 106 |
-
# description="Enter an MSID to get recommendations based on it."
|
| 107 |
-
# )
|
| 108 |
-
|
| 109 |
-
# iface3 = gr.Interface(
|
| 110 |
-
# fn=get_recommendations_user_feedback_gradio,
|
| 111 |
-
# inputs=[
|
| 112 |
-
# gr.Textbox(label="User ID", placeholder="Enter your user ID..."),
|
| 113 |
-
# gr.Textbox(label="MSID", placeholder="Enter the MSID..."),
|
| 114 |
-
# gr.Textbox(label="Clicked MSID", placeholder="Enter the clicked MSID..."),
|
| 115 |
-
# gr.Slider(minimum=1, maximum=10, value=5, step=1, label="Number of recommendations")
|
| 116 |
-
# ],
|
| 117 |
-
# outputs=gr.JSON(),
|
| 118 |
-
# title="User Feedback Recommendations",
|
| 119 |
-
# description="Enter your user ID, MSID, and clicked MSID to get recommendations based on user feedback."
|
| 120 |
-
# )
|
| 121 |
-
|
| 122 |
-
# iface4 = gr.Interface(
|
| 123 |
-
# fn=get_recommendations_summary_gradio,
|
| 124 |
-
# inputs=[
|
| 125 |
-
# gr.Textbox(label="MSID", placeholder="Enter the MSID..."),
|
| 126 |
-
# gr.Slider(minimum=1, maximum=10, value=5, step=1, label="Number of recommendations"),
|
| 127 |
-
# gr.Checkbox(label="Summary", value=True),
|
| 128 |
-
# gr.Checkbox(label="Smart Tip", value=True)
|
| 129 |
-
# ],
|
| 130 |
-
# outputs=gr.JSON(),
|
| 131 |
-
# title="Recommendations Summary",
|
| 132 |
-
# description="Enter an MSID to get a summary of recommendations."
|
| 133 |
-
# )
|
| 134 |
-
|
| 135 |
-
# demo = gr.TabbedInterface([iface1, iface2, iface3, iface4], ["Query Recommendations", "MSID Recommendations", "User Feedback Recommendations", "Recommendations Summary"])
|
| 136 |
-
|
| 137 |
-
# if __name__ == "__main__":
|
| 138 |
-
# demo.launch()
|
| 139 |
-
|
| 140 |
-
# app = fastapi_app
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
import logging
|
| 146 |
import uvicorn
|
| 147 |
-
from fastapi import HTTPException
|
|
|
|
| 148 |
from pydantic import BaseModel, Field
|
| 149 |
-
from typing import List
|
|
|
|
| 150 |
|
| 151 |
-
from src.main import app as fastapi_app # Existing FastAPI app instance
|
| 152 |
from src.core.recommender import recommender
|
| 153 |
from src.database.mongodb import mongodb
|
|
|
|
| 154 |
|
| 155 |
logger = logging.getLogger(__name__)
|
| 156 |
# Configure basic logging if not already set up elsewhere
|
| 157 |
if not logger.hasHandlers():
|
| 158 |
logging.basicConfig(level=logging.INFO)
|
| 159 |
|
| 160 |
-
#
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 172 |
|
| 173 |
# Pydantic models for request/response bodies
|
| 174 |
class FeedbackPayload(BaseModel):
|
|
@@ -181,23 +64,69 @@ class FeedbackResponse(BaseModel):
|
|
| 181 |
message: str
|
| 182 |
|
| 183 |
# API Endpoints
|
| 184 |
-
@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 185 |
async def get_recommendations_api(query: str, k: int = 5):
|
| 186 |
"""
|
| 187 |
Get recommendations based on a textual query.
|
| 188 |
"""
|
| 189 |
try:
|
| 190 |
-
if not hasattr(
|
| 191 |
logger.error("Recommender is not available.")
|
| 192 |
raise HTTPException(status_code=503, detail="Recommender service not available")
|
| 193 |
|
| 194 |
-
response =
|
| 195 |
return response
|
| 196 |
except Exception as e:
|
| 197 |
logger.error(f"API Error in get_recommendations_api for query '{query}': {e}", exc_info=True)
|
| 198 |
raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}")
|
| 199 |
|
| 200 |
-
@
|
| 201 |
async def get_recommendations_by_id_api(msid: str, k: int = 5):
|
| 202 |
"""
|
| 203 |
Get recommendations based on a given MSID.
|
|
@@ -211,13 +140,21 @@ async def get_recommendations_by_id_api(msid: str, k: int = 5):
|
|
| 211 |
raise HTTPException(status_code=400, detail="k must be an integer between 1 and 10")
|
| 212 |
|
| 213 |
# Check if recommender service is available
|
| 214 |
-
if not hasattr(
|
| 215 |
logger.error("Recommender is not available.")
|
| 216 |
raise HTTPException(status_code=503, detail="Recommender service not available")
|
| 217 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 218 |
# Get recommendations with error handling
|
| 219 |
try:
|
| 220 |
-
response =
|
| 221 |
if not response:
|
| 222 |
raise HTTPException(status_code=404, detail=f"No recommendations found for MSID: {msid}")
|
| 223 |
return response
|
|
@@ -236,7 +173,7 @@ async def get_recommendations_by_id_api(msid: str, k: int = 5):
|
|
| 236 |
logger.error(f"Unexpected error in get_recommendations_by_id_api for msid '{msid}': {e}", exc_info=True)
|
| 237 |
raise HTTPException(status_code=500, detail="An unexpected error occurred")
|
| 238 |
|
| 239 |
-
@
|
| 240 |
async def submit_user_feedback_api(payload: FeedbackPayload):
|
| 241 |
"""
|
| 242 |
Submit user feedback (e.g., clicked articles) and save it.
|
|
@@ -244,49 +181,58 @@ async def submit_user_feedback_api(payload: FeedbackPayload):
|
|
| 244 |
though the primary response here is the status of feedback submission.
|
| 245 |
"""
|
| 246 |
try:
|
| 247 |
-
if not hasattr(
|
| 248 |
logger.error("Recommender is not available.")
|
| 249 |
raise HTTPException(status_code=503, detail="Recommender service not available")
|
| 250 |
|
| 251 |
# (Optional) Compute recommendations based on feedback, similar to Gradio function.
|
| 252 |
# The result of this call is not the primary output of this API endpoint.
|
| 253 |
-
|
| 254 |
-
|
| 255 |
-
|
| 256 |
-
|
| 257 |
-
|
| 258 |
-
|
| 259 |
-
|
| 260 |
-
if not actual_clicked_msids:
|
| 261 |
-
logger.warning(f"API: Invalid clicked_msid: '{payload.clicked_msid}' for user '{payload.user_id}'")
|
| 262 |
-
raise HTTPException(status_code=400, detail="clicked_msid parameter is invalid or does not contain valid MSIDs.")
|
| 263 |
|
| 264 |
-
|
| 265 |
-
|
| 266 |
-
|
| 267 |
-
|
| 268 |
-
|
| 269 |
-
|
| 270 |
-
if mongodb.db is None:
|
| 271 |
-
logger.error("MongoDB database connection is not available.")
|
| 272 |
-
raise HTTPException(status_code=503, detail="Database service not available")
|
| 273 |
-
|
| 274 |
-
feedback_collection = mongodb.db[feedback_collection_name]
|
| 275 |
-
user_doc = feedback_collection.find_one({"user_id": payload.user_id})
|
| 276 |
|
| 277 |
-
|
| 278 |
-
|
| 279 |
-
{"user_id": payload.user_id},
|
| 280 |
-
{"$addToSet": {"Articles": {"msid": payload.msid, "Read": actual_clicked_msids}}}
|
| 281 |
)
|
| 282 |
-
|
| 283 |
-
|
| 284 |
-
|
| 285 |
-
|
| 286 |
-
|
| 287 |
-
|
| 288 |
-
|
| 289 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 290 |
|
| 291 |
except HTTPException:
|
| 292 |
raise # Re-raise HTTPException directly
|
|
@@ -294,18 +240,18 @@ async def submit_user_feedback_api(payload: FeedbackPayload):
|
|
| 294 |
logger.error(f"API Error in submit_user_feedback_api for user '{payload.user_id}': {e}", exc_info=True)
|
| 295 |
raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}")
|
| 296 |
|
| 297 |
-
@
|
| 298 |
async def get_recommendations_summary_api(msid: str, k: int = 5, summary: bool = True, smart_tip: bool = True):
|
| 299 |
"""
|
| 300 |
Get recommendations with optional summary and smart tip for a given MSID.
|
| 301 |
"""
|
| 302 |
try:
|
| 303 |
-
if not hasattr(
|
| 304 |
logger.error("Recommender is not available.")
|
| 305 |
raise HTTPException(status_code=503, detail="Recommender service not available")
|
| 306 |
|
| 307 |
try:
|
| 308 |
-
response =
|
| 309 |
except RuntimeError as e:
|
| 310 |
# Catch the meta tensor error and return a fallback
|
| 311 |
if "meta tensor" in str(e):
|
|
@@ -325,9 +271,5 @@ async def get_recommendations_summary_api(msid: str, k: int = 5, summary: bool =
|
|
| 325 |
logger.error(f"API Error in get_recommendations_summary_api for msid '{msid}': {e}", exc_info=True)
|
| 326 |
raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}")
|
| 327 |
|
| 328 |
-
# This makes fastapi_app (imported from src.main and extended here) available as 'app'
|
| 329 |
-
# for ASGI servers like Uvicorn.
|
| 330 |
-
app = fastapi_app
|
| 331 |
-
|
| 332 |
if __name__ == "__main__":
|
| 333 |
uvicorn.run("app:app", host="0.0.0.0", port=8000, reload=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import logging
|
| 2 |
import uvicorn
|
| 3 |
+
from fastapi import FastAPI, HTTPException
|
| 4 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 5 |
from pydantic import BaseModel, Field
|
| 6 |
+
from typing import List, Optional
|
| 7 |
+
from datetime import datetime
|
| 8 |
|
|
|
|
| 9 |
from src.core.recommender import recommender
|
| 10 |
from src.database.mongodb import mongodb
|
| 11 |
+
from src.config.settings import API_TITLE, API_DESCRIPTION, API_VERSION
|
| 12 |
|
| 13 |
logger = logging.getLogger(__name__)
|
| 14 |
# Configure basic logging if not already set up elsewhere
|
| 15 |
if not logger.hasHandlers():
|
| 16 |
logging.basicConfig(level=logging.INFO)
|
| 17 |
|
| 18 |
+
# Create a new FastAPI app instance instead of importing from src.main
|
| 19 |
+
app = FastAPI(
|
| 20 |
+
title=API_TITLE,
|
| 21 |
+
description=API_DESCRIPTION,
|
| 22 |
+
version=API_VERSION
|
| 23 |
+
)
|
| 24 |
+
|
| 25 |
+
# Add CORS middleware
|
| 26 |
+
app.add_middleware(
|
| 27 |
+
CORSMiddleware,
|
| 28 |
+
allow_origins=["*"],
|
| 29 |
+
allow_credentials=True,
|
| 30 |
+
allow_methods=["*"],
|
| 31 |
+
allow_headers=["*"],
|
| 32 |
+
)
|
| 33 |
+
|
| 34 |
+
# Initialize recommender on startup
|
| 35 |
+
@app.on_event("startup")
|
| 36 |
+
async def startup_event():
|
| 37 |
+
"""Initialize recommender system on startup."""
|
| 38 |
+
try:
|
| 39 |
+
logger.info("Initializing recommender system...")
|
| 40 |
+
recommender.load_components()
|
| 41 |
+
app.state.recommender = recommender
|
| 42 |
+
logger.info("Recommender system initialized successfully")
|
| 43 |
+
except Exception as e:
|
| 44 |
+
logger.error(f"Failed to initialize recommender system: {e}", exc_info=True)
|
| 45 |
+
# Don't raise here to allow the app to start even if recommender fails
|
| 46 |
+
|
| 47 |
+
@app.on_event("shutdown")
|
| 48 |
+
async def shutdown_event():
|
| 49 |
+
"""Cleanup on shutdown."""
|
| 50 |
+
try:
|
| 51 |
+
mongodb.close()
|
| 52 |
+
logger.info("MongoDB connection closed")
|
| 53 |
+
except Exception as e:
|
| 54 |
+
logger.error(f"Error during shutdown: {e}", exc_info=True)
|
| 55 |
|
| 56 |
# Pydantic models for request/response bodies
|
| 57 |
class FeedbackPayload(BaseModel):
|
|
|
|
| 64 |
message: str
|
| 65 |
|
| 66 |
# API Endpoints
|
| 67 |
+
@app.get("/health")
|
| 68 |
+
async def health_check():
|
| 69 |
+
"""
|
| 70 |
+
Health check endpoint to diagnose system status.
|
| 71 |
+
"""
|
| 72 |
+
health_status = {
|
| 73 |
+
"status": "healthy",
|
| 74 |
+
"timestamp": datetime.now().isoformat(),
|
| 75 |
+
"components": {}
|
| 76 |
+
}
|
| 77 |
+
|
| 78 |
+
# Check recommender system
|
| 79 |
+
if hasattr(app.state, "recommender") and app.state.recommender is not None:
|
| 80 |
+
health_status["components"]["recommender"] = {
|
| 81 |
+
"status": "available",
|
| 82 |
+
"models_loaded": {
|
| 83 |
+
"embed_model": app.state.recommender.embed_model is not None,
|
| 84 |
+
"reranker": app.state.recommender.reranker is not None,
|
| 85 |
+
"generator": app.state.recommender.generator is not None
|
| 86 |
+
},
|
| 87 |
+
"data_available": app.state.recommender.df is not None and not app.state.recommender.df.empty,
|
| 88 |
+
"faiss_index_available": app.state.recommender.index is not None,
|
| 89 |
+
"faiss_vectors": app.state.recommender.index.ntotal if app.state.recommender.index else 0
|
| 90 |
+
}
|
| 91 |
+
else:
|
| 92 |
+
health_status["components"]["recommender"] = {"status": "not_available"}
|
| 93 |
+
health_status["status"] = "degraded"
|
| 94 |
+
|
| 95 |
+
# Check MongoDB connection
|
| 96 |
+
try:
|
| 97 |
+
if mongodb.db is not None:
|
| 98 |
+
# Try a simple operation to test connection
|
| 99 |
+
mongodb.db.command("ping")
|
| 100 |
+
health_status["components"]["mongodb"] = {"status": "connected"}
|
| 101 |
+
else:
|
| 102 |
+
health_status["components"]["mongodb"] = {"status": "not_connected"}
|
| 103 |
+
health_status["status"] = "degraded"
|
| 104 |
+
except Exception as e:
|
| 105 |
+
health_status["components"]["mongodb"] = {
|
| 106 |
+
"status": "error",
|
| 107 |
+
"error": str(e)
|
| 108 |
+
}
|
| 109 |
+
health_status["status"] = "degraded"
|
| 110 |
+
|
| 111 |
+
return health_status
|
| 112 |
+
|
| 113 |
+
@app.get("/recommendations/")
|
| 114 |
async def get_recommendations_api(query: str, k: int = 5):
|
| 115 |
"""
|
| 116 |
Get recommendations based on a textual query.
|
| 117 |
"""
|
| 118 |
try:
|
| 119 |
+
if not hasattr(app.state, "recommender") or app.state.recommender is None:
|
| 120 |
logger.error("Recommender is not available.")
|
| 121 |
raise HTTPException(status_code=503, detail="Recommender service not available")
|
| 122 |
|
| 123 |
+
response = app.state.recommender.get_recommendations(query, k)
|
| 124 |
return response
|
| 125 |
except Exception as e:
|
| 126 |
logger.error(f"API Error in get_recommendations_api for query '{query}': {e}", exc_info=True)
|
| 127 |
raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}")
|
| 128 |
|
| 129 |
+
@app.get("/recommendations/msid/")
|
| 130 |
async def get_recommendations_by_id_api(msid: str, k: int = 5):
|
| 131 |
"""
|
| 132 |
Get recommendations based on a given MSID.
|
|
|
|
| 140 |
raise HTTPException(status_code=400, detail="k must be an integer between 1 and 10")
|
| 141 |
|
| 142 |
# Check if recommender service is available
|
| 143 |
+
if not hasattr(app.state, "recommender") or app.state.recommender is None:
|
| 144 |
logger.error("Recommender is not available.")
|
| 145 |
raise HTTPException(status_code=503, detail="Recommender service not available")
|
| 146 |
|
| 147 |
+
# Check if recommender has the necessary data
|
| 148 |
+
if app.state.recommender.df is None or app.state.recommender.df.empty:
|
| 149 |
+
logger.error("Recommender data not available (MongoDB connection issue).")
|
| 150 |
+
raise HTTPException(
|
| 151 |
+
status_code=503,
|
| 152 |
+
detail="Recommender data not available. The service is currently unable to access the required data."
|
| 153 |
+
)
|
| 154 |
+
|
| 155 |
# Get recommendations with error handling
|
| 156 |
try:
|
| 157 |
+
response = app.state.recommender.get_recommendations_by_id(msid, k)
|
| 158 |
if not response:
|
| 159 |
raise HTTPException(status_code=404, detail=f"No recommendations found for MSID: {msid}")
|
| 160 |
return response
|
|
|
|
| 173 |
logger.error(f"Unexpected error in get_recommendations_by_id_api for msid '{msid}': {e}", exc_info=True)
|
| 174 |
raise HTTPException(status_code=500, detail="An unexpected error occurred")
|
| 175 |
|
| 176 |
+
@app.post("/recommendations/feedback/user/", response_model=FeedbackResponse)
|
| 177 |
async def submit_user_feedback_api(payload: FeedbackPayload):
|
| 178 |
"""
|
| 179 |
Submit user feedback (e.g., clicked articles) and save it.
|
|
|
|
| 181 |
though the primary response here is the status of feedback submission.
|
| 182 |
"""
|
| 183 |
try:
|
| 184 |
+
if not hasattr(app.state, "recommender") or app.state.recommender is None:
|
| 185 |
logger.error("Recommender is not available.")
|
| 186 |
raise HTTPException(status_code=503, detail="Recommender service not available")
|
| 187 |
|
| 188 |
# (Optional) Compute recommendations based on feedback, similar to Gradio function.
|
| 189 |
# The result of this call is not the primary output of this API endpoint.
|
| 190 |
+
try:
|
| 191 |
+
_ = app.state.recommender.get_recommendations_user_feedback(
|
| 192 |
+
payload.user_id, payload.msid, payload.clicked_msid, payload.k
|
| 193 |
+
)
|
| 194 |
+
logger.info(f"API: (Computed recommendations for user '{payload.user_id}' based on click, not part of this response)")
|
| 195 |
+
except Exception as e:
|
| 196 |
+
logger.warning(f"Could not compute recommendations based on feedback: {e}")
|
|
|
|
|
|
|
|
|
|
| 197 |
|
| 198 |
+
# Save feedback to MongoDB (optional - only if MongoDB is available)
|
| 199 |
+
try:
|
| 200 |
+
actual_clicked_msids = [s.strip() for s in payload.clicked_msid.split(',') if s.strip()]
|
| 201 |
+
if not actual_clicked_msids:
|
| 202 |
+
logger.warning(f"API: Invalid clicked_msid: '{payload.clicked_msid}' for user '{payload.user_id}'")
|
| 203 |
+
raise HTTPException(status_code=400, detail="clicked_msid parameter is invalid or does not contain valid MSIDs.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 204 |
|
| 205 |
+
logger.info(
|
| 206 |
+
f"API: Saving feedback for user '{payload.user_id}', context msid: '{payload.msid}', clicked msids: {actual_clicked_msids}"
|
|
|
|
|
|
|
| 207 |
)
|
| 208 |
+
|
| 209 |
+
feedback_collection_name = "user_feedback_tracking"
|
| 210 |
+
# Check if MongoDB is available
|
| 211 |
+
if mongodb.db is None:
|
| 212 |
+
logger.warning("MongoDB database connection is not available. Skipping feedback storage.")
|
| 213 |
+
return FeedbackResponse(message="Response processed successfully (feedback storage unavailable)")
|
| 214 |
+
|
| 215 |
+
feedback_collection = mongodb.db[feedback_collection_name]
|
| 216 |
+
user_doc = feedback_collection.find_one({"user_id": payload.user_id})
|
| 217 |
+
|
| 218 |
+
if user_doc:
|
| 219 |
+
feedback_collection.update_one(
|
| 220 |
+
{"user_id": payload.user_id},
|
| 221 |
+
{"$addToSet": {"Articles": {"msid": payload.msid, "Read": actual_clicked_msids}}}
|
| 222 |
+
)
|
| 223 |
+
else:
|
| 224 |
+
feedback_collection.insert_one({
|
| 225 |
+
"user_id": payload.user_id,
|
| 226 |
+
"Articles": [{"msid": payload.msid, "Read": actual_clicked_msids}]
|
| 227 |
+
})
|
| 228 |
+
|
| 229 |
+
logger.info(f"API: Successfully saved feedback for user '{payload.user_id}'")
|
| 230 |
+
return FeedbackResponse(message="Response saved successfully")
|
| 231 |
+
|
| 232 |
+
except Exception as e:
|
| 233 |
+
logger.error(f"Error saving feedback to MongoDB: {e}", exc_info=True)
|
| 234 |
+
# Don't fail the entire request if MongoDB is unavailable
|
| 235 |
+
return FeedbackResponse(message="Response processed successfully (feedback storage failed)")
|
| 236 |
|
| 237 |
except HTTPException:
|
| 238 |
raise # Re-raise HTTPException directly
|
|
|
|
| 240 |
logger.error(f"API Error in submit_user_feedback_api for user '{payload.user_id}': {e}", exc_info=True)
|
| 241 |
raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}")
|
| 242 |
|
| 243 |
+
@app.get("/recommendations/summary/")
|
| 244 |
async def get_recommendations_summary_api(msid: str, k: int = 5, summary: bool = True, smart_tip: bool = True):
|
| 245 |
"""
|
| 246 |
Get recommendations with optional summary and smart tip for a given MSID.
|
| 247 |
"""
|
| 248 |
try:
|
| 249 |
+
if not hasattr(app.state, "recommender") or app.state.recommender is None:
|
| 250 |
logger.error("Recommender is not available.")
|
| 251 |
raise HTTPException(status_code=503, detail="Recommender service not available")
|
| 252 |
|
| 253 |
try:
|
| 254 |
+
response = app.state.recommender.get_recommendations_summary(msid, k, summary, smart_tip)
|
| 255 |
except RuntimeError as e:
|
| 256 |
# Catch the meta tensor error and return a fallback
|
| 257 |
if "meta tensor" in str(e):
|
|
|
|
| 271 |
logger.error(f"API Error in get_recommendations_summary_api for msid '{msid}': {e}", exc_info=True)
|
| 272 |
raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}")
|
| 273 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 274 |
if __name__ == "__main__":
|
| 275 |
uvicorn.run("app:app", host="0.0.0.0", port=8000, reload=True)
|
src/core/recommender.py
CHANGED
|
@@ -98,7 +98,13 @@ class RecoRecommender:
|
|
| 98 |
logger.error(f"Error during Intel XPU check: {e}. Using CPU.")
|
| 99 |
logger.info(f"Selected device: {self.device}")
|
| 100 |
|
| 101 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 102 |
# Initialize model trainer
|
| 103 |
self.model_trainer = ModelTrainer(RERANKER_MODEL_NAME, device=self.device) # Pass the determined device
|
| 104 |
self._setup_indic_nlp()
|
|
@@ -179,6 +185,13 @@ class RecoRecommender:
|
|
| 179 |
def _get_model_metadata(self) -> Dict:
|
| 180 |
"""Retrieve current model metadata from MongoDB."""
|
| 181 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 182 |
metadata = self.faiss_meta_collection.find_one({"_id": self.MODEL_METADATA_DOC_ID})
|
| 183 |
if metadata:
|
| 184 |
return metadata
|
|
@@ -194,11 +207,15 @@ class RecoRecommender:
|
|
| 194 |
except Exception as e:
|
| 195 |
logger.error(f"Error retrieving model metadata: {e}")
|
| 196 |
# Return a minimal fallback to ensure core functionalities can proceed if possible
|
| 197 |
-
return {"_id": self.MODEL_METADATA_DOC_ID, "embedding_model_name": EMBED_MODEL_NAME}
|
| 198 |
|
| 199 |
def _update_model_metadata(self, updates: Dict) -> bool:
|
| 200 |
"""Update model metadata in MongoDB."""
|
| 201 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 202 |
result = self.faiss_meta_collection.update_one(
|
| 203 |
{"_id": self.MODEL_METADATA_DOC_ID},
|
| 204 |
{"$set": {**updates, "metadata_last_updated": datetime.now()}}, # Key changed for clarity
|
|
@@ -220,6 +237,10 @@ class RecoRecommender:
|
|
| 220 |
def _needs_reembedding_batch(self, doc_ids: List[str], current_checksum: str) -> List[str]:
|
| 221 |
"""Check which documents from a batch need reembedding."""
|
| 222 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 223 |
# Query for all documents in one go
|
| 224 |
metadata_docs = self.faiss_meta_collection.find(
|
| 225 |
{"_id": {"$in": doc_ids}}
|
|
@@ -688,30 +709,35 @@ class RecoRecommender:
|
|
| 688 |
self.index = None # Ensure index is None on failure
|
| 689 |
raise # Re-raise the exception
|
| 690 |
|
| 691 |
-
|
| 692 |
-
|
| 693 |
-
|
| 694 |
-
|
| 695 |
-
|
| 696 |
-
|
| 697 |
-
|
| 698 |
-
|
| 699 |
-
|
| 700 |
-
|
| 701 |
-
|
| 702 |
-
|
| 703 |
-
|
| 704 |
-
|
| 705 |
-
|
| 706 |
-
|
| 707 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 708 |
|
| 709 |
-
except Exception as e:
|
| 710 |
-
logger.error(f"Error loading FAISS index IDs from MongoDB: {e}", exc_info=True)
|
| 711 |
-
self.indexed_ids = [] # Ensure IDs list is empty on failure
|
| 712 |
-
# We don't re-raise here, as load_components will decide if a rebuild is needed
|
| 713 |
-
# based on whether self.indexed_ids is populated.
|
| 714 |
-
|
| 715 |
def build_indexes_and_save(self, data_already_loaded: bool = False):
|
| 716 |
"""
|
| 717 |
Load data (if not already loaded), build FAISS index from current self.df, and save.
|
|
@@ -744,15 +770,19 @@ class RecoRecommender:
|
|
| 744 |
# Save indexed_ids to MongoDB
|
| 745 |
logger.info(f"Saving FAISS index IDs to MongoDB collection '{MONGO_FAISS_META_COLLECTION_NAME}', document_id '{self.FAISS_IDS_DOC_ID}'")
|
| 746 |
try:
|
| 747 |
-
self.faiss_meta_collection
|
| 748 |
-
|
| 749 |
-
|
| 750 |
-
|
| 751 |
-
|
| 752 |
-
|
|
|
|
|
|
|
|
|
|
| 753 |
except Exception as e:
|
| 754 |
logger.error(f"Error saving FAISS index IDs to MongoDB: {e}", exc_info=True)
|
| 755 |
-
|
|
|
|
| 756 |
logger.info("Index building and saving complete") # Log successful completion
|
| 757 |
except Exception as e: # Catch any exception during the process
|
| 758 |
logger.error(f"Error during index building: {e}", exc_info=True) # Log the error with traceback
|
|
@@ -763,64 +793,94 @@ class RecoRecommender:
|
|
| 763 |
logger.info("Loading components...") # Log the start of component loading
|
| 764 |
try: # Start a try-except block for error handling
|
| 765 |
self.load_models() # Load all machine learning models
|
| 766 |
-
|
| 767 |
-
|
| 768 |
-
|
| 769 |
-
|
| 770 |
-
|
| 771 |
-
|
| 772 |
-
|
| 773 |
-
logger.info("
|
| 774 |
-
|
| 775 |
-
|
|
|
|
|
|
|
| 776 |
try:
|
| 777 |
self._load_faiss_index_and_ids() # Tries to load .bin and IDs from Mongo.
|
| 778 |
# Sets self.index and self.indexed_ids.
|
| 779 |
# self.indexed_ids will be [] if Mongo data for IDs is missing.
|
| 780 |
# Raises FileNotFoundError if .bin (INDEX_PATH) is missing.
|
| 781 |
|
| 782 |
-
|
| 783 |
-
|
| 784 |
-
|
| 785 |
-
|
| 786 |
-
|
| 787 |
-
|
| 788 |
-
|
| 789 |
-
|
| 790 |
-
|
| 791 |
-
|
| 792 |
-
|
| 793 |
-
|
| 794 |
-
|
| 795 |
-
|
| 796 |
-
|
| 797 |
-
|
| 798 |
-
|
| 799 |
-
|
| 800 |
-
|
| 801 |
-
|
| 802 |
-
|
| 803 |
-
|
| 804 |
-
|
| 805 |
-
|
| 806 |
-
|
| 807 |
-
|
| 808 |
-
|
| 809 |
-
|
| 810 |
-
|
| 811 |
-
|
| 812 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 813 |
else:
|
| 814 |
-
|
|
|
|
|
|
|
|
|
|
| 815 |
else:
|
| 816 |
# This case handles if self.index is None (FileNotFoundError caught below)
|
| 817 |
# or if index was loaded but empty and no IDs from Mongo.
|
| 818 |
-
|
| 819 |
-
|
|
|
|
|
|
|
|
|
|
| 820 |
|
| 821 |
except FileNotFoundError: # This means INDEX_PATH (.bin file) was not found.
|
| 822 |
-
logger.warning(f"FAISS index file ({INDEX_PATH}) not found.
|
| 823 |
-
self.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 824 |
logger.info("Components loaded successfully") # Log successful loading of all components
|
| 825 |
except Exception as e: # Catch any exception during component loading
|
| 826 |
logger.error(f"Error loading components: {e}", exc_info=True) # Log the error with traceback
|
|
|
|
| 98 |
logger.error(f"Error during Intel XPU check: {e}. Using CPU.")
|
| 99 |
logger.info(f"Selected device: {self.device}")
|
| 100 |
|
| 101 |
+
# Initialize MongoDB collection only if connection is available
|
| 102 |
+
if mongodb.db is not None:
|
| 103 |
+
self.faiss_meta_collection = mongodb.db[MONGO_FAISS_META_COLLECTION_NAME]
|
| 104 |
+
else:
|
| 105 |
+
self.faiss_meta_collection = None
|
| 106 |
+
logger.warning("MongoDB not available. Some features may be limited.")
|
| 107 |
+
|
| 108 |
# Initialize model trainer
|
| 109 |
self.model_trainer = ModelTrainer(RERANKER_MODEL_NAME, device=self.device) # Pass the determined device
|
| 110 |
self._setup_indic_nlp()
|
|
|
|
| 185 |
def _get_model_metadata(self) -> Dict:
|
| 186 |
"""Retrieve current model metadata from MongoDB."""
|
| 187 |
try:
|
| 188 |
+
if self.faiss_meta_collection is None:
|
| 189 |
+
logger.warning("MongoDB not available. Returning default metadata.")
|
| 190 |
+
return {
|
| 191 |
+
"_id": self.MODEL_METADATA_DOC_ID,
|
| 192 |
+
"embedding_model_name": EMBED_MODEL_NAME,
|
| 193 |
+
}
|
| 194 |
+
|
| 195 |
metadata = self.faiss_meta_collection.find_one({"_id": self.MODEL_METADATA_DOC_ID})
|
| 196 |
if metadata:
|
| 197 |
return metadata
|
|
|
|
| 207 |
except Exception as e:
|
| 208 |
logger.error(f"Error retrieving model metadata: {e}")
|
| 209 |
# Return a minimal fallback to ensure core functionalities can proceed if possible
|
| 210 |
+
return {"_id": self.MODEL_METADATA_DOC_ID, "embedding_model_name": EMBED_MODEL_NAME}
|
| 211 |
|
| 212 |
def _update_model_metadata(self, updates: Dict) -> bool:
|
| 213 |
"""Update model metadata in MongoDB."""
|
| 214 |
try:
|
| 215 |
+
if self.faiss_meta_collection is None:
|
| 216 |
+
logger.warning("MongoDB not available. Cannot update model metadata.")
|
| 217 |
+
return False
|
| 218 |
+
|
| 219 |
result = self.faiss_meta_collection.update_one(
|
| 220 |
{"_id": self.MODEL_METADATA_DOC_ID},
|
| 221 |
{"$set": {**updates, "metadata_last_updated": datetime.now()}}, # Key changed for clarity
|
|
|
|
| 237 |
def _needs_reembedding_batch(self, doc_ids: List[str], current_checksum: str) -> List[str]:
|
| 238 |
"""Check which documents from a batch need reembedding."""
|
| 239 |
try:
|
| 240 |
+
if self.faiss_meta_collection is None:
|
| 241 |
+
logger.warning("MongoDB not available. Assuming all documents need reembedding.")
|
| 242 |
+
return doc_ids
|
| 243 |
+
|
| 244 |
# Query for all documents in one go
|
| 245 |
metadata_docs = self.faiss_meta_collection.find(
|
| 246 |
{"_id": {"$in": doc_ids}}
|
|
|
|
| 709 |
self.index = None # Ensure index is None on failure
|
| 710 |
raise # Re-raise the exception
|
| 711 |
|
| 712 |
+
# Try to load IDs from MongoDB if available
|
| 713 |
+
if self.faiss_meta_collection is not None:
|
| 714 |
+
logger.info(f"Loading FAISS index IDs from MongoDB collection '{MONGO_FAISS_META_COLLECTION_NAME}', document_id '{self.FAISS_IDS_DOC_ID}'")
|
| 715 |
+
try:
|
| 716 |
+
ids_document = self.faiss_meta_collection.find_one({"_id": self.FAISS_IDS_DOC_ID})
|
| 717 |
+
if ids_document and "ids" in ids_document:
|
| 718 |
+
self.indexed_ids = ids_document["ids"]
|
| 719 |
+
logger.info(f"Loaded {len(self.indexed_ids)} indexed IDs from MongoDB.")
|
| 720 |
+
|
| 721 |
+
# Basic consistency check
|
| 722 |
+
if self.index and self.index.ntotal != len(self.indexed_ids):
|
| 723 |
+
logger.warning(
|
| 724 |
+
f"FAISS index vector count ({self.index.ntotal}) "
|
| 725 |
+
f"does not match loaded ID count from MongoDB ({len(self.indexed_ids)}). "
|
| 726 |
+
"Index might be inconsistent. Consider rebuilding."
|
| 727 |
+
)
|
| 728 |
+
else:
|
| 729 |
+
logger.warning(f"FAISS index IDs document not found in MongoDB or 'ids' field missing. Will attempt to build if necessary.")
|
| 730 |
+
self.indexed_ids = [] # Initialize as empty if not found
|
| 731 |
+
|
| 732 |
+
except Exception as e:
|
| 733 |
+
logger.error(f"Error loading FAISS index IDs from MongoDB: {e}", exc_info=True)
|
| 734 |
+
self.indexed_ids = [] # Ensure IDs list is empty on failure
|
| 735 |
+
# We don't re-raise here, as load_components will decide if a rebuild is needed
|
| 736 |
+
# based on whether self.indexed_ids is populated.
|
| 737 |
+
else:
|
| 738 |
+
logger.warning("MongoDB not available. Cannot load indexed IDs. Operating with empty ID list.")
|
| 739 |
+
self.indexed_ids = []
|
| 740 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 741 |
def build_indexes_and_save(self, data_already_loaded: bool = False):
|
| 742 |
"""
|
| 743 |
Load data (if not already loaded), build FAISS index from current self.df, and save.
|
|
|
|
| 770 |
# Save indexed_ids to MongoDB
|
| 771 |
logger.info(f"Saving FAISS index IDs to MongoDB collection '{MONGO_FAISS_META_COLLECTION_NAME}', document_id '{self.FAISS_IDS_DOC_ID}'")
|
| 772 |
try:
|
| 773 |
+
if self.faiss_meta_collection is not None:
|
| 774 |
+
self.faiss_meta_collection.update_one(
|
| 775 |
+
{"_id": self.FAISS_IDS_DOC_ID},
|
| 776 |
+
{"$set": {"ids": self.indexed_ids, "last_updated": datetime.now()}},
|
| 777 |
+
upsert=True
|
| 778 |
+
)
|
| 779 |
+
logger.info(f"Saved {len(self.indexed_ids)} indexed IDs to MongoDB.")
|
| 780 |
+
else:
|
| 781 |
+
logger.warning("MongoDB not available. Skipping indexed IDs save.")
|
| 782 |
except Exception as e:
|
| 783 |
logger.error(f"Error saving FAISS index IDs to MongoDB: {e}", exc_info=True)
|
| 784 |
+
# Don't raise here as the FAISS index was saved successfully
|
| 785 |
+
# The IDs can be regenerated if needed
|
| 786 |
logger.info("Index building and saving complete") # Log successful completion
|
| 787 |
except Exception as e: # Catch any exception during the process
|
| 788 |
logger.error(f"Error during index building: {e}", exc_info=True) # Log the error with traceback
|
|
|
|
| 793 |
logger.info("Loading components...") # Log the start of component loading
|
| 794 |
try: # Start a try-except block for error handling
|
| 795 |
self.load_models() # Load all machine learning models
|
| 796 |
+
|
| 797 |
+
# Try to load data from MongoDB, but handle failures gracefully
|
| 798 |
+
try:
|
| 799 |
+
self._load_data_from_mongo() # Load data from MongoDB and preprocess it
|
| 800 |
+
logger.info("Successfully loaded data from MongoDB")
|
| 801 |
+
except Exception as mongo_error:
|
| 802 |
+
logger.warning(f"Failed to load data from MongoDB: {mongo_error}")
|
| 803 |
+
logger.info("Attempting to work with existing FAISS index without MongoDB data...")
|
| 804 |
+
# Set df to None to indicate no MongoDB data is available
|
| 805 |
+
self.df = None
|
| 806 |
+
|
| 807 |
+
# Try to load FAISS index and IDs
|
| 808 |
try:
|
| 809 |
self._load_faiss_index_and_ids() # Tries to load .bin and IDs from Mongo.
|
| 810 |
# Sets self.index and self.indexed_ids.
|
| 811 |
# self.indexed_ids will be [] if Mongo data for IDs is missing.
|
| 812 |
# Raises FileNotFoundError if .bin (INDEX_PATH) is missing.
|
| 813 |
|
| 814 |
+
if self.index and self.index.ntotal > 0:
|
| 815 |
+
logger.info(f"FAISS index loaded successfully with {self.index.ntotal} vectors")
|
| 816 |
+
|
| 817 |
+
# If we have MongoDB data, proceed with normal logic
|
| 818 |
+
if self.df is not None and not self.df.empty:
|
| 819 |
+
# Consistency check and incremental update logic
|
| 820 |
+
if not self.indexed_ids:
|
| 821 |
+
logger.warning("FAISS index file loaded, but no corresponding IDs found in MongoDB. Rebuilding for consistency.")
|
| 822 |
+
self.build_indexes_and_save(data_already_loaded=True)
|
| 823 |
+
else:
|
| 824 |
+
logger.info("Existing FAISS index and IDs loaded from storage.")
|
| 825 |
+
# Proceed with incremental update logic
|
| 826 |
+
current_df_ids = set(self.df[self.id_col].tolist())
|
| 827 |
+
indexed_ids_set = set(self.indexed_ids)
|
| 828 |
+
new_ids_to_add = list(current_df_ids - indexed_ids_set)
|
| 829 |
+
|
| 830 |
+
if new_ids_to_add:
|
| 831 |
+
logger.info(f"Found {len(new_ids_to_add)} new documents to add to the index.")
|
| 832 |
+
new_docs_df = self.df[self.df[self.id_col].isin(new_ids_to_add)].copy()
|
| 833 |
+
|
| 834 |
+
new_embeddings, new_doc_ids_added = self._generate_embeddings(new_docs_df)
|
| 835 |
+
|
| 836 |
+
if new_embeddings.size > 0:
|
| 837 |
+
self.index.add(new_embeddings.astype(np.float32))
|
| 838 |
+
self.indexed_ids.extend(new_doc_ids_added)
|
| 839 |
+
logger.info(f"Added {len(new_doc_ids_added)} new vectors to FAISS index. Total vectors: {self.index.ntotal}")
|
| 840 |
+
|
| 841 |
+
# Save the updated FAISS index
|
| 842 |
+
faiss.write_index(self.index, INDEX_PATH)
|
| 843 |
+
# Try to save the updated IDs to MongoDB, but don't fail if it doesn't work
|
| 844 |
+
try:
|
| 845 |
+
self.faiss_meta_collection.update_one(
|
| 846 |
+
{"_id": self.FAISS_IDS_DOC_ID},
|
| 847 |
+
{"$set": {"ids": self.indexed_ids, "last_updated": datetime.now()}},
|
| 848 |
+
upsert=True
|
| 849 |
+
)
|
| 850 |
+
logger.info("Updated FAISS index and IDs saved to MongoDB.")
|
| 851 |
+
except Exception as e:
|
| 852 |
+
logger.warning(f"Could not save IDs to MongoDB: {e}")
|
| 853 |
+
else:
|
| 854 |
+
logger.info("No new documents found to add to the index. Index is up-to-date.")
|
| 855 |
else:
|
| 856 |
+
# No MongoDB data available, but we have a FAISS index
|
| 857 |
+
logger.info("FAISS index available but no MongoDB data. Operating in limited mode.")
|
| 858 |
+
if not self.indexed_ids:
|
| 859 |
+
logger.warning("No indexed IDs available. Some functionality may be limited.")
|
| 860 |
else:
|
| 861 |
# This case handles if self.index is None (FileNotFoundError caught below)
|
| 862 |
# or if index was loaded but empty and no IDs from Mongo.
|
| 863 |
+
if self.df is not None and not self.df.empty:
|
| 864 |
+
logger.info("FAISS index and/or IDs not found or empty. Building new index.")
|
| 865 |
+
self.build_indexes_and_save(data_already_loaded=True)
|
| 866 |
+
else:
|
| 867 |
+
logger.warning("No data available (neither MongoDB nor FAISS index). Cannot build index.")
|
| 868 |
|
| 869 |
except FileNotFoundError: # This means INDEX_PATH (.bin file) was not found.
|
| 870 |
+
logger.warning(f"FAISS index file ({INDEX_PATH}) not found.")
|
| 871 |
+
if self.df is not None and not self.df.empty:
|
| 872 |
+
logger.info("Building index from scratch.")
|
| 873 |
+
self.build_indexes_and_save(data_already_loaded=True)
|
| 874 |
+
else:
|
| 875 |
+
logger.error("Cannot build index: no data available.")
|
| 876 |
+
except Exception as e:
|
| 877 |
+
logger.error(f"Error loading FAISS index: {e}", exc_info=True)
|
| 878 |
+
if self.df is not None and not self.df.empty:
|
| 879 |
+
logger.info("Attempting to rebuild index due to loading error.")
|
| 880 |
+
self.build_indexes_and_save(data_already_loaded=True)
|
| 881 |
+
else:
|
| 882 |
+
logger.error("Cannot rebuild index: no data available.")
|
| 883 |
+
|
| 884 |
logger.info("Components loaded successfully") # Log successful loading of all components
|
| 885 |
except Exception as e: # Catch any exception during component loading
|
| 886 |
logger.error(f"Error loading components: {e}", exc_info=True) # Log the error with traceback
|
src/database/mongodb.py
CHANGED
|
@@ -44,7 +44,7 @@ class MongoDB:
|
|
| 44 |
"""
|
| 45 |
Establishes connection to MongoDB Atlas.
|
| 46 |
Handles connection errors and sets up the database instance.
|
| 47 |
-
|
| 48 |
"""
|
| 49 |
try:
|
| 50 |
logger.info(f"Connecting to MongoDB Atlas: DB='{MONGO_DB_NAME}'")
|
|
@@ -56,26 +56,31 @@ class MongoDB:
|
|
| 56 |
self._db = self._client[MONGO_DB_NAME]
|
| 57 |
logger.info("Successfully connected to MongoDB.")
|
| 58 |
except (ConnectionFailure, ConfigurationError) as e:
|
| 59 |
-
logger.
|
| 60 |
self._client = None
|
| 61 |
self._db = None
|
| 62 |
-
raise
|
| 63 |
except Exception as e:
|
| 64 |
-
logger.
|
| 65 |
self._client = None
|
| 66 |
self._db = None
|
| 67 |
-
raise
|
| 68 |
|
| 69 |
@property
|
| 70 |
-
def db(self) -> Database:
|
| 71 |
"""
|
| 72 |
Property to get the database instance.
|
| 73 |
-
|
| 74 |
Returns:
|
| 75 |
-
Database: MongoDB database instance
|
| 76 |
"""
|
| 77 |
if self._db is None:
|
| 78 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 79 |
return self._db
|
| 80 |
|
| 81 |
@property
|
|
|
|
| 44 |
"""
|
| 45 |
Establishes connection to MongoDB Atlas.
|
| 46 |
Handles connection errors and sets up the database instance.
|
| 47 |
+
Does not raise exceptions if connection fails - allows graceful degradation.
|
| 48 |
"""
|
| 49 |
try:
|
| 50 |
logger.info(f"Connecting to MongoDB Atlas: DB='{MONGO_DB_NAME}'")
|
|
|
|
| 56 |
self._db = self._client[MONGO_DB_NAME]
|
| 57 |
logger.info("Successfully connected to MongoDB.")
|
| 58 |
except (ConnectionFailure, ConfigurationError) as e:
|
| 59 |
+
logger.warning(f"MongoDB connection failed: {e}")
|
| 60 |
self._client = None
|
| 61 |
self._db = None
|
| 62 |
+
# Don't raise the exception - allow the application to continue without MongoDB
|
| 63 |
except Exception as e:
|
| 64 |
+
logger.warning(f"An unexpected error occurred during MongoDB connection: {e}")
|
| 65 |
self._client = None
|
| 66 |
self._db = None
|
| 67 |
+
# Don't raise the exception - allow the application to continue without MongoDB
|
| 68 |
|
| 69 |
@property
|
| 70 |
+
def db(self) -> Optional[Database]:
|
| 71 |
"""
|
| 72 |
Property to get the database instance.
|
| 73 |
+
Returns None if the connection is not available.
|
| 74 |
Returns:
|
| 75 |
+
Database: MongoDB database instance or None if not connected
|
| 76 |
"""
|
| 77 |
if self._db is None:
|
| 78 |
+
# Try to connect once, but don't keep retrying
|
| 79 |
+
try:
|
| 80 |
+
self._connect()
|
| 81 |
+
except Exception as e:
|
| 82 |
+
logger.warning(f"Failed to establish MongoDB connection: {e}")
|
| 83 |
+
return None
|
| 84 |
return self._db
|
| 85 |
|
| 86 |
@property
|