CompassIA / app.py
dembasowmr's picture
Added userId and date-time to the conversations.
a2967ae
import sys
# CRITICAL: These lines MUST be the absolute first executable lines in app.py.
# This ensures pysqlite3 is loaded and replaces the standard sqlite3 module
# in sys.modules before any other module (like chromadb) attempts to import sqlite3.
try:
import pysqlite3
sys.modules['sqlite3'] = pysqlite3
print("pysqlite3 successfully imported and set as default sqlite3 module.")
except ImportError:
print("ERROR: pysqlite3-binary could not be imported. ChromaDB will likely fail due to old sqlite3 version.")
# In a production environment, you might want to raise an exception here
# to prevent the application from starting if this critical dependency fails.
import os
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
import uvicorn
import json
import base64
from dotenv import load_dotenv
# Load environment variables (after pysqlite3 fix)
# This ensures that variables like FIREBASE_CONFIG_BASE64 are available
# before other modules (like config.py) attempt to read them.
load_dotenv()
# Add the 'src' directory to the Python path
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), 'src')))
# Now import components from src.compassia (should be src.rag_system as per earlier conversation,
# but keeping 'compassia' as per your provided code for this response)
# We import initialize_firebase_client as we call it here.
# DocumentRAG and embedding_model are needed for instantiating the RAG system.
from src.config import CHROMADB_PERSIST_DIRECTORY, CHROMADB_COLLECTION_NAME
from src.compassia import DocumentRAG, embedding_model, initialize_firebase_client # Corrected import to compassia
# --- Firebase Initialization (Global, once per process) ---
# Call the initialization function and CAPTURE THE RETURNED FIRESTORE CLIENT INSTANCE.
# This instance will be passed to the DocumentRAG to ensure correct Firestore access.
FIRESTORE_DB_INSTANCE = initialize_firebase_client()
# --- Initialize the RAG system globally ---
# This happens once when the FastAPI app starts.
print("--- FastAPI App Startup: Initializing RAG System ---")
rag_system = DocumentRAG(
embedding_model=embedding_model,
persist_directory=CHROMADB_PERSIST_DIRECTORY,
collection_name=CHROMADB_COLLECTION_NAME,
firestore_db_instance=FIRESTORE_DB_INSTANCE # CRITICAL: Pass the initialized Firestore instance here
)
# --- Index documents on startup ---
# This loop will run when the FastAPI app first starts.
# It uses ChromaDB's persistence, so documents already indexed will be skipped.
# It now correctly uses the captured FIRESTORE_DB_INSTANCE.
print("--- FastAPI App Startup: Indexing Documents from Firestore ---")
if FIRESTORE_DB_INSTANCE:
try:
docs_ref = FIRESTORE_DB_INSTANCE.collection('documents').stream()
documents_to_process = []
for doc in docs_ref:
doc_data = doc.to_dict()
if 'fileUrl' in doc_data:
# The add_document method in rag_system.py now handles PDF filtering
# so we just pass the URL and optional display name.
pdf_url = doc_data['fileUrl']
display_name = doc_data.get('name_en', None)
documents_to_process.append({"url": pdf_url, "name": display_name})
else:
print(f"Skipping document ID: {doc.id} - 'fileUrl' field missing.")
if documents_to_process:
for doc_info in documents_to_process:
rag_system.add_document(doc_info['url'], doc_info['name'])
else:
print("No documents with 'fileUrl' found in Firestore collection 'documents' to index.")
except Exception as e:
print(f"API Error: Error fetching documents from Firestore during startup: {e}")
print("Please ensure your Firestore database is accessible and the service account key (FIREBASE_CONFIG_BASE64 secret) is correctly set in your Hugging Face Space secrets.")
# Decide if app should crash or continue. For now, it will print error but continue.
else:
print("API Error: Firestore client not initialized. Cannot fetch documents from Firestore on startup.")
print("Ensure FIREBASE_CONFIG_BASE64 secret is correctly set in your Hugging Face Space secrets.")
print("--- FastAPI App Startup: Document indexing complete ---")
# --- FastAPI Application Instance ---
app = FastAPI(
title="CompassIA",
description="Backend API for querying PDFs using DeepSeek (via OpenRouter) and BGE-M3 embeddings, with conversational memory and document indexing from Firestore.",
version="0.1.0",
)
# Pydantic model for request body validation
class QueryRequest(BaseModel):
question: str
user_id: str # Added: user_id is now a required field for every request
conversation_id: str = None # Optional: client can provide an ID for ongoing conversations
# --- API Endpoint Definition ---
@app.post("/compassia/")
async def compassia_endpoint(request: QueryRequest):
"""
Answers a question about the indexed PDF documents using RAG, with conversational memory.
Requires a user_id from the authenticated user.
If `conversation_id` is not provided, a new one will be generated and returned in the response.
"""
try:
# Call answer_question which now returns a tuple (answer_text, conversation_id)
# Pass the user_id from the request
answer_text, final_conversation_id = rag_system.answer_question(
request.question,
conversation_id=request.conversation_id,
user_id=request.user_id # Passed: The user_id is now sent to the RAG system
)
# Return both the answer and the (potentially new) conversation_id to the client
return {"answer": answer_text, "conversation_id": final_conversation_id}
except Exception as e:
print(f"Error processing /compassia/ request: {e}")
raise HTTPException(status_code=500, detail=f"Internal Server Error: {str(e)}")
# Basic health check endpoint
@app.get("/")
async def root():
return {"message": "CompassIA API is running. Use /compassia/ for queries."}