import sys # CRITICAL: These lines MUST be the absolute first executable lines in app.py. # This ensures pysqlite3 is loaded and replaces the standard sqlite3 module # in sys.modules before any other module (like chromadb) attempts to import sqlite3. try: import pysqlite3 sys.modules['sqlite3'] = pysqlite3 print("pysqlite3 successfully imported and set as default sqlite3 module.") except ImportError: print("ERROR: pysqlite3-binary could not be imported. ChromaDB will likely fail due to old sqlite3 version.") # In a production environment, you might want to raise an exception here # to prevent the application from starting if this critical dependency fails. import os from fastapi import FastAPI, HTTPException from pydantic import BaseModel import uvicorn import json import base64 from dotenv import load_dotenv # Load environment variables (after pysqlite3 fix) # This ensures that variables like FIREBASE_CONFIG_BASE64 are available # before other modules (like config.py) attempt to read them. load_dotenv() # Add the 'src' directory to the Python path sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), 'src'))) # Now import components from src.compassia (should be src.rag_system as per earlier conversation, # but keeping 'compassia' as per your provided code for this response) # We import initialize_firebase_client as we call it here. # DocumentRAG and embedding_model are needed for instantiating the RAG system. from src.config import CHROMADB_PERSIST_DIRECTORY, CHROMADB_COLLECTION_NAME from src.compassia import DocumentRAG, embedding_model, initialize_firebase_client # Corrected import to compassia # --- Firebase Initialization (Global, once per process) --- # Call the initialization function and CAPTURE THE RETURNED FIRESTORE CLIENT INSTANCE. # This instance will be passed to the DocumentRAG to ensure correct Firestore access. FIRESTORE_DB_INSTANCE = initialize_firebase_client() # --- Initialize the RAG system globally --- # This happens once when the FastAPI app starts. print("--- FastAPI App Startup: Initializing RAG System ---") rag_system = DocumentRAG( embedding_model=embedding_model, persist_directory=CHROMADB_PERSIST_DIRECTORY, collection_name=CHROMADB_COLLECTION_NAME, firestore_db_instance=FIRESTORE_DB_INSTANCE # CRITICAL: Pass the initialized Firestore instance here ) # --- Index documents on startup --- # This loop will run when the FastAPI app first starts. # It uses ChromaDB's persistence, so documents already indexed will be skipped. # It now correctly uses the captured FIRESTORE_DB_INSTANCE. print("--- FastAPI App Startup: Indexing Documents from Firestore ---") if FIRESTORE_DB_INSTANCE: try: docs_ref = FIRESTORE_DB_INSTANCE.collection('documents').stream() documents_to_process = [] for doc in docs_ref: doc_data = doc.to_dict() if 'fileUrl' in doc_data: # The add_document method in rag_system.py now handles PDF filtering # so we just pass the URL and optional display name. pdf_url = doc_data['fileUrl'] display_name = doc_data.get('name_en', None) documents_to_process.append({"url": pdf_url, "name": display_name}) else: print(f"Skipping document ID: {doc.id} - 'fileUrl' field missing.") if documents_to_process: for doc_info in documents_to_process: rag_system.add_document(doc_info['url'], doc_info['name']) else: print("No documents with 'fileUrl' found in Firestore collection 'documents' to index.") except Exception as e: print(f"API Error: Error fetching documents from Firestore during startup: {e}") print("Please ensure your Firestore database is accessible and the service account key (FIREBASE_CONFIG_BASE64 secret) is correctly set in your Hugging Face Space secrets.") # Decide if app should crash or continue. For now, it will print error but continue. else: print("API Error: Firestore client not initialized. Cannot fetch documents from Firestore on startup.") print("Ensure FIREBASE_CONFIG_BASE64 secret is correctly set in your Hugging Face Space secrets.") print("--- FastAPI App Startup: Document indexing complete ---") # --- FastAPI Application Instance --- app = FastAPI( title="CompassIA", description="Backend API for querying PDFs using DeepSeek (via OpenRouter) and BGE-M3 embeddings, with conversational memory and document indexing from Firestore.", version="0.1.0", ) # Pydantic model for request body validation class QueryRequest(BaseModel): question: str user_id: str # Added: user_id is now a required field for every request conversation_id: str = None # Optional: client can provide an ID for ongoing conversations # --- API Endpoint Definition --- @app.post("/compassia/") async def compassia_endpoint(request: QueryRequest): """ Answers a question about the indexed PDF documents using RAG, with conversational memory. Requires a user_id from the authenticated user. If `conversation_id` is not provided, a new one will be generated and returned in the response. """ try: # Call answer_question which now returns a tuple (answer_text, conversation_id) # Pass the user_id from the request answer_text, final_conversation_id = rag_system.answer_question( request.question, conversation_id=request.conversation_id, user_id=request.user_id # Passed: The user_id is now sent to the RAG system ) # Return both the answer and the (potentially new) conversation_id to the client return {"answer": answer_text, "conversation_id": final_conversation_id} except Exception as e: print(f"Error processing /compassia/ request: {e}") raise HTTPException(status_code=500, detail=f"Internal Server Error: {str(e)}") # Basic health check endpoint @app.get("/") async def root(): return {"message": "CompassIA API is running. Use /compassia/ for queries."}