Spaces:
Runtime error
Runtime error
| import sys | |
| # CRITICAL: These lines MUST be the absolute first executable lines in app.py. | |
| # This ensures pysqlite3 is loaded and replaces the standard sqlite3 module | |
| # in sys.modules before any other module (like chromadb) attempts to import sqlite3. | |
| try: | |
| import pysqlite3 | |
| sys.modules['sqlite3'] = pysqlite3 | |
| print("pysqlite3 successfully imported and set as default sqlite3 module.") | |
| except ImportError: | |
| print("ERROR: pysqlite3-binary could not be imported. ChromaDB will likely fail due to old sqlite3 version.") | |
| # In a production environment, you might want to raise an exception here | |
| # to prevent the application from starting if this critical dependency fails. | |
| import os | |
| from fastapi import FastAPI, HTTPException | |
| from pydantic import BaseModel | |
| import uvicorn | |
| import json | |
| import base64 | |
| from dotenv import load_dotenv | |
| # Load environment variables (after pysqlite3 fix) | |
| # This ensures that variables like FIREBASE_CONFIG_BASE64 are available | |
| # before other modules (like config.py) attempt to read them. | |
| load_dotenv() | |
| # Add the 'src' directory to the Python path | |
| sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), 'src'))) | |
| # Now import components from src.compassia (should be src.rag_system as per earlier conversation, | |
| # but keeping 'compassia' as per your provided code for this response) | |
| # We import initialize_firebase_client as we call it here. | |
| # DocumentRAG and embedding_model are needed for instantiating the RAG system. | |
| from src.config import CHROMADB_PERSIST_DIRECTORY, CHROMADB_COLLECTION_NAME | |
| from src.compassia import DocumentRAG, embedding_model, initialize_firebase_client # Corrected import to compassia | |
| # --- Firebase Initialization (Global, once per process) --- | |
| # Call the initialization function and CAPTURE THE RETURNED FIRESTORE CLIENT INSTANCE. | |
| # This instance will be passed to the DocumentRAG to ensure correct Firestore access. | |
| FIRESTORE_DB_INSTANCE = initialize_firebase_client() | |
| # --- Initialize the RAG system globally --- | |
| # This happens once when the FastAPI app starts. | |
| print("--- FastAPI App Startup: Initializing RAG System ---") | |
| rag_system = DocumentRAG( | |
| embedding_model=embedding_model, | |
| persist_directory=CHROMADB_PERSIST_DIRECTORY, | |
| collection_name=CHROMADB_COLLECTION_NAME, | |
| firestore_db_instance=FIRESTORE_DB_INSTANCE # CRITICAL: Pass the initialized Firestore instance here | |
| ) | |
| # --- Index documents on startup --- | |
| # This loop will run when the FastAPI app first starts. | |
| # It uses ChromaDB's persistence, so documents already indexed will be skipped. | |
| # It now correctly uses the captured FIRESTORE_DB_INSTANCE. | |
| print("--- FastAPI App Startup: Indexing Documents from Firestore ---") | |
| if FIRESTORE_DB_INSTANCE: | |
| try: | |
| docs_ref = FIRESTORE_DB_INSTANCE.collection('documents').stream() | |
| documents_to_process = [] | |
| for doc in docs_ref: | |
| doc_data = doc.to_dict() | |
| if 'fileUrl' in doc_data: | |
| # The add_document method in rag_system.py now handles PDF filtering | |
| # so we just pass the URL and optional display name. | |
| pdf_url = doc_data['fileUrl'] | |
| display_name = doc_data.get('name_en', None) | |
| documents_to_process.append({"url": pdf_url, "name": display_name}) | |
| else: | |
| print(f"Skipping document ID: {doc.id} - 'fileUrl' field missing.") | |
| if documents_to_process: | |
| for doc_info in documents_to_process: | |
| rag_system.add_document(doc_info['url'], doc_info['name']) | |
| else: | |
| print("No documents with 'fileUrl' found in Firestore collection 'documents' to index.") | |
| except Exception as e: | |
| print(f"API Error: Error fetching documents from Firestore during startup: {e}") | |
| print("Please ensure your Firestore database is accessible and the service account key (FIREBASE_CONFIG_BASE64 secret) is correctly set in your Hugging Face Space secrets.") | |
| # Decide if app should crash or continue. For now, it will print error but continue. | |
| else: | |
| print("API Error: Firestore client not initialized. Cannot fetch documents from Firestore on startup.") | |
| print("Ensure FIREBASE_CONFIG_BASE64 secret is correctly set in your Hugging Face Space secrets.") | |
| print("--- FastAPI App Startup: Document indexing complete ---") | |
| # --- FastAPI Application Instance --- | |
| app = FastAPI( | |
| title="CompassIA", | |
| description="Backend API for querying PDFs using DeepSeek (via OpenRouter) and BGE-M3 embeddings, with conversational memory and document indexing from Firestore.", | |
| version="0.1.0", | |
| ) | |
| # Pydantic model for request body validation | |
| class QueryRequest(BaseModel): | |
| question: str | |
| user_id: str # Added: user_id is now a required field for every request | |
| conversation_id: str = None # Optional: client can provide an ID for ongoing conversations | |
| # --- API Endpoint Definition --- | |
| async def compassia_endpoint(request: QueryRequest): | |
| """ | |
| Answers a question about the indexed PDF documents using RAG, with conversational memory. | |
| Requires a user_id from the authenticated user. | |
| If `conversation_id` is not provided, a new one will be generated and returned in the response. | |
| """ | |
| try: | |
| # Call answer_question which now returns a tuple (answer_text, conversation_id) | |
| # Pass the user_id from the request | |
| answer_text, final_conversation_id = rag_system.answer_question( | |
| request.question, | |
| conversation_id=request.conversation_id, | |
| user_id=request.user_id # Passed: The user_id is now sent to the RAG system | |
| ) | |
| # Return both the answer and the (potentially new) conversation_id to the client | |
| return {"answer": answer_text, "conversation_id": final_conversation_id} | |
| except Exception as e: | |
| print(f"Error processing /compassia/ request: {e}") | |
| raise HTTPException(status_code=500, detail=f"Internal Server Error: {str(e)}") | |
| # Basic health check endpoint | |
| async def root(): | |
| return {"message": "CompassIA API is running. Use /compassia/ for queries."} |