Spaces:
Runtime error
Runtime error
File size: 6,152 Bytes
df5a316 1c2a47a df5a316 1c2a47a df5a316 1c2a47a f91e3a6 df5a316 414dfd0 15d9931 df5a316 6b2f911 1c2a47a f91e3a6 6b2f911 15d9931 a2967ae f91e3a6 6b2f911 a2967ae 414dfd0 15d9931 f91e3a6 414dfd0 f91e3a6 414dfd0 15d9931 f91e3a6 414dfd0 f91e3a6 15d9931 f91e3a6 15d9931 f91e3a6 6b2f911 f91e3a6 15d9931 6b2f911 a2967ae f91e3a6 15d9931 1c2a47a f91e3a6 6b2f911 15d9931 f91e3a6 15d9931 f91e3a6 15d9931 f91e3a6 15d9931 6b2f911 15d9931 414dfd0 d607228 f91e3a6 414dfd0 a2967ae f91e3a6 414dfd0 a032c74 414dfd0 15d9931 a2967ae 0f574db 414dfd0 0f574db a2967ae 0f574db a2967ae 0f574db f91e3a6 0f574db f91e3a6 414dfd0 a032c74 f91e3a6 414dfd0 a2967ae | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 | import sys
# CRITICAL: These lines MUST be the absolute first executable lines in app.py.
# This ensures pysqlite3 is loaded and replaces the standard sqlite3 module
# in sys.modules before any other module (like chromadb) attempts to import sqlite3.
try:
import pysqlite3
sys.modules['sqlite3'] = pysqlite3
print("pysqlite3 successfully imported and set as default sqlite3 module.")
except ImportError:
print("ERROR: pysqlite3-binary could not be imported. ChromaDB will likely fail due to old sqlite3 version.")
# In a production environment, you might want to raise an exception here
# to prevent the application from starting if this critical dependency fails.
import os
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
import uvicorn
import json
import base64
from dotenv import load_dotenv
# Load environment variables (after pysqlite3 fix)
# This ensures that variables like FIREBASE_CONFIG_BASE64 are available
# before other modules (like config.py) attempt to read them.
load_dotenv()
# Add the 'src' directory to the Python path
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), 'src')))
# Now import components from src.compassia (should be src.rag_system as per earlier conversation,
# but keeping 'compassia' as per your provided code for this response)
# We import initialize_firebase_client as we call it here.
# DocumentRAG and embedding_model are needed for instantiating the RAG system.
from src.config import CHROMADB_PERSIST_DIRECTORY, CHROMADB_COLLECTION_NAME
from src.compassia import DocumentRAG, embedding_model, initialize_firebase_client # Corrected import to compassia
# --- Firebase Initialization (Global, once per process) ---
# Call the initialization function and CAPTURE THE RETURNED FIRESTORE CLIENT INSTANCE.
# This instance will be passed to the DocumentRAG to ensure correct Firestore access.
FIRESTORE_DB_INSTANCE = initialize_firebase_client()
# --- Initialize the RAG system globally ---
# This happens once when the FastAPI app starts.
print("--- FastAPI App Startup: Initializing RAG System ---")
rag_system = DocumentRAG(
embedding_model=embedding_model,
persist_directory=CHROMADB_PERSIST_DIRECTORY,
collection_name=CHROMADB_COLLECTION_NAME,
firestore_db_instance=FIRESTORE_DB_INSTANCE # CRITICAL: Pass the initialized Firestore instance here
)
# --- Index documents on startup ---
# This loop will run when the FastAPI app first starts.
# It uses ChromaDB's persistence, so documents already indexed will be skipped.
# It now correctly uses the captured FIRESTORE_DB_INSTANCE.
print("--- FastAPI App Startup: Indexing Documents from Firestore ---")
if FIRESTORE_DB_INSTANCE:
try:
docs_ref = FIRESTORE_DB_INSTANCE.collection('documents').stream()
documents_to_process = []
for doc in docs_ref:
doc_data = doc.to_dict()
if 'fileUrl' in doc_data:
# The add_document method in rag_system.py now handles PDF filtering
# so we just pass the URL and optional display name.
pdf_url = doc_data['fileUrl']
display_name = doc_data.get('name_en', None)
documents_to_process.append({"url": pdf_url, "name": display_name})
else:
print(f"Skipping document ID: {doc.id} - 'fileUrl' field missing.")
if documents_to_process:
for doc_info in documents_to_process:
rag_system.add_document(doc_info['url'], doc_info['name'])
else:
print("No documents with 'fileUrl' found in Firestore collection 'documents' to index.")
except Exception as e:
print(f"API Error: Error fetching documents from Firestore during startup: {e}")
print("Please ensure your Firestore database is accessible and the service account key (FIREBASE_CONFIG_BASE64 secret) is correctly set in your Hugging Face Space secrets.")
# Decide if app should crash or continue. For now, it will print error but continue.
else:
print("API Error: Firestore client not initialized. Cannot fetch documents from Firestore on startup.")
print("Ensure FIREBASE_CONFIG_BASE64 secret is correctly set in your Hugging Face Space secrets.")
print("--- FastAPI App Startup: Document indexing complete ---")
# --- FastAPI Application Instance ---
app = FastAPI(
title="CompassIA",
description="Backend API for querying PDFs using DeepSeek (via OpenRouter) and BGE-M3 embeddings, with conversational memory and document indexing from Firestore.",
version="0.1.0",
)
# Pydantic model for request body validation
class QueryRequest(BaseModel):
question: str
user_id: str # Added: user_id is now a required field for every request
conversation_id: str = None # Optional: client can provide an ID for ongoing conversations
# --- API Endpoint Definition ---
@app.post("/compassia/")
async def compassia_endpoint(request: QueryRequest):
"""
Answers a question about the indexed PDF documents using RAG, with conversational memory.
Requires a user_id from the authenticated user.
If `conversation_id` is not provided, a new one will be generated and returned in the response.
"""
try:
# Call answer_question which now returns a tuple (answer_text, conversation_id)
# Pass the user_id from the request
answer_text, final_conversation_id = rag_system.answer_question(
request.question,
conversation_id=request.conversation_id,
user_id=request.user_id # Passed: The user_id is now sent to the RAG system
)
# Return both the answer and the (potentially new) conversation_id to the client
return {"answer": answer_text, "conversation_id": final_conversation_id}
except Exception as e:
print(f"Error processing /compassia/ request: {e}")
raise HTTPException(status_code=500, detail=f"Internal Server Error: {str(e)}")
# Basic health check endpoint
@app.get("/")
async def root():
return {"message": "CompassIA API is running. Use /compassia/ for queries."} |