Spaces:
Runtime error
Runtime error
Commit ·
1c2a47a
1
Parent(s): 539d3e9
Small updates on app.py
Browse files
app.py
CHANGED
|
@@ -1,14 +1,15 @@
|
|
| 1 |
import sys
|
| 2 |
-
#
|
| 3 |
-
#
|
| 4 |
-
#
|
| 5 |
try:
|
| 6 |
import pysqlite3
|
| 7 |
sys.modules['sqlite3'] = pysqlite3
|
|
|
|
| 8 |
except ImportError:
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
|
| 13 |
import os
|
| 14 |
from fastapi import FastAPI, HTTPException
|
|
@@ -18,22 +19,17 @@ import json
|
|
| 18 |
import base64
|
| 19 |
from dotenv import load_dotenv
|
| 20 |
|
| 21 |
-
# Load environment variables
|
| 22 |
-
# This ensures that variables like FIREBASE_CONFIG_BASE64 are available
|
| 23 |
-
# before other modules (like config.py) attempt to read them.
|
| 24 |
load_dotenv()
|
| 25 |
|
| 26 |
# Add the 'src' directory to the Python path
|
| 27 |
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), 'src')))
|
| 28 |
|
| 29 |
-
#
|
| 30 |
-
# Ensure config is imported after dotenv for FIREBASE_CONFIG_BASE64
|
| 31 |
from src.config import CHROMADB_PERSIST_DIRECTORY, CHROMADB_COLLECTION_NAME
|
| 32 |
from src.compassia import DocumentRAG, embedding_model, initialize_firebase_client, FIRESTORE_DATABASE
|
| 33 |
|
| 34 |
# --- Firebase Initialization (Global, once per process) ---
|
| 35 |
-
# Initialize Firebase Admin SDK using a secret from Hugging Face Spaces
|
| 36 |
-
# This function is now called directly from app.py startup.
|
| 37 |
initialize_firebase_client()
|
| 38 |
|
| 39 |
|
|
@@ -46,9 +42,6 @@ rag_system = DocumentRAG(
|
|
| 46 |
)
|
| 47 |
|
| 48 |
# --- Index documents on startup ---
|
| 49 |
-
# This loop will run when the FastAPI app first starts.
|
| 50 |
-
# It uses ChromaDB's persistence, so documents already indexed will be skipped.
|
| 51 |
-
# Now fetches document URLs directly from Firestore using FIRESTORE_DATABASE
|
| 52 |
print("--- FastAPI App Startup: Indexing Documents from Firestore ---")
|
| 53 |
if FIRESTORE_DATABASE:
|
| 54 |
try:
|
|
@@ -58,10 +51,8 @@ if FIRESTORE_DATABASE:
|
|
| 58 |
for doc in docs_ref:
|
| 59 |
doc_data = doc.to_dict()
|
| 60 |
if 'fileUrl' in doc_data:
|
| 61 |
-
# IMPORTANT: Pass the full URL and optional name to add_document.
|
| 62 |
-
# The add_document method in compassia.py now handles PDF filtering.
|
| 63 |
pdf_url = doc_data['fileUrl']
|
| 64 |
-
display_name = doc_data.get('name_en', None)
|
| 65 |
documents_found_in_firestore.append({"url": pdf_url, "name": display_name})
|
| 66 |
else:
|
| 67 |
print(f"Skipping document ID: {doc.id} - 'fileUrl' field missing.")
|
|
@@ -74,8 +65,6 @@ if FIRESTORE_DATABASE:
|
|
| 74 |
except Exception as e:
|
| 75 |
print(f"API Error: Error fetching documents from Firestore: {e}")
|
| 76 |
print("Please ensure your Firestore database is accessible and the service account key (FIREBASE_CONFIG_BASE64 secret) is correct.")
|
| 77 |
-
# If document fetching fails, consider if the app should still start or crash.
|
| 78 |
-
# For now, it will print the error but continue to try to start the API.
|
| 79 |
else:
|
| 80 |
print("API Error: Firestore client not initialized. Cannot fetch documents from Firestore on startup.")
|
| 81 |
print("Ensure FIREBASE_CONFIG_BASE64 secret is correctly set in your Hugging Face Space secrets.")
|
|
@@ -94,7 +83,7 @@ app = FastAPI(
|
|
| 94 |
# Pydantic model for request body validation
|
| 95 |
class QueryRequest(BaseModel):
|
| 96 |
question: str
|
| 97 |
-
conversation_id: str = None
|
| 98 |
|
| 99 |
# --- API Endpoint Definition ---
|
| 100 |
@app.post("/compassia/")
|
|
@@ -103,9 +92,6 @@ async def compassia_endpoint(request: QueryRequest):
|
|
| 103 |
Answers a question about the indexed PDF documents using RAG, with conversational memory.
|
| 104 |
"""
|
| 105 |
try:
|
| 106 |
-
# Pass conversation_id to the answer_question function.
|
| 107 |
-
# The add_document logic is now handled internally by rag_system during initialization
|
| 108 |
-
# and when new documents are added (though here they are all added at startup).
|
| 109 |
answer = rag_system.answer_question(request.question, conversation_id=request.conversation_id)
|
| 110 |
return {"answer": answer, "conversation_id": request.conversation_id}
|
| 111 |
except Exception as e:
|
|
|
|
| 1 |
import sys
|
| 2 |
+
# CRITICAL: These lines MUST be the absolute first executable lines in app.py.
|
| 3 |
+
# This ensures pysqlite3 is loaded and replaces the standard sqlite3 module
|
| 4 |
+
# in sys.modules before any other module (like chromadb) attempts to import sqlite3.
|
| 5 |
try:
|
| 6 |
import pysqlite3
|
| 7 |
sys.modules['sqlite3'] = pysqlite3
|
| 8 |
+
print("pysqlite3 successfully imported and set as default sqlite3 module.")
|
| 9 |
except ImportError:
|
| 10 |
+
print("ERROR: pysqlite3-binary could not be imported. ChromaDB will likely fail due to old sqlite3 version.")
|
| 11 |
+
# You might want to raise an exception here or ensure the app exits gracefully
|
| 12 |
+
# if pysqlite3 is a hard dependency for your deployment.
|
| 13 |
|
| 14 |
import os
|
| 15 |
from fastapi import FastAPI, HTTPException
|
|
|
|
| 19 |
import base64
|
| 20 |
from dotenv import load_dotenv
|
| 21 |
|
| 22 |
+
# Load environment variables (after pysqlite3 fix)
|
|
|
|
|
|
|
| 23 |
load_dotenv()
|
| 24 |
|
| 25 |
# Add the 'src' directory to the Python path
|
| 26 |
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), 'src')))
|
| 27 |
|
| 28 |
+
# Now import components from src.compassia
|
|
|
|
| 29 |
from src.config import CHROMADB_PERSIST_DIRECTORY, CHROMADB_COLLECTION_NAME
|
| 30 |
from src.compassia import DocumentRAG, embedding_model, initialize_firebase_client, FIRESTORE_DATABASE
|
| 31 |
|
| 32 |
# --- Firebase Initialization (Global, once per process) ---
|
|
|
|
|
|
|
| 33 |
initialize_firebase_client()
|
| 34 |
|
| 35 |
|
|
|
|
| 42 |
)
|
| 43 |
|
| 44 |
# --- Index documents on startup ---
|
|
|
|
|
|
|
|
|
|
| 45 |
print("--- FastAPI App Startup: Indexing Documents from Firestore ---")
|
| 46 |
if FIRESTORE_DATABASE:
|
| 47 |
try:
|
|
|
|
| 51 |
for doc in docs_ref:
|
| 52 |
doc_data = doc.to_dict()
|
| 53 |
if 'fileUrl' in doc_data:
|
|
|
|
|
|
|
| 54 |
pdf_url = doc_data['fileUrl']
|
| 55 |
+
display_name = doc_data.get('name_en', None)
|
| 56 |
documents_found_in_firestore.append({"url": pdf_url, "name": display_name})
|
| 57 |
else:
|
| 58 |
print(f"Skipping document ID: {doc.id} - 'fileUrl' field missing.")
|
|
|
|
| 65 |
except Exception as e:
|
| 66 |
print(f"API Error: Error fetching documents from Firestore: {e}")
|
| 67 |
print("Please ensure your Firestore database is accessible and the service account key (FIREBASE_CONFIG_BASE64 secret) is correct.")
|
|
|
|
|
|
|
| 68 |
else:
|
| 69 |
print("API Error: Firestore client not initialized. Cannot fetch documents from Firestore on startup.")
|
| 70 |
print("Ensure FIREBASE_CONFIG_BASE64 secret is correctly set in your Hugging Face Space secrets.")
|
|
|
|
| 83 |
# Pydantic model for request body validation
|
| 84 |
class QueryRequest(BaseModel):
|
| 85 |
question: str
|
| 86 |
+
conversation_id: str = None
|
| 87 |
|
| 88 |
# --- API Endpoint Definition ---
|
| 89 |
@app.post("/compassia/")
|
|
|
|
| 92 |
Answers a question about the indexed PDF documents using RAG, with conversational memory.
|
| 93 |
"""
|
| 94 |
try:
|
|
|
|
|
|
|
|
|
|
| 95 |
answer = rag_system.answer_question(request.question, conversation_id=request.conversation_id)
|
| 96 |
return {"answer": answer, "conversation_id": request.conversation_id}
|
| 97 |
except Exception as e:
|