dembasowmr commited on
Commit
1c2a47a
·
1 Parent(s): 539d3e9

Small updates on app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -25
app.py CHANGED
@@ -1,14 +1,15 @@
1
  import sys
2
- # IMPORTANT: These lines MUST be at the very top of app.py
3
- # They ensure that any subsequent import of 'sqlite3' (even indirectly by chromadb)
4
- # will use the version provided by pysqlite3-binary.
5
  try:
6
  import pysqlite3
7
  sys.modules['sqlite3'] = pysqlite3
 
8
  except ImportError:
9
- # This should not happen if pysqlite3-binary is correctly installed via requirements.txt
10
- print("Warning: pysqlite3-binary could not be imported. ChromaDB might encounter SQLite version issues.")
11
- pass
12
 
13
  import os
14
  from fastapi import FastAPI, HTTPException
@@ -18,22 +19,17 @@ import json
18
  import base64
19
  from dotenv import load_dotenv
20
 
21
- # Load environment variables at the very top
22
- # This ensures that variables like FIREBASE_CONFIG_BASE64 are available
23
- # before other modules (like config.py) attempt to read them.
24
  load_dotenv()
25
 
26
  # Add the 'src' directory to the Python path
27
  sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), 'src')))
28
 
29
- # Import components from the new modular structure, specifically from src.compassia
30
- # Ensure config is imported after dotenv for FIREBASE_CONFIG_BASE64
31
  from src.config import CHROMADB_PERSIST_DIRECTORY, CHROMADB_COLLECTION_NAME
32
  from src.compassia import DocumentRAG, embedding_model, initialize_firebase_client, FIRESTORE_DATABASE
33
 
34
  # --- Firebase Initialization (Global, once per process) ---
35
- # Initialize Firebase Admin SDK using a secret from Hugging Face Spaces
36
- # This function is now called directly from app.py startup.
37
  initialize_firebase_client()
38
 
39
 
@@ -46,9 +42,6 @@ rag_system = DocumentRAG(
46
  )
47
 
48
  # --- Index documents on startup ---
49
- # This loop will run when the FastAPI app first starts.
50
- # It uses ChromaDB's persistence, so documents already indexed will be skipped.
51
- # Now fetches document URLs directly from Firestore using FIRESTORE_DATABASE
52
  print("--- FastAPI App Startup: Indexing Documents from Firestore ---")
53
  if FIRESTORE_DATABASE:
54
  try:
@@ -58,10 +51,8 @@ if FIRESTORE_DATABASE:
58
  for doc in docs_ref:
59
  doc_data = doc.to_dict()
60
  if 'fileUrl' in doc_data:
61
- # IMPORTANT: Pass the full URL and optional name to add_document.
62
- # The add_document method in compassia.py now handles PDF filtering.
63
  pdf_url = doc_data['fileUrl']
64
- display_name = doc_data.get('name_en', None) # Pass None if not present, add_document handles basename
65
  documents_found_in_firestore.append({"url": pdf_url, "name": display_name})
66
  else:
67
  print(f"Skipping document ID: {doc.id} - 'fileUrl' field missing.")
@@ -74,8 +65,6 @@ if FIRESTORE_DATABASE:
74
  except Exception as e:
75
  print(f"API Error: Error fetching documents from Firestore: {e}")
76
  print("Please ensure your Firestore database is accessible and the service account key (FIREBASE_CONFIG_BASE64 secret) is correct.")
77
- # If document fetching fails, consider if the app should still start or crash.
78
- # For now, it will print the error but continue to try to start the API.
79
  else:
80
  print("API Error: Firestore client not initialized. Cannot fetch documents from Firestore on startup.")
81
  print("Ensure FIREBASE_CONFIG_BASE64 secret is correctly set in your Hugging Face Space secrets.")
@@ -94,7 +83,7 @@ app = FastAPI(
94
  # Pydantic model for request body validation
95
  class QueryRequest(BaseModel):
96
  question: str
97
- conversation_id: str = None # Optional for new conversations
98
 
99
  # --- API Endpoint Definition ---
100
  @app.post("/compassia/")
@@ -103,9 +92,6 @@ async def compassia_endpoint(request: QueryRequest):
103
  Answers a question about the indexed PDF documents using RAG, with conversational memory.
104
  """
105
  try:
106
- # Pass conversation_id to the answer_question function.
107
- # The add_document logic is now handled internally by rag_system during initialization
108
- # and when new documents are added (though here they are all added at startup).
109
  answer = rag_system.answer_question(request.question, conversation_id=request.conversation_id)
110
  return {"answer": answer, "conversation_id": request.conversation_id}
111
  except Exception as e:
 
1
  import sys
2
+ # CRITICAL: These lines MUST be the absolute first executable lines in app.py.
3
+ # This ensures pysqlite3 is loaded and replaces the standard sqlite3 module
4
+ # in sys.modules before any other module (like chromadb) attempts to import sqlite3.
5
  try:
6
  import pysqlite3
7
  sys.modules['sqlite3'] = pysqlite3
8
+ print("pysqlite3 successfully imported and set as default sqlite3 module.")
9
  except ImportError:
10
+ print("ERROR: pysqlite3-binary could not be imported. ChromaDB will likely fail due to old sqlite3 version.")
11
+ # You might want to raise an exception here or ensure the app exits gracefully
12
+ # if pysqlite3 is a hard dependency for your deployment.
13
 
14
  import os
15
  from fastapi import FastAPI, HTTPException
 
19
  import base64
20
  from dotenv import load_dotenv
21
 
22
+ # Load environment variables (after pysqlite3 fix)
 
 
23
  load_dotenv()
24
 
25
  # Add the 'src' directory to the Python path
26
  sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), 'src')))
27
 
28
+ # Now import components from src.compassia
 
29
  from src.config import CHROMADB_PERSIST_DIRECTORY, CHROMADB_COLLECTION_NAME
30
  from src.compassia import DocumentRAG, embedding_model, initialize_firebase_client, FIRESTORE_DATABASE
31
 
32
  # --- Firebase Initialization (Global, once per process) ---
 
 
33
  initialize_firebase_client()
34
 
35
 
 
42
  )
43
 
44
  # --- Index documents on startup ---
 
 
 
45
  print("--- FastAPI App Startup: Indexing Documents from Firestore ---")
46
  if FIRESTORE_DATABASE:
47
  try:
 
51
  for doc in docs_ref:
52
  doc_data = doc.to_dict()
53
  if 'fileUrl' in doc_data:
 
 
54
  pdf_url = doc_data['fileUrl']
55
+ display_name = doc_data.get('name_en', None)
56
  documents_found_in_firestore.append({"url": pdf_url, "name": display_name})
57
  else:
58
  print(f"Skipping document ID: {doc.id} - 'fileUrl' field missing.")
 
65
  except Exception as e:
66
  print(f"API Error: Error fetching documents from Firestore: {e}")
67
  print("Please ensure your Firestore database is accessible and the service account key (FIREBASE_CONFIG_BASE64 secret) is correct.")
 
 
68
  else:
69
  print("API Error: Firestore client not initialized. Cannot fetch documents from Firestore on startup.")
70
  print("Ensure FIREBASE_CONFIG_BASE64 secret is correctly set in your Hugging Face Space secrets.")
 
83
  # Pydantic model for request body validation
84
  class QueryRequest(BaseModel):
85
  question: str
86
+ conversation_id: str = None
87
 
88
  # --- API Endpoint Definition ---
89
  @app.post("/compassia/")
 
92
  Answers a question about the indexed PDF documents using RAG, with conversational memory.
93
  """
94
  try:
 
 
 
95
  answer = rag_system.answer_question(request.question, conversation_id=request.conversation_id)
96
  return {"answer": answer, "conversation_id": request.conversation_id}
97
  except Exception as e: