Deepanshu7284 commited on
Commit
b0de19a
·
1 Parent(s): a3114c9

Fix ChromaDB permissions with PersistentClient

Browse files
Files changed (1) hide show
  1. app.py +30 -18
app.py CHANGED
@@ -1,7 +1,6 @@
1
- # app.py
2
-
3
  import os
4
  import gradio as gr
 
5
 
6
  from dotenv import load_dotenv
7
  import requests
@@ -17,10 +16,11 @@ from langchain_core.output_parsers import StrOutputParser
17
  # --- DEPLOYMENT-ONLY FUNCTION ---
18
  def build_brain_if_needed():
19
  """Checks if the ChromaDB exists and builds it if it doesn't."""
20
- if not os.path.exists("./chroma_db"):
 
 
21
  print("Database not found. Building now... (This will run only once on the server's first startup)")
22
  from langchain_community.document_loaders import TextLoader
23
-
24
  from langchain.text_splitter import RecursiveCharacterTextSplitter
25
 
26
  loader = TextLoader('knowledge.txt', encoding='utf-8')
@@ -29,10 +29,15 @@ def build_brain_if_needed():
29
  docs = text_splitter.split_documents(documents)
30
  embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
31
 
 
 
 
 
32
  db = Chroma.from_documents(
33
- docs,
34
- embedding_function,
35
- persist_directory="./chroma_db"
 
36
  )
37
  print("Database built successfully.")
38
  else:
@@ -59,7 +64,16 @@ if not ELEVENLABS_VOICE_ID:
59
  # Load RAG chain
60
  def load_and_build_chain():
61
  embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
62
- vectorstore = Chroma(persist_directory="./chroma_db", embedding_function=embedding_function)
 
 
 
 
 
 
 
 
 
63
  retriever = vectorstore.as_retriever()
64
 
65
  persona_prompt_template = """
@@ -168,7 +182,8 @@ def process_user_turn(user_input, chat_history):
168
  return chat_history, audio_file
169
  except Exception as e:
170
  print(f"Processing Error: {e}")
171
- chat_history.append((user_input, "I'm terribly sorry, something went wrong."))
 
172
  return chat_history, None
173
 
174
  # Gradio UI
@@ -195,21 +210,18 @@ with gr.Blocks(css="""
195
 
196
  def handle_text_submission(message, history):
197
  history, audio = process_user_turn(message, history)
198
- return history, audio
199
 
200
  def handle_audio_submission(audio_file, history):
201
  if not audio_file:
202
- return history, None
203
  transcribed = transcribe_speech(audio_file)
204
  history, audio = process_user_turn(transcribed, history)
205
- return history, audio
206
-
207
- text_in.submit(handle_text_submission, [text_in, chatbot], [chatbot, audio_out])
208
- send_btn.click(handle_text_submission, [text_in, chatbot], [chatbot, audio_out])
209
- audio_in.stop_recording(handle_audio_submission, [audio_in, chatbot], [chatbot, audio_out])
210
 
211
- text_in.submit(lambda: "", None, text_in)
212
- send_btn.click(lambda: "", None, text_in)
 
213
 
214
  # Launch app
215
  demo.launch(server_name="0.0.0.0")
 
 
 
1
  import os
2
  import gradio as gr
3
+ import chromadb # Added import
4
 
5
  from dotenv import load_dotenv
6
  import requests
 
16
  # --- DEPLOYMENT-ONLY FUNCTION ---
17
  def build_brain_if_needed():
18
  """Checks if the ChromaDB exists and builds it if it doesn't."""
19
+ # Use an absolute path inside the container for consistency
20
+ db_path = "/app/chroma_db"
21
+ if not os.path.exists(db_path):
22
  print("Database not found. Building now... (This will run only once on the server's first startup)")
23
  from langchain_community.document_loaders import TextLoader
 
24
  from langchain.text_splitter import RecursiveCharacterTextSplitter
25
 
26
  loader = TextLoader('knowledge.txt', encoding='utf-8')
 
29
  docs = text_splitter.split_documents(documents)
30
  embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
31
 
32
+ # Explicitly create a persistent client pointing to the absolute path
33
+ persistent_client = chromadb.PersistentClient(path=db_path)
34
+
35
+ # Create the Chroma vector store using the client
36
  db = Chroma.from_documents(
37
+ client=persistent_client,
38
+ documents=docs,
39
+ embedding=embedding_function, # Correct parameter name is 'embedding'
40
+ collection_name="churchill_collection" # Good practice to name the collection
41
  )
42
  print("Database built successfully.")
43
  else:
 
64
  # Load RAG chain
65
  def load_and_build_chain():
66
  embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
67
+
68
+ # Use the same persistent client to load the existing DB
69
+ persistent_client = chromadb.PersistentClient(path="/app/chroma_db")
70
+
71
+ vectorstore = Chroma(
72
+ client=persistent_client,
73
+ embedding_function=embedding_function,
74
+ collection_name="churchill_collection" # Must use the same collection name
75
+ )
76
+
77
  retriever = vectorstore.as_retriever()
78
 
79
  persona_prompt_template = """
 
182
  return chat_history, audio_file
183
  except Exception as e:
184
  print(f"Processing Error: {e}")
185
+ chat_history.append({"role": "user", "content": user_input})
186
+ chat_history.append({"role": "assistant", "content": "I'm terribly sorry, something went wrong."})
187
  return chat_history, None
188
 
189
  # Gradio UI
 
210
 
211
  def handle_text_submission(message, history):
212
  history, audio = process_user_turn(message, history)
213
+ return history, audio, ""
214
 
215
  def handle_audio_submission(audio_file, history):
216
  if not audio_file:
217
+ return history, None, ""
218
  transcribed = transcribe_speech(audio_file)
219
  history, audio = process_user_turn(transcribed, history)
220
+ return history, audio, ""
 
 
 
 
221
 
222
+ text_in.submit(handle_text_submission, [text_in, chatbot], [chatbot, audio_out, text_in])
223
+ send_btn.click(handle_text_submission, [text_in, chatbot], [chatbot, audio_out, text_in])
224
+ audio_in.stop_recording(handle_audio_submission, [audio_in, chatbot], [chatbot, audio_out, text_in])
225
 
226
  # Launch app
227
  demo.launch(server_name="0.0.0.0")