nehajiya8 commited on
Commit
1c1597c
·
verified ·
1 Parent(s): 56c91eb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -40
app.py CHANGED
@@ -1,5 +1,6 @@
1
  import requests
2
  import chromadb
 
3
  import gradio as gr
4
  import tempfile
5
  from utils.github_fetcher import GitHubRepoFetcher
@@ -143,7 +144,7 @@ def answer_question(repo_content, question, chat_history):
143
  return "Please load a valid repository first. " + (repo_content or "")
144
 
145
  llm = ChatOpenAI(api_key=OPENAI_API_KEY, temperature=0)
146
- embeddings = OpenAIEmbeddings(api_key=OPENAI_API_KEY)
147
 
148
  text_splitter = RecursiveCharacterTextSplitter(
149
  chunk_size=1000,
@@ -167,26 +168,42 @@ def answer_question(repo_content, question, chat_history):
167
  docs = [Document(page_content=current_context)]
168
  splits = text_splitter.split_documents(docs)
169
 
170
- # Create vector store with explicit client settings
171
- # Use a temporary directory for Chroma persistence
172
  with tempfile.TemporaryDirectory() as temp_persist_dir:
173
- vectorstore = Chroma.from_documents(
174
- documents=splits,
175
- embedding=embeddings,
176
- client_settings=chromadb.config.Settings(
177
- chroma_db_impl="duckdb+parquet",
178
- persist_directory=temp_persist_dir,
179
- anonymized_telemetry=False
180
- )
181
  )
182
-
183
- retriever = vectorstore.as_retriever(
184
- search_kwargs={"k": 5}
 
 
 
 
 
 
 
 
 
 
 
 
 
185
  )
186
-
187
- # Include chat history and repository content in the prompt
188
- chat_context = format_chat_history(chat_history) if chat_history else ""
189
- system_message = """You are a helpful assistant that explains code repositories.
 
 
 
 
 
 
190
  Answer questions based on the provided repository content and chat history.
191
  Repository Structure:
192
  {context}
@@ -202,29 +219,27 @@ Important Instructions:
202
  Current Question: {input}
203
  Please provide a clear, structured explanation focusing on the specific parts of the repository mentioned in the question.
204
  """
205
-
206
- prompt = ChatPromptTemplate.from_messages([
207
- ("system", system_message),
208
- ("human", "{input}")
209
- ])
210
-
211
- # Create and execute chain
212
- document_chain = create_stuff_documents_chain(
213
- llm,
214
- prompt,
215
- document_variable_name="context",
216
- )
217
- retrieval_chain = create_retrieval_chain(retriever, document_chain)
218
-
219
- result = retrieval_chain.invoke({
220
- "input": question,
221
- "chat_history": chat_context
222
- })
223
-
224
- if "answer" not in result:
225
- return "I apologize, but I couldn't process the repository content properly. Please try loading the repository again."
226
 
227
- return result["answer"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
228
  except Exception as e:
229
  print(f"Error in answer_question: {str(e)}") # Debug log
230
  return f"Error processing question: {str(e)}"
 
1
  import requests
2
  import chromadb
3
+ from chromadb.config import Settings
4
  import gradio as gr
5
  import tempfile
6
  from utils.github_fetcher import GitHubRepoFetcher
 
144
  return "Please load a valid repository first. " + (repo_content or "")
145
 
146
  llm = ChatOpenAI(api_key=OPENAI_API_KEY, temperature=0)
147
+ embeddings_model = OpenAIEmbeddings(api_key=OPENAI_API_KEY)
148
 
149
  text_splitter = RecursiveCharacterTextSplitter(
150
  chunk_size=1000,
 
168
  docs = [Document(page_content=current_context)]
169
  splits = text_splitter.split_documents(docs)
170
 
171
+ # Set up Chroma with new client architecture
 
172
  with tempfile.TemporaryDirectory() as temp_persist_dir:
173
+ client = chromadb.PersistentClient(path=temp_persist_dir)
174
+
175
+ # Create collection
176
+ collection = client.create_collection(
177
+ name="repo_content",
178
+ metadata={"hnsw:space": "cosine"}
 
 
179
  )
180
+
181
+ # Add documents to collection
182
+ for i, doc in enumerate(splits):
183
+ embedding = embeddings_model.embed_query(doc.page_content)
184
+ collection.add(
185
+ documents=[doc.page_content],
186
+ ids=[f"doc_{i}"],
187
+ embeddings=[embedding]
188
+ )
189
+
190
+ # Get relevant documents for the question
191
+ query_embedding = embeddings_model.embed_query(question)
192
+ results = collection.query(
193
+ query_embeddings=[query_embedding],
194
+ n_results=5,
195
+ include=["documents", "distances"]
196
  )
197
+
198
+ # Convert results to documents for the chain
199
+ retrieved_docs = [
200
+ Document(page_content=doc)
201
+ for doc in results['documents'][0]
202
+ ]
203
+
204
+ # Include chat history and repository content in the prompt
205
+ chat_context = format_chat_history(chat_history) if chat_history else ""
206
+ system_message = """You are a helpful assistant that explains code repositories.
207
  Answer questions based on the provided repository content and chat history.
208
  Repository Structure:
209
  {context}
 
219
  Current Question: {input}
220
  Please provide a clear, structured explanation focusing on the specific parts of the repository mentioned in the question.
221
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
222
 
223
+ prompt = ChatPromptTemplate.from_messages([
224
+ ("system", system_message),
225
+ ("human", "{input}")
226
+ ])
227
+
228
+ # Create and execute chain with retrieved documents
229
+ chain = create_stuff_documents_chain(
230
+ llm,
231
+ prompt,
232
+ document_variable_name="context"
233
+ )
234
+
235
+ response = chain.invoke({
236
+ "input": question,
237
+ "context": retrieved_docs,
238
+ "chat_history": chat_context
239
+ })
240
+
241
+ return response["answer"]
242
+
243
  except Exception as e:
244
  print(f"Error in answer_question: {str(e)}") # Debug log
245
  return f"Error processing question: {str(e)}"