Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
| 1 |
import requests
|
| 2 |
import chromadb
|
|
|
|
| 3 |
import gradio as gr
|
| 4 |
import tempfile
|
| 5 |
from utils.github_fetcher import GitHubRepoFetcher
|
|
@@ -143,7 +144,7 @@ def answer_question(repo_content, question, chat_history):
|
|
| 143 |
return "Please load a valid repository first. " + (repo_content or "")
|
| 144 |
|
| 145 |
llm = ChatOpenAI(api_key=OPENAI_API_KEY, temperature=0)
|
| 146 |
-
|
| 147 |
|
| 148 |
text_splitter = RecursiveCharacterTextSplitter(
|
| 149 |
chunk_size=1000,
|
|
@@ -167,26 +168,42 @@ def answer_question(repo_content, question, chat_history):
|
|
| 167 |
docs = [Document(page_content=current_context)]
|
| 168 |
splits = text_splitter.split_documents(docs)
|
| 169 |
|
| 170 |
-
#
|
| 171 |
-
# Use a temporary directory for Chroma persistence
|
| 172 |
with tempfile.TemporaryDirectory() as temp_persist_dir:
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
anonymized_telemetry=False
|
| 180 |
-
)
|
| 181 |
)
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 185 |
)
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 190 |
Answer questions based on the provided repository content and chat history.
|
| 191 |
Repository Structure:
|
| 192 |
{context}
|
|
@@ -202,29 +219,27 @@ Important Instructions:
|
|
| 202 |
Current Question: {input}
|
| 203 |
Please provide a clear, structured explanation focusing on the specific parts of the repository mentioned in the question.
|
| 204 |
"""
|
| 205 |
-
|
| 206 |
-
prompt = ChatPromptTemplate.from_messages([
|
| 207 |
-
("system", system_message),
|
| 208 |
-
("human", "{input}")
|
| 209 |
-
])
|
| 210 |
-
|
| 211 |
-
# Create and execute chain
|
| 212 |
-
document_chain = create_stuff_documents_chain(
|
| 213 |
-
llm,
|
| 214 |
-
prompt,
|
| 215 |
-
document_variable_name="context",
|
| 216 |
-
)
|
| 217 |
-
retrieval_chain = create_retrieval_chain(retriever, document_chain)
|
| 218 |
-
|
| 219 |
-
result = retrieval_chain.invoke({
|
| 220 |
-
"input": question,
|
| 221 |
-
"chat_history": chat_context
|
| 222 |
-
})
|
| 223 |
-
|
| 224 |
-
if "answer" not in result:
|
| 225 |
-
return "I apologize, but I couldn't process the repository content properly. Please try loading the repository again."
|
| 226 |
|
| 227 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 228 |
except Exception as e:
|
| 229 |
print(f"Error in answer_question: {str(e)}") # Debug log
|
| 230 |
return f"Error processing question: {str(e)}"
|
|
|
|
| 1 |
import requests
|
| 2 |
import chromadb
|
| 3 |
+
from chromadb.config import Settings
|
| 4 |
import gradio as gr
|
| 5 |
import tempfile
|
| 6 |
from utils.github_fetcher import GitHubRepoFetcher
|
|
|
|
| 144 |
return "Please load a valid repository first. " + (repo_content or "")
|
| 145 |
|
| 146 |
llm = ChatOpenAI(api_key=OPENAI_API_KEY, temperature=0)
|
| 147 |
+
embeddings_model = OpenAIEmbeddings(api_key=OPENAI_API_KEY)
|
| 148 |
|
| 149 |
text_splitter = RecursiveCharacterTextSplitter(
|
| 150 |
chunk_size=1000,
|
|
|
|
| 168 |
docs = [Document(page_content=current_context)]
|
| 169 |
splits = text_splitter.split_documents(docs)
|
| 170 |
|
| 171 |
+
# Set up Chroma with new client architecture
|
|
|
|
| 172 |
with tempfile.TemporaryDirectory() as temp_persist_dir:
|
| 173 |
+
client = chromadb.PersistentClient(path=temp_persist_dir)
|
| 174 |
+
|
| 175 |
+
# Create collection
|
| 176 |
+
collection = client.create_collection(
|
| 177 |
+
name="repo_content",
|
| 178 |
+
metadata={"hnsw:space": "cosine"}
|
|
|
|
|
|
|
| 179 |
)
|
| 180 |
+
|
| 181 |
+
# Add documents to collection
|
| 182 |
+
for i, doc in enumerate(splits):
|
| 183 |
+
embedding = embeddings_model.embed_query(doc.page_content)
|
| 184 |
+
collection.add(
|
| 185 |
+
documents=[doc.page_content],
|
| 186 |
+
ids=[f"doc_{i}"],
|
| 187 |
+
embeddings=[embedding]
|
| 188 |
+
)
|
| 189 |
+
|
| 190 |
+
# Get relevant documents for the question
|
| 191 |
+
query_embedding = embeddings_model.embed_query(question)
|
| 192 |
+
results = collection.query(
|
| 193 |
+
query_embeddings=[query_embedding],
|
| 194 |
+
n_results=5,
|
| 195 |
+
include=["documents", "distances"]
|
| 196 |
)
|
| 197 |
+
|
| 198 |
+
# Convert results to documents for the chain
|
| 199 |
+
retrieved_docs = [
|
| 200 |
+
Document(page_content=doc)
|
| 201 |
+
for doc in results['documents'][0]
|
| 202 |
+
]
|
| 203 |
+
|
| 204 |
+
# Include chat history and repository content in the prompt
|
| 205 |
+
chat_context = format_chat_history(chat_history) if chat_history else ""
|
| 206 |
+
system_message = """You are a helpful assistant that explains code repositories.
|
| 207 |
Answer questions based on the provided repository content and chat history.
|
| 208 |
Repository Structure:
|
| 209 |
{context}
|
|
|
|
| 219 |
Current Question: {input}
|
| 220 |
Please provide a clear, structured explanation focusing on the specific parts of the repository mentioned in the question.
|
| 221 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 222 |
|
| 223 |
+
prompt = ChatPromptTemplate.from_messages([
|
| 224 |
+
("system", system_message),
|
| 225 |
+
("human", "{input}")
|
| 226 |
+
])
|
| 227 |
+
|
| 228 |
+
# Create and execute chain with retrieved documents
|
| 229 |
+
chain = create_stuff_documents_chain(
|
| 230 |
+
llm,
|
| 231 |
+
prompt,
|
| 232 |
+
document_variable_name="context"
|
| 233 |
+
)
|
| 234 |
+
|
| 235 |
+
response = chain.invoke({
|
| 236 |
+
"input": question,
|
| 237 |
+
"context": retrieved_docs,
|
| 238 |
+
"chat_history": chat_context
|
| 239 |
+
})
|
| 240 |
+
|
| 241 |
+
return response["answer"]
|
| 242 |
+
|
| 243 |
except Exception as e:
|
| 244 |
print(f"Error in answer_question: {str(e)}") # Debug log
|
| 245 |
return f"Error processing question: {str(e)}"
|