Spaces:

nehajiya8
/

github-repo-explorer

Sleeping

App Files Files Community

nehajiya8 commited on Jan 30, 2025

Commit

1c1597c

verified ·

1 Parent(s): 56c91eb

Update app.py

Browse files

Files changed (1) hide show

app.py +55 -40

app.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import requests
 import chromadb
 import gradio as gr
 import tempfile
 from utils.github_fetcher import GitHubRepoFetcher
@@ -143,7 +144,7 @@ def answer_question(repo_content, question, chat_history):
             return "Please load a valid repository first. " + (repo_content or "")
         llm = ChatOpenAI(api_key=OPENAI_API_KEY, temperature=0)
-        embeddings = OpenAIEmbeddings(api_key=OPENAI_API_KEY)
         text_splitter = RecursiveCharacterTextSplitter(
             chunk_size=1000,
@@ -167,26 +168,42 @@ def answer_question(repo_content, question, chat_history):
         docs = [Document(page_content=current_context)]
         splits = text_splitter.split_documents(docs)
-        # Create vector store with explicit client settings
-        # Use a temporary directory for Chroma persistence
         with tempfile.TemporaryDirectory() as temp_persist_dir:
-            vectorstore = Chroma.from_documents(
-                documents=splits,
-                embedding=embeddings,
-                client_settings=chromadb.config.Settings(
-                    chroma_db_impl="duckdb+parquet",
-                    persist_directory=temp_persist_dir,
-                    anonymized_telemetry=False
-                )
             )
-            retriever = vectorstore.as_retriever(
-                search_kwargs={"k": 5}
             )
-        # Include chat history and repository content in the prompt
-        chat_context = format_chat_history(chat_history) if chat_history else ""
-        system_message = """You are a helpful assistant that explains code repositories.
 Answer questions based on the provided repository content and chat history.
 Repository Structure:
 {context}
@@ -202,29 +219,27 @@ Important Instructions:
 Current Question: {input}
 Please provide a clear, structured explanation focusing on the specific parts of the repository mentioned in the question.
 """
-        prompt = ChatPromptTemplate.from_messages([
-            ("system", system_message),
-            ("human", "{input}")
-        ])
-        # Create and execute chain
-        document_chain = create_stuff_documents_chain(
-            llm,
-            prompt,
-            document_variable_name="context",
-        )
-        retrieval_chain = create_retrieval_chain(retriever, document_chain)
-        result = retrieval_chain.invoke({
-            "input": question,
-            "chat_history": chat_context
-        })
-        if "answer" not in result:
-            return "I apologize, but I couldn't process the repository content properly. Please try loading the repository again."
-        return result["answer"]
     except Exception as e:
         print(f"Error in answer_question: {str(e)}")  # Debug log
         return f"Error processing question: {str(e)}"

 import requests
 import chromadb
+from chromadb.config import Settings
 import gradio as gr
 import tempfile
 from utils.github_fetcher import GitHubRepoFetcher
             return "Please load a valid repository first. " + (repo_content or "")
         llm = ChatOpenAI(api_key=OPENAI_API_KEY, temperature=0)
+        embeddings_model = OpenAIEmbeddings(api_key=OPENAI_API_KEY)
         text_splitter = RecursiveCharacterTextSplitter(
             chunk_size=1000,
         docs = [Document(page_content=current_context)]
         splits = text_splitter.split_documents(docs)
+        # Set up Chroma with new client architecture
         with tempfile.TemporaryDirectory() as temp_persist_dir:
+            client = chromadb.PersistentClient(path=temp_persist_dir)
+            # Create collection
+            collection = client.create_collection(
+                name="repo_content",
+                metadata={"hnsw:space": "cosine"}
             )
+            # Add documents to collection
+            for i, doc in enumerate(splits):
+                embedding = embeddings_model.embed_query(doc.page_content)
+                collection.add(
+                    documents=[doc.page_content],
+                    ids=[f"doc_{i}"],
+                    embeddings=[embedding]
+                )
+            # Get relevant documents for the question
+            query_embedding = embeddings_model.embed_query(question)
+            results = collection.query(
+                query_embeddings=[query_embedding],
+                n_results=5,
+                include=["documents", "distances"]
             )
+            # Convert results to documents for the chain
+            retrieved_docs = [
+                Document(page_content=doc)
+                for doc in results['documents'][0]
+            ]
+            # Include chat history and repository content in the prompt
+            chat_context = format_chat_history(chat_history) if chat_history else ""
+            system_message = """You are a helpful assistant that explains code repositories.
 Answer questions based on the provided repository content and chat history.
 Repository Structure:
 {context}
 Current Question: {input}
 Please provide a clear, structured explanation focusing on the specific parts of the repository mentioned in the question.
 """
+            prompt = ChatPromptTemplate.from_messages([
+                ("system", system_message),
+                ("human", "{input}")
+            ])
+            # Create and execute chain with retrieved documents
+            chain = create_stuff_documents_chain(
+                llm,
+                prompt,
+                document_variable_name="context"
+            )
+            response = chain.invoke({
+                "input": question,
+                "context": retrieved_docs,
+                "chat_history": chat_context
+            })
+            return response["answer"]
     except Exception as e:
         print(f"Error in answer_question: {str(e)}")  # Debug log
         return f"Error processing question: {str(e)}"