Spaces:

pratikshahp
/

openai-github-chat

Runtime error

App Files Files Community

pratikshahp commited on Jan 27, 2025

Commit

bad09fd

verified ·

1 Parent(s): 74ef341

Update app.py

Browse files

Files changed (1) hide show

app.py +70 -55

app.py CHANGED Viewed

@@ -4,7 +4,7 @@ from github import Github
 from langchain_community.vectorstores import Chroma
 from langchain_community.embeddings import HuggingFaceEmbeddings
 from langchain_text_splitters import RecursiveCharacterTextSplitter
-from langchain.llms import OpenAI
 from dotenv import load_dotenv
 # Load environment variables
@@ -12,71 +12,91 @@ load_dotenv()
 openai_api_key = os.getenv("OPENAI_API_KEY")
 # Function to fetch repository data from GitHub
-def fetch_github_repo_data(git_repo, github_token):
     try:
         g = Github(github_token)
-        repo = g.get_repo(git_repo)
         contents = repo.get_contents("")
         repo_data = ""
         while contents:
             file_content = contents.pop(0)
             if file_content.type == "dir":
                 contents.extend(repo.get_contents(file_content.path))
             else:
-                file_data = repo.get_contents(file_content.path).decoded_content
                 try:
                     text = file_data.decode("utf-8")
                     repo_data += f"\n\nFile: {file_content.path}\n{text}"
                 except UnicodeDecodeError:
                     # Skip non-text files
                     continue
         return repo_data
     except Exception as e:
         st.error(f"Error fetching GitHub repository data: {e}")
         return None
-# Function to perform RAG using OpenAI and Chroma
-def perform_rag(repo_data, prompt):
     try:
-        if repo_data:
-            # Create embeddings
-            embeddings = HuggingFaceEmbeddings()
-            # Split text into chunks
-            text_splitter = RecursiveCharacterTextSplitter(
-                chunk_size=1000,
-                chunk_overlap=20,
-                length_function=len,
-                is_separator_regex=False,
-            )
-            chunks = text_splitter.create_documents([repo_data])
-            # Store chunks in ChromaDB
-            persist_directory = 'github_repo_embeddings'
-            vectordb = Chroma.from_documents(documents=chunks, embedding=embeddings, persist_directory=persist_directory)
-            vectordb.persist()  # Persist ChromaDB
-            # Load persisted Chroma database
-            vectordb = Chroma(persist_directory=persist_directory, embedding_function=embeddings)
-            # Perform retrieval using Chroma
-            docs = vectordb.similarity_search(prompt)
-            if docs:
-                text = docs[0].page_content
-            else:
-                st.warning("No relevant documents found.")
-                return None
-            # Perform generation using OpenAI
-            llm = OpenAI(api_key=openai_api_key, model="gpt-4o-mini", temperature=0.7, max_tokens=500)
-            question_with_context = f"Context: {text}\n\nQuestion: {prompt}\n\nAnswer:"
-            response = llm.generate([question_with_context])
-            return response
-        else:
-            st.warning("No repository data found or error occurred.")
             return None
     except Exception as e:
         st.error(f"Error performing RAG: {e}")
         return None
@@ -84,33 +104,28 @@ def perform_rag(repo_data, prompt):
 # Streamlit application
 def main():
     st.title("Chat with GitHub Repository")
-    st.caption("This app allows you to chat with a GitHub Repo using OpenAI and ChromaDB")
-    # Get the GitHub token from the user
     github_token = st.text_input("Enter your GitHub Token", type="password")
-    # Get the GitHub repository from the user
-    git_repo = st.text_input("Enter the GitHub Repo (owner/repo)", type="default")
-    # Add the GitHub data to the knowledge base if the GitHub token is provided
     if github_token and git_repo:
-        # Fetch GitHub repository data
         repo_data = fetch_github_repo_data(git_repo, github_token)
         if repo_data:
-            st.success(f"Added {git_repo} to knowledge base!")
-            # Ask a question about the repository
-            prompt = st.text_input("Ask any question about the GitHub Repo")
-            # Chat with the repository
-            if prompt:
-                answer = perform_rag(repo_data, prompt)
                 if answer:
                     st.subheader("Generated Answer:")
                     st.write(answer)
         else:
-            st.error(f"Failed to fetch data for {git_repo}. Please check the repository name and your token's permissions.")
 if __name__ == "__main__":
     main()

 from langchain_community.vectorstores import Chroma
 from langchain_community.embeddings import HuggingFaceEmbeddings
 from langchain_text_splitters import RecursiveCharacterTextSplitter
+from openai import OpenAI
 from dotenv import load_dotenv
 # Load environment variables
 openai_api_key = os.getenv("OPENAI_API_KEY")
 # Function to fetch repository data from GitHub
+def fetch_github_repo_data(repo_name, github_token):
+    """Fetch all text content from a GitHub repository."""
     try:
         g = Github(github_token)
+        repo = g.get_repo(repo_name)
         contents = repo.get_contents("")
         repo_data = ""
         while contents:
             file_content = contents.pop(0)
             if file_content.type == "dir":
                 contents.extend(repo.get_contents(file_content.path))
             else:
                 try:
+                    file_data = repo.get_contents(file_content.path).decoded_content
                     text = file_data.decode("utf-8")
                     repo_data += f"\n\nFile: {file_content.path}\n{text}"
                 except UnicodeDecodeError:
                     # Skip non-text files
                     continue
         return repo_data
     except Exception as e:
         st.error(f"Error fetching GitHub repository data: {e}")
         return None
+# Function to generate a response using OpenAI
+def generate_response(context, question):
+    """Generate a response using OpenAI."""
     try:
+        from openai import OpenAI
+        client = OpenAI(api_key=openai_api_key)
+        messages = [
+            {"role": "system", "content": "You are an assistant that answers questions based on repository content."},
+            {"role": "user", "content": f"Context: {context}\n\nQuestion: {question}\n\nAnswer:"}
+        ]
+        response = client.chat.completions.create(
+            model="gpt-4o-mini",
+            messages=messages,
+            max_tokens=150,
+        )
+        return response.choices[0].message.content.strip()
+    except Exception as e:
+        st.error(f"Error generating response: {e}")
+        return None
+# Function to perform RAG using OpenAI and Chroma
+def perform_rag(repo_data, question):
+    """Perform retrieval-augmented generation using ChromaDB and OpenAI."""
+    try:
+        if not repo_data:
+            st.warning("Repository data is empty.")
+            return None
+        # Create embeddings
+        embeddings = HuggingFaceEmbeddings()
+        # Split text into chunks
+        text_splitter = RecursiveCharacterTextSplitter(
+            chunk_size=1000, chunk_overlap=20, length_function=len
+        )
+        chunks = text_splitter.create_documents([repo_data])
+        # Store chunks in ChromaDB
+        persist_directory = "github_repo_embeddings"
+        vectordb = Chroma.from_documents(
+            documents=chunks, embedding=embeddings, persist_directory=persist_directory
+        )
+        vectordb.persist()
+        # Load persisted Chroma database
+        vectordb = Chroma(
+            persist_directory=persist_directory, embedding_function=embeddings
+        )
+        # Perform retrieval using Chroma
+        docs = vectordb.similarity_search(question)
+        if not docs:
+            st.warning("No relevant documents found.")
             return None
+        context = docs[0].page_content
+        return generate_response(context, question)
     except Exception as e:
         st.error(f"Error performing RAG: {e}")
         return None
 # Streamlit application
 def main():
     st.title("Chat with GitHub Repository")
+    st.caption("This app allows you to interact with a GitHub repository using OpenAI and ChromaDB.")
+    # Get user inputs
     github_token = st.text_input("Enter your GitHub Token", type="password")
+    git_repo = st.text_input("Enter the GitHub Repo (owner/repo)")
     if github_token and git_repo:
         repo_data = fetch_github_repo_data(git_repo, github_token)
         if repo_data:
+            st.success(f"Successfully added {git_repo} to the knowledge base!")
+            question = st.text_input("Ask any question about the repository")
+            if question:
+                answer = perform_rag(repo_data, question)
                 if answer:
                     st.subheader("Generated Answer:")
                     st.write(answer)
         else:
+            st.error("Failed to fetch repository data. Ensure the repository name and token are correct.")
 if __name__ == "__main__":
     main()