pratikshahp commited on
Commit
a4838d8
·
verified ·
1 Parent(s): 951134a

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +116 -0
app.py ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ from github import Github
4
+ from langchain_community.vectorstores import Chroma
5
+ from langchain_community.embeddings import HuggingFaceEmbeddings
6
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
7
+ from langchain.llms import OpenAI
8
+ from dotenv import load_dotenv
9
+
10
+ # Load environment variables
11
+ load_dotenv()
12
+ openai_api_key = os.getenv("OPENAI_API_KEY")
13
+
14
+ # Function to fetch repository data from GitHub
15
+ def fetch_github_repo_data(git_repo, github_token):
16
+ try:
17
+ g = Github(github_token)
18
+ repo = g.get_repo(git_repo)
19
+ contents = repo.get_contents("")
20
+ repo_data = ""
21
+ while contents:
22
+ file_content = contents.pop(0)
23
+ if file_content.type == "dir":
24
+ contents.extend(repo.get_contents(file_content.path))
25
+ else:
26
+ file_data = repo.get_contents(file_content.path).decoded_content
27
+ try:
28
+ text = file_data.decode("utf-8")
29
+ repo_data += f"\n\nFile: {file_content.path}\n{text}"
30
+ except UnicodeDecodeError:
31
+ # Skip non-text files
32
+ continue
33
+ return repo_data
34
+ except Exception as e:
35
+ st.error(f"Error fetching GitHub repository data: {e}")
36
+ return None
37
+
38
+ # Function to perform RAG using OpenAI and Chroma
39
+ def perform_rag(repo_data, prompt):
40
+ try:
41
+ if repo_data:
42
+ # Create embeddings
43
+ embeddings = HuggingFaceEmbeddings()
44
+
45
+ # Split text into chunks
46
+ text_splitter = RecursiveCharacterTextSplitter(
47
+ chunk_size=1000,
48
+ chunk_overlap=20,
49
+ length_function=len,
50
+ is_separator_regex=False,
51
+ )
52
+ chunks = text_splitter.create_documents([repo_data])
53
+
54
+ # Store chunks in ChromaDB
55
+ persist_directory = 'github_repo_embeddings'
56
+ vectordb = Chroma.from_documents(documents=chunks, embedding=embeddings, persist_directory=persist_directory)
57
+ vectordb.persist() # Persist ChromaDB
58
+
59
+ # Load persisted Chroma database
60
+ vectordb = Chroma(persist_directory=persist_directory, embedding_function=embeddings)
61
+
62
+ # Perform retrieval using Chroma
63
+ docs = vectordb.similarity_search(prompt)
64
+ if docs:
65
+ text = docs[0].page_content
66
+ else:
67
+ st.warning("No relevant documents found.")
68
+ return None
69
+
70
+ # Perform generation using OpenAI
71
+ llm = OpenAI(api_key=openai_api_key, model="gpt-4", temperature=0.7, max_tokens=500)
72
+ question_with_context = f"Context: {text}\n\nQuestion: {prompt}\n\nAnswer:"
73
+ response = llm.generate(question_with_context)
74
+
75
+ return response
76
+ else:
77
+ st.warning("No repository data found or error occurred.")
78
+ return None
79
+
80
+ except Exception as e:
81
+ st.error(f"Error performing RAG: {e}")
82
+ return None
83
+
84
+ # Streamlit application
85
+ def main():
86
+ st.title("Chat with GitHub Repository \ud83d\udcac")
87
+ st.caption("This app allows you to chat with a GitHub Repo using OpenAI and ChromaDB")
88
+
89
+ # Get the GitHub token from the user
90
+ github_token = st.text_input("Enter your GitHub Token", type="password")
91
+
92
+ # Get the GitHub repository from the user
93
+ git_repo = st.text_input("Enter the GitHub Repo (owner/repo)", type="default")
94
+
95
+ # Add the GitHub data to the knowledge base if the GitHub token is provided
96
+ if github_token and git_repo:
97
+ # Fetch GitHub repository data
98
+ repo_data = fetch_github_repo_data(git_repo, github_token)
99
+
100
+ if repo_data:
101
+ st.success(f"Added {git_repo} to knowledge base!")
102
+
103
+ # Ask a question about the repository
104
+ prompt = st.text_input("Ask any question about the GitHub Repo")
105
+
106
+ # Chat with the repository
107
+ if prompt:
108
+ answer = perform_rag(repo_data, prompt)
109
+ if answer:
110
+ st.subheader("Generated Answer:")
111
+ st.write(answer)
112
+ else:
113
+ st.error(f"Failed to fetch data for {git_repo}. Please check the repository name and your token's permissions.")
114
+
115
+ if __name__ == "__main__":
116
+ main()