pratikshahp commited on
Commit
bad09fd
·
verified ·
1 Parent(s): 74ef341

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +70 -55
app.py CHANGED
@@ -4,7 +4,7 @@ from github import Github
4
  from langchain_community.vectorstores import Chroma
5
  from langchain_community.embeddings import HuggingFaceEmbeddings
6
  from langchain_text_splitters import RecursiveCharacterTextSplitter
7
- from langchain.llms import OpenAI
8
  from dotenv import load_dotenv
9
 
10
  # Load environment variables
@@ -12,71 +12,91 @@ load_dotenv()
12
  openai_api_key = os.getenv("OPENAI_API_KEY")
13
 
14
  # Function to fetch repository data from GitHub
15
- def fetch_github_repo_data(git_repo, github_token):
 
16
  try:
17
  g = Github(github_token)
18
- repo = g.get_repo(git_repo)
19
  contents = repo.get_contents("")
20
  repo_data = ""
 
21
  while contents:
22
  file_content = contents.pop(0)
23
  if file_content.type == "dir":
24
  contents.extend(repo.get_contents(file_content.path))
25
  else:
26
- file_data = repo.get_contents(file_content.path).decoded_content
27
  try:
 
28
  text = file_data.decode("utf-8")
29
  repo_data += f"\n\nFile: {file_content.path}\n{text}"
30
  except UnicodeDecodeError:
31
  # Skip non-text files
32
  continue
 
33
  return repo_data
34
  except Exception as e:
35
  st.error(f"Error fetching GitHub repository data: {e}")
36
  return None
37
 
38
- # Function to perform RAG using OpenAI and Chroma
39
- def perform_rag(repo_data, prompt):
 
40
  try:
41
- if repo_data:
42
- # Create embeddings
43
- embeddings = HuggingFaceEmbeddings()
44
-
45
- # Split text into chunks
46
- text_splitter = RecursiveCharacterTextSplitter(
47
- chunk_size=1000,
48
- chunk_overlap=20,
49
- length_function=len,
50
- is_separator_regex=False,
51
- )
52
- chunks = text_splitter.create_documents([repo_data])
53
-
54
- # Store chunks in ChromaDB
55
- persist_directory = 'github_repo_embeddings'
56
- vectordb = Chroma.from_documents(documents=chunks, embedding=embeddings, persist_directory=persist_directory)
57
- vectordb.persist() # Persist ChromaDB
58
-
59
- # Load persisted Chroma database
60
- vectordb = Chroma(persist_directory=persist_directory, embedding_function=embeddings)
61
-
62
- # Perform retrieval using Chroma
63
- docs = vectordb.similarity_search(prompt)
64
- if docs:
65
- text = docs[0].page_content
66
- else:
67
- st.warning("No relevant documents found.")
68
- return None
69
 
70
- # Perform generation using OpenAI
71
- llm = OpenAI(api_key=openai_api_key, model="gpt-4o-mini", temperature=0.7, max_tokens=500)
72
- question_with_context = f"Context: {text}\n\nQuestion: {prompt}\n\nAnswer:"
73
- response = llm.generate([question_with_context])
 
 
 
74
 
75
- return response
76
- else:
77
- st.warning("No repository data found or error occurred.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
  return None
79
 
 
 
 
80
  except Exception as e:
81
  st.error(f"Error performing RAG: {e}")
82
  return None
@@ -84,33 +104,28 @@ def perform_rag(repo_data, prompt):
84
  # Streamlit application
85
  def main():
86
  st.title("Chat with GitHub Repository")
87
- st.caption("This app allows you to chat with a GitHub Repo using OpenAI and ChromaDB")
88
 
89
- # Get the GitHub token from the user
90
  github_token = st.text_input("Enter your GitHub Token", type="password")
 
91
 
92
- # Get the GitHub repository from the user
93
- git_repo = st.text_input("Enter the GitHub Repo (owner/repo)", type="default")
94
-
95
- # Add the GitHub data to the knowledge base if the GitHub token is provided
96
  if github_token and git_repo:
97
- # Fetch GitHub repository data
98
  repo_data = fetch_github_repo_data(git_repo, github_token)
99
 
100
  if repo_data:
101
- st.success(f"Added {git_repo} to knowledge base!")
 
 
102
 
103
- # Ask a question about the repository
104
- prompt = st.text_input("Ask any question about the GitHub Repo")
105
 
106
- # Chat with the repository
107
- if prompt:
108
- answer = perform_rag(repo_data, prompt)
109
  if answer:
110
  st.subheader("Generated Answer:")
111
  st.write(answer)
112
  else:
113
- st.error(f"Failed to fetch data for {git_repo}. Please check the repository name and your token's permissions.")
114
 
115
  if __name__ == "__main__":
116
  main()
 
4
  from langchain_community.vectorstores import Chroma
5
  from langchain_community.embeddings import HuggingFaceEmbeddings
6
  from langchain_text_splitters import RecursiveCharacterTextSplitter
7
+ from openai import OpenAI
8
  from dotenv import load_dotenv
9
 
10
  # Load environment variables
 
12
  openai_api_key = os.getenv("OPENAI_API_KEY")
13
 
14
  # Function to fetch repository data from GitHub
15
+ def fetch_github_repo_data(repo_name, github_token):
16
+ """Fetch all text content from a GitHub repository."""
17
  try:
18
  g = Github(github_token)
19
+ repo = g.get_repo(repo_name)
20
  contents = repo.get_contents("")
21
  repo_data = ""
22
+
23
  while contents:
24
  file_content = contents.pop(0)
25
  if file_content.type == "dir":
26
  contents.extend(repo.get_contents(file_content.path))
27
  else:
 
28
  try:
29
+ file_data = repo.get_contents(file_content.path).decoded_content
30
  text = file_data.decode("utf-8")
31
  repo_data += f"\n\nFile: {file_content.path}\n{text}"
32
  except UnicodeDecodeError:
33
  # Skip non-text files
34
  continue
35
+
36
  return repo_data
37
  except Exception as e:
38
  st.error(f"Error fetching GitHub repository data: {e}")
39
  return None
40
 
41
+ # Function to generate a response using OpenAI
42
+ def generate_response(context, question):
43
+ """Generate a response using OpenAI."""
44
  try:
45
+ from openai import OpenAI
46
+
47
+ client = OpenAI(api_key=openai_api_key)
48
+ messages = [
49
+ {"role": "system", "content": "You are an assistant that answers questions based on repository content."},
50
+ {"role": "user", "content": f"Context: {context}\n\nQuestion: {question}\n\nAnswer:"}
51
+ ]
52
+ response = client.chat.completions.create(
53
+ model="gpt-4o-mini",
54
+ messages=messages,
55
+ max_tokens=150,
56
+ )
57
+ return response.choices[0].message.content.strip()
58
+ except Exception as e:
59
+ st.error(f"Error generating response: {e}")
60
+ return None
 
 
 
 
 
 
 
 
 
 
 
 
61
 
62
+ # Function to perform RAG using OpenAI and Chroma
63
+ def perform_rag(repo_data, question):
64
+ """Perform retrieval-augmented generation using ChromaDB and OpenAI."""
65
+ try:
66
+ if not repo_data:
67
+ st.warning("Repository data is empty.")
68
+ return None
69
 
70
+ # Create embeddings
71
+ embeddings = HuggingFaceEmbeddings()
72
+
73
+ # Split text into chunks
74
+ text_splitter = RecursiveCharacterTextSplitter(
75
+ chunk_size=1000, chunk_overlap=20, length_function=len
76
+ )
77
+ chunks = text_splitter.create_documents([repo_data])
78
+
79
+ # Store chunks in ChromaDB
80
+ persist_directory = "github_repo_embeddings"
81
+ vectordb = Chroma.from_documents(
82
+ documents=chunks, embedding=embeddings, persist_directory=persist_directory
83
+ )
84
+ vectordb.persist()
85
+
86
+ # Load persisted Chroma database
87
+ vectordb = Chroma(
88
+ persist_directory=persist_directory, embedding_function=embeddings
89
+ )
90
+
91
+ # Perform retrieval using Chroma
92
+ docs = vectordb.similarity_search(question)
93
+ if not docs:
94
+ st.warning("No relevant documents found.")
95
  return None
96
 
97
+ context = docs[0].page_content
98
+ return generate_response(context, question)
99
+
100
  except Exception as e:
101
  st.error(f"Error performing RAG: {e}")
102
  return None
 
104
  # Streamlit application
105
  def main():
106
  st.title("Chat with GitHub Repository")
107
+ st.caption("This app allows you to interact with a GitHub repository using OpenAI and ChromaDB.")
108
 
109
+ # Get user inputs
110
  github_token = st.text_input("Enter your GitHub Token", type="password")
111
+ git_repo = st.text_input("Enter the GitHub Repo (owner/repo)")
112
 
 
 
 
 
113
  if github_token and git_repo:
 
114
  repo_data = fetch_github_repo_data(git_repo, github_token)
115
 
116
  if repo_data:
117
+ st.success(f"Successfully added {git_repo} to the knowledge base!")
118
+
119
+ question = st.text_input("Ask any question about the repository")
120
 
121
+ if question:
122
+ answer = perform_rag(repo_data, question)
123
 
 
 
 
124
  if answer:
125
  st.subheader("Generated Answer:")
126
  st.write(answer)
127
  else:
128
+ st.error("Failed to fetch repository data. Ensure the repository name and token are correct.")
129
 
130
  if __name__ == "__main__":
131
  main()