ahmadsanafarooq commited on
Commit
db840b8
·
verified ·
1 Parent(s): eccecc3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -47
app.py CHANGED
@@ -1,23 +1,23 @@
1
  import os
2
  import gradio as gr
3
- from langchain.text_splitter import RecursiveCharacterTextSplitter
4
- from langchain_community.vectorstores import Chroma
5
- from langchain.chains import RetrievalQA
6
- from langchain_groq import ChatGroq
7
- from langchain_community.document_loaders import TextLoader, PyPDFLoader
8
- from langchain.schema import Document
9
  from pathlib import Path
10
  from typing import List
11
  import logging
 
12
  import numpy as np
13
  from sklearn.feature_extraction.text import TfidfVectorizer
14
- from dotenv import load_dotenv
 
 
 
 
 
15
 
16
- # Logger Configuration
17
  logging.basicConfig(level=logging.INFO)
18
  logger = logging.getLogger(__name__)
19
 
20
- # Simple TF-IDF Fallback Embeddings
21
  class SimpleEmbeddings:
22
  def __init__(self):
23
  self.vectorizer = TfidfVectorizer(max_features=384, stop_words='english')
@@ -34,7 +34,7 @@ class SimpleEmbeddings:
34
  return [0.0] * 384
35
  return self.vectorizer.transform([text]).toarray()[0].tolist()
36
 
37
- # RAG Assistant Class
38
  class RAGAssistant:
39
  def __init__(self, groq_api_key: str):
40
  self.groq_api_key = groq_api_key
@@ -58,7 +58,7 @@ class RAGAssistant:
58
  model_kwargs={'device': 'cpu'},
59
  encode_kwargs={'normalize_embeddings': False}
60
  )
61
- print(f"Loaded HuggingFace model: {model_name}")
62
  return embeddings
63
  except Exception as e:
64
  print(f"Failed to load {model_name}: {e}")
@@ -79,7 +79,7 @@ class RAGAssistant:
79
  collection_name="code_documentation"
80
  )
81
  except Exception as e:
82
- logger.error(f"Vector store init error: {str(e)}")
83
 
84
  def load_documents(self, files: List[str], assistant_type: str) -> str:
85
  try:
@@ -88,27 +88,22 @@ class RAGAssistant:
88
 
89
  for file_path in files:
90
  print(f"Trying to load: {file_path}")
91
- print("File exists?", os.path.exists(file_path))
92
  try:
93
  if file_path.lower().endswith('.pdf'):
94
- loader = PyPDFLoader(file_path)
95
  else:
96
  loader = TextLoader(file_path, encoding='utf-8')
97
-
98
  docs = loader.load()
99
- print(f"Loaded {len(docs)} docs from: {file_path}")
100
- for doc in docs[:1]:
101
- print("Preview:", doc.page_content[:100])
102
  documents.extend(docs)
103
  except Exception as e:
104
- logger.error(f"Error loading {file_path}: {e}")
105
  continue
106
 
107
  if not documents:
108
  return "❌ No documents could be loaded. Please check your file type or content."
109
 
110
  chunks = self.text_splitter.split_documents(documents)
111
- print(f"Total chunks created: {len(chunks)}")
112
 
113
  for chunk in chunks:
114
  chunk.metadata['assistant_type'] = assistant_type
@@ -124,12 +119,12 @@ class RAGAssistant:
124
 
125
  except Exception as e:
126
  logger.error(f"Error loading documents: {str(e)}")
127
- return f"Error loading documents: {str(e)}"
128
 
129
  def get_learning_tutor_response(self, question: str) -> str:
130
  try:
131
  if not self.learning_vectorstore:
132
- return "⚠️ Please upload some learning materials first."
133
 
134
  qa_chain = RetrievalQA.from_chain_type(
135
  llm=self.llm,
@@ -138,7 +133,10 @@ class RAGAssistant:
138
  return_source_documents=True
139
  )
140
 
141
- result = qa_chain({"query": question})
 
 
 
142
  response = result['result']
143
 
144
  if result.get('source_documents'):
@@ -148,14 +146,15 @@ class RAGAssistant:
148
  response += f"- {Path(source).name}\n"
149
 
150
  return response
 
151
  except Exception as e:
152
- logger.error(f"Error in learning tutor: {str(e)}")
153
  return f"❌ Error: {str(e)}"
154
 
155
  def get_code_helper_response(self, question: str) -> str:
156
  try:
157
  if not self.code_vectorstore:
158
- return "⚠️ Please upload some code documentation first."
159
 
160
  qa_chain = RetrievalQA.from_chain_type(
161
  llm=self.llm,
@@ -164,7 +163,10 @@ class RAGAssistant:
164
  return_source_documents=True
165
  )
166
 
167
- result = qa_chain({"query": question})
 
 
 
168
  response = result['result']
169
 
170
  if result.get('source_documents'):
@@ -174,22 +176,23 @@ class RAGAssistant:
174
  response += f"- {Path(source).name}\n"
175
 
176
  return response
 
177
  except Exception as e:
178
- logger.error(f"Error in code helper: {str(e)}")
179
  return f"❌ Error: {str(e)}"
180
 
181
- # Gradio UI
182
  def create_gradio_interface(assistant: RAGAssistant):
183
  def upload_learning_files(files):
184
  if not files:
185
  return "No files uploaded."
186
- file_paths = [f.name for f in files] # ✅ FIXED HERE
187
  return assistant.load_documents(file_paths, "learning")
188
 
189
  def upload_code_files(files):
190
  if not files:
191
  return "No files uploaded."
192
- file_paths = [f.name for f in files] # ✅ FIXED HERE
193
  return assistant.load_documents(file_paths, "code")
194
 
195
  def learning_chat(message, history):
@@ -207,53 +210,53 @@ def create_gradio_interface(assistant: RAGAssistant):
207
  return history, ""
208
 
209
  with gr.Blocks(title="RAG-Based Learning & Code Assistant", theme=gr.themes.Soft()) as demo:
210
- gr.Markdown("# 📚 RAG-Based Learning & Code Assistant")
 
211
 
212
  with gr.Tabs():
213
- with gr.TabItem("📘 Learning Tutor"):
214
  with gr.Row():
215
  with gr.Column(scale=1):
216
  learning_files = gr.File(label="Upload Materials", file_count="multiple", file_types=[".pdf", ".txt", ".md"])
217
- learning_upload_btn = gr.Button("Upload", variant="primary")
218
  learning_status = gr.Textbox(label="Upload Status", interactive=False)
219
  with gr.Column(scale=2):
220
- learning_chatbot = gr.Chatbot(label="Learning Chat", height=400)
221
- learning_input = gr.Textbox(label="Ask your question", placeholder="e.g. What is overfitting?")
222
- learning_submit = gr.Button("Ask", variant="primary")
223
-
224
  learning_upload_btn.click(upload_learning_files, inputs=[learning_files], outputs=[learning_status])
225
  learning_submit.click(learning_chat, inputs=[learning_input, learning_chatbot], outputs=[learning_chatbot, learning_input])
226
  learning_input.submit(learning_chat, inputs=[learning_input, learning_chatbot], outputs=[learning_chatbot, learning_input])
227
 
228
- with gr.TabItem("💻 Code Helper"):
229
  with gr.Row():
230
  with gr.Column(scale=1):
231
- code_files = gr.File(label="Upload Docs", file_count="multiple", file_types=[".pdf", ".txt", ".md", ".py", ".json"])
232
- code_upload_btn = gr.Button("Upload", variant="primary")
233
  code_status = gr.Textbox(label="Upload Status", interactive=False)
234
  with gr.Column(scale=2):
235
  code_chatbot = gr.Chatbot(label="Code Chat", height=400)
236
- code_input = gr.Textbox(label="Ask question", placeholder="e.g. How to call this API?")
237
- code_submit = gr.Button("Ask", variant="primary")
238
-
239
  code_upload_btn.click(upload_code_files, inputs=[code_files], outputs=[code_status])
240
  code_submit.click(code_chat, inputs=[code_input, code_chatbot], outputs=[code_chatbot, code_input])
241
  code_input.submit(code_chat, inputs=[code_input, code_chatbot], outputs=[code_chatbot, code_input])
242
 
243
- gr.Markdown("Built with ❤️ using LangChain, ChromaDB, and Groq")
 
244
 
245
  return demo
246
 
247
- # Main
248
  def main():
249
  load_dotenv()
250
  groq_api_key = os.getenv("GROQ_API_KEY")
251
  if not groq_api_key:
252
- print(" Please set your GROQ_API_KEY in .env or environment.")
253
  return
254
  assistant = RAGAssistant(groq_api_key)
255
  demo = create_gradio_interface(assistant)
256
  demo.launch(server_name="0.0.0.0", server_port=7860, share=False, debug=True)
257
 
258
  if __name__ == "__main__":
259
- main()
 
1
  import os
2
  import gradio as gr
 
 
 
 
 
 
3
  from pathlib import Path
4
  from typing import List
5
  import logging
6
+ from dotenv import load_dotenv
7
  import numpy as np
8
  from sklearn.feature_extraction.text import TfidfVectorizer
9
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
10
+ from langchain_community.vectorstores import Chroma
11
+ from langchain.chains import RetrievalQA
12
+ from langchain_groq import ChatGroq
13
+ from langchain.schema import Document
14
+ from langchain_community.document_loaders import TextLoader, UnstructuredPDFLoader
15
 
16
+ # ----------------- Logger Setup -----------------
17
  logging.basicConfig(level=logging.INFO)
18
  logger = logging.getLogger(__name__)
19
 
20
+ # ----------------- Fallback Embeddings -----------------
21
  class SimpleEmbeddings:
22
  def __init__(self):
23
  self.vectorizer = TfidfVectorizer(max_features=384, stop_words='english')
 
34
  return [0.0] * 384
35
  return self.vectorizer.transform([text]).toarray()[0].tolist()
36
 
37
+ # ----------------- RAG Assistant Class -----------------
38
  class RAGAssistant:
39
  def __init__(self, groq_api_key: str):
40
  self.groq_api_key = groq_api_key
 
58
  model_kwargs={'device': 'cpu'},
59
  encode_kwargs={'normalize_embeddings': False}
60
  )
61
+ print(f"Loaded: {model_name}")
62
  return embeddings
63
  except Exception as e:
64
  print(f"Failed to load {model_name}: {e}")
 
79
  collection_name="code_documentation"
80
  )
81
  except Exception as e:
82
+ logger.error(f"Error initializing vector stores: {str(e)}")
83
 
84
  def load_documents(self, files: List[str], assistant_type: str) -> str:
85
  try:
 
88
 
89
  for file_path in files:
90
  print(f"Trying to load: {file_path}")
 
91
  try:
92
  if file_path.lower().endswith('.pdf'):
93
+ loader = UnstructuredPDFLoader(file_path)
94
  else:
95
  loader = TextLoader(file_path, encoding='utf-8')
 
96
  docs = loader.load()
 
 
 
97
  documents.extend(docs)
98
  except Exception as e:
99
+ print(f"Error loading {file_path}: {e}")
100
  continue
101
 
102
  if not documents:
103
  return "❌ No documents could be loaded. Please check your file type or content."
104
 
105
  chunks = self.text_splitter.split_documents(documents)
106
+ print(f"Chunks created: {len(chunks)}")
107
 
108
  for chunk in chunks:
109
  chunk.metadata['assistant_type'] = assistant_type
 
119
 
120
  except Exception as e:
121
  logger.error(f"Error loading documents: {str(e)}")
122
+ return f"Error loading documents: {str(e)}"
123
 
124
  def get_learning_tutor_response(self, question: str) -> str:
125
  try:
126
  if not self.learning_vectorstore:
127
+ return "⚠️ Upload learning materials first."
128
 
129
  qa_chain = RetrievalQA.from_chain_type(
130
  llm=self.llm,
 
133
  return_source_documents=True
134
  )
135
 
136
+ prompt = f"""You are an educational assistant. Help the student understand the topic:
137
+ Question: {question}"""
138
+
139
+ result = qa_chain({"query": prompt})
140
  response = result['result']
141
 
142
  if result.get('source_documents'):
 
146
  response += f"- {Path(source).name}\n"
147
 
148
  return response
149
+
150
  except Exception as e:
151
+ logger.error(f"Learning tutor error: {str(e)}")
152
  return f"❌ Error: {str(e)}"
153
 
154
  def get_code_helper_response(self, question: str) -> str:
155
  try:
156
  if not self.code_vectorstore:
157
+ return "⚠️ Upload code documentation first."
158
 
159
  qa_chain = RetrievalQA.from_chain_type(
160
  llm=self.llm,
 
163
  return_source_documents=True
164
  )
165
 
166
+ prompt = f"""You are a code documentation assistant. Help the developer understand the code:
167
+ Question: {question}"""
168
+
169
+ result = qa_chain({"query": prompt})
170
  response = result['result']
171
 
172
  if result.get('source_documents'):
 
176
  response += f"- {Path(source).name}\n"
177
 
178
  return response
179
+
180
  except Exception as e:
181
+ logger.error(f"Code helper error: {str(e)}")
182
  return f"❌ Error: {str(e)}"
183
 
184
+ # ----------------- Gradio Interface -----------------
185
  def create_gradio_interface(assistant: RAGAssistant):
186
  def upload_learning_files(files):
187
  if not files:
188
  return "No files uploaded."
189
+ file_paths = [f.path for f in files]
190
  return assistant.load_documents(file_paths, "learning")
191
 
192
  def upload_code_files(files):
193
  if not files:
194
  return "No files uploaded."
195
+ file_paths = [f.path for f in files]
196
  return assistant.load_documents(file_paths, "code")
197
 
198
  def learning_chat(message, history):
 
210
  return history, ""
211
 
212
  with gr.Blocks(title="RAG-Based Learning & Code Assistant", theme=gr.themes.Soft()) as demo:
213
+ gr.Markdown("# 🎓 RAG-Based Learning & Code Assistant")
214
+ gr.Markdown("Upload your documents and ask intelligent questions.")
215
 
216
  with gr.Tabs():
217
+ with gr.TabItem("📚 Learning Tutor"):
218
  with gr.Row():
219
  with gr.Column(scale=1):
220
  learning_files = gr.File(label="Upload Materials", file_count="multiple", file_types=[".pdf", ".txt", ".md"])
221
+ learning_upload_btn = gr.Button("Upload")
222
  learning_status = gr.Textbox(label="Upload Status", interactive=False)
223
  with gr.Column(scale=2):
224
+ learning_chatbot = gr.Chatbot(label="Tutor Chat", height=400)
225
+ learning_input = gr.Textbox(label="Ask a question", placeholder="What is supervised learning?")
226
+ learning_submit = gr.Button("Ask")
 
227
  learning_upload_btn.click(upload_learning_files, inputs=[learning_files], outputs=[learning_status])
228
  learning_submit.click(learning_chat, inputs=[learning_input, learning_chatbot], outputs=[learning_chatbot, learning_input])
229
  learning_input.submit(learning_chat, inputs=[learning_input, learning_chatbot], outputs=[learning_chatbot, learning_input])
230
 
231
+ with gr.TabItem("💻 Code Documentation Helper"):
232
  with gr.Row():
233
  with gr.Column(scale=1):
234
+ code_files = gr.File(label="Upload Code Docs", file_count="multiple", file_types=[".pdf", ".txt", ".md", ".py", ".js", ".json"])
235
+ code_upload_btn = gr.Button("Upload")
236
  code_status = gr.Textbox(label="Upload Status", interactive=False)
237
  with gr.Column(scale=2):
238
  code_chatbot = gr.Chatbot(label="Code Chat", height=400)
239
+ code_input = gr.Textbox(label="Ask about the codebase", placeholder="How does this API authenticate users?")
240
+ code_submit = gr.Button("Ask")
 
241
  code_upload_btn.click(upload_code_files, inputs=[code_files], outputs=[code_status])
242
  code_submit.click(code_chat, inputs=[code_input, code_chatbot], outputs=[code_chatbot, code_input])
243
  code_input.submit(code_chat, inputs=[code_input, code_chatbot], outputs=[code_chatbot, code_input])
244
 
245
+ gr.Markdown("---")
246
+ gr.Markdown("🔧 Powered by LangChain, ChromaDB, and Groq")
247
 
248
  return demo
249
 
250
+ # ----------------- Main -----------------
251
  def main():
252
  load_dotenv()
253
  groq_api_key = os.getenv("GROQ_API_KEY")
254
  if not groq_api_key:
255
+ print("Set your GROQ_API_KEY in the .env file.")
256
  return
257
  assistant = RAGAssistant(groq_api_key)
258
  demo = create_gradio_interface(assistant)
259
  demo.launch(server_name="0.0.0.0", server_port=7860, share=False, debug=True)
260
 
261
  if __name__ == "__main__":
262
+ main()