ahmadsanafarooq commited on
Commit
eccecc3
Β·
verified Β·
1 Parent(s): ff82bb6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -49
app.py CHANGED
@@ -13,11 +13,11 @@ import numpy as np
13
  from sklearn.feature_extraction.text import TfidfVectorizer
14
  from dotenv import load_dotenv
15
 
16
- # Logger Configuration
17
  logging.basicConfig(level=logging.INFO)
18
  logger = logging.getLogger(__name__)
19
 
20
- # Simple TF-IDF Fallback Embeddings
21
  class SimpleEmbeddings:
22
  def __init__(self):
23
  self.vectorizer = TfidfVectorizer(max_features=384, stop_words='english')
@@ -34,7 +34,7 @@ class SimpleEmbeddings:
34
  return [0.0] * 384
35
  return self.vectorizer.transform([text]).toarray()[0].tolist()
36
 
37
- # RAG Assistant Class
38
  class RAGAssistant:
39
  def __init__(self, groq_api_key: str):
40
  self.groq_api_key = groq_api_key
@@ -58,7 +58,7 @@ class RAGAssistant:
58
  model_kwargs={'device': 'cpu'},
59
  encode_kwargs={'normalize_embeddings': False}
60
  )
61
- print(f"Successfully loaded HuggingFace model: {model_name}")
62
  return embeddings
63
  except Exception as e:
64
  print(f"Failed to load {model_name}: {e}")
@@ -79,7 +79,7 @@ class RAGAssistant:
79
  collection_name="code_documentation"
80
  )
81
  except Exception as e:
82
- logger.error(f"Error initializing vector stores: {str(e)}")
83
 
84
  def load_documents(self, files: List[str], assistant_type: str) -> str:
85
  try:
@@ -88,19 +88,24 @@ class RAGAssistant:
88
 
89
  for file_path in files:
90
  print(f"Trying to load: {file_path}")
 
91
  try:
92
  if file_path.lower().endswith('.pdf'):
93
  loader = PyPDFLoader(file_path)
94
  else:
95
  loader = TextLoader(file_path, encoding='utf-8')
 
96
  docs = loader.load()
 
 
 
97
  documents.extend(docs)
98
  except Exception as e:
99
- print(f"Error loading {file_path}: {e}")
100
  continue
101
 
102
  if not documents:
103
- return "No documents could be loaded. Please check your files."
104
 
105
  chunks = self.text_splitter.split_documents(documents)
106
  print(f"Total chunks created: {len(chunks)}")
@@ -115,16 +120,16 @@ class RAGAssistant:
115
  self.code_vectorstore.add_documents(chunks)
116
  self.code_vectorstore.persist()
117
 
118
- return f"Successfully loaded {len(chunks)} chunks from {len(documents)} documents into {assistant_type} assistant."
119
 
120
  except Exception as e:
121
  logger.error(f"Error loading documents: {str(e)}")
122
- return f"Error loading documents: {str(e)}"
123
 
124
  def get_learning_tutor_response(self, question: str) -> str:
125
  try:
126
  if not self.learning_vectorstore:
127
- return "Please upload some learning materials first."
128
 
129
  qa_chain = RetrievalQA.from_chain_type(
130
  llm=self.llm,
@@ -133,14 +138,7 @@ class RAGAssistant:
133
  return_source_documents=True
134
  )
135
 
136
- learning_prompt = f"""
137
- You are an AI learning assistant helping students understand academic concepts.
138
- Based on the provided materials, answer the student's question:
139
-
140
- {question}
141
- """
142
-
143
- result = qa_chain({"query": learning_prompt})
144
  response = result['result']
145
 
146
  if result.get('source_documents'):
@@ -150,15 +148,14 @@ class RAGAssistant:
150
  response += f"- {Path(source).name}\n"
151
 
152
  return response
153
-
154
  except Exception as e:
155
  logger.error(f"Error in learning tutor: {str(e)}")
156
- return f"Error generating response: {str(e)}"
157
 
158
  def get_code_helper_response(self, question: str) -> str:
159
  try:
160
  if not self.code_vectorstore:
161
- return "Please upload some code documentation first."
162
 
163
  qa_chain = RetrievalQA.from_chain_type(
164
  llm=self.llm,
@@ -167,14 +164,7 @@ class RAGAssistant:
167
  return_source_documents=True
168
  )
169
 
170
- code_prompt = f"""
171
- You are a code documentation assistant helping developers with APIs and codebases.
172
- Based on the uploaded documentation, answer this question:
173
-
174
- {question}
175
- """
176
-
177
- result = qa_chain({"query": code_prompt})
178
  response = result['result']
179
 
180
  if result.get('source_documents'):
@@ -184,23 +174,22 @@ class RAGAssistant:
184
  response += f"- {Path(source).name}\n"
185
 
186
  return response
187
-
188
  except Exception as e:
189
  logger.error(f"Error in code helper: {str(e)}")
190
- return f"Error generating response: {str(e)}"
191
 
192
- # Gradio UI Interface
193
  def create_gradio_interface(assistant: RAGAssistant):
194
  def upload_learning_files(files):
195
  if not files:
196
  return "No files uploaded."
197
- file_paths = [f.path for f in files]
198
  return assistant.load_documents(file_paths, "learning")
199
 
200
  def upload_code_files(files):
201
  if not files:
202
  return "No files uploaded."
203
- file_paths = [f.path for f in files]
204
  return assistant.load_documents(file_paths, "code")
205
 
206
  def learning_chat(message, history):
@@ -218,57 +207,53 @@ def create_gradio_interface(assistant: RAGAssistant):
218
  return history, ""
219
 
220
  with gr.Blocks(title="RAG-Based Learning & Code Assistant", theme=gr.themes.Soft()) as demo:
221
- gr.Markdown("# RAG-Based Learning & Code Assistant")
222
- gr.Markdown("Upload documents and get smart, personalized answers.")
223
 
224
  with gr.Tabs():
225
- with gr.TabItem(" Learning Tutor"):
226
- gr.Markdown("### Upload lecture notes or textbooks below:")
227
  with gr.Row():
228
  with gr.Column(scale=1):
229
  learning_files = gr.File(label="Upload Materials", file_count="multiple", file_types=[".pdf", ".txt", ".md"])
230
  learning_upload_btn = gr.Button("Upload", variant="primary")
231
  learning_status = gr.Textbox(label="Upload Status", interactive=False)
232
  with gr.Column(scale=2):
233
- learning_chatbot = gr.Chatbot(label="Tutor Chat", height=400)
234
- learning_input = gr.Textbox(label="Ask a question", placeholder="e.g., What is machine learning?")
235
  learning_submit = gr.Button("Ask", variant="primary")
236
 
237
  learning_upload_btn.click(upload_learning_files, inputs=[learning_files], outputs=[learning_status])
238
  learning_submit.click(learning_chat, inputs=[learning_input, learning_chatbot], outputs=[learning_chatbot, learning_input])
239
  learning_input.submit(learning_chat, inputs=[learning_input, learning_chatbot], outputs=[learning_chatbot, learning_input])
240
 
241
- with gr.TabItem("Code Documentation Helper"):
242
- gr.Markdown("### Upload code docs or API guides below:")
243
  with gr.Row():
244
  with gr.Column(scale=1):
245
- code_files = gr.File(label="Upload Docs", file_count="multiple", file_types=[".pdf", ".txt", ".md", ".py", ".js", ".json"])
246
  code_upload_btn = gr.Button("Upload", variant="primary")
247
  code_status = gr.Textbox(label="Upload Status", interactive=False)
248
  with gr.Column(scale=2):
249
  code_chatbot = gr.Chatbot(label="Code Chat", height=400)
250
- code_input = gr.Textbox(label="Ask about the codebase", placeholder="e.g., How does login work?")
251
  code_submit = gr.Button("Ask", variant="primary")
252
 
253
  code_upload_btn.click(upload_code_files, inputs=[code_files], outputs=[code_status])
254
  code_submit.click(code_chat, inputs=[code_input, code_chatbot], outputs=[code_chatbot, code_input])
255
  code_input.submit(code_chat, inputs=[code_input, code_chatbot], outputs=[code_chatbot, code_input])
256
 
257
- gr.Markdown("---")
258
- gr.Markdown("Built with using LangChain, ChromaDB, and Groq API")
259
 
260
  return demo
261
 
262
- # Main Function
263
  def main():
264
  load_dotenv()
265
  groq_api_key = os.getenv("GROQ_API_KEY")
266
  if not groq_api_key:
267
- print("Set your GROQ_API_KEY in the .env file or environment.")
268
  return
269
  assistant = RAGAssistant(groq_api_key)
270
  demo = create_gradio_interface(assistant)
271
  demo.launch(server_name="0.0.0.0", server_port=7860, share=False, debug=True)
272
 
273
  if __name__ == "__main__":
274
- main()
 
13
  from sklearn.feature_extraction.text import TfidfVectorizer
14
  from dotenv import load_dotenv
15
 
16
+ # Logger Configuration
17
  logging.basicConfig(level=logging.INFO)
18
  logger = logging.getLogger(__name__)
19
 
20
+ # Simple TF-IDF Fallback Embeddings
21
  class SimpleEmbeddings:
22
  def __init__(self):
23
  self.vectorizer = TfidfVectorizer(max_features=384, stop_words='english')
 
34
  return [0.0] * 384
35
  return self.vectorizer.transform([text]).toarray()[0].tolist()
36
 
37
+ # RAG Assistant Class
38
  class RAGAssistant:
39
  def __init__(self, groq_api_key: str):
40
  self.groq_api_key = groq_api_key
 
58
  model_kwargs={'device': 'cpu'},
59
  encode_kwargs={'normalize_embeddings': False}
60
  )
61
+ print(f"Loaded HuggingFace model: {model_name}")
62
  return embeddings
63
  except Exception as e:
64
  print(f"Failed to load {model_name}: {e}")
 
79
  collection_name="code_documentation"
80
  )
81
  except Exception as e:
82
+ logger.error(f"Vector store init error: {str(e)}")
83
 
84
  def load_documents(self, files: List[str], assistant_type: str) -> str:
85
  try:
 
88
 
89
  for file_path in files:
90
  print(f"Trying to load: {file_path}")
91
+ print("File exists?", os.path.exists(file_path))
92
  try:
93
  if file_path.lower().endswith('.pdf'):
94
  loader = PyPDFLoader(file_path)
95
  else:
96
  loader = TextLoader(file_path, encoding='utf-8')
97
+
98
  docs = loader.load()
99
+ print(f"Loaded {len(docs)} docs from: {file_path}")
100
+ for doc in docs[:1]:
101
+ print("Preview:", doc.page_content[:100])
102
  documents.extend(docs)
103
  except Exception as e:
104
+ logger.error(f"Error loading {file_path}: {e}")
105
  continue
106
 
107
  if not documents:
108
+ return "❌ No documents could be loaded. Please check your file type or content."
109
 
110
  chunks = self.text_splitter.split_documents(documents)
111
  print(f"Total chunks created: {len(chunks)}")
 
120
  self.code_vectorstore.add_documents(chunks)
121
  self.code_vectorstore.persist()
122
 
123
+ return f"βœ… Loaded {len(chunks)} chunks from {len(documents)} documents into {assistant_type} assistant."
124
 
125
  except Exception as e:
126
  logger.error(f"Error loading documents: {str(e)}")
127
+ return f"❌ Error loading documents: {str(e)}"
128
 
129
  def get_learning_tutor_response(self, question: str) -> str:
130
  try:
131
  if not self.learning_vectorstore:
132
+ return "⚠️ Please upload some learning materials first."
133
 
134
  qa_chain = RetrievalQA.from_chain_type(
135
  llm=self.llm,
 
138
  return_source_documents=True
139
  )
140
 
141
+ result = qa_chain({"query": question})
 
 
 
 
 
 
 
142
  response = result['result']
143
 
144
  if result.get('source_documents'):
 
148
  response += f"- {Path(source).name}\n"
149
 
150
  return response
 
151
  except Exception as e:
152
  logger.error(f"Error in learning tutor: {str(e)}")
153
+ return f"❌ Error: {str(e)}"
154
 
155
  def get_code_helper_response(self, question: str) -> str:
156
  try:
157
  if not self.code_vectorstore:
158
+ return "⚠️ Please upload some code documentation first."
159
 
160
  qa_chain = RetrievalQA.from_chain_type(
161
  llm=self.llm,
 
164
  return_source_documents=True
165
  )
166
 
167
+ result = qa_chain({"query": question})
 
 
 
 
 
 
 
168
  response = result['result']
169
 
170
  if result.get('source_documents'):
 
174
  response += f"- {Path(source).name}\n"
175
 
176
  return response
 
177
  except Exception as e:
178
  logger.error(f"Error in code helper: {str(e)}")
179
+ return f"❌ Error: {str(e)}"
180
 
181
+ # Gradio UI
182
  def create_gradio_interface(assistant: RAGAssistant):
183
  def upload_learning_files(files):
184
  if not files:
185
  return "No files uploaded."
186
+ file_paths = [f.name for f in files] # βœ… FIXED HERE
187
  return assistant.load_documents(file_paths, "learning")
188
 
189
  def upload_code_files(files):
190
  if not files:
191
  return "No files uploaded."
192
+ file_paths = [f.name for f in files] # βœ… FIXED HERE
193
  return assistant.load_documents(file_paths, "code")
194
 
195
  def learning_chat(message, history):
 
207
  return history, ""
208
 
209
  with gr.Blocks(title="RAG-Based Learning & Code Assistant", theme=gr.themes.Soft()) as demo:
210
+ gr.Markdown("# πŸ“š RAG-Based Learning & Code Assistant")
 
211
 
212
  with gr.Tabs():
213
+ with gr.TabItem("πŸ“˜ Learning Tutor"):
 
214
  with gr.Row():
215
  with gr.Column(scale=1):
216
  learning_files = gr.File(label="Upload Materials", file_count="multiple", file_types=[".pdf", ".txt", ".md"])
217
  learning_upload_btn = gr.Button("Upload", variant="primary")
218
  learning_status = gr.Textbox(label="Upload Status", interactive=False)
219
  with gr.Column(scale=2):
220
+ learning_chatbot = gr.Chatbot(label="Learning Chat", height=400)
221
+ learning_input = gr.Textbox(label="Ask your question", placeholder="e.g. What is overfitting?")
222
  learning_submit = gr.Button("Ask", variant="primary")
223
 
224
  learning_upload_btn.click(upload_learning_files, inputs=[learning_files], outputs=[learning_status])
225
  learning_submit.click(learning_chat, inputs=[learning_input, learning_chatbot], outputs=[learning_chatbot, learning_input])
226
  learning_input.submit(learning_chat, inputs=[learning_input, learning_chatbot], outputs=[learning_chatbot, learning_input])
227
 
228
+ with gr.TabItem("πŸ’» Code Helper"):
 
229
  with gr.Row():
230
  with gr.Column(scale=1):
231
+ code_files = gr.File(label="Upload Docs", file_count="multiple", file_types=[".pdf", ".txt", ".md", ".py", ".json"])
232
  code_upload_btn = gr.Button("Upload", variant="primary")
233
  code_status = gr.Textbox(label="Upload Status", interactive=False)
234
  with gr.Column(scale=2):
235
  code_chatbot = gr.Chatbot(label="Code Chat", height=400)
236
+ code_input = gr.Textbox(label="Ask question", placeholder="e.g. How to call this API?")
237
  code_submit = gr.Button("Ask", variant="primary")
238
 
239
  code_upload_btn.click(upload_code_files, inputs=[code_files], outputs=[code_status])
240
  code_submit.click(code_chat, inputs=[code_input, code_chatbot], outputs=[code_chatbot, code_input])
241
  code_input.submit(code_chat, inputs=[code_input, code_chatbot], outputs=[code_chatbot, code_input])
242
 
243
+ gr.Markdown("Built with ❀️ using LangChain, ChromaDB, and Groq")
 
244
 
245
  return demo
246
 
247
+ # Main
248
  def main():
249
  load_dotenv()
250
  groq_api_key = os.getenv("GROQ_API_KEY")
251
  if not groq_api_key:
252
+ print("❌ Please set your GROQ_API_KEY in .env or environment.")
253
  return
254
  assistant = RAGAssistant(groq_api_key)
255
  demo = create_gradio_interface(assistant)
256
  demo.launch(server_name="0.0.0.0", server_port=7860, share=False, debug=True)
257
 
258
  if __name__ == "__main__":
259
+ main()