IW2025 commited on
Commit
ed9eeda
·
verified ·
1 Parent(s): 0ca62f4

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +60 -63
app.py CHANGED
@@ -83,9 +83,13 @@ class CurriculumChatbot:
83
  # Create QA prompt template
84
  qa_template = """<|begin_of_text|><|start_header_id|>system<|end_header_id|>
85
 
86
- You are a helpful AI programming tutor. Answer questions about programming concepts clearly and educationally. If the question is about curriculum content, use the provided context. If not, provide a general programming answer.
87
 
88
- Make sure to check the curriculum content and answer the question based on the curriculum content if possible. The RAG is not perfect, so if the question is not related to that slide, change it to be related to question.
 
 
 
 
89
 
90
  <|eot_id|><|start_header_id|>user<|end_header_id|>
91
 
@@ -177,7 +181,7 @@ Which slide is most relevant? Return only: "filename.pdf - Page X"
177
  # Calculate relevance score based on similarity
178
  curriculum_relevance_score = len([r for r in results if r.page_content.strip()])
179
 
180
- # Generate LLM answer
181
  if self.qa_chain:
182
  try:
183
  if curriculum_relevance_score > 0:
@@ -201,81 +205,74 @@ Which slide is most relevant? Return only: "filename.pdf - Page X"
201
 
202
  except Exception as e:
203
  print(f"Error generating answer: {e}")
 
204
  if curriculum_relevance_score > 0:
205
- answer = f"Based on the curriculum content:\n\n{results[0].page_content}"
206
  else:
207
- answer = "I'm sorry, I couldn't generate an answer at the moment."
208
  else:
209
- # Fallback without LLM
210
  if curriculum_relevance_score > 0:
211
- answer = f"Most relevant content from the curriculum:\n\n{results[0].page_content}"
212
  else:
213
- answer = "No relevant content found in the curriculum."
214
 
215
- # Get recommended slide
216
- recommended_slide = None
217
- if curriculum_relevance_score > 0 and self.slide_selection_chain:
218
- try:
219
- available_slides = self.get_available_slides_text()
220
- slide_response = self.slide_selection_chain.run(
221
- question=query,
222
- available_slides=available_slides
223
- )
 
 
 
 
 
224
 
225
- # Extract filename and page from response
226
- slide_response = slide_response.strip()
227
- if "<|eot_id|>" in slide_response:
228
- slide_response = slide_response.split("<|eot_id|>")[-1].strip()
229
 
230
- # Parse the response to get filename and page
231
- match = re.search(r'(.+\.pdf)\s*-\s*Page\s*(\d+)', slide_response)
232
- if match:
233
- filename = match.group(1)
234
- page_num = int(match.group(2))
235
- if filename in self.pdf_files:
236
- recommended_slide = self.get_pdf_page_image(self.pdf_files[filename], page_num)
237
- recommended_label = f"{filename} - Page {page_num}"
238
- else:
239
- # Fallback to most relevant result
240
- recommended_slide = self.get_pdf_page_image(
241
- self.pdf_files[results[0].metadata["filename"]],
242
- results[0].metadata["page_number"]
243
- )
244
- recommended_label = f"{results[0].metadata['filename']} - Page {results[0].metadata['page_number']}"
245
- else:
246
- # Fallback to most relevant result
247
- recommended_slide = self.get_pdf_page_image(
248
- self.pdf_files[results[0].metadata["filename"]],
249
- results[0].metadata["page_number"]
250
- )
251
- recommended_label = f"{results[0].metadata['filename']} - Page {results[0].metadata['page_number']}"
252
- except Exception as e:
253
- print(f"Error selecting slide: {e}")
254
- # Fallback to most relevant result
255
- if results:
256
- recommended_slide = self.get_pdf_page_image(
257
- self.pdf_files[results[0].metadata["filename"]],
258
- results[0].metadata["page_number"]
259
- )
260
- recommended_label = f"{results[0].metadata['filename']} - Page {results[0].metadata['page_number']}"
261
-
262
- # Get all slides for navigation
263
- all_slides = self.get_all_slides()
264
 
265
- return answer, recommended_slide, recommended_label if 'recommended_label' in locals() else None, all_slides
266
 
267
  # --- Gradio UI ---
268
  chatbot = CurriculumChatbot()
269
 
270
  def gradio_chat(query):
271
- answer, recommended_slide, recommended_label, all_slides = chatbot.chat(query)
272
 
273
- # Create gallery with recommended slide first, then all slides
274
- if recommended_slide and recommended_label:
275
- gallery_items = [(recommended_slide, f"📌 {recommended_label} (Recommended)")]
276
- gallery_items.extend(all_slides)
277
- else:
278
- gallery_items = all_slides
279
 
280
  return answer, gallery_items
281
 
 
83
  # Create QA prompt template
84
  qa_template = """<|begin_of_text|><|start_header_id|>system<|end_header_id|>
85
 
86
+ You are a helpful AI programming tutor. You MUST ALWAYS provide a clear, educational answer to every question. Never say you cannot answer or that you don't know.
87
 
88
+ If the question is about curriculum content, use the provided context to give a detailed, educational explanation. If the curriculum content doesn't perfectly match the question, adapt your answer to be relevant while using the curriculum information.
89
+
90
+ If the question is not covered in the curriculum, provide a comprehensive general programming answer based on your knowledge.
91
+
92
+ Always be educational, clear, and helpful.
93
 
94
  <|eot_id|><|start_header_id|>user<|end_header_id|>
95
 
 
181
  # Calculate relevance score based on similarity
182
  curriculum_relevance_score = len([r for r in results if r.page_content.strip()])
183
 
184
+ # ALWAYS generate LLM answer (never fallback to raw text)
185
  if self.qa_chain:
186
  try:
187
  if curriculum_relevance_score > 0:
 
205
 
206
  except Exception as e:
207
  print(f"Error generating answer: {e}")
208
+ # Even if LLM fails, try to provide a helpful response
209
  if curriculum_relevance_score > 0:
210
+ answer = f"Based on the curriculum content, here's what I found:\n\n{results[0].page_content}\n\n*Note: I'm having trouble generating a custom answer right now, but here's the relevant curriculum content.*"
211
  else:
212
+ answer = "I'm sorry, I couldn't generate an answer at the moment. Please try rephrasing your question."
213
  else:
214
+ # If no LLM available, still provide helpful response
215
  if curriculum_relevance_score > 0:
216
+ answer = f"Based on the curriculum content:\n\n{results[0].page_content}\n\n*Note: AI generation is not available, but here's the relevant curriculum content.*"
217
  else:
218
+ answer = "I couldn't find relevant content in the curriculum for this question. Please try rephrasing or ask about a different programming topic."
219
 
220
+ # Get the most relevant slide and its neighboring pages
221
+ relevant_slides = []
222
+ if curriculum_relevance_score > 0:
223
+ # Get the most relevant result
224
+ best_result = results[0]
225
+ filename = best_result.metadata["filename"]
226
+ page_number = best_result.metadata["page_number"]
227
+
228
+ # Get the specific PDF and its pages
229
+ if filename in self.pdf_files:
230
+ pdf_path = self.pdf_files[filename]
231
+ doc = fitz.open(pdf_path)
232
+ total_pages = len(doc)
233
+ doc.close()
234
 
235
+ # Get the target page and neighboring pages (2 before, 2 after)
236
+ start_page = max(1, page_number - 2)
237
+ end_page = min(total_pages, page_number + 2)
 
238
 
239
+ for page_num in range(start_page, end_page + 1):
240
+ img = self.get_pdf_page_image(pdf_path, page_num)
241
+ if img:
242
+ if page_num == page_number:
243
+ # Highlight the most relevant page
244
+ label = f"📌 {filename} - Page {page_num} (Most Relevant)"
245
+ else:
246
+ label = f"{filename} - Page {page_num}"
247
+ relevant_slides.append((img, label))
248
+
249
+ recommended_slide = relevant_slides[0][0] if relevant_slides else None
250
+ recommended_label = relevant_slides[0][1] if relevant_slides else None
251
+ else:
252
+ # Fallback if filename not found
253
+ recommended_slide = None
254
+ recommended_label = None
255
+ else:
256
+ # If no curriculum content, show a few slides from different PDFs
257
+ relevant_slides = []
258
+ for filename, pages in list(self.pdf_pages.items())[:3]: # Show first 3 PDFs
259
+ for page_num in list(pages.keys())[:2]: # Show first 2 pages of each
260
+ img = self.get_pdf_page_image(self.pdf_files[filename], page_num)
261
+ if img:
262
+ relevant_slides.append((img, f"{filename} - Page {page_num}"))
263
+ recommended_slide = relevant_slides[0][0] if relevant_slides else None
264
+ recommended_label = relevant_slides[0][1] if relevant_slides else None
 
 
 
 
 
 
 
 
265
 
266
+ return answer, recommended_slide, recommended_label, relevant_slides
267
 
268
  # --- Gradio UI ---
269
  chatbot = CurriculumChatbot()
270
 
271
  def gradio_chat(query):
272
+ answer, recommended_slide, recommended_label, relevant_slides = chatbot.chat(query)
273
 
274
+ # Use the relevant slides (specific PDF with neighboring pages)
275
+ gallery_items = relevant_slides if relevant_slides else []
 
 
 
 
276
 
277
  return answer, gallery_items
278