Spaces:
Sleeping
Sleeping
Upload app.py
Browse files
app.py
CHANGED
|
@@ -103,12 +103,11 @@ class CurriculumChatbot:
|
|
| 103 |
self.llm = HuggingFacePipeline(pipeline=pipe)
|
| 104 |
|
| 105 |
# Warm and engaging prompt templates
|
| 106 |
-
qa_template = """
|
| 107 |
|
| 108 |
-
|
| 109 |
-
{filled_context}
|
| 110 |
|
| 111 |
-
|
| 112 |
|
| 113 |
self.qa_prompt = PromptTemplate(
|
| 114 |
input_variables=["question", "filled_context"],
|
|
@@ -131,12 +130,11 @@ Please select the most relevant slide (filename.pdf - Page X) that would best he
|
|
| 131 |
self.slide_selection_chain = self.slide_selection_prompt | self.llm
|
| 132 |
|
| 133 |
# Warm and detailed focused QA template
|
| 134 |
-
focused_qa_template = """
|
| 135 |
|
| 136 |
-
|
| 137 |
-
{slide_content}
|
| 138 |
|
| 139 |
-
|
| 140 |
|
| 141 |
self.focused_qa_prompt = PromptTemplate(
|
| 142 |
input_variables=["question", "slide_content"],
|
|
@@ -145,6 +143,8 @@ Please provide a warm, encouraging answer that directly answers their question a
|
|
| 145 |
self.focused_qa_chain = self.focused_qa_prompt | self.llm
|
| 146 |
|
| 147 |
print("✅ Optimized model loaded successfully!")
|
|
|
|
|
|
|
| 148 |
except Exception as e:
|
| 149 |
print(f"Warning: Could not load optimized model: {e}")
|
| 150 |
print("Falling back to basic search mode...")
|
|
@@ -191,283 +191,72 @@ Please provide a warm, encouraging answer that directly answers their question a
|
|
| 191 |
return "\n".join(slides_text)
|
| 192 |
|
| 193 |
def chat(self, query):
|
| 194 |
-
"""
|
| 195 |
-
# Check cache first for faster responses
|
| 196 |
-
if query in self.response_cache:
|
| 197 |
-
print("✅ Using cached response")
|
| 198 |
-
return self.response_cache[query]
|
| 199 |
|
| 200 |
-
#
|
| 201 |
-
results = self.vector_db.similarity_search(query, k=3)
|
| 202 |
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
if results:
|
| 206 |
-
# Calculate relevance score based on similarity and content relevance
|
| 207 |
-
relevant_results = []
|
| 208 |
-
for result in results:
|
| 209 |
-
content = result.page_content.lower()
|
| 210 |
-
query_terms = query.lower().split()
|
| 211 |
-
|
| 212 |
-
# Check if any query terms appear in the content
|
| 213 |
-
term_matches = sum(1 for term in query_terms if len(term) > 2 and term in content)
|
| 214 |
-
|
| 215 |
-
# Only consider results that have some relevance to the query
|
| 216 |
-
if term_matches > 0 or len(content.strip()) > 50:
|
| 217 |
-
relevant_results.append(result)
|
| 218 |
-
|
| 219 |
-
curriculum_relevance_score = len(relevant_results)
|
| 220 |
-
results = relevant_results # Use only relevant results
|
| 221 |
-
|
| 222 |
-
# Debug: Print what we found
|
| 223 |
-
print(f"Query: {query}")
|
| 224 |
-
print(f"Found {len(results)} relevant results:")
|
| 225 |
-
for i, result in enumerate(results[:3]):
|
| 226 |
-
print(f" {i+1}. {result.metadata['filename']} - Page {result.metadata['page_number']}")
|
| 227 |
-
print(f" Content: {result.page_content[:100]}...")
|
| 228 |
|
| 229 |
-
#
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
# Prepare slide contents for LLM analysis
|
| 235 |
-
slide_contents = []
|
| 236 |
-
for i, result in enumerate(results[:5]): # Top 5 results
|
| 237 |
-
filename = result.metadata["filename"]
|
| 238 |
-
page_num = result.metadata["page_number"]
|
| 239 |
-
content = result.page_content
|
| 240 |
-
slide_contents.append(f"Slide {i+1}: {filename} - Page {page_num}\nContent: {content}\n")
|
| 241 |
-
|
| 242 |
-
slide_contents_text = "\n".join(slide_contents)
|
| 243 |
-
|
| 244 |
-
# Use LLM to select the best slide
|
| 245 |
-
slide_response = self.slide_selection_chain.invoke({
|
| 246 |
-
"question": query,
|
| 247 |
-
"slide_contents": slide_contents_text
|
| 248 |
-
})
|
| 249 |
-
|
| 250 |
-
# Extract filename and page from response
|
| 251 |
-
slide_response = slide_response.strip()
|
| 252 |
-
if "<|eot_id|>" in slide_response:
|
| 253 |
-
slide_response = slide_response.split("<|eot_id|>")[-1].strip()
|
| 254 |
-
|
| 255 |
-
# Parse the response to get filename and page
|
| 256 |
-
match = re.search(r'(.+\.pdf)\s*-\s*Page\s*(\d+)', slide_response)
|
| 257 |
-
if match:
|
| 258 |
-
filename = match.group(1)
|
| 259 |
-
page_num = int(match.group(2))
|
| 260 |
-
|
| 261 |
-
# Find the corresponding result
|
| 262 |
-
for result in results:
|
| 263 |
-
if (result.metadata["filename"] == filename and
|
| 264 |
-
result.metadata["page_number"] == page_num):
|
| 265 |
-
best_result = result
|
| 266 |
-
best_slide_content = result.page_content
|
| 267 |
-
break
|
| 268 |
-
|
| 269 |
-
# If LLM selection failed, fall back to first result
|
| 270 |
-
if not best_result:
|
| 271 |
-
best_result = results[0]
|
| 272 |
-
best_slide_content = results[0].page_content
|
| 273 |
-
else:
|
| 274 |
-
# Fallback to first result if parsing failed
|
| 275 |
-
best_result = results[0]
|
| 276 |
-
best_slide_content = results[0].page_content
|
| 277 |
-
|
| 278 |
-
except Exception as e:
|
| 279 |
-
print(f"Error in LLM slide selection: {e}")
|
| 280 |
-
# Fallback to first result
|
| 281 |
-
best_result = results[0]
|
| 282 |
-
best_slide_content = results[0].page_content
|
| 283 |
-
else:
|
| 284 |
-
# Fallback without LLM
|
| 285 |
-
if curriculum_relevance_score > 0:
|
| 286 |
-
best_result = results[0]
|
| 287 |
-
best_slide_content = results[0].page_content
|
| 288 |
|
| 289 |
-
#
|
| 290 |
-
|
|
|
|
|
|
|
|
|
|
| 291 |
try:
|
|
|
|
|
|
|
| 292 |
answer = self.focused_qa_chain.invoke({
|
| 293 |
"question": query,
|
| 294 |
"slide_content": best_slide_content
|
| 295 |
})
|
| 296 |
|
| 297 |
-
|
| 298 |
-
print(f"LLM Raw Response: {answer[:200]}...")
|
| 299 |
|
| 300 |
# Clean up the answer
|
| 301 |
answer = answer.strip()
|
| 302 |
if "<|eot_id|>" in answer:
|
| 303 |
answer = answer.split("<|eot_id|>")[-1].strip()
|
| 304 |
|
| 305 |
-
# Remove any prompt artifacts
|
| 306 |
-
if answer.startswith("Answer:"):
|
| 307 |
-
answer = answer[7:].strip()
|
| 308 |
-
if answer.startswith("Provide a clear, educational answer based on this slide:"):
|
| 309 |
-
answer = answer[58:].strip()
|
| 310 |
-
|
| 311 |
-
# If LLM response is too short or problematic, show slide content with explanation
|
| 312 |
-
if len(answer.strip()) < 30:
|
| 313 |
-
slide_info = f"📄 **Slide Reference:** {best_result.metadata['filename']} - Page {best_result.metadata['page_number']}"
|
| 314 |
-
answer = f"{slide_info}\n\n**Slide Content:**\n{best_slide_content}\n\n*Note: Here's the relevant curriculum content to help answer your question.*"
|
| 315 |
-
|
| 316 |
-
except Exception as e:
|
| 317 |
-
print(f"Error generating focused answer: {e}")
|
| 318 |
-
# Show slide content with explanation
|
| 319 |
-
slide_info = f"📄 **Slide Reference:** {best_result.metadata['filename']} - Page {best_result.metadata['page_number']}"
|
| 320 |
-
answer = f"{slide_info}\n\n**Slide Content:**\n{best_slide_content}\n\n*Note: Here's the relevant curriculum content to help answer your question.*"
|
| 321 |
-
|
| 322 |
-
elif self.qa_chain and not self.fast_mode:
|
| 323 |
-
# Fallback to general LLM if focused chain fails
|
| 324 |
-
try:
|
| 325 |
-
if curriculum_relevance_score > 0:
|
| 326 |
-
context = "\n\n".join([result.page_content for result in results])
|
| 327 |
-
filled_context = f"Here's the relevant curriculum content from the student's course materials:\n{context}\n\nPlease provide a warm, encouraging answer that directly uses this curriculum content to help the student understand the concept."
|
| 328 |
-
else:
|
| 329 |
-
filled_context = "Note: This question is not covered in the current curriculum. Please provide a friendly, general programming answer that encourages the student's curiosity."
|
| 330 |
-
|
| 331 |
-
answer = self.qa_chain.invoke({
|
| 332 |
-
"question": query,
|
| 333 |
-
"filled_context": filled_context
|
| 334 |
-
})
|
| 335 |
-
|
| 336 |
-
# Clean up the answer
|
| 337 |
-
answer = answer.strip()
|
| 338 |
-
if "<|eot_id|>" in answer:
|
| 339 |
-
answer = answer.split("<|eot_id|>")[-1].strip()
|
| 340 |
-
if answer.startswith("Answer:"):
|
| 341 |
-
answer = answer[7:].strip()
|
| 342 |
-
if answer.startswith("Provide a clear, educational answer explaining the concept:"):
|
| 343 |
-
answer = answer[58:].strip()
|
| 344 |
-
|
| 345 |
-
# If answer is too short, show slide content
|
| 346 |
-
if len(answer.strip()) < 30:
|
| 347 |
-
if curriculum_relevance_score > 0:
|
| 348 |
-
slide_info = f"📄 **Slide Reference:** {best_result.metadata['filename']} - Page {best_result.metadata['page_number']}"
|
| 349 |
-
answer = f"{slide_info}\n\n**Slide Content:**\n{best_slide_content}\n\n*Note: Here's the relevant curriculum content to help answer your question.*"
|
| 350 |
-
else:
|
| 351 |
-
answer = "I'm sorry, I couldn't generate a proper answer right now. Please try rephrasing your question - sometimes a different way of asking helps! 😊"
|
| 352 |
-
|
| 353 |
-
# Add warning if not in curriculum
|
| 354 |
-
if curriculum_relevance_score == 0:
|
| 355 |
-
answer = "💡 **Note: This topic isn't covered in your current curriculum, but here's a helpful answer:**\n\n" + answer
|
| 356 |
-
|
| 357 |
except Exception as e:
|
| 358 |
print(f"Error generating answer: {e}")
|
| 359 |
-
|
| 360 |
-
slide_info = f"📄 **Slide Reference:** {best_result.metadata['filename']} - Page {best_result.metadata['page_number']}"
|
| 361 |
-
answer = f"{slide_info}\n\n**Slide Content:**\n{best_slide_content}\n\n*Note: Here's the relevant curriculum content to help answer your question.*"
|
| 362 |
-
else:
|
| 363 |
-
answer = "I'm sorry, I couldn't generate an answer at the moment. Please try rephrasing your question - sometimes a different approach helps! 😊"
|
| 364 |
else:
|
| 365 |
-
#
|
| 366 |
-
|
| 367 |
-
slide_info = f"📄 **Slide Reference:** {best_result.metadata['filename']} - Page {best_result.metadata['page_number']}"
|
| 368 |
-
answer = f"{slide_info}\n\n**Great question! Here's what your curriculum teaches:**\n\n{best_slide_content}\n\n*Note: AI generation is not available, but here's the relevant curriculum content to help you learn!* 🌟"
|
| 369 |
-
else:
|
| 370 |
-
answer = "I couldn't find any programming-related content in the curriculum for this question. This appears to be about something outside the scope of your programming course. Try asking about programming concepts like variables, loops, functions, or other topics covered in your curriculum! 😊"
|
| 371 |
|
| 372 |
-
#
|
| 373 |
relevant_slides = []
|
| 374 |
-
|
| 375 |
-
|
| 376 |
-
best_result = results[0]
|
| 377 |
-
filename = best_result.metadata["filename"]
|
| 378 |
-
page_number = best_result.metadata["page_number"]
|
| 379 |
-
|
| 380 |
-
# Get the specific PDF and its pages
|
| 381 |
-
if filename in self.pdf_files:
|
| 382 |
-
pdf_path = self.pdf_files[filename]
|
| 383 |
-
doc = fitz.open(pdf_path)
|
| 384 |
-
total_pages = len(doc)
|
| 385 |
-
doc.close()
|
| 386 |
-
|
| 387 |
-
# Find the best content page by analyzing all results
|
| 388 |
-
target_page = page_number
|
| 389 |
-
best_content_score = 0
|
| 390 |
-
|
| 391 |
-
# Check all search results for the best content page
|
| 392 |
-
for result in results:
|
| 393 |
-
if result.metadata["filename"] == filename:
|
| 394 |
-
page_num = result.metadata["page_number"]
|
| 395 |
-
page_text = self.pdf_pages[filename].get(page_num, "")
|
| 396 |
-
text_length = len(page_text.strip())
|
| 397 |
-
|
| 398 |
-
# Score based on text length and relevance
|
| 399 |
-
content_score = text_length
|
| 400 |
-
if text_length > 100: # Prefer content pages over title slides
|
| 401 |
-
content_score += 500
|
| 402 |
-
|
| 403 |
-
if content_score > best_content_score:
|
| 404 |
-
best_content_score = content_score
|
| 405 |
-
target_page = page_num
|
| 406 |
-
|
| 407 |
-
# If we still have a title slide, look for better content in the same PDF
|
| 408 |
-
page_text = self.pdf_pages[filename].get(target_page, "")
|
| 409 |
-
if len(page_text.strip()) < 150: # Still a title slide
|
| 410 |
-
# Search for pages with the query terms
|
| 411 |
-
query_terms = query.lower().split()
|
| 412 |
-
best_match_score = 0
|
| 413 |
-
|
| 414 |
-
for page_num in range(1, total_pages + 1):
|
| 415 |
-
if page_num in self.pdf_pages[filename]:
|
| 416 |
-
text = self.pdf_pages[filename][page_num].lower()
|
| 417 |
-
text_length = len(text.strip())
|
| 418 |
-
|
| 419 |
-
# Count how many query terms appear in this page
|
| 420 |
-
match_score = sum(1 for term in query_terms if term in text)
|
| 421 |
-
|
| 422 |
-
# Prefer pages with both query terms and good content
|
| 423 |
-
if match_score > 0 and text_length > 200:
|
| 424 |
-
total_score = match_score * 1000 + text_length
|
| 425 |
-
if total_score > best_match_score:
|
| 426 |
-
best_match_score = total_score
|
| 427 |
-
target_page = page_num
|
| 428 |
-
|
| 429 |
-
# Get the target page and neighboring pages (2 before, 2 after)
|
| 430 |
-
start_page = max(1, target_page - 2)
|
| 431 |
-
end_page = min(total_pages, target_page + 2)
|
| 432 |
-
|
| 433 |
-
# Use a set to track unique slides and avoid duplicates
|
| 434 |
-
seen_slides = set()
|
| 435 |
-
|
| 436 |
-
for page_num in range(start_page, end_page + 1):
|
| 437 |
-
img = self.get_pdf_page_image(pdf_path, page_num)
|
| 438 |
-
if img:
|
| 439 |
-
slide_key = f"{filename}-{page_num}"
|
| 440 |
-
if slide_key not in seen_slides:
|
| 441 |
-
seen_slides.add(slide_key)
|
| 442 |
-
if page_num == target_page:
|
| 443 |
-
# Highlight the most relevant page
|
| 444 |
-
label = f"📌 {filename} - Page {page_num} (Most Relevant)"
|
| 445 |
-
else:
|
| 446 |
-
label = f"{filename} - Page {page_num}"
|
| 447 |
-
relevant_slides.append((img, label))
|
| 448 |
-
|
| 449 |
-
recommended_slide = relevant_slides[0][0] if relevant_slides else None
|
| 450 |
-
recommended_label = relevant_slides[0][1] if relevant_slides else None
|
| 451 |
-
else:
|
| 452 |
-
# Fallback if filename not found
|
| 453 |
-
recommended_slide = None
|
| 454 |
-
recommended_label = None
|
| 455 |
-
else:
|
| 456 |
-
# If no curriculum content, provide a helpful response
|
| 457 |
-
relevant_slides = []
|
| 458 |
-
recommended_slide = None
|
| 459 |
-
recommended_label = None
|
| 460 |
-
|
| 461 |
-
# Cache the response for future use
|
| 462 |
-
self.response_cache[query] = (answer, recommended_slide, recommended_label, relevant_slides)
|
| 463 |
|
| 464 |
-
|
| 465 |
-
|
| 466 |
-
|
| 467 |
-
|
| 468 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 469 |
|
| 470 |
-
return answer,
|
| 471 |
|
| 472 |
# --- Gradio UI ---
|
| 473 |
chatbot = CurriculumChatbot(fast_mode=False) # Enable AI mode by default
|
|
|
|
| 103 |
self.llm = HuggingFacePipeline(pipeline=pipe)
|
| 104 |
|
| 105 |
# Warm and engaging prompt templates
|
| 106 |
+
qa_template = """Answer this question: {question}
|
| 107 |
|
| 108 |
+
Using this information: {filled_context}
|
|
|
|
| 109 |
|
| 110 |
+
Provide a helpful, friendly answer."""
|
| 111 |
|
| 112 |
self.qa_prompt = PromptTemplate(
|
| 113 |
input_variables=["question", "filled_context"],
|
|
|
|
| 130 |
self.slide_selection_chain = self.slide_selection_prompt | self.llm
|
| 131 |
|
| 132 |
# Warm and detailed focused QA template
|
| 133 |
+
focused_qa_template = """Answer this question: {question}
|
| 134 |
|
| 135 |
+
Using this information: {slide_content}
|
|
|
|
| 136 |
|
| 137 |
+
Provide a helpful, friendly answer."""
|
| 138 |
|
| 139 |
self.focused_qa_prompt = PromptTemplate(
|
| 140 |
input_variables=["question", "slide_content"],
|
|
|
|
| 143 |
self.focused_qa_chain = self.focused_qa_prompt | self.llm
|
| 144 |
|
| 145 |
print("✅ Optimized model loaded successfully!")
|
| 146 |
+
print(f"🔍 LLM object: {self.llm}")
|
| 147 |
+
print(f"🔍 Focused QA chain: {self.focused_qa_chain}")
|
| 148 |
except Exception as e:
|
| 149 |
print(f"Warning: Could not load optimized model: {e}")
|
| 150 |
print("Falling back to basic search mode...")
|
|
|
|
| 191 |
return "\n".join(slides_text)
|
| 192 |
|
| 193 |
def chat(self, query):
|
| 194 |
+
"""Simplified chat function with vector search, LLM analysis, and slide display"""
|
|
|
|
|
|
|
|
|
|
|
|
|
| 195 |
|
| 196 |
+
# 1. Vector Search - Find relevant slides
|
| 197 |
+
results = self.vector_db.similarity_search(query, k=3)
|
| 198 |
|
| 199 |
+
if not results:
|
| 200 |
+
return "I couldn't find relevant content in the curriculum for this question.", None, None, []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 201 |
|
| 202 |
+
# Debug: Show what we found
|
| 203 |
+
print(f"Query: {query}")
|
| 204 |
+
print(f"Found {len(results)} relevant slides:")
|
| 205 |
+
for i, result in enumerate(results):
|
| 206 |
+
print(f" {i+1}. {result.metadata['filename']} - Page {result.metadata['page_number']}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 207 |
|
| 208 |
+
# 2. LLM Check - Analyze slides and generate answer
|
| 209 |
+
best_result = results[0]
|
| 210 |
+
best_slide_content = best_result.page_content
|
| 211 |
+
|
| 212 |
+
if self.focused_qa_chain and not self.fast_mode:
|
| 213 |
try:
|
| 214 |
+
print(f"🔍 Calling LLM with question: {query}")
|
| 215 |
+
|
| 216 |
answer = self.focused_qa_chain.invoke({
|
| 217 |
"question": query,
|
| 218 |
"slide_content": best_slide_content
|
| 219 |
})
|
| 220 |
|
| 221 |
+
print(f"LLM Response: {answer[:100]}...")
|
|
|
|
| 222 |
|
| 223 |
# Clean up the answer
|
| 224 |
answer = answer.strip()
|
| 225 |
if "<|eot_id|>" in answer:
|
| 226 |
answer = answer.split("<|eot_id|>")[-1].strip()
|
| 227 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 228 |
except Exception as e:
|
| 229 |
print(f"Error generating answer: {e}")
|
| 230 |
+
answer = f"📄 **Slide Reference:** {best_result.metadata['filename']} - Page {best_result.metadata['page_number']}\n\n**Slide Content:**\n{best_slide_content}"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 231 |
else:
|
| 232 |
+
# Fallback to slide content
|
| 233 |
+
answer = f"📄 **Slide Reference:** {best_result.metadata['filename']} - Page {best_result.metadata['page_number']}\n\n**Slide Content:**\n{best_slide_content}"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 234 |
|
| 235 |
+
# 3. Slide Output - Get relevant slides
|
| 236 |
relevant_slides = []
|
| 237 |
+
filename = best_result.metadata["filename"]
|
| 238 |
+
page_number = best_result.metadata["page_number"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 239 |
|
| 240 |
+
if filename in self.pdf_files:
|
| 241 |
+
pdf_path = self.pdf_files[filename]
|
| 242 |
+
doc = fitz.open(pdf_path)
|
| 243 |
+
total_pages = len(doc)
|
| 244 |
+
doc.close()
|
| 245 |
+
|
| 246 |
+
# Get the target page and neighboring pages (2 before, 2 after)
|
| 247 |
+
start_page = max(1, page_number - 2)
|
| 248 |
+
end_page = min(total_pages, page_number + 2)
|
| 249 |
+
|
| 250 |
+
for page_num in range(start_page, end_page + 1):
|
| 251 |
+
img = self.get_pdf_page_image(pdf_path, page_num)
|
| 252 |
+
if img:
|
| 253 |
+
if page_num == page_number:
|
| 254 |
+
label = f"📌 {filename} - Page {page_num} (Most Relevant)"
|
| 255 |
+
else:
|
| 256 |
+
label = f"{filename} - Page {page_num}"
|
| 257 |
+
relevant_slides.append((img, label))
|
| 258 |
|
| 259 |
+
return answer, relevant_slides[0][0] if relevant_slides else None, relevant_slides[0][1] if relevant_slides else None, relevant_slides
|
| 260 |
|
| 261 |
# --- Gradio UI ---
|
| 262 |
chatbot = CurriculumChatbot(fast_mode=False) # Enable AI mode by default
|