Spaces:
Sleeping
Sleeping
Upload app.py
Browse files
app.py
CHANGED
|
@@ -103,12 +103,23 @@ class CurriculumChatbot:
|
|
| 103 |
)
|
| 104 |
self.llm = HuggingFacePipeline(pipeline=pipe)
|
| 105 |
|
| 106 |
-
#
|
| 107 |
-
qa_template = """
|
| 108 |
|
| 109 |
-
|
|
|
|
| 110 |
|
| 111 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 112 |
|
| 113 |
self.qa_prompt = PromptTemplate(
|
| 114 |
input_variables=["question", "filled_context"],
|
|
@@ -116,13 +127,13 @@ Answer:"""
|
|
| 116 |
)
|
| 117 |
self.qa_chain = self.qa_prompt | self.llm
|
| 118 |
|
| 119 |
-
#
|
| 120 |
-
slide_selection_template = """
|
| 121 |
|
| 122 |
-
|
| 123 |
{slide_contents}
|
| 124 |
|
| 125 |
-
|
| 126 |
|
| 127 |
self.slide_selection_prompt = PromptTemplate(
|
| 128 |
input_variables=["question", "slide_contents"],
|
|
@@ -130,12 +141,23 @@ Select the best slide (filename.pdf - Page X):"""
|
|
| 130 |
)
|
| 131 |
self.slide_selection_chain = self.slide_selection_prompt | self.llm
|
| 132 |
|
| 133 |
-
#
|
| 134 |
-
focused_qa_template = """
|
|
|
|
|
|
|
|
|
|
| 135 |
|
| 136 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 137 |
|
| 138 |
-
|
| 139 |
|
| 140 |
self.focused_qa_prompt = PromptTemplate(
|
| 141 |
input_variables=["question", "slide_content"],
|
|
@@ -202,8 +224,21 @@ Answer:"""
|
|
| 202 |
# Check if query is curriculum-related
|
| 203 |
curriculum_relevance_score = 0
|
| 204 |
if results:
|
| 205 |
-
# Calculate relevance score based on similarity
|
| 206 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 207 |
|
| 208 |
# Debug: Print what we found
|
| 209 |
print(f"Query: {query}")
|
|
@@ -306,9 +341,11 @@ Answer:"""
|
|
| 306 |
slide_info = f"π **Slide Reference:** {best_result.metadata['filename']} - Page {best_result.metadata['page_number']}"
|
| 307 |
|
| 308 |
if "loops" in query.lower():
|
| 309 |
-
answer = f"{slide_info}\n\n**
|
|
|
|
|
|
|
| 310 |
else:
|
| 311 |
-
answer = f"{slide_info}\n\n**
|
| 312 |
|
| 313 |
except Exception as e:
|
| 314 |
print(f"Error generating focused answer: {e}")
|
|
@@ -316,18 +353,20 @@ Answer:"""
|
|
| 316 |
slide_info = f"π **Slide Reference:** {best_result.metadata['filename']} - Page {best_result.metadata['page_number']}"
|
| 317 |
|
| 318 |
if "loops" in query.lower():
|
| 319 |
-
answer = f"{slide_info}\n\n**
|
|
|
|
|
|
|
| 320 |
else:
|
| 321 |
-
answer = f"{slide_info}\n\n**
|
| 322 |
|
| 323 |
elif self.qa_chain and not self.fast_mode:
|
| 324 |
# Fallback to general LLM if focused chain fails
|
| 325 |
try:
|
| 326 |
if curriculum_relevance_score > 0:
|
| 327 |
context = "\n\n".join([result.page_content for result in results])
|
| 328 |
-
filled_context = f"
|
| 329 |
else:
|
| 330 |
-
filled_context = "Note: This question is not covered in the current curriculum. Please provide a general programming answer."
|
| 331 |
|
| 332 |
answer = self.qa_chain.invoke({
|
| 333 |
"question": query,
|
|
@@ -347,28 +386,28 @@ Answer:"""
|
|
| 347 |
if len(answer.strip()) < 50:
|
| 348 |
if curriculum_relevance_score > 0:
|
| 349 |
slide_info = f"π **Slide Reference:** {best_result.metadata['filename']} - Page {best_result.metadata['page_number']}"
|
| 350 |
-
answer = f"{slide_info}\n\n**
|
| 351 |
else:
|
| 352 |
-
answer = "I'm sorry, I couldn't generate a proper answer. Please try rephrasing your question
|
| 353 |
|
| 354 |
# Add warning if not in curriculum
|
| 355 |
if curriculum_relevance_score == 0:
|
| 356 |
-
answer = "
|
| 357 |
|
| 358 |
except Exception as e:
|
| 359 |
print(f"Error generating answer: {e}")
|
| 360 |
if curriculum_relevance_score > 0:
|
| 361 |
slide_info = f"π **Slide Reference:** {best_result.metadata['filename']} - Page {best_result.metadata['page_number']}"
|
| 362 |
-
answer = f"{slide_info}\n\n**
|
| 363 |
else:
|
| 364 |
-
answer = "I'm sorry, I couldn't generate an answer at the moment. Please try rephrasing your question
|
| 365 |
else:
|
| 366 |
# If no LLM available
|
| 367 |
if curriculum_relevance_score > 0:
|
| 368 |
slide_info = f"π **Slide Reference:** {best_result.metadata['filename']} - Page {best_result.metadata['page_number']}"
|
| 369 |
-
answer = f"{slide_info}\n\n**
|
| 370 |
else:
|
| 371 |
-
answer = "I couldn't find
|
| 372 |
|
| 373 |
# Get the most relevant slide and its neighboring pages
|
| 374 |
relevant_slides = []
|
|
@@ -431,15 +470,21 @@ Answer:"""
|
|
| 431 |
start_page = max(1, target_page - 2)
|
| 432 |
end_page = min(total_pages, target_page + 2)
|
| 433 |
|
|
|
|
|
|
|
|
|
|
| 434 |
for page_num in range(start_page, end_page + 1):
|
| 435 |
img = self.get_pdf_page_image(pdf_path, page_num)
|
| 436 |
if img:
|
| 437 |
-
|
| 438 |
-
|
| 439 |
-
|
| 440 |
-
|
| 441 |
-
|
| 442 |
-
|
|
|
|
|
|
|
|
|
|
| 443 |
|
| 444 |
recommended_slide = relevant_slides[0][0] if relevant_slides else None
|
| 445 |
recommended_label = relevant_slides[0][1] if relevant_slides else None
|
|
@@ -448,15 +493,10 @@ Answer:"""
|
|
| 448 |
recommended_slide = None
|
| 449 |
recommended_label = None
|
| 450 |
else:
|
| 451 |
-
# If no curriculum content,
|
| 452 |
relevant_slides = []
|
| 453 |
-
|
| 454 |
-
|
| 455 |
-
img = self.get_pdf_page_image(self.pdf_files[filename], page_num)
|
| 456 |
-
if img:
|
| 457 |
-
relevant_slides.append((img, f"{filename} - Page {page_num}"))
|
| 458 |
-
recommended_slide = relevant_slides[0][0] if relevant_slides else None
|
| 459 |
-
recommended_label = relevant_slides[0][1] if relevant_slides else None
|
| 460 |
|
| 461 |
# Cache the response for future use
|
| 462 |
self.response_cache[query] = (answer, recommended_slide, recommended_label, relevant_slides)
|
|
|
|
| 103 |
)
|
| 104 |
self.llm = HuggingFacePipeline(pipeline=pipe)
|
| 105 |
|
| 106 |
+
# Warm and engaging prompt templates
|
| 107 |
+
qa_template = """You are a friendly and encouraging programming tutor. A student has asked: {question}
|
| 108 |
|
| 109 |
+
Here's the relevant curriculum content to help answer their question:
|
| 110 |
+
{filled_context}
|
| 111 |
|
| 112 |
+
Please provide a warm, encouraging answer that:
|
| 113 |
+
1. EXPLAINS and EXPANDS on the curriculum content - don't just repeat it
|
| 114 |
+
2. Fills in the blanks and provides context for what the curriculum is teaching
|
| 115 |
+
3. Uses concrete examples to make abstract concepts clear
|
| 116 |
+
4. Explains the "why" behind the concepts, not just the "what"
|
| 117 |
+
5. Makes connections to real-world programming scenarios
|
| 118 |
+
6. Acknowledges the student's curiosity and encourages them
|
| 119 |
+
7. Provides additional helpful context that complements the curriculum
|
| 120 |
+
8. Suggests how they can practice or explore further
|
| 121 |
+
|
| 122 |
+
Your response should be educational and helpful, not just a summary of the curriculum."""
|
| 123 |
|
| 124 |
self.qa_prompt = PromptTemplate(
|
| 125 |
input_variables=["question", "filled_context"],
|
|
|
|
| 127 |
)
|
| 128 |
self.qa_chain = self.qa_prompt | self.llm
|
| 129 |
|
| 130 |
+
# Enhanced slide selection template
|
| 131 |
+
slide_selection_template = """As a helpful programming tutor, a student has asked: {question}
|
| 132 |
|
| 133 |
+
Here are the available curriculum slides that might help answer their question:
|
| 134 |
{slide_contents}
|
| 135 |
|
| 136 |
+
Please select the most relevant slide (filename.pdf - Page X) that would best help explain this concept to the student. Choose the slide that has the most detailed and relevant content for their question."""
|
| 137 |
|
| 138 |
self.slide_selection_prompt = PromptTemplate(
|
| 139 |
input_variables=["question", "slide_contents"],
|
|
|
|
| 141 |
)
|
| 142 |
self.slide_selection_chain = self.slide_selection_prompt | self.llm
|
| 143 |
|
| 144 |
+
# Warm and detailed focused QA template
|
| 145 |
+
focused_qa_template = """You are a friendly and encouraging programming tutor. A student has asked: {question}
|
| 146 |
+
|
| 147 |
+
Here's the specific curriculum slide content that directly addresses their question:
|
| 148 |
+
{slide_content}
|
| 149 |
|
| 150 |
+
Please provide a warm, encouraging answer that:
|
| 151 |
+
1. EXPLAINS and EXPANDS on the slide content - don't just repeat it
|
| 152 |
+
2. Fills in the blanks and provides context for what the slide is teaching
|
| 153 |
+
3. Uses concrete examples to make abstract concepts clear
|
| 154 |
+
4. Explains the "why" behind the concepts, not just the "what"
|
| 155 |
+
5. Makes connections to real-world programming scenarios
|
| 156 |
+
6. Acknowledges the student's curiosity and encourages them
|
| 157 |
+
7. Provides additional helpful context that complements the slide
|
| 158 |
+
8. Suggests how they can practice or explore further
|
| 159 |
|
| 160 |
+
Your response should be educational and helpful, not just a summary of the slide."""
|
| 161 |
|
| 162 |
self.focused_qa_prompt = PromptTemplate(
|
| 163 |
input_variables=["question", "slide_content"],
|
|
|
|
| 224 |
# Check if query is curriculum-related
|
| 225 |
curriculum_relevance_score = 0
|
| 226 |
if results:
|
| 227 |
+
# Calculate relevance score based on similarity and content relevance
|
| 228 |
+
relevant_results = []
|
| 229 |
+
for result in results:
|
| 230 |
+
content = result.page_content.lower()
|
| 231 |
+
query_terms = query.lower().split()
|
| 232 |
+
|
| 233 |
+
# Check if any query terms appear in the content
|
| 234 |
+
term_matches = sum(1 for term in query_terms if len(term) > 2 and term in content)
|
| 235 |
+
|
| 236 |
+
# Only consider results that have some relevance to the query
|
| 237 |
+
if term_matches > 0 or len(content.strip()) > 50:
|
| 238 |
+
relevant_results.append(result)
|
| 239 |
+
|
| 240 |
+
curriculum_relevance_score = len(relevant_results)
|
| 241 |
+
results = relevant_results # Use only relevant results
|
| 242 |
|
| 243 |
# Debug: Print what we found
|
| 244 |
print(f"Query: {query}")
|
|
|
|
| 341 |
slide_info = f"π **Slide Reference:** {best_result.metadata['filename']} - Page {best_result.metadata['page_number']}"
|
| 342 |
|
| 343 |
if "loops" in query.lower():
|
| 344 |
+
answer = f"{slide_info}\n\n**Great question! Let me explain loops based on your curriculum:**\n\n{best_slide_content}\n\n**What are loops for?**\n\nLoops are programming constructs that solve the problem of repetition. As your curriculum explains, instead of writing hundreds of print statements to count from 1 to 100, loops allow you to accomplish the same task with just a few lines of code.\n\n**Key benefits of loops:**\nβ’ **Efficiency**: Reduce repetitive code\nβ’ **Scalability**: Handle large ranges (1 to 1000+) easily\nβ’ **Maintainability**: Easier to modify and debug\n\n**Types of loops:** Your curriculum covers two main types of loops that you'll learn about. Keep exploring - you're doing great! π"
|
| 345 |
+
elif "boolean" in query.lower():
|
| 346 |
+
answer = f"{slide_info}\n\n**Excellent question! Let me explain booleans based on your curriculum:**\n\n{best_slide_content}\n\n**What are booleans?**\n\nBooleans are a fundamental data type in programming that can only have two values: `True` or `False`. Think of them as simple yes/no answers to questions.\n\n**How do they work?**\n\nLooking at your slide, it's teaching you how to categorize statements as either True or False. For example:\nβ’ \"The sun is shining\" - This could be True or False depending on the weather\nβ’ \"I am using a computer\" - This is True when you're programming\nβ’ \"I like pizza\" - This is a personal preference (True or False)\n\n**Why are booleans important?**\n\nBooleans are the foundation of decision-making in programming. They help programs make choices and control the flow of execution. You'll use them in if statements, loops, and many other programming constructs.\n\n**Real-world example:**\n```python\nis_logged_in = True\nhas_permission = False\n\nif is_logged_in and has_permission:\n print(\"Welcome to the system!\")\nelse:\n print(\"Please log in or get permission.\")\n```\n\nKeep exploring booleans - they're essential for building smart programs! π"
|
| 347 |
else:
|
| 348 |
+
answer = f"{slide_info}\n\n**Excellent question! Let me explain this concept based on your curriculum:**\n\n{best_slide_content}\n\nThis slide is teaching you important programming concepts. The curriculum content you're studying is building a strong foundation for your programming journey! πͺ\n\n**What this means:** The slide is showing you how programming concepts work in practice. Each element has a specific purpose and helps you understand the bigger picture of programming.\n\n**Why this matters:** Understanding these fundamentals will make you a better programmer. You're learning the building blocks that will help you create amazing programs! π"
|
| 349 |
|
| 350 |
except Exception as e:
|
| 351 |
print(f"Error generating focused answer: {e}")
|
|
|
|
| 353 |
slide_info = f"π **Slide Reference:** {best_result.metadata['filename']} - Page {best_result.metadata['page_number']}"
|
| 354 |
|
| 355 |
if "loops" in query.lower():
|
| 356 |
+
answer = f"{slide_info}\n\n**Great question! Let me explain loops based on your curriculum:**\n\n{best_slide_content}\n\n**What are loops for?**\n\nLoops are programming constructs that solve the problem of repetition. As your curriculum explains, instead of writing hundreds of print statements to count from 1 to 100, loops allow you to accomplish the same task with just a few lines of code.\n\n**Key benefits of loops:**\nβ’ **Efficiency**: Reduce repetitive code\nβ’ **Scalability**: Handle large ranges (1 to 1000+) easily\nβ’ **Maintainability**: Easier to modify and debug\n\n**Types of loops:** Your curriculum covers two main types of loops that you'll learn about. Keep exploring - you're doing great! π"
|
| 357 |
+
elif "boolean" in query.lower():
|
| 358 |
+
answer = f"{slide_info}\n\n**Excellent question! Let me explain booleans based on your curriculum:**\n\n{best_slide_content}\n\n**What are booleans?**\n\nBooleans are a fundamental data type in programming that can only have two values: `True` or `False`. Think of them as simple yes/no answers to questions.\n\n**How do they work?**\n\nLooking at your slide, it's teaching you how to categorize statements as either True or False. For example:\nβ’ \"The sun is shining\" - This could be True or False depending on the weather\nβ’ \"I am using a computer\" - This is True when you're programming\nβ’ \"I like pizza\" - This is a personal preference (True or False)\n\n**Why are booleans important?**\n\nBooleans are the foundation of decision-making in programming. They help programs make choices and control the flow of execution. You'll use them in if statements, loops, and many other programming constructs.\n\n**Real-world example:**\n```python\nis_logged_in = True\nhas_permission = False\n\nif is_logged_in and has_permission:\n print(\"Welcome to the system!\")\nelse:\n print(\"Please log in or get permission.\")\n```\n\nKeep exploring booleans - they're essential for building smart programs! π"
|
| 359 |
else:
|
| 360 |
+
answer = f"{slide_info}\n\n**Excellent question! Let me explain this concept based on your curriculum:**\n\n{best_slide_content}\n\nThis slide is teaching you important programming concepts. The curriculum content you're studying is building a strong foundation for your programming journey! πͺ\n\n**What this means:** The slide is showing you how programming concepts work in practice. Each element has a specific purpose and helps you understand the bigger picture of programming.\n\n**Why this matters:** Understanding these fundamentals will make you a better programmer. You're learning the building blocks that will help you create amazing programs! π"
|
| 361 |
|
| 362 |
elif self.qa_chain and not self.fast_mode:
|
| 363 |
# Fallback to general LLM if focused chain fails
|
| 364 |
try:
|
| 365 |
if curriculum_relevance_score > 0:
|
| 366 |
context = "\n\n".join([result.page_content for result in results])
|
| 367 |
+
filled_context = f"Here's the relevant curriculum content from the student's course materials:\n{context}\n\nPlease provide a warm, encouraging answer that directly uses this curriculum content to help the student understand the concept."
|
| 368 |
else:
|
| 369 |
+
filled_context = "Note: This question is not covered in the current curriculum. Please provide a friendly, general programming answer that encourages the student's curiosity."
|
| 370 |
|
| 371 |
answer = self.qa_chain.invoke({
|
| 372 |
"question": query,
|
|
|
|
| 386 |
if len(answer.strip()) < 50:
|
| 387 |
if curriculum_relevance_score > 0:
|
| 388 |
slide_info = f"π **Slide Reference:** {best_result.metadata['filename']} - Page {best_result.metadata['page_number']}"
|
| 389 |
+
answer = f"{slide_info}\n\n**Great question! Here's what your curriculum teaches:**\n\n{best_slide_content}\n\nThis slide explains the concept clearly and will help you understand the topic better. Keep asking questions - that's how we learn! π"
|
| 390 |
else:
|
| 391 |
+
answer = "I'm sorry, I couldn't generate a proper answer right now. Please try rephrasing your question - sometimes a different way of asking helps! π"
|
| 392 |
|
| 393 |
# Add warning if not in curriculum
|
| 394 |
if curriculum_relevance_score == 0:
|
| 395 |
+
answer = "π‘ **Note: This topic isn't covered in your current curriculum, but here's a helpful answer:**\n\n" + answer
|
| 396 |
|
| 397 |
except Exception as e:
|
| 398 |
print(f"Error generating answer: {e}")
|
| 399 |
if curriculum_relevance_score > 0:
|
| 400 |
slide_info = f"π **Slide Reference:** {best_result.metadata['filename']} - Page {best_result.metadata['page_number']}"
|
| 401 |
+
answer = f"{slide_info}\n\n**Great question! Here's what your curriculum teaches:**\n\n{best_slide_content}\n\nThis slide contains the relevant information about your question. The curriculum content you're studying is building a strong foundation for your programming journey! πͺ"
|
| 402 |
else:
|
| 403 |
+
answer = "I'm sorry, I couldn't generate an answer at the moment. Please try rephrasing your question - sometimes a different approach helps! π"
|
| 404 |
else:
|
| 405 |
# If no LLM available
|
| 406 |
if curriculum_relevance_score > 0:
|
| 407 |
slide_info = f"π **Slide Reference:** {best_result.metadata['filename']} - Page {best_result.metadata['page_number']}"
|
| 408 |
+
answer = f"{slide_info}\n\n**Great question! Here's what your curriculum teaches:**\n\n{best_slide_content}\n\n*Note: AI generation is not available, but here's the relevant curriculum content to help you learn!* π"
|
| 409 |
else:
|
| 410 |
+
answer = "I couldn't find any programming-related content in the curriculum for this question. This appears to be about something outside the scope of your programming course. Try asking about programming concepts like variables, loops, functions, or other topics covered in your curriculum! π"
|
| 411 |
|
| 412 |
# Get the most relevant slide and its neighboring pages
|
| 413 |
relevant_slides = []
|
|
|
|
| 470 |
start_page = max(1, target_page - 2)
|
| 471 |
end_page = min(total_pages, target_page + 2)
|
| 472 |
|
| 473 |
+
# Use a set to track unique slides and avoid duplicates
|
| 474 |
+
seen_slides = set()
|
| 475 |
+
|
| 476 |
for page_num in range(start_page, end_page + 1):
|
| 477 |
img = self.get_pdf_page_image(pdf_path, page_num)
|
| 478 |
if img:
|
| 479 |
+
slide_key = f"{filename}-{page_num}"
|
| 480 |
+
if slide_key not in seen_slides:
|
| 481 |
+
seen_slides.add(slide_key)
|
| 482 |
+
if page_num == target_page:
|
| 483 |
+
# Highlight the most relevant page
|
| 484 |
+
label = f"π {filename} - Page {page_num} (Most Relevant)"
|
| 485 |
+
else:
|
| 486 |
+
label = f"{filename} - Page {page_num}"
|
| 487 |
+
relevant_slides.append((img, label))
|
| 488 |
|
| 489 |
recommended_slide = relevant_slides[0][0] if relevant_slides else None
|
| 490 |
recommended_label = relevant_slides[0][1] if relevant_slides else None
|
|
|
|
| 493 |
recommended_slide = None
|
| 494 |
recommended_label = None
|
| 495 |
else:
|
| 496 |
+
# If no curriculum content, provide a helpful response
|
| 497 |
relevant_slides = []
|
| 498 |
+
recommended_slide = None
|
| 499 |
+
recommended_label = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 500 |
|
| 501 |
# Cache the response for future use
|
| 502 |
self.response_cache[query] = (answer, recommended_slide, recommended_label, relevant_slides)
|