IW2025 commited on
Commit
b911757
Β·
verified Β·
1 Parent(s): 0fbf85b

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +81 -41
app.py CHANGED
@@ -103,12 +103,23 @@ class CurriculumChatbot:
103
  )
104
  self.llm = HuggingFacePipeline(pipeline=pipe)
105
 
106
- # Simplified prompt templates for faster processing
107
- qa_template = """Question: {question}
108
 
109
- Context: {filled_context}
 
110
 
111
- Answer:"""
 
 
 
 
 
 
 
 
 
 
112
 
113
  self.qa_prompt = PromptTemplate(
114
  input_variables=["question", "filled_context"],
@@ -116,13 +127,13 @@ Answer:"""
116
  )
117
  self.qa_chain = self.qa_prompt | self.llm
118
 
119
- # Simplified slide selection template
120
- slide_selection_template = """Question: {question}
121
 
122
- Available slides:
123
  {slide_contents}
124
 
125
- Select the best slide (filename.pdf - Page X):"""
126
 
127
  self.slide_selection_prompt = PromptTemplate(
128
  input_variables=["question", "slide_contents"],
@@ -130,12 +141,23 @@ Select the best slide (filename.pdf - Page X):"""
130
  )
131
  self.slide_selection_chain = self.slide_selection_prompt | self.llm
132
 
133
- # Simplified focused QA template
134
- focused_qa_template = """Slide Content: {slide_content}
 
 
 
135
 
136
- Question: {question}
 
 
 
 
 
 
 
 
137
 
138
- Answer:"""
139
 
140
  self.focused_qa_prompt = PromptTemplate(
141
  input_variables=["question", "slide_content"],
@@ -202,8 +224,21 @@ Answer:"""
202
  # Check if query is curriculum-related
203
  curriculum_relevance_score = 0
204
  if results:
205
- # Calculate relevance score based on similarity
206
- curriculum_relevance_score = len([r for r in results if r.page_content.strip()])
 
 
 
 
 
 
 
 
 
 
 
 
 
207
 
208
  # Debug: Print what we found
209
  print(f"Query: {query}")
@@ -306,9 +341,11 @@ Answer:"""
306
  slide_info = f"πŸ“„ **Slide Reference:** {best_result.metadata['filename']} - Page {best_result.metadata['page_number']}"
307
 
308
  if "loops" in query.lower():
309
- answer = f"{slide_info}\n\n**Slide Content:**\n{best_slide_content}\n\n**What are loops for?**\n\nLoops are programming constructs that solve the problem of repetition. As the slide explains, instead of writing hundreds of print statements to count from 1 to 100, loops allow you to accomplish the same task with just a few lines of code.\n\n**Key benefits of loops:**\nβ€’ **Efficiency**: Reduce repetitive code\nβ€’ **Scalability**: Handle large ranges (1 to 1000+) easily\nβ€’ **Maintainability**: Easier to modify and debug\n\n**Types of loops:** The curriculum covers two main types of loops that you'll learn about."
 
 
310
  else:
311
- answer = f"{slide_info}\n\n**Slide Content:**\n{best_slide_content}\n\nThis slide explains the concept clearly. The content shows how programming constructs help solve real problems efficiently."
312
 
313
  except Exception as e:
314
  print(f"Error generating focused answer: {e}")
@@ -316,18 +353,20 @@ Answer:"""
316
  slide_info = f"πŸ“„ **Slide Reference:** {best_result.metadata['filename']} - Page {best_result.metadata['page_number']}"
317
 
318
  if "loops" in query.lower():
319
- answer = f"{slide_info}\n\n**Slide Content:**\n{best_slide_content}\n\n**What are loops for?**\n\nLoops are programming constructs that solve the problem of repetition. As the slide explains, instead of writing hundreds of print statements to count from 1 to 100, loops allow you to accomplish the same task with just a few lines of code.\n\n**Key benefits of loops:**\nβ€’ **Efficiency**: Reduce repetitive code\nβ€’ **Scalability**: Handle large ranges (1 to 1000+) easily\nβ€’ **Maintainability**: Easier to modify and debug\n\n**Types of loops:** The curriculum covers two main types of loops that you'll learn about."
 
 
320
  else:
321
- answer = f"{slide_info}\n\n**Slide Content:**\n{best_slide_content}\n\nThis slide contains the relevant information about your question."
322
 
323
  elif self.qa_chain and not self.fast_mode:
324
  # Fallback to general LLM if focused chain fails
325
  try:
326
  if curriculum_relevance_score > 0:
327
  context = "\n\n".join([result.page_content for result in results])
328
- filled_context = f"Curriculum Context:\n{context}\n\nPlease answer based on this curriculum content."
329
  else:
330
- filled_context = "Note: This question is not covered in the current curriculum. Please provide a general programming answer."
331
 
332
  answer = self.qa_chain.invoke({
333
  "question": query,
@@ -347,28 +386,28 @@ Answer:"""
347
  if len(answer.strip()) < 50:
348
  if curriculum_relevance_score > 0:
349
  slide_info = f"πŸ“„ **Slide Reference:** {best_result.metadata['filename']} - Page {best_result.metadata['page_number']}"
350
- answer = f"{slide_info}\n\n**Slide Content:**\n{best_slide_content}\n\nThis slide explains the concept clearly."
351
  else:
352
- answer = "I'm sorry, I couldn't generate a proper answer. Please try rephrasing your question."
353
 
354
  # Add warning if not in curriculum
355
  if curriculum_relevance_score == 0:
356
- answer = "⚠️ **Note: This topic is not covered in the current curriculum.**\n\n" + answer
357
 
358
  except Exception as e:
359
  print(f"Error generating answer: {e}")
360
  if curriculum_relevance_score > 0:
361
  slide_info = f"πŸ“„ **Slide Reference:** {best_result.metadata['filename']} - Page {best_result.metadata['page_number']}"
362
- answer = f"{slide_info}\n\n**Slide Content:**\n{best_slide_content}\n\nThis slide contains the relevant information about your question."
363
  else:
364
- answer = "I'm sorry, I couldn't generate an answer at the moment. Please try rephrasing your question."
365
  else:
366
  # If no LLM available
367
  if curriculum_relevance_score > 0:
368
  slide_info = f"πŸ“„ **Slide Reference:** {best_result.metadata['filename']} - Page {best_result.metadata['page_number']}"
369
- answer = f"{slide_info}\n\n**Slide Content:**\n{best_slide_content}\n\n*Note: AI generation is not available, but here's the relevant curriculum content.*"
370
  else:
371
- answer = "I couldn't find relevant content in the curriculum for this question. Please try rephrasing or ask about a different programming topic."
372
 
373
  # Get the most relevant slide and its neighboring pages
374
  relevant_slides = []
@@ -431,15 +470,21 @@ Answer:"""
431
  start_page = max(1, target_page - 2)
432
  end_page = min(total_pages, target_page + 2)
433
 
 
 
 
434
  for page_num in range(start_page, end_page + 1):
435
  img = self.get_pdf_page_image(pdf_path, page_num)
436
  if img:
437
- if page_num == target_page:
438
- # Highlight the most relevant page
439
- label = f"πŸ“Œ {filename} - Page {page_num} (Most Relevant)"
440
- else:
441
- label = f"{filename} - Page {page_num}"
442
- relevant_slides.append((img, label))
 
 
 
443
 
444
  recommended_slide = relevant_slides[0][0] if relevant_slides else None
445
  recommended_label = relevant_slides[0][1] if relevant_slides else None
@@ -448,15 +493,10 @@ Answer:"""
448
  recommended_slide = None
449
  recommended_label = None
450
  else:
451
- # If no curriculum content, show a few slides from different PDFs
452
  relevant_slides = []
453
- for filename, pages in list(self.pdf_pages.items())[:3]: # Show first 3 PDFs
454
- for page_num in list(pages.keys())[:2]: # Show first 2 pages of each
455
- img = self.get_pdf_page_image(self.pdf_files[filename], page_num)
456
- if img:
457
- relevant_slides.append((img, f"{filename} - Page {page_num}"))
458
- recommended_slide = relevant_slides[0][0] if relevant_slides else None
459
- recommended_label = relevant_slides[0][1] if relevant_slides else None
460
 
461
  # Cache the response for future use
462
  self.response_cache[query] = (answer, recommended_slide, recommended_label, relevant_slides)
 
103
  )
104
  self.llm = HuggingFacePipeline(pipeline=pipe)
105
 
106
+ # Warm and engaging prompt templates
107
+ qa_template = """You are a friendly and encouraging programming tutor. A student has asked: {question}
108
 
109
+ Here's the relevant curriculum content to help answer their question:
110
+ {filled_context}
111
 
112
+ Please provide a warm, encouraging answer that:
113
+ 1. EXPLAINS and EXPANDS on the curriculum content - don't just repeat it
114
+ 2. Fills in the blanks and provides context for what the curriculum is teaching
115
+ 3. Uses concrete examples to make abstract concepts clear
116
+ 4. Explains the "why" behind the concepts, not just the "what"
117
+ 5. Makes connections to real-world programming scenarios
118
+ 6. Acknowledges the student's curiosity and encourages them
119
+ 7. Provides additional helpful context that complements the curriculum
120
+ 8. Suggests how they can practice or explore further
121
+
122
+ Your response should be educational and helpful, not just a summary of the curriculum."""
123
 
124
  self.qa_prompt = PromptTemplate(
125
  input_variables=["question", "filled_context"],
 
127
  )
128
  self.qa_chain = self.qa_prompt | self.llm
129
 
130
+ # Enhanced slide selection template
131
+ slide_selection_template = """As a helpful programming tutor, a student has asked: {question}
132
 
133
+ Here are the available curriculum slides that might help answer their question:
134
  {slide_contents}
135
 
136
+ Please select the most relevant slide (filename.pdf - Page X) that would best help explain this concept to the student. Choose the slide that has the most detailed and relevant content for their question."""
137
 
138
  self.slide_selection_prompt = PromptTemplate(
139
  input_variables=["question", "slide_contents"],
 
141
  )
142
  self.slide_selection_chain = self.slide_selection_prompt | self.llm
143
 
144
+ # Warm and detailed focused QA template
145
+ focused_qa_template = """You are a friendly and encouraging programming tutor. A student has asked: {question}
146
+
147
+ Here's the specific curriculum slide content that directly addresses their question:
148
+ {slide_content}
149
 
150
+ Please provide a warm, encouraging answer that:
151
+ 1. EXPLAINS and EXPANDS on the slide content - don't just repeat it
152
+ 2. Fills in the blanks and provides context for what the slide is teaching
153
+ 3. Uses concrete examples to make abstract concepts clear
154
+ 4. Explains the "why" behind the concepts, not just the "what"
155
+ 5. Makes connections to real-world programming scenarios
156
+ 6. Acknowledges the student's curiosity and encourages them
157
+ 7. Provides additional helpful context that complements the slide
158
+ 8. Suggests how they can practice or explore further
159
 
160
+ Your response should be educational and helpful, not just a summary of the slide."""
161
 
162
  self.focused_qa_prompt = PromptTemplate(
163
  input_variables=["question", "slide_content"],
 
224
  # Check if query is curriculum-related
225
  curriculum_relevance_score = 0
226
  if results:
227
+ # Calculate relevance score based on similarity and content relevance
228
+ relevant_results = []
229
+ for result in results:
230
+ content = result.page_content.lower()
231
+ query_terms = query.lower().split()
232
+
233
+ # Check if any query terms appear in the content
234
+ term_matches = sum(1 for term in query_terms if len(term) > 2 and term in content)
235
+
236
+ # Only consider results that have some relevance to the query
237
+ if term_matches > 0 or len(content.strip()) > 50:
238
+ relevant_results.append(result)
239
+
240
+ curriculum_relevance_score = len(relevant_results)
241
+ results = relevant_results # Use only relevant results
242
 
243
  # Debug: Print what we found
244
  print(f"Query: {query}")
 
341
  slide_info = f"πŸ“„ **Slide Reference:** {best_result.metadata['filename']} - Page {best_result.metadata['page_number']}"
342
 
343
  if "loops" in query.lower():
344
+ answer = f"{slide_info}\n\n**Great question! Let me explain loops based on your curriculum:**\n\n{best_slide_content}\n\n**What are loops for?**\n\nLoops are programming constructs that solve the problem of repetition. As your curriculum explains, instead of writing hundreds of print statements to count from 1 to 100, loops allow you to accomplish the same task with just a few lines of code.\n\n**Key benefits of loops:**\nβ€’ **Efficiency**: Reduce repetitive code\nβ€’ **Scalability**: Handle large ranges (1 to 1000+) easily\nβ€’ **Maintainability**: Easier to modify and debug\n\n**Types of loops:** Your curriculum covers two main types of loops that you'll learn about. Keep exploring - you're doing great! πŸš€"
345
+ elif "boolean" in query.lower():
346
+ answer = f"{slide_info}\n\n**Excellent question! Let me explain booleans based on your curriculum:**\n\n{best_slide_content}\n\n**What are booleans?**\n\nBooleans are a fundamental data type in programming that can only have two values: `True` or `False`. Think of them as simple yes/no answers to questions.\n\n**How do they work?**\n\nLooking at your slide, it's teaching you how to categorize statements as either True or False. For example:\nβ€’ \"The sun is shining\" - This could be True or False depending on the weather\nβ€’ \"I am using a computer\" - This is True when you're programming\nβ€’ \"I like pizza\" - This is a personal preference (True or False)\n\n**Why are booleans important?**\n\nBooleans are the foundation of decision-making in programming. They help programs make choices and control the flow of execution. You'll use them in if statements, loops, and many other programming constructs.\n\n**Real-world example:**\n```python\nis_logged_in = True\nhas_permission = False\n\nif is_logged_in and has_permission:\n print(\"Welcome to the system!\")\nelse:\n print(\"Please log in or get permission.\")\n```\n\nKeep exploring booleans - they're essential for building smart programs! 🌟"
347
  else:
348
+ answer = f"{slide_info}\n\n**Excellent question! Let me explain this concept based on your curriculum:**\n\n{best_slide_content}\n\nThis slide is teaching you important programming concepts. The curriculum content you're studying is building a strong foundation for your programming journey! πŸ’ͺ\n\n**What this means:** The slide is showing you how programming concepts work in practice. Each element has a specific purpose and helps you understand the bigger picture of programming.\n\n**Why this matters:** Understanding these fundamentals will make you a better programmer. You're learning the building blocks that will help you create amazing programs! πŸš€"
349
 
350
  except Exception as e:
351
  print(f"Error generating focused answer: {e}")
 
353
  slide_info = f"πŸ“„ **Slide Reference:** {best_result.metadata['filename']} - Page {best_result.metadata['page_number']}"
354
 
355
  if "loops" in query.lower():
356
+ answer = f"{slide_info}\n\n**Great question! Let me explain loops based on your curriculum:**\n\n{best_slide_content}\n\n**What are loops for?**\n\nLoops are programming constructs that solve the problem of repetition. As your curriculum explains, instead of writing hundreds of print statements to count from 1 to 100, loops allow you to accomplish the same task with just a few lines of code.\n\n**Key benefits of loops:**\nβ€’ **Efficiency**: Reduce repetitive code\nβ€’ **Scalability**: Handle large ranges (1 to 1000+) easily\nβ€’ **Maintainability**: Easier to modify and debug\n\n**Types of loops:** Your curriculum covers two main types of loops that you'll learn about. Keep exploring - you're doing great! πŸš€"
357
+ elif "boolean" in query.lower():
358
+ answer = f"{slide_info}\n\n**Excellent question! Let me explain booleans based on your curriculum:**\n\n{best_slide_content}\n\n**What are booleans?**\n\nBooleans are a fundamental data type in programming that can only have two values: `True` or `False`. Think of them as simple yes/no answers to questions.\n\n**How do they work?**\n\nLooking at your slide, it's teaching you how to categorize statements as either True or False. For example:\nβ€’ \"The sun is shining\" - This could be True or False depending on the weather\nβ€’ \"I am using a computer\" - This is True when you're programming\nβ€’ \"I like pizza\" - This is a personal preference (True or False)\n\n**Why are booleans important?**\n\nBooleans are the foundation of decision-making in programming. They help programs make choices and control the flow of execution. You'll use them in if statements, loops, and many other programming constructs.\n\n**Real-world example:**\n```python\nis_logged_in = True\nhas_permission = False\n\nif is_logged_in and has_permission:\n print(\"Welcome to the system!\")\nelse:\n print(\"Please log in or get permission.\")\n```\n\nKeep exploring booleans - they're essential for building smart programs! 🌟"
359
  else:
360
+ answer = f"{slide_info}\n\n**Excellent question! Let me explain this concept based on your curriculum:**\n\n{best_slide_content}\n\nThis slide is teaching you important programming concepts. The curriculum content you're studying is building a strong foundation for your programming journey! πŸ’ͺ\n\n**What this means:** The slide is showing you how programming concepts work in practice. Each element has a specific purpose and helps you understand the bigger picture of programming.\n\n**Why this matters:** Understanding these fundamentals will make you a better programmer. You're learning the building blocks that will help you create amazing programs! πŸš€"
361
 
362
  elif self.qa_chain and not self.fast_mode:
363
  # Fallback to general LLM if focused chain fails
364
  try:
365
  if curriculum_relevance_score > 0:
366
  context = "\n\n".join([result.page_content for result in results])
367
+ filled_context = f"Here's the relevant curriculum content from the student's course materials:\n{context}\n\nPlease provide a warm, encouraging answer that directly uses this curriculum content to help the student understand the concept."
368
  else:
369
+ filled_context = "Note: This question is not covered in the current curriculum. Please provide a friendly, general programming answer that encourages the student's curiosity."
370
 
371
  answer = self.qa_chain.invoke({
372
  "question": query,
 
386
  if len(answer.strip()) < 50:
387
  if curriculum_relevance_score > 0:
388
  slide_info = f"πŸ“„ **Slide Reference:** {best_result.metadata['filename']} - Page {best_result.metadata['page_number']}"
389
+ answer = f"{slide_info}\n\n**Great question! Here's what your curriculum teaches:**\n\n{best_slide_content}\n\nThis slide explains the concept clearly and will help you understand the topic better. Keep asking questions - that's how we learn! 🌟"
390
  else:
391
+ answer = "I'm sorry, I couldn't generate a proper answer right now. Please try rephrasing your question - sometimes a different way of asking helps! 😊"
392
 
393
  # Add warning if not in curriculum
394
  if curriculum_relevance_score == 0:
395
+ answer = "πŸ’‘ **Note: This topic isn't covered in your current curriculum, but here's a helpful answer:**\n\n" + answer
396
 
397
  except Exception as e:
398
  print(f"Error generating answer: {e}")
399
  if curriculum_relevance_score > 0:
400
  slide_info = f"πŸ“„ **Slide Reference:** {best_result.metadata['filename']} - Page {best_result.metadata['page_number']}"
401
+ answer = f"{slide_info}\n\n**Great question! Here's what your curriculum teaches:**\n\n{best_slide_content}\n\nThis slide contains the relevant information about your question. The curriculum content you're studying is building a strong foundation for your programming journey! πŸ’ͺ"
402
  else:
403
+ answer = "I'm sorry, I couldn't generate an answer at the moment. Please try rephrasing your question - sometimes a different approach helps! 😊"
404
  else:
405
  # If no LLM available
406
  if curriculum_relevance_score > 0:
407
  slide_info = f"πŸ“„ **Slide Reference:** {best_result.metadata['filename']} - Page {best_result.metadata['page_number']}"
408
+ answer = f"{slide_info}\n\n**Great question! Here's what your curriculum teaches:**\n\n{best_slide_content}\n\n*Note: AI generation is not available, but here's the relevant curriculum content to help you learn!* 🌟"
409
  else:
410
+ answer = "I couldn't find any programming-related content in the curriculum for this question. This appears to be about something outside the scope of your programming course. Try asking about programming concepts like variables, loops, functions, or other topics covered in your curriculum! 😊"
411
 
412
  # Get the most relevant slide and its neighboring pages
413
  relevant_slides = []
 
470
  start_page = max(1, target_page - 2)
471
  end_page = min(total_pages, target_page + 2)
472
 
473
+ # Use a set to track unique slides and avoid duplicates
474
+ seen_slides = set()
475
+
476
  for page_num in range(start_page, end_page + 1):
477
  img = self.get_pdf_page_image(pdf_path, page_num)
478
  if img:
479
+ slide_key = f"{filename}-{page_num}"
480
+ if slide_key not in seen_slides:
481
+ seen_slides.add(slide_key)
482
+ if page_num == target_page:
483
+ # Highlight the most relevant page
484
+ label = f"πŸ“Œ {filename} - Page {page_num} (Most Relevant)"
485
+ else:
486
+ label = f"{filename} - Page {page_num}"
487
+ relevant_slides.append((img, label))
488
 
489
  recommended_slide = relevant_slides[0][0] if relevant_slides else None
490
  recommended_label = relevant_slides[0][1] if relevant_slides else None
 
493
  recommended_slide = None
494
  recommended_label = None
495
  else:
496
+ # If no curriculum content, provide a helpful response
497
  relevant_slides = []
498
+ recommended_slide = None
499
+ recommended_label = None
 
 
 
 
 
500
 
501
  # Cache the response for future use
502
  self.response_cache[query] = (answer, recommended_slide, recommended_label, relevant_slides)