IW2025 commited on
Commit
dbb7b33
·
verified ·
1 Parent(s): 74ee704

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +86 -25
app.py CHANGED
@@ -94,18 +94,26 @@ Provide a clear, educational answer explaining the concept:"""
94
  ))
95
 
96
  # Create slide selection prompt template for DialoGPT
97
- slide_template = """Given this question: {question}
98
 
99
- Available slides:
100
- {available_slides}
 
 
 
101
 
102
- Which slide is most relevant? Return only the filename and page number like this: "filename.pdf - Page X"
 
 
 
 
 
103
 
104
  Answer:"""
105
 
106
  self.slide_selection_chain = LLMChain(llm=self.llm, prompt=PromptTemplate(
107
- input_variables=["question", "available_slides"],
108
- template=slide_template
109
  ))
110
 
111
  # Create focused answer prompt template
@@ -186,19 +194,65 @@ Provide a clear, educational answer based on this slide:"""
186
  print(f" {i+1}. {result.metadata['filename']} - Page {result.metadata['page_number']}")
187
  print(f" Content: {result.page_content[:100]}...")
188
 
189
- # Find the most relevant slide content first
190
  best_slide_content = ""
191
- if curriculum_relevance_score > 0:
192
- # Get the most relevant result
193
- best_result = results[0]
194
- best_slide_content = best_result.page_content
195
-
196
- # If the best slide has little content, try to find a better one
197
- if len(best_slide_content.strip()) < 100:
198
- for result in results[1:]:
199
- if len(result.page_content.strip()) > len(best_slide_content.strip()):
200
- best_slide_content = result.page_content
201
- best_result = result
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
202
 
203
  # Generate focused LLM answer using the most relevant slide
204
  if self.focused_qa_chain and curriculum_relevance_score > 0:
@@ -228,18 +282,22 @@ Provide a clear, educational answer based on this slide:"""
228
  "slide content:" in answer.lower()):
229
 
230
  # Generate a proper answer using the slide content
 
 
231
  if "loops" in query.lower():
232
- answer = f"Based on the curriculum slide:\n\n{best_slide_content}\n\n**What are loops for?**\n\nLoops are programming constructs that solve the problem of repetition. As the slide explains, instead of writing hundreds of print statements to count from 1 to 100, loops allow you to accomplish the same task with just a few lines of code.\n\n**Key benefits of loops:**\n• **Efficiency**: Reduce repetitive code\n• **Scalability**: Handle large ranges (1 to 1000+) easily\n• **Maintainability**: Easier to modify and debug\n\n**Types of loops:** The curriculum covers two main types of loops that you'll learn about."
233
  else:
234
- answer = f"Based on the curriculum slide:\n\n{best_slide_content}\n\nThis slide explains the concept clearly. The content shows how programming constructs like loops help solve real problems efficiently."
235
 
236
  except Exception as e:
237
  print(f"Error generating focused answer: {e}")
238
  # Generate a proper answer using the slide content
 
 
239
  if "loops" in query.lower():
240
- answer = f"Based on the curriculum slide:\n\n{best_slide_content}\n\n**What are loops for?**\n\nLoops are programming constructs that solve the problem of repetition. As the slide explains, instead of writing hundreds of print statements to count from 1 to 100, loops allow you to accomplish the same task with just a few lines of code.\n\n**Key benefits of loops:**\n• **Efficiency**: Reduce repetitive code\n• **Scalability**: Handle large ranges (1 to 1000+) easily\n• **Maintainability**: Easier to modify and debug\n\n**Types of loops:** The curriculum covers two main types of loops that you'll learn about."
241
  else:
242
- answer = f"Based on the curriculum slide:\n\n{best_slide_content}\n\nThis slide contains the relevant information about your question."
243
 
244
  elif self.qa_chain:
245
  # Fallback to general LLM if focused chain fails
@@ -264,7 +322,8 @@ Provide a clear, educational answer based on this slide:"""
264
  # Check if the answer is too short
265
  if len(answer.strip()) < 50:
266
  if curriculum_relevance_score > 0:
267
- answer = f"Based on the curriculum content:\n\n{best_slide_content}\n\nThis slide explains the concept clearly."
 
268
  else:
269
  answer = "I'm sorry, I couldn't generate a proper answer. Please try rephrasing your question."
270
 
@@ -275,13 +334,15 @@ Provide a clear, educational answer based on this slide:"""
275
  except Exception as e:
276
  print(f"Error generating answer: {e}")
277
  if curriculum_relevance_score > 0:
278
- answer = f"Based on the curriculum slide:\n\n{best_slide_content}\n\nThis slide contains the relevant information about your question."
 
279
  else:
280
  answer = "I'm sorry, I couldn't generate an answer at the moment. Please try rephrasing your question."
281
  else:
282
  # If no LLM available
283
  if curriculum_relevance_score > 0:
284
- answer = f"Based on the curriculum slide:\n\n{best_slide_content}\n\n*Note: AI generation is not available, but here's the relevant curriculum content.*"
 
285
  else:
286
  answer = "I couldn't find relevant content in the curriculum for this question. Please try rephrasing or ask about a different programming topic."
287
 
 
94
  ))
95
 
96
  # Create slide selection prompt template for DialoGPT
97
+ slide_selection_template = """You are an AI that analyzes curriculum slides to find the best one for teaching a concept.
98
 
99
+ Question: {question}
100
+
101
+ Here are the top 5 most relevant slides from the curriculum:
102
+
103
+ {slide_contents}
104
 
105
+ Which slide is the BEST for teaching this concept to a student? Consider:
106
+ - Which slide has the most educational content?
107
+ - Which slide explains the concept most clearly?
108
+ - Which slide would be most helpful for learning?
109
+
110
+ Return ONLY the filename and page number like this: "filename.pdf - Page X"
111
 
112
  Answer:"""
113
 
114
  self.slide_selection_chain = LLMChain(llm=self.llm, prompt=PromptTemplate(
115
+ input_variables=["question", "slide_contents"],
116
+ template=slide_selection_template
117
  ))
118
 
119
  # Create focused answer prompt template
 
194
  print(f" {i+1}. {result.metadata['filename']} - Page {result.metadata['page_number']}")
195
  print(f" Content: {result.page_content[:100]}...")
196
 
197
+ # Use LLM to analyze top 5 slides and select the best one for teaching
198
  best_slide_content = ""
199
+ best_result = None
200
+ if curriculum_relevance_score > 0 and self.slide_selection_chain:
201
+ try:
202
+ # Prepare slide contents for LLM analysis
203
+ slide_contents = []
204
+ for i, result in enumerate(results[:5]): # Top 5 results
205
+ filename = result.metadata["filename"]
206
+ page_num = result.metadata["page_number"]
207
+ content = result.page_content
208
+ slide_contents.append(f"Slide {i+1}: {filename} - Page {page_num}\nContent: {content}\n")
209
+
210
+ slide_contents_text = "\n".join(slide_contents)
211
+
212
+ # Use LLM to select the best slide
213
+ slide_response = self.slide_selection_chain.run(
214
+ question=query,
215
+ slide_contents=slide_contents_text
216
+ )
217
+
218
+ # Extract filename and page from response
219
+ slide_response = slide_response.strip()
220
+ if "<|eot_id|>" in slide_response:
221
+ slide_response = slide_response.split("<|eot_id|>")[-1].strip()
222
+
223
+ # Parse the response to get filename and page
224
+ match = re.search(r'(.+\.pdf)\s*-\s*Page\s*(\d+)', slide_response)
225
+ if match:
226
+ filename = match.group(1)
227
+ page_num = int(match.group(2))
228
+
229
+ # Find the corresponding result
230
+ for result in results:
231
+ if (result.metadata["filename"] == filename and
232
+ result.metadata["page_number"] == page_num):
233
+ best_result = result
234
+ best_slide_content = result.page_content
235
+ break
236
+
237
+ # If LLM selection failed, fall back to first result
238
+ if not best_result:
239
+ best_result = results[0]
240
+ best_slide_content = results[0].page_content
241
+ else:
242
+ # Fallback to first result if parsing failed
243
+ best_result = results[0]
244
+ best_slide_content = results[0].page_content
245
+
246
+ except Exception as e:
247
+ print(f"Error in LLM slide selection: {e}")
248
+ # Fallback to first result
249
+ best_result = results[0]
250
+ best_slide_content = results[0].page_content
251
+ else:
252
+ # Fallback without LLM
253
+ if curriculum_relevance_score > 0:
254
+ best_result = results[0]
255
+ best_slide_content = results[0].page_content
256
 
257
  # Generate focused LLM answer using the most relevant slide
258
  if self.focused_qa_chain and curriculum_relevance_score > 0:
 
282
  "slide content:" in answer.lower()):
283
 
284
  # Generate a proper answer using the slide content
285
+ slide_info = f"📄 **Slide Reference:** {best_result.metadata['filename']} - Page {best_result.metadata['page_number']}"
286
+
287
  if "loops" in query.lower():
288
+ answer = f"{slide_info}\n\n**Slide Content:**\n{best_slide_content}\n\n**What are loops for?**\n\nLoops are programming constructs that solve the problem of repetition. As the slide explains, instead of writing hundreds of print statements to count from 1 to 100, loops allow you to accomplish the same task with just a few lines of code.\n\n**Key benefits of loops:**\n• **Efficiency**: Reduce repetitive code\n• **Scalability**: Handle large ranges (1 to 1000+) easily\n• **Maintainability**: Easier to modify and debug\n\n**Types of loops:** The curriculum covers two main types of loops that you'll learn about."
289
  else:
290
+ answer = f"{slide_info}\n\n**Slide Content:**\n{best_slide_content}\n\nThis slide explains the concept clearly. The content shows how programming constructs help solve real problems efficiently."
291
 
292
  except Exception as e:
293
  print(f"Error generating focused answer: {e}")
294
  # Generate a proper answer using the slide content
295
+ slide_info = f"📄 **Slide Reference:** {best_result.metadata['filename']} - Page {best_result.metadata['page_number']}"
296
+
297
  if "loops" in query.lower():
298
+ answer = f"{slide_info}\n\n**Slide Content:**\n{best_slide_content}\n\n**What are loops for?**\n\nLoops are programming constructs that solve the problem of repetition. As the slide explains, instead of writing hundreds of print statements to count from 1 to 100, loops allow you to accomplish the same task with just a few lines of code.\n\n**Key benefits of loops:**\n• **Efficiency**: Reduce repetitive code\n• **Scalability**: Handle large ranges (1 to 1000+) easily\n• **Maintainability**: Easier to modify and debug\n\n**Types of loops:** The curriculum covers two main types of loops that you'll learn about."
299
  else:
300
+ answer = f"{slide_info}\n\n**Slide Content:**\n{best_slide_content}\n\nThis slide contains the relevant information about your question."
301
 
302
  elif self.qa_chain:
303
  # Fallback to general LLM if focused chain fails
 
322
  # Check if the answer is too short
323
  if len(answer.strip()) < 50:
324
  if curriculum_relevance_score > 0:
325
+ slide_info = f"📄 **Slide Reference:** {best_result.metadata['filename']} - Page {best_result.metadata['page_number']}"
326
+ answer = f"{slide_info}\n\n**Slide Content:**\n{best_slide_content}\n\nThis slide explains the concept clearly."
327
  else:
328
  answer = "I'm sorry, I couldn't generate a proper answer. Please try rephrasing your question."
329
 
 
334
  except Exception as e:
335
  print(f"Error generating answer: {e}")
336
  if curriculum_relevance_score > 0:
337
+ slide_info = f"📄 **Slide Reference:** {best_result.metadata['filename']} - Page {best_result.metadata['page_number']}"
338
+ answer = f"{slide_info}\n\n**Slide Content:**\n{best_slide_content}\n\nThis slide contains the relevant information about your question."
339
  else:
340
  answer = "I'm sorry, I couldn't generate an answer at the moment. Please try rephrasing your question."
341
  else:
342
  # If no LLM available
343
  if curriculum_relevance_score > 0:
344
+ slide_info = f"📄 **Slide Reference:** {best_result.metadata['filename']} - Page {best_result.metadata['page_number']}"
345
+ answer = f"{slide_info}\n\n**Slide Content:**\n{best_slide_content}\n\n*Note: AI generation is not available, but here's the relevant curriculum content.*"
346
  else:
347
  answer = "I couldn't find relevant content in the curriculum for this question. Please try rephrasing or ask about a different programming topic."
348