Spaces:

IW2025
/

InclusiveWorldChatbot

Sleeping

App Files Files Community

IW2025 commited on Jul 27, 2025

Commit

dbb7b33

verified ·

1 Parent(s): 74ee704

Update app.py

Browse files

Files changed (1) hide show

app.py +86 -25

app.py CHANGED Viewed

@@ -94,18 +94,26 @@ Provide a clear, educational answer explaining the concept:"""
             ))
             # Create slide selection prompt template for DialoGPT
-            slide_template = """Given this question: {question}
-Available slides:
-{available_slides}
-Which slide is most relevant? Return only the filename and page number like this: "filename.pdf - Page X"
 Answer:"""
             self.slide_selection_chain = LLMChain(llm=self.llm, prompt=PromptTemplate(
-                input_variables=["question", "available_slides"],
-                template=slide_template
             ))
             # Create focused answer prompt template
@@ -186,19 +194,65 @@ Provide a clear, educational answer based on this slide:"""
                 print(f"  {i+1}. {result.metadata['filename']} - Page {result.metadata['page_number']}")
                 print(f"     Content: {result.page_content[:100]}...")
-        # Find the most relevant slide content first
         best_slide_content = ""
-        if curriculum_relevance_score > 0:
-            # Get the most relevant result
-            best_result = results[0]
-            best_slide_content = best_result.page_content
-            # If the best slide has little content, try to find a better one
-            if len(best_slide_content.strip()) < 100:
-                for result in results[1:]:
-                    if len(result.page_content.strip()) > len(best_slide_content.strip()):
-                        best_slide_content = result.page_content
-                        best_result = result
         # Generate focused LLM answer using the most relevant slide
         if self.focused_qa_chain and curriculum_relevance_score > 0:
@@ -228,18 +282,22 @@ Provide a clear, educational answer based on this slide:"""
                     "slide content:" in answer.lower()):
                     # Generate a proper answer using the slide content
                     if "loops" in query.lower():
-                        answer = f"Based on the curriculum slide:\n\n{best_slide_content}\n\n**What are loops for?**\n\nLoops are programming constructs that solve the problem of repetition. As the slide explains, instead of writing hundreds of print statements to count from 1 to 100, loops allow you to accomplish the same task with just a few lines of code.\n\n**Key benefits of loops:**\n• **Efficiency**: Reduce repetitive code\n• **Scalability**: Handle large ranges (1 to 1000+) easily\n• **Maintainability**: Easier to modify and debug\n\n**Types of loops:** The curriculum covers two main types of loops that you'll learn about."
                     else:
-                        answer = f"Based on the curriculum slide:\n\n{best_slide_content}\n\nThis slide explains the concept clearly. The content shows how programming constructs like loops help solve real problems efficiently."
             except Exception as e:
                 print(f"Error generating focused answer: {e}")
                 # Generate a proper answer using the slide content
                 if "loops" in query.lower():
-                    answer = f"Based on the curriculum slide:\n\n{best_slide_content}\n\n**What are loops for?**\n\nLoops are programming constructs that solve the problem of repetition. As the slide explains, instead of writing hundreds of print statements to count from 1 to 100, loops allow you to accomplish the same task with just a few lines of code.\n\n**Key benefits of loops:**\n• **Efficiency**: Reduce repetitive code\n• **Scalability**: Handle large ranges (1 to 1000+) easily\n• **Maintainability**: Easier to modify and debug\n\n**Types of loops:** The curriculum covers two main types of loops that you'll learn about."
                 else:
-                    answer = f"Based on the curriculum slide:\n\n{best_slide_content}\n\nThis slide contains the relevant information about your question."
         elif self.qa_chain:
             # Fallback to general LLM if focused chain fails
@@ -264,7 +322,8 @@ Provide a clear, educational answer based on this slide:"""
                 # Check if the answer is too short
                 if len(answer.strip()) < 50:
                     if curriculum_relevance_score > 0:
-                        answer = f"Based on the curriculum content:\n\n{best_slide_content}\n\nThis slide explains the concept clearly."
                     else:
                         answer = "I'm sorry, I couldn't generate a proper answer. Please try rephrasing your question."
@@ -275,13 +334,15 @@ Provide a clear, educational answer based on this slide:"""
             except Exception as e:
                 print(f"Error generating answer: {e}")
                 if curriculum_relevance_score > 0:
-                    answer = f"Based on the curriculum slide:\n\n{best_slide_content}\n\nThis slide contains the relevant information about your question."
                 else:
                     answer = "I'm sorry, I couldn't generate an answer at the moment. Please try rephrasing your question."
         else:
             # If no LLM available
             if curriculum_relevance_score > 0:
-                answer = f"Based on the curriculum slide:\n\n{best_slide_content}\n\n*Note: AI generation is not available, but here's the relevant curriculum content.*"
             else:
                 answer = "I couldn't find relevant content in the curriculum for this question. Please try rephrasing or ask about a different programming topic."

             ))
             # Create slide selection prompt template for DialoGPT
+            slide_selection_template = """You are an AI that analyzes curriculum slides to find the best one for teaching a concept.
+Question: {question}
+Here are the top 5 most relevant slides from the curriculum:
+{slide_contents}
+Which slide is the BEST for teaching this concept to a student? Consider:
+- Which slide has the most educational content?
+- Which slide explains the concept most clearly?
+- Which slide would be most helpful for learning?
+Return ONLY the filename and page number like this: "filename.pdf - Page X"
 Answer:"""
             self.slide_selection_chain = LLMChain(llm=self.llm, prompt=PromptTemplate(
+                input_variables=["question", "slide_contents"],
+                template=slide_selection_template
             ))
             # Create focused answer prompt template
                 print(f"  {i+1}. {result.metadata['filename']} - Page {result.metadata['page_number']}")
                 print(f"     Content: {result.page_content[:100]}...")
+        # Use LLM to analyze top 5 slides and select the best one for teaching
         best_slide_content = ""
+        best_result = None
+        if curriculum_relevance_score > 0 and self.slide_selection_chain:
+            try:
+                # Prepare slide contents for LLM analysis
+                slide_contents = []
+                for i, result in enumerate(results[:5]):  # Top 5 results
+                    filename = result.metadata["filename"]
+                    page_num = result.metadata["page_number"]
+                    content = result.page_content
+                    slide_contents.append(f"Slide {i+1}: {filename} - Page {page_num}\nContent: {content}\n")
+                slide_contents_text = "\n".join(slide_contents)
+                # Use LLM to select the best slide
+                slide_response = self.slide_selection_chain.run(
+                    question=query,
+                    slide_contents=slide_contents_text
+                )
+                # Extract filename and page from response
+                slide_response = slide_response.strip()
+                if "<|eot_id|>" in slide_response:
+                    slide_response = slide_response.split("<|eot_id|>")[-1].strip()
+                # Parse the response to get filename and page
+                match = re.search(r'(.+\.pdf)\s*-\s*Page\s*(\d+)', slide_response)
+                if match:
+                    filename = match.group(1)
+                    page_num = int(match.group(2))
+                    # Find the corresponding result
+                    for result in results:
+                        if (result.metadata["filename"] == filename and
+                            result.metadata["page_number"] == page_num):
+                            best_result = result
+                            best_slide_content = result.page_content
+                            break
+                    # If LLM selection failed, fall back to first result
+                    if not best_result:
+                        best_result = results[0]
+                        best_slide_content = results[0].page_content
+                else:
+                    # Fallback to first result if parsing failed
+                    best_result = results[0]
+                    best_slide_content = results[0].page_content
+            except Exception as e:
+                print(f"Error in LLM slide selection: {e}")
+                # Fallback to first result
+                best_result = results[0]
+                best_slide_content = results[0].page_content
+        else:
+            # Fallback without LLM
+            if curriculum_relevance_score > 0:
+                best_result = results[0]
+                best_slide_content = results[0].page_content
         # Generate focused LLM answer using the most relevant slide
         if self.focused_qa_chain and curriculum_relevance_score > 0:
                     "slide content:" in answer.lower()):
                     # Generate a proper answer using the slide content
+                    slide_info = f"📄 **Slide Reference:** {best_result.metadata['filename']} - Page {best_result.metadata['page_number']}"
                     if "loops" in query.lower():
+                        answer = f"{slide_info}\n\n**Slide Content:**\n{best_slide_content}\n\n**What are loops for?**\n\nLoops are programming constructs that solve the problem of repetition. As the slide explains, instead of writing hundreds of print statements to count from 1 to 100, loops allow you to accomplish the same task with just a few lines of code.\n\n**Key benefits of loops:**\n• **Efficiency**: Reduce repetitive code\n• **Scalability**: Handle large ranges (1 to 1000+) easily\n• **Maintainability**: Easier to modify and debug\n\n**Types of loops:** The curriculum covers two main types of loops that you'll learn about."
                     else:
+                        answer = f"{slide_info}\n\n**Slide Content:**\n{best_slide_content}\n\nThis slide explains the concept clearly. The content shows how programming constructs help solve real problems efficiently."
             except Exception as e:
                 print(f"Error generating focused answer: {e}")
                 # Generate a proper answer using the slide content
+                slide_info = f"📄 **Slide Reference:** {best_result.metadata['filename']} - Page {best_result.metadata['page_number']}"
                 if "loops" in query.lower():
+                    answer = f"{slide_info}\n\n**Slide Content:**\n{best_slide_content}\n\n**What are loops for?**\n\nLoops are programming constructs that solve the problem of repetition. As the slide explains, instead of writing hundreds of print statements to count from 1 to 100, loops allow you to accomplish the same task with just a few lines of code.\n\n**Key benefits of loops:**\n• **Efficiency**: Reduce repetitive code\n• **Scalability**: Handle large ranges (1 to 1000+) easily\n• **Maintainability**: Easier to modify and debug\n\n**Types of loops:** The curriculum covers two main types of loops that you'll learn about."
                 else:
+                    answer = f"{slide_info}\n\n**Slide Content:**\n{best_slide_content}\n\nThis slide contains the relevant information about your question."
         elif self.qa_chain:
             # Fallback to general LLM if focused chain fails
                 # Check if the answer is too short
                 if len(answer.strip()) < 50:
                     if curriculum_relevance_score > 0:
+                        slide_info = f"📄 **Slide Reference:** {best_result.metadata['filename']} - Page {best_result.metadata['page_number']}"
+                        answer = f"{slide_info}\n\n**Slide Content:**\n{best_slide_content}\n\nThis slide explains the concept clearly."
                     else:
                         answer = "I'm sorry, I couldn't generate a proper answer. Please try rephrasing your question."
             except Exception as e:
                 print(f"Error generating answer: {e}")
                 if curriculum_relevance_score > 0:
+                    slide_info = f"📄 **Slide Reference:** {best_result.metadata['filename']} - Page {best_result.metadata['page_number']}"
+                    answer = f"{slide_info}\n\n**Slide Content:**\n{best_slide_content}\n\nThis slide contains the relevant information about your question."
                 else:
                     answer = "I'm sorry, I couldn't generate an answer at the moment. Please try rephrasing your question."
         else:
             # If no LLM available
             if curriculum_relevance_score > 0:
+                slide_info = f"📄 **Slide Reference:** {best_result.metadata['filename']} - Page {best_result.metadata['page_number']}"
+                answer = f"{slide_info}\n\n**Slide Content:**\n{best_slide_content}\n\n*Note: AI generation is not available, but here's the relevant curriculum content.*"
             else:
                 answer = "I couldn't find relevant content in the curriculum for this question. Please try rephrasing or ask about a different programming topic."