Spaces:
Sleeping
Sleeping
Upload app.py
Browse files
app.py
CHANGED
|
@@ -83,9 +83,13 @@ class CurriculumChatbot:
|
|
| 83 |
# Create QA prompt template
|
| 84 |
qa_template = """<|begin_of_text|><|start_header_id|>system<|end_header_id|>
|
| 85 |
|
| 86 |
-
You are a helpful AI programming tutor.
|
| 87 |
|
| 88 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 89 |
|
| 90 |
<|eot_id|><|start_header_id|>user<|end_header_id|>
|
| 91 |
|
|
@@ -177,7 +181,7 @@ Which slide is most relevant? Return only: "filename.pdf - Page X"
|
|
| 177 |
# Calculate relevance score based on similarity
|
| 178 |
curriculum_relevance_score = len([r for r in results if r.page_content.strip()])
|
| 179 |
|
| 180 |
-
#
|
| 181 |
if self.qa_chain:
|
| 182 |
try:
|
| 183 |
if curriculum_relevance_score > 0:
|
|
@@ -201,81 +205,74 @@ Which slide is most relevant? Return only: "filename.pdf - Page X"
|
|
| 201 |
|
| 202 |
except Exception as e:
|
| 203 |
print(f"Error generating answer: {e}")
|
|
|
|
| 204 |
if curriculum_relevance_score > 0:
|
| 205 |
-
answer = f"Based on the curriculum content:\n\n{results[0].page_content}"
|
| 206 |
else:
|
| 207 |
-
answer = "I'm sorry, I couldn't generate an answer at the moment."
|
| 208 |
else:
|
| 209 |
-
#
|
| 210 |
if curriculum_relevance_score > 0:
|
| 211 |
-
answer = f"
|
| 212 |
else:
|
| 213 |
-
answer = "
|
| 214 |
|
| 215 |
-
# Get
|
| 216 |
-
|
| 217 |
-
if curriculum_relevance_score > 0
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 224 |
|
| 225 |
-
#
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
slide_response = slide_response.split("<|eot_id|>")[-1].strip()
|
| 229 |
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
|
| 254 |
-
|
| 255 |
-
|
| 256 |
-
recommended_slide = self.get_pdf_page_image(
|
| 257 |
-
self.pdf_files[results[0].metadata["filename"]],
|
| 258 |
-
results[0].metadata["page_number"]
|
| 259 |
-
)
|
| 260 |
-
recommended_label = f"{results[0].metadata['filename']} - Page {results[0].metadata['page_number']}"
|
| 261 |
-
|
| 262 |
-
# Get all slides for navigation
|
| 263 |
-
all_slides = self.get_all_slides()
|
| 264 |
|
| 265 |
-
return answer, recommended_slide, recommended_label
|
| 266 |
|
| 267 |
# --- Gradio UI ---
|
| 268 |
chatbot = CurriculumChatbot()
|
| 269 |
|
| 270 |
def gradio_chat(query):
|
| 271 |
-
answer, recommended_slide, recommended_label,
|
| 272 |
|
| 273 |
-
#
|
| 274 |
-
if
|
| 275 |
-
gallery_items = [(recommended_slide, f"📌 {recommended_label} (Recommended)")]
|
| 276 |
-
gallery_items.extend(all_slides)
|
| 277 |
-
else:
|
| 278 |
-
gallery_items = all_slides
|
| 279 |
|
| 280 |
return answer, gallery_items
|
| 281 |
|
|
|
|
| 83 |
# Create QA prompt template
|
| 84 |
qa_template = """<|begin_of_text|><|start_header_id|>system<|end_header_id|>
|
| 85 |
|
| 86 |
+
You are a helpful AI programming tutor. You MUST ALWAYS provide a clear, educational answer to every question. Never say you cannot answer or that you don't know.
|
| 87 |
|
| 88 |
+
If the question is about curriculum content, use the provided context to give a detailed, educational explanation. If the curriculum content doesn't perfectly match the question, adapt your answer to be relevant while using the curriculum information.
|
| 89 |
+
|
| 90 |
+
If the question is not covered in the curriculum, provide a comprehensive general programming answer based on your knowledge.
|
| 91 |
+
|
| 92 |
+
Always be educational, clear, and helpful.
|
| 93 |
|
| 94 |
<|eot_id|><|start_header_id|>user<|end_header_id|>
|
| 95 |
|
|
|
|
| 181 |
# Calculate relevance score based on similarity
|
| 182 |
curriculum_relevance_score = len([r for r in results if r.page_content.strip()])
|
| 183 |
|
| 184 |
+
# ALWAYS generate LLM answer (never fallback to raw text)
|
| 185 |
if self.qa_chain:
|
| 186 |
try:
|
| 187 |
if curriculum_relevance_score > 0:
|
|
|
|
| 205 |
|
| 206 |
except Exception as e:
|
| 207 |
print(f"Error generating answer: {e}")
|
| 208 |
+
# Even if LLM fails, try to provide a helpful response
|
| 209 |
if curriculum_relevance_score > 0:
|
| 210 |
+
answer = f"Based on the curriculum content, here's what I found:\n\n{results[0].page_content}\n\n*Note: I'm having trouble generating a custom answer right now, but here's the relevant curriculum content.*"
|
| 211 |
else:
|
| 212 |
+
answer = "I'm sorry, I couldn't generate an answer at the moment. Please try rephrasing your question."
|
| 213 |
else:
|
| 214 |
+
# If no LLM available, still provide helpful response
|
| 215 |
if curriculum_relevance_score > 0:
|
| 216 |
+
answer = f"Based on the curriculum content:\n\n{results[0].page_content}\n\n*Note: AI generation is not available, but here's the relevant curriculum content.*"
|
| 217 |
else:
|
| 218 |
+
answer = "I couldn't find relevant content in the curriculum for this question. Please try rephrasing or ask about a different programming topic."
|
| 219 |
|
| 220 |
+
# Get the most relevant slide and its neighboring pages
|
| 221 |
+
relevant_slides = []
|
| 222 |
+
if curriculum_relevance_score > 0:
|
| 223 |
+
# Get the most relevant result
|
| 224 |
+
best_result = results[0]
|
| 225 |
+
filename = best_result.metadata["filename"]
|
| 226 |
+
page_number = best_result.metadata["page_number"]
|
| 227 |
+
|
| 228 |
+
# Get the specific PDF and its pages
|
| 229 |
+
if filename in self.pdf_files:
|
| 230 |
+
pdf_path = self.pdf_files[filename]
|
| 231 |
+
doc = fitz.open(pdf_path)
|
| 232 |
+
total_pages = len(doc)
|
| 233 |
+
doc.close()
|
| 234 |
|
| 235 |
+
# Get the target page and neighboring pages (2 before, 2 after)
|
| 236 |
+
start_page = max(1, page_number - 2)
|
| 237 |
+
end_page = min(total_pages, page_number + 2)
|
|
|
|
| 238 |
|
| 239 |
+
for page_num in range(start_page, end_page + 1):
|
| 240 |
+
img = self.get_pdf_page_image(pdf_path, page_num)
|
| 241 |
+
if img:
|
| 242 |
+
if page_num == page_number:
|
| 243 |
+
# Highlight the most relevant page
|
| 244 |
+
label = f"📌 {filename} - Page {page_num} (Most Relevant)"
|
| 245 |
+
else:
|
| 246 |
+
label = f"{filename} - Page {page_num}"
|
| 247 |
+
relevant_slides.append((img, label))
|
| 248 |
+
|
| 249 |
+
recommended_slide = relevant_slides[0][0] if relevant_slides else None
|
| 250 |
+
recommended_label = relevant_slides[0][1] if relevant_slides else None
|
| 251 |
+
else:
|
| 252 |
+
# Fallback if filename not found
|
| 253 |
+
recommended_slide = None
|
| 254 |
+
recommended_label = None
|
| 255 |
+
else:
|
| 256 |
+
# If no curriculum content, show a few slides from different PDFs
|
| 257 |
+
relevant_slides = []
|
| 258 |
+
for filename, pages in list(self.pdf_pages.items())[:3]: # Show first 3 PDFs
|
| 259 |
+
for page_num in list(pages.keys())[:2]: # Show first 2 pages of each
|
| 260 |
+
img = self.get_pdf_page_image(self.pdf_files[filename], page_num)
|
| 261 |
+
if img:
|
| 262 |
+
relevant_slides.append((img, f"{filename} - Page {page_num}"))
|
| 263 |
+
recommended_slide = relevant_slides[0][0] if relevant_slides else None
|
| 264 |
+
recommended_label = relevant_slides[0][1] if relevant_slides else None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 265 |
|
| 266 |
+
return answer, recommended_slide, recommended_label, relevant_slides
|
| 267 |
|
| 268 |
# --- Gradio UI ---
|
| 269 |
chatbot = CurriculumChatbot()
|
| 270 |
|
| 271 |
def gradio_chat(query):
|
| 272 |
+
answer, recommended_slide, recommended_label, relevant_slides = chatbot.chat(query)
|
| 273 |
|
| 274 |
+
# Use the relevant slides (specific PDF with neighboring pages)
|
| 275 |
+
gallery_items = relevant_slides if relevant_slides else []
|
|
|
|
|
|
|
|
|
|
|
|
|
| 276 |
|
| 277 |
return answer, gallery_items
|
| 278 |
|