Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -5,11 +5,9 @@ import fitz # PyMuPDF
|
|
| 5 |
from langchain_community.embeddings import HuggingFaceEmbeddings
|
| 6 |
from langchain_community.vectorstores import Chroma
|
| 7 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
| 8 |
-
from langchain_huggingface import HuggingFacePipeline
|
| 9 |
from langchain.prompts import PromptTemplate
|
| 10 |
from langchain.chains import LLMChain
|
| 11 |
-
|
| 12 |
-
import torch
|
| 13 |
import base64
|
| 14 |
from PIL import Image
|
| 15 |
import io
|
|
@@ -71,7 +69,7 @@ class CurriculumChatbot:
|
|
| 71 |
)
|
| 72 |
|
| 73 |
def _setup_llm(self):
|
| 74 |
-
"""Setup LLM with
|
| 75 |
try:
|
| 76 |
# Initialize LLM attributes
|
| 77 |
self.llm = None
|
|
@@ -79,20 +77,30 @@ class CurriculumChatbot:
|
|
| 79 |
self.focused_qa_chain = None
|
| 80 |
self.content_selection_chain = None
|
| 81 |
|
| 82 |
-
# Load
|
| 83 |
-
|
| 84 |
-
"
|
| 85 |
-
model="microsoft/DialoGPT-medium",
|
| 86 |
-
torch_dtype=torch.float16,
|
| 87 |
-
device_map="auto",
|
| 88 |
-
max_length=512,
|
| 89 |
-
do_sample=True,
|
| 90 |
-
temperature=0.7,
|
| 91 |
-
top_p=0.9,
|
| 92 |
-
repetition_penalty=1.1
|
| 93 |
)
|
| 94 |
|
| 95 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 96 |
|
| 97 |
# Create content selection prompt template
|
| 98 |
content_selection_template = """You are an expert curriculum analyst. Your task is to find the most relevant slide for a student's question.
|
|
@@ -118,7 +126,7 @@ Most relevant slide number:"""
|
|
| 118 |
template=content_selection_template
|
| 119 |
))
|
| 120 |
|
| 121 |
-
# Create QA prompt template for
|
| 122 |
qa_template = """You are an expert programming tutor. Your task is to provide a comprehensive, educational answer based on the curriculum content.
|
| 123 |
|
| 124 |
Curriculum Content:
|
|
@@ -133,6 +141,7 @@ Instructions:
|
|
| 133 |
4. Explain the concept step-by-step
|
| 134 |
5. Make sure your answer directly addresses what the student is asking
|
| 135 |
6. If the content is limited, provide additional educational context
|
|
|
|
| 136 |
|
| 137 |
Your detailed answer:"""
|
| 138 |
|
|
@@ -156,6 +165,7 @@ Instructions:
|
|
| 156 |
4. Explain the concept step-by-step
|
| 157 |
5. Make sure your answer directly addresses what the student is asking
|
| 158 |
6. If the slide content is limited, provide additional educational context
|
|
|
|
| 159 |
|
| 160 |
Your detailed answer:"""
|
| 161 |
|
|
@@ -301,16 +311,14 @@ Your detailed answer:"""
|
|
| 301 |
|
| 302 |
print(f"LLM Response: {answer[:200]}...")
|
| 303 |
|
| 304 |
-
# Clean up the answer
|
| 305 |
answer = answer.strip()
|
| 306 |
-
if "<|eot_id|>" in answer:
|
| 307 |
-
answer = answer.split("<|eot_id|>")[-1].strip()
|
| 308 |
|
| 309 |
# Remove any prompt artifacts
|
| 310 |
if answer.startswith("Answer:"):
|
| 311 |
answer = answer[7:].strip()
|
| 312 |
-
if answer.startswith("
|
| 313 |
-
answer = answer[
|
| 314 |
|
| 315 |
# Check if the answer is too short, generic, or poor quality
|
| 316 |
if (len(answer.strip()) < 100 or
|
|
@@ -345,10 +353,8 @@ Your detailed answer:"""
|
|
| 345 |
|
| 346 |
answer = self.qa_chain.run(question=query, filled_context=filled_context)
|
| 347 |
answer = answer.strip()
|
| 348 |
-
|
| 349 |
-
|
| 350 |
-
|
| 351 |
-
|
| 352 |
if answer.startswith("Answer:"):
|
| 353 |
answer = answer[7:].strip()
|
| 354 |
if answer.startswith("Provide a clear, educational answer explaining the concept:"):
|
|
|
|
| 5 |
from langchain_community.embeddings import HuggingFaceEmbeddings
|
| 6 |
from langchain_community.vectorstores import Chroma
|
| 7 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
|
|
|
| 8 |
from langchain.prompts import PromptTemplate
|
| 9 |
from langchain.chains import LLMChain
|
| 10 |
+
import anthropic
|
|
|
|
| 11 |
import base64
|
| 12 |
from PIL import Image
|
| 13 |
import io
|
|
|
|
| 69 |
)
|
| 70 |
|
| 71 |
def _setup_llm(self):
|
| 72 |
+
"""Setup LLM with Claude"""
|
| 73 |
try:
|
| 74 |
# Initialize LLM attributes
|
| 75 |
self.llm = None
|
|
|
|
| 77 |
self.focused_qa_chain = None
|
| 78 |
self.content_selection_chain = None
|
| 79 |
|
| 80 |
+
# Load Claude
|
| 81 |
+
self.anthropic_client = anthropic.Anthropic(
|
| 82 |
+
api_key=os.environ.get("ANTHROPIC_API_KEY")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 83 |
)
|
| 84 |
|
| 85 |
+
# Create a custom LLM wrapper that works with LangChain
|
| 86 |
+
class ClaudeLLM:
|
| 87 |
+
def __init__(self, client):
|
| 88 |
+
self.client = client
|
| 89 |
+
|
| 90 |
+
def __call__(self, prompt):
|
| 91 |
+
try:
|
| 92 |
+
response = self.client.messages.create(
|
| 93 |
+
model="claude-3-5-haiku-20241022",
|
| 94 |
+
max_tokens=1500,
|
| 95 |
+
temperature=0.7,
|
| 96 |
+
messages=[{"role": "user", "content": prompt}]
|
| 97 |
+
)
|
| 98 |
+
return response.content[0].text
|
| 99 |
+
except Exception as e:
|
| 100 |
+
print(f"Error calling Claude: {e}")
|
| 101 |
+
return "I'm sorry, I couldn't generate a response at the moment."
|
| 102 |
+
|
| 103 |
+
self.llm = ClaudeLLM(self.anthropic_client)
|
| 104 |
|
| 105 |
# Create content selection prompt template
|
| 106 |
content_selection_template = """You are an expert curriculum analyst. Your task is to find the most relevant slide for a student's question.
|
|
|
|
| 126 |
template=content_selection_template
|
| 127 |
))
|
| 128 |
|
| 129 |
+
# Create QA prompt template for Claude
|
| 130 |
qa_template = """You are an expert programming tutor. Your task is to provide a comprehensive, educational answer based on the curriculum content.
|
| 131 |
|
| 132 |
Curriculum Content:
|
|
|
|
| 141 |
4. Explain the concept step-by-step
|
| 142 |
5. Make sure your answer directly addresses what the student is asking
|
| 143 |
6. If the content is limited, provide additional educational context
|
| 144 |
+
7. Structure your answer clearly with bullet points or numbered lists when appropriate
|
| 145 |
|
| 146 |
Your detailed answer:"""
|
| 147 |
|
|
|
|
| 165 |
4. Explain the concept step-by-step
|
| 166 |
5. Make sure your answer directly addresses what the student is asking
|
| 167 |
6. If the slide content is limited, provide additional educational context
|
| 168 |
+
7. Structure your answer clearly with bullet points or numbered lists when appropriate
|
| 169 |
|
| 170 |
Your detailed answer:"""
|
| 171 |
|
|
|
|
| 311 |
|
| 312 |
print(f"LLM Response: {answer[:200]}...")
|
| 313 |
|
| 314 |
+
# Clean up the answer (Claude is cleaner, but just in case)
|
| 315 |
answer = answer.strip()
|
|
|
|
|
|
|
| 316 |
|
| 317 |
# Remove any prompt artifacts
|
| 318 |
if answer.startswith("Answer:"):
|
| 319 |
answer = answer[7:].strip()
|
| 320 |
+
if answer.startswith("Your detailed answer:"):
|
| 321 |
+
answer = answer[20:].strip()
|
| 322 |
|
| 323 |
# Check if the answer is too short, generic, or poor quality
|
| 324 |
if (len(answer.strip()) < 100 or
|
|
|
|
| 353 |
|
| 354 |
answer = self.qa_chain.run(question=query, filled_context=filled_context)
|
| 355 |
answer = answer.strip()
|
| 356 |
+
|
| 357 |
+
# Remove any prompt artifacts (Claude is cleaner, but just in case)
|
|
|
|
|
|
|
| 358 |
if answer.startswith("Answer:"):
|
| 359 |
answer = answer[7:].strip()
|
| 360 |
if answer.startswith("Provide a clear, educational answer explaining the concept:"):
|