Test_Space / app.py
DarkSting's picture
Update app.py
c0be5b7 verified
from flask import Flask, request, jsonify
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import PeftModel
import torch
import os
import json
import re
# ---------------- QUESTION GEN ----------------
BASE_MODEL = "Qwen/Qwen2.5-Math-1.5B-Instruct"
ADAPTER_ID_QUESTION = "Chamith2000/qwen2.5-math-mcq-lora"
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
# ---------------- LOAD TOKENIZER ----------------
tokenizer = AutoTokenizer.from_pretrained(
BASE_MODEL,
trust_remote_code=True,
device_map="auto",
)
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
# ---------------- LOAD MODEL ----------------
loaded_bnb = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_compute_dtype=torch.bfloat16)
loaded_base = AutoModelForCausalLM.from_pretrained(BASE_MODEL, quantization_config=loaded_bnb, device_map='auto')
model = PeftModel.from_pretrained(loaded_base, ADAPTER_ID_QUESTION)
model.eval()
# ---------------- INFERENCE ----------------
@torch.no_grad()
def infer_question_gen(topic, difficulty, grade):
prompt = (
"### INSTRUCTION\n"
"Generate ONE UNIQUE math MCQ.\n"
"The question MUST be different from previous ones.\n"
"Understand the mathematcal relationship and verify the answer before giving the final answer.\n"
"### METADATA\n"
f"Topic: {topic}\n"
f"Difficulty: {difficulty}\n"
f"Grade: {grade}\n\n"
"### OUTPUT (JSON ONLY)\n"
)
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
output = model.generate(
**inputs,
max_new_tokens=400,
temperature=0.7,
top_p=0.9,
do_sample=True,
repetition_penalty=1.2,
no_repeat_ngram_size=6,
eos_token_id=tokenizer.eos_token_id,
pad_token_id=tokenizer.eos_token_id,
)
text = tokenizer.decode(output[0], skip_special_tokens=True)
if text is None:
return {"error": "Model did not return valid JSON"}
return text
# ===============================================================================
# TEACHER FEEDBACK MODEL
# ===============================================================================
TEACHER_MODEL_PATH = "Qwen/Qwen2.5-3B-Instruct"
TEACHER_ADAPTER_PATH = "Chamith2000/teachers_guide_and_matching_score"
print("Loading teacher feedback model...")
teacher_tokenizer = AutoTokenizer.from_pretrained(TEACHER_MODEL_PATH)
if teacher_tokenizer.pad_token is None:
teacher_tokenizer.pad_token = teacher_tokenizer.eos_token
teacher_base = AutoModelForCausalLM.from_pretrained(
TEACHER_MODEL_PATH,
quantization_config=loaded_bnb,
device_map='auto'
)
teacher_model = PeftModel.from_pretrained(teacher_base, TEACHER_ADAPTER_PATH)
teacher_model.eval()
print("✅ Teacher feedback model loaded!")
@torch.no_grad()
def infer_teacher_guide(data):
teacher_guide = data.get('teacher_guide', '')
student_feedback = data.get('student_feedback', '')
engagement_time = data.get('student_engagement_time', 0)
allocated_time = data.get('guide_allocated_time', 0)
grade = data.get('grade', 4)
time_diff = allocated_time - engagement_time
if time_diff > 10:
time_analysis = f"Student finished {time_diff} minutes early (engaged {engagement_time} min vs {allocated_time} min allocated). This suggests possible disengagement, rushing, or giving up."
elif time_diff < -10:
time_analysis = f"Student took {abs(time_diff)} minutes longer than allocated (engaged {engagement_time} min vs {allocated_time} min allocated). This suggests slow pacing or difficulty understanding."
else:
time_analysis = f"Student completed in appropriate time (engaged {engagement_time} min, allocated {allocated_time} min)."
prompt = f"""Analyze teaching effectiveness. Return ONLY JSON.
LESSON INFO:
Grade Level: {grade}
Teacher Guide: {teacher_guide}
STUDENT FEEDBACK:
"{student_feedback}"
TIME ANALYSIS:
{time_analysis}
Analyze and return JSON:
{{
"weak_sections": ["section1", "section2", ...],
"reason": "<detailed analysis>"
}}
OUTPUT:"""
inputs = teacher_tokenizer(prompt, return_tensors="pt").to(DEVICE)
outputs = teacher_model.generate(
**inputs,
max_new_tokens=400,
temperature=0.05,
top_p=0.9,
do_sample=True,
repetition_penalty=1.15,
pad_token_id=teacher_tokenizer.pad_token_id,
eos_token_id=teacher_tokenizer.eos_token_id,
)
generated = teacher_tokenizer.decode(
outputs[0][inputs['input_ids'].shape[1]:],
skip_special_tokens=True
).strip()
return parse_json_output(generated)
def parse_json_output(text):
try:
start = text.find('{')
end = text.rfind('}') + 1
if start != -1 and end > start:
return json.loads(text[start:end])
except:
pass
try:
weak_sections = []
reason = ""
sections_match = re.search(r'"weak_sections"\s*:\s*\[(.*?)\]', text, re.DOTALL)
if sections_match:
weak_sections = re.findall(r'"([^"]+)"', sections_match.group(1))
reason_match = re.search(r'"reason"\s*:\s*"([^"]*)"', text, re.DOTALL)
if reason_match:
reason = reason_match.group(1)
if weak_sections or reason:
return {'weak_sections': weak_sections, 'reason': reason}
except:
pass
return {'weak_sections': [], 'reason': 'Failed to parse model output', 'raw_output': text[:500]}
# ===============================================================================
# QUESTION-TOPIC MATCHING MODEL
# ===============================================================================
MATCHING_MODEL_PATH = "Qwen/Qwen2.5-1.5B-Instruct"
MATCHING_ADAPTER_PATH = "Chamith2000/answer_evaluate"
RUBRIC = """SCORING RUBRIC:
1.0 Perfect match — the question fully and directly assesses the stated topic
0.75 Slight mismatch — the question is closely related but targets a slightly different concept
0.5 Partial mismatch — the question is from a related area but tests a different skill
0.0 No match — the question is completely unrelated to the stated topic"""
print("Loading question-topic matching model...")
matching_tokenizer = AutoTokenizer.from_pretrained(MATCHING_MODEL_PATH)
if matching_tokenizer.pad_token is None:
matching_tokenizer.pad_token = matching_tokenizer.eos_token
bnb_config_question_topic = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.bfloat16,
bnb_4bit_use_double_quant=True,
)
matching_base = AutoModelForCausalLM.from_pretrained(
MATCHING_MODEL_PATH,
quantization_config=bnb_config_question_topic,
device_map='auto'
)
matching_model = PeftModel.from_pretrained(matching_base, MATCHING_ADAPTER_PATH)
matching_model.eval()
print("✅ Question-topic matching model loaded!")
def build_matching_prompt(grade: int, topic: str, question: str) -> str:
return f"""### ROLE
You are a strict educational content reviewer for elementary school mathematics (Grades 3-5).
### TASK
Evaluate whether the given math question correctly matches the stated topic. Determine if the question directly tests the topic's core concept.
### INPUT
Grade Level: {grade}
Topic: {topic}
Question: {question}
{RUBRIC}
### OUTPUT FORMAT
Return ONLY valid JSON:
- "matching_score": one of [1.0, 0.75, 0.5, 0.0]
- "improvements": empty list [] when score is 1.0; otherwise, 1-2 specific reasons explaining what the question actually tests and why it doesn't match the topic
### RESPONSE
"""
def parse_matching_json(text):
try:
start = text.find('{')
end = text.rfind('}') + 1
if start != -1 and end > start:
parsed = json.loads(text[start:end])
if "matching_score" in parsed and "improvements" in parsed:
score = float(parsed["matching_score"])
valid_scores = [0.0, 0.5, 0.75, 1.0]
parsed["matching_score"] = min(valid_scores, key=lambda x: abs(x - score))
parsed["improvements"] = parsed["improvements"] if isinstance(parsed["improvements"], list) else []
return parsed
except:
pass
try:
score_match = re.search(r'"matching_score"\s*:\s*([\d.]+)', text)
items_match = re.findall(r'"([^"]{10,})"', text)
if score_match:
raw_score = float(score_match.group(1))
valid_scores = [0.0, 0.5, 0.75, 1.0]
snapped_score = min(valid_scores, key=lambda x: abs(x - raw_score))
improvements = [i for i in items_match if i != str(raw_score)][:2]
return {"matching_score": snapped_score, "improvements": improvements}
except:
pass
return {
"matching_score": 0.0,
"improvements": ["Failed to parse model output."],
"raw_output": text
}
@torch.no_grad()
def evaluate_question_topic_match(data):
grade = data.get('grade', 3)
topic = data.get('topic', '')
question = data.get('question', '')
if not topic or not question:
return {"error": "Missing required fields: 'topic' and 'question'"}
prompt = build_matching_prompt(grade, topic, question)
inputs = matching_tokenizer(
prompt,
return_tensors="pt",
truncation=True,
max_length=512
).to(DEVICE)
outputs = matching_model.generate(
**inputs,
max_new_tokens=200,
do_sample=False,
temperature=0.01,
repetition_penalty=1.1,
pad_token_id=matching_tokenizer.eos_token_id,
eos_token_id=matching_tokenizer.eos_token_id,
)
generated = matching_tokenizer.decode(
outputs[0][inputs['input_ids'].shape[1]:],
skip_special_tokens=True
).strip()
return parse_matching_json(generated)
# ===============================================================================
# STUDENT FEEDBACK ANALYSER MODEL
# ===============================================================================
FEEDBACK_MODEL_PATH = "Qwen/Qwen2-1.5B-Instruct"
FEEDBACK_ADAPTER_PATH = "Chamith2000/Video_Feedback"
VALID_ISSUES = {"audio", "confusion", "difficulty", "pacing", "positive"}
VALID_SEVERITIES = {"high", "medium", "low"}
print("Loading student feedback analyser model...")
feedback_tokenizer = AutoTokenizer.from_pretrained(
FEEDBACK_MODEL_PATH,
trust_remote_code=True,
padding_side="right",
)
if feedback_tokenizer.pad_token is None:
feedback_tokenizer.pad_token = feedback_tokenizer.eos_token
bnb_config_feedback = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.float16,
bnb_4bit_use_double_quant=True,
)
feedback_base = AutoModelForCausalLM.from_pretrained(
FEEDBACK_MODEL_PATH,
quantization_config=bnb_config_feedback,
device_map='auto',
trust_remote_code=True,
torch_dtype=torch.float16,
)
feedback_model = PeftModel.from_pretrained(feedback_base, FEEDBACK_ADAPTER_PATH)
feedback_model.eval()
print("✅ Student feedback analyser model loaded!")
def build_feedback_prompt(text: str, grade: int, lesson: str, video_content: str) -> str:
messages = [
{
"role": "system",
"content": (
"You are an educational feedback analyser for a mathematics e-learning platform. "
"You analyse student feedback and return ONLY a valid JSON object — no explanation, "
"no markdown, no extra text. Just the raw JSON."
),
},
{
"role": "user",
"content": (
f"Analyse the student feedback below and return a JSON object.\n\n"
f"Student details:\n"
f" Grade : {grade}\n"
f" Lesson : {lesson}\n"
f" Video context: {video_content}\n\n"
f"Student feedback: \"{text}\"\n\n"
f"Return JSON with exactly these keys:\n"
f" issue : one of [audio, confusion, difficulty, pacing, positive]\n"
f" severity : one of [high, medium, low]\n"
f" recommendation : a specific actionable instruction for the teacher\n"
f" confidence : your confidence as a float between 0.0 and 1.0\n\n"
f"JSON:"
),
},
]
return feedback_tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True,
)
def parse_feedback_json(text: str) -> dict:
"""Parse and validate the model's JSON output for feedback analysis."""
clean = text.strip()
# Strip markdown code fences if present
if clean.startswith("```"):
lines = clean.split("\n")
clean = "\n".join(l for l in lines if not l.startswith("```"))
# Try direct parse
try:
parsed = json.loads(clean)
except json.JSONDecodeError:
# Fall back to brace extraction
start = clean.find('{')
end = clean.rfind('}') + 1
if start != -1 and end > start:
try:
parsed = json.loads(clean[start:end])
except json.JSONDecodeError:
return {
"error": "Failed to parse model output as JSON",
"raw_output": text[:500],
}
else:
return {
"error": "No JSON object found in model output",
"raw_output": text[:500],
}
# Normalise and validate fields
issue = str(parsed.get("issue", "")).lower().strip()
severity = str(parsed.get("severity", "")).lower().strip()
rec = parsed.get("recommendation", "")
raw_conf = parsed.get("confidence", 0.0)
try:
confidence = float(raw_conf)
confidence = max(0.0, min(1.0, confidence))
except (TypeError, ValueError):
confidence = 0.0
if issue not in VALID_ISSUES:
issue = "confusion" # safe default
if severity not in VALID_SEVERITIES:
severity = "medium" # safe default
return {
"issue": issue,
"severity": severity,
"recommendation": rec,
"confidence": confidence,
}
@torch.no_grad()
def infer_student_feedback(data: dict) -> dict:
"""Run inference with the student feedback analyser model."""
text = data.get("text", "").strip()
grade = data.get("grade", 3)
lesson = data.get("lesson", "")
video_content = data.get("video_content", "")
if not text:
return {"error": "Missing required field: 'text'"}
if not lesson:
return {"error": "Missing required field: 'lesson'"}
if not video_content:
return {"error": "Missing required field: 'video_content'"}
prompt = build_feedback_prompt(text, grade, lesson, video_content)
inputs = feedback_tokenizer(
prompt,
return_tensors="pt",
truncation=True,
max_length=512,
).to(DEVICE)
outputs = feedback_model.generate(
**inputs,
max_new_tokens=150,
do_sample=False,
temperature=1.0,
repetition_penalty=1.1,
pad_token_id=feedback_tokenizer.eos_token_id,
eos_token_id=feedback_tokenizer.eos_token_id,
)
generated = feedback_tokenizer.decode(
outputs[0][inputs["input_ids"].shape[1]:],
skip_special_tokens=True,
).strip()
return parse_feedback_json(generated)
# ===============================================================================
# FLASK APP
# ===============================================================================
app = Flask(__name__)
@app.route("/generate", methods=["POST"])
def generate():
data = request.get_json()
calling_type = data.get('model_type')
if calling_type == 'teacher':
return teacher_guide(data.get('teacher_data'))
elif calling_type == 'question':
return question_gen(data)
elif calling_type == 'analyze':
return analyze_gen(data.get('teacher_data'))
elif calling_type == 'feedback':
return student_feedback(data.get('feedback_data'))
return jsonify({"error": "Invalid model type. Use 'teacher', 'question', 'analyze', or 'feedback'"}), 400
def question_gen(data):
"""Generate math question and return raw model output."""
if not data:
return jsonify({"error": "Invalid JSON body"}), 400
topic = data.get("topic")
difficulty = data.get("difficulty")
grade = data.get("grade")
if not topic or not difficulty or not grade:
return jsonify({"error": "Missing 'topic', 'difficulty', or 'grade'"}), 400
raw_text = infer_question_gen(topic, difficulty, grade)
return jsonify({"output": raw_text})
def analyze_gen(data):
"""Evaluate question-topic match and return raw model output."""
if not data:
return jsonify({"error": "Invalid JSON body"}), 400
result = evaluate_question_topic_match(data)
return jsonify({"output": result})
def teacher_guide(data):
"""Analyze teacher feedback."""
if not data:
return jsonify({"error": "Missing 'prompt'"}), 400
prompt = infer_teacher_guide(data)
return jsonify({"output": prompt})
def student_feedback(data):
"""Analyse student video feedback and classify issue, severity, and recommendation."""
if not data:
return jsonify({"error": "Missing 'feedback_data'"}), 400
result = infer_student_feedback(data)
return jsonify({"output": result})
if __name__ == "__main__":
print("\n" + "="*70)
print("SERVER READY!")
print("="*70)
print("\nEndpoints:")
print(" POST /generate")
print(" - model_type: 'question' → math question generation")
print(" - model_type: 'analyze' → question-topic match scoring")
print(" - model_type: 'teacher' → teaching feedback analysis")
print(" - model_type: 'feedback' → student video feedback analysis")
print("="*70 + "\n")
app.run(host="0.0.0.0", port=7860)