from flask import Flask, request, jsonify from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig from peft import PeftModel import torch import os import json import re # ---------------- QUESTION GEN ---------------- BASE_MODEL = "Qwen/Qwen2.5-Math-1.5B-Instruct" ADAPTER_ID_QUESTION = "Chamith2000/qwen2.5-math-mcq-lora" DEVICE = "cuda" if torch.cuda.is_available() else "cpu" # ---------------- LOAD TOKENIZER ---------------- tokenizer = AutoTokenizer.from_pretrained( BASE_MODEL, trust_remote_code=True, device_map="auto", ) if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token # ---------------- LOAD MODEL ---------------- loaded_bnb = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_compute_dtype=torch.bfloat16) loaded_base = AutoModelForCausalLM.from_pretrained(BASE_MODEL, quantization_config=loaded_bnb, device_map='auto') model = PeftModel.from_pretrained(loaded_base, ADAPTER_ID_QUESTION) model.eval() # ---------------- INFERENCE ---------------- @torch.no_grad() def infer_question_gen(topic, difficulty, grade): prompt = ( "### INSTRUCTION\n" "Generate ONE UNIQUE math MCQ.\n" "The question MUST be different from previous ones.\n" "Understand the mathematcal relationship and verify the answer before giving the final answer.\n" "### METADATA\n" f"Topic: {topic}\n" f"Difficulty: {difficulty}\n" f"Grade: {grade}\n\n" "### OUTPUT (JSON ONLY)\n" ) inputs = tokenizer(prompt, return_tensors="pt").to(model.device) output = model.generate( **inputs, max_new_tokens=400, temperature=0.7, top_p=0.9, do_sample=True, repetition_penalty=1.2, no_repeat_ngram_size=6, eos_token_id=tokenizer.eos_token_id, pad_token_id=tokenizer.eos_token_id, ) text = tokenizer.decode(output[0], skip_special_tokens=True) if text is None: return {"error": "Model did not return valid JSON"} return text # =============================================================================== # TEACHER FEEDBACK MODEL # =============================================================================== TEACHER_MODEL_PATH = "Qwen/Qwen2.5-3B-Instruct" TEACHER_ADAPTER_PATH = "Chamith2000/teachers_guide_and_matching_score" print("Loading teacher feedback model...") teacher_tokenizer = AutoTokenizer.from_pretrained(TEACHER_MODEL_PATH) if teacher_tokenizer.pad_token is None: teacher_tokenizer.pad_token = teacher_tokenizer.eos_token teacher_base = AutoModelForCausalLM.from_pretrained( TEACHER_MODEL_PATH, quantization_config=loaded_bnb, device_map='auto' ) teacher_model = PeftModel.from_pretrained(teacher_base, TEACHER_ADAPTER_PATH) teacher_model.eval() print("✅ Teacher feedback model loaded!") @torch.no_grad() def infer_teacher_guide(data): teacher_guide = data.get('teacher_guide', '') student_feedback = data.get('student_feedback', '') engagement_time = data.get('student_engagement_time', 0) allocated_time = data.get('guide_allocated_time', 0) grade = data.get('grade', 4) time_diff = allocated_time - engagement_time if time_diff > 10: time_analysis = f"Student finished {time_diff} minutes early (engaged {engagement_time} min vs {allocated_time} min allocated). This suggests possible disengagement, rushing, or giving up." elif time_diff < -10: time_analysis = f"Student took {abs(time_diff)} minutes longer than allocated (engaged {engagement_time} min vs {allocated_time} min allocated). This suggests slow pacing or difficulty understanding." else: time_analysis = f"Student completed in appropriate time (engaged {engagement_time} min, allocated {allocated_time} min)." prompt = f"""Analyze teaching effectiveness. Return ONLY JSON. LESSON INFO: Grade Level: {grade} Teacher Guide: {teacher_guide} STUDENT FEEDBACK: "{student_feedback}" TIME ANALYSIS: {time_analysis} Analyze and return JSON: {{ "weak_sections": ["section1", "section2", ...], "reason": "" }} OUTPUT:""" inputs = teacher_tokenizer(prompt, return_tensors="pt").to(DEVICE) outputs = teacher_model.generate( **inputs, max_new_tokens=400, temperature=0.05, top_p=0.9, do_sample=True, repetition_penalty=1.15, pad_token_id=teacher_tokenizer.pad_token_id, eos_token_id=teacher_tokenizer.eos_token_id, ) generated = teacher_tokenizer.decode( outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True ).strip() return parse_json_output(generated) def parse_json_output(text): try: start = text.find('{') end = text.rfind('}') + 1 if start != -1 and end > start: return json.loads(text[start:end]) except: pass try: weak_sections = [] reason = "" sections_match = re.search(r'"weak_sections"\s*:\s*\[(.*?)\]', text, re.DOTALL) if sections_match: weak_sections = re.findall(r'"([^"]+)"', sections_match.group(1)) reason_match = re.search(r'"reason"\s*:\s*"([^"]*)"', text, re.DOTALL) if reason_match: reason = reason_match.group(1) if weak_sections or reason: return {'weak_sections': weak_sections, 'reason': reason} except: pass return {'weak_sections': [], 'reason': 'Failed to parse model output', 'raw_output': text[:500]} # =============================================================================== # QUESTION-TOPIC MATCHING MODEL # =============================================================================== MATCHING_MODEL_PATH = "Qwen/Qwen2.5-1.5B-Instruct" MATCHING_ADAPTER_PATH = "Chamith2000/answer_evaluate" RUBRIC = """SCORING RUBRIC: 1.0 Perfect match — the question fully and directly assesses the stated topic 0.75 Slight mismatch — the question is closely related but targets a slightly different concept 0.5 Partial mismatch — the question is from a related area but tests a different skill 0.0 No match — the question is completely unrelated to the stated topic""" print("Loading question-topic matching model...") matching_tokenizer = AutoTokenizer.from_pretrained(MATCHING_MODEL_PATH) if matching_tokenizer.pad_token is None: matching_tokenizer.pad_token = matching_tokenizer.eos_token bnb_config_question_topic = BitsAndBytesConfig( load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.bfloat16, bnb_4bit_use_double_quant=True, ) matching_base = AutoModelForCausalLM.from_pretrained( MATCHING_MODEL_PATH, quantization_config=bnb_config_question_topic, device_map='auto' ) matching_model = PeftModel.from_pretrained(matching_base, MATCHING_ADAPTER_PATH) matching_model.eval() print("✅ Question-topic matching model loaded!") def build_matching_prompt(grade: int, topic: str, question: str) -> str: return f"""### ROLE You are a strict educational content reviewer for elementary school mathematics (Grades 3-5). ### TASK Evaluate whether the given math question correctly matches the stated topic. Determine if the question directly tests the topic's core concept. ### INPUT Grade Level: {grade} Topic: {topic} Question: {question} {RUBRIC} ### OUTPUT FORMAT Return ONLY valid JSON: - "matching_score": one of [1.0, 0.75, 0.5, 0.0] - "improvements": empty list [] when score is 1.0; otherwise, 1-2 specific reasons explaining what the question actually tests and why it doesn't match the topic ### RESPONSE """ def parse_matching_json(text): try: start = text.find('{') end = text.rfind('}') + 1 if start != -1 and end > start: parsed = json.loads(text[start:end]) if "matching_score" in parsed and "improvements" in parsed: score = float(parsed["matching_score"]) valid_scores = [0.0, 0.5, 0.75, 1.0] parsed["matching_score"] = min(valid_scores, key=lambda x: abs(x - score)) parsed["improvements"] = parsed["improvements"] if isinstance(parsed["improvements"], list) else [] return parsed except: pass try: score_match = re.search(r'"matching_score"\s*:\s*([\d.]+)', text) items_match = re.findall(r'"([^"]{10,})"', text) if score_match: raw_score = float(score_match.group(1)) valid_scores = [0.0, 0.5, 0.75, 1.0] snapped_score = min(valid_scores, key=lambda x: abs(x - raw_score)) improvements = [i for i in items_match if i != str(raw_score)][:2] return {"matching_score": snapped_score, "improvements": improvements} except: pass return { "matching_score": 0.0, "improvements": ["Failed to parse model output."], "raw_output": text } @torch.no_grad() def evaluate_question_topic_match(data): grade = data.get('grade', 3) topic = data.get('topic', '') question = data.get('question', '') if not topic or not question: return {"error": "Missing required fields: 'topic' and 'question'"} prompt = build_matching_prompt(grade, topic, question) inputs = matching_tokenizer( prompt, return_tensors="pt", truncation=True, max_length=512 ).to(DEVICE) outputs = matching_model.generate( **inputs, max_new_tokens=200, do_sample=False, temperature=0.01, repetition_penalty=1.1, pad_token_id=matching_tokenizer.eos_token_id, eos_token_id=matching_tokenizer.eos_token_id, ) generated = matching_tokenizer.decode( outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True ).strip() return parse_matching_json(generated) # =============================================================================== # STUDENT FEEDBACK ANALYSER MODEL # =============================================================================== FEEDBACK_MODEL_PATH = "Qwen/Qwen2-1.5B-Instruct" FEEDBACK_ADAPTER_PATH = "Chamith2000/Video_Feedback" VALID_ISSUES = {"audio", "confusion", "difficulty", "pacing", "positive"} VALID_SEVERITIES = {"high", "medium", "low"} print("Loading student feedback analyser model...") feedback_tokenizer = AutoTokenizer.from_pretrained( FEEDBACK_MODEL_PATH, trust_remote_code=True, padding_side="right", ) if feedback_tokenizer.pad_token is None: feedback_tokenizer.pad_token = feedback_tokenizer.eos_token bnb_config_feedback = BitsAndBytesConfig( load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.float16, bnb_4bit_use_double_quant=True, ) feedback_base = AutoModelForCausalLM.from_pretrained( FEEDBACK_MODEL_PATH, quantization_config=bnb_config_feedback, device_map='auto', trust_remote_code=True, torch_dtype=torch.float16, ) feedback_model = PeftModel.from_pretrained(feedback_base, FEEDBACK_ADAPTER_PATH) feedback_model.eval() print("✅ Student feedback analyser model loaded!") def build_feedback_prompt(text: str, grade: int, lesson: str, video_content: str) -> str: messages = [ { "role": "system", "content": ( "You are an educational feedback analyser for a mathematics e-learning platform. " "You analyse student feedback and return ONLY a valid JSON object — no explanation, " "no markdown, no extra text. Just the raw JSON." ), }, { "role": "user", "content": ( f"Analyse the student feedback below and return a JSON object.\n\n" f"Student details:\n" f" Grade : {grade}\n" f" Lesson : {lesson}\n" f" Video context: {video_content}\n\n" f"Student feedback: \"{text}\"\n\n" f"Return JSON with exactly these keys:\n" f" issue : one of [audio, confusion, difficulty, pacing, positive]\n" f" severity : one of [high, medium, low]\n" f" recommendation : a specific actionable instruction for the teacher\n" f" confidence : your confidence as a float between 0.0 and 1.0\n\n" f"JSON:" ), }, ] return feedback_tokenizer.apply_chat_template( messages, tokenize=False, add_generation_prompt=True, ) def parse_feedback_json(text: str) -> dict: """Parse and validate the model's JSON output for feedback analysis.""" clean = text.strip() # Strip markdown code fences if present if clean.startswith("```"): lines = clean.split("\n") clean = "\n".join(l for l in lines if not l.startswith("```")) # Try direct parse try: parsed = json.loads(clean) except json.JSONDecodeError: # Fall back to brace extraction start = clean.find('{') end = clean.rfind('}') + 1 if start != -1 and end > start: try: parsed = json.loads(clean[start:end]) except json.JSONDecodeError: return { "error": "Failed to parse model output as JSON", "raw_output": text[:500], } else: return { "error": "No JSON object found in model output", "raw_output": text[:500], } # Normalise and validate fields issue = str(parsed.get("issue", "")).lower().strip() severity = str(parsed.get("severity", "")).lower().strip() rec = parsed.get("recommendation", "") raw_conf = parsed.get("confidence", 0.0) try: confidence = float(raw_conf) confidence = max(0.0, min(1.0, confidence)) except (TypeError, ValueError): confidence = 0.0 if issue not in VALID_ISSUES: issue = "confusion" # safe default if severity not in VALID_SEVERITIES: severity = "medium" # safe default return { "issue": issue, "severity": severity, "recommendation": rec, "confidence": confidence, } @torch.no_grad() def infer_student_feedback(data: dict) -> dict: """Run inference with the student feedback analyser model.""" text = data.get("text", "").strip() grade = data.get("grade", 3) lesson = data.get("lesson", "") video_content = data.get("video_content", "") if not text: return {"error": "Missing required field: 'text'"} if not lesson: return {"error": "Missing required field: 'lesson'"} if not video_content: return {"error": "Missing required field: 'video_content'"} prompt = build_feedback_prompt(text, grade, lesson, video_content) inputs = feedback_tokenizer( prompt, return_tensors="pt", truncation=True, max_length=512, ).to(DEVICE) outputs = feedback_model.generate( **inputs, max_new_tokens=150, do_sample=False, temperature=1.0, repetition_penalty=1.1, pad_token_id=feedback_tokenizer.eos_token_id, eos_token_id=feedback_tokenizer.eos_token_id, ) generated = feedback_tokenizer.decode( outputs[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True, ).strip() return parse_feedback_json(generated) # =============================================================================== # FLASK APP # =============================================================================== app = Flask(__name__) @app.route("/generate", methods=["POST"]) def generate(): data = request.get_json() calling_type = data.get('model_type') if calling_type == 'teacher': return teacher_guide(data.get('teacher_data')) elif calling_type == 'question': return question_gen(data) elif calling_type == 'analyze': return analyze_gen(data.get('teacher_data')) elif calling_type == 'feedback': return student_feedback(data.get('feedback_data')) return jsonify({"error": "Invalid model type. Use 'teacher', 'question', 'analyze', or 'feedback'"}), 400 def question_gen(data): """Generate math question and return raw model output.""" if not data: return jsonify({"error": "Invalid JSON body"}), 400 topic = data.get("topic") difficulty = data.get("difficulty") grade = data.get("grade") if not topic or not difficulty or not grade: return jsonify({"error": "Missing 'topic', 'difficulty', or 'grade'"}), 400 raw_text = infer_question_gen(topic, difficulty, grade) return jsonify({"output": raw_text}) def analyze_gen(data): """Evaluate question-topic match and return raw model output.""" if not data: return jsonify({"error": "Invalid JSON body"}), 400 result = evaluate_question_topic_match(data) return jsonify({"output": result}) def teacher_guide(data): """Analyze teacher feedback.""" if not data: return jsonify({"error": "Missing 'prompt'"}), 400 prompt = infer_teacher_guide(data) return jsonify({"output": prompt}) def student_feedback(data): """Analyse student video feedback and classify issue, severity, and recommendation.""" if not data: return jsonify({"error": "Missing 'feedback_data'"}), 400 result = infer_student_feedback(data) return jsonify({"output": result}) if __name__ == "__main__": print("\n" + "="*70) print("SERVER READY!") print("="*70) print("\nEndpoints:") print(" POST /generate") print(" - model_type: 'question' → math question generation") print(" - model_type: 'analyze' → question-topic match scoring") print(" - model_type: 'teacher' → teaching feedback analysis") print(" - model_type: 'feedback' → student video feedback analysis") print("="*70 + "\n") app.run(host="0.0.0.0", port=7860)