from flask import Flask, request, jsonify
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import PeftModel
import torch
import os
import json
import re

# ---------------- QUESTION GEN ----------------
BASE_MODEL = "Qwen/Qwen2.5-Math-1.5B-Instruct"
ADAPTER_ID_QUESTION = "Chamith2000/qwen2.5-math-mcq-lora"

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

# ---------------- LOAD TOKENIZER ----------------
tokenizer = AutoTokenizer.from_pretrained(
    BASE_MODEL,
    trust_remote_code=True,
    device_map="auto",
)

if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

# ---------------- LOAD MODEL ----------------
loaded_bnb = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_compute_dtype=torch.bfloat16)
loaded_base = AutoModelForCausalLM.from_pretrained(BASE_MODEL, quantization_config=loaded_bnb, device_map='auto')
model = PeftModel.from_pretrained(loaded_base, ADAPTER_ID_QUESTION)
model.eval()


# ---------------- INFERENCE ----------------
@torch.no_grad()
def infer_question_gen(topic, difficulty, grade):
    prompt = (
        "### INSTRUCTION\n"
        "Generate ONE UNIQUE math MCQ.\n"
        "The question MUST be different from previous ones.\n"
        "Understand the mathematcal relationship and verify the answer before giving the final answer.\n"
        "### METADATA\n"
        f"Topic: {topic}\n"
        f"Difficulty: {difficulty}\n"
        f"Grade: {grade}\n\n"
        "### OUTPUT (JSON ONLY)\n"
    )

    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    output = model.generate(
        **inputs,
        max_new_tokens=400,
        temperature=0.7,
        top_p=0.9,
        do_sample=True,
        repetition_penalty=1.2,
        no_repeat_ngram_size=6,
        eos_token_id=tokenizer.eos_token_id,
        pad_token_id=tokenizer.eos_token_id,
    )

    text = tokenizer.decode(output[0], skip_special_tokens=True)

    if text is None:
        return {"error": "Model did not return valid JSON"}

    return text


# ===============================================================================
# TEACHER FEEDBACK MODEL
# ===============================================================================

TEACHER_MODEL_PATH = "Qwen/Qwen2.5-3B-Instruct"
TEACHER_ADAPTER_PATH = "Chamith2000/teachers_guide_and_matching_score"

print("Loading teacher feedback model...")
teacher_tokenizer = AutoTokenizer.from_pretrained(TEACHER_MODEL_PATH)
if teacher_tokenizer.pad_token is None:
    teacher_tokenizer.pad_token = teacher_tokenizer.eos_token

teacher_base = AutoModelForCausalLM.from_pretrained(
    TEACHER_MODEL_PATH,
    quantization_config=loaded_bnb,
    device_map='auto'
)
teacher_model = PeftModel.from_pretrained(teacher_base, TEACHER_ADAPTER_PATH)
teacher_model.eval()
print("✅ Teacher feedback model loaded!")


@torch.no_grad()
def infer_teacher_guide(data):
    teacher_guide    = data.get('teacher_guide', '')
    student_feedback = data.get('student_feedback', '')
    engagement_time  = data.get('student_engagement_time', 0)
    allocated_time   = data.get('guide_allocated_time', 0)
    grade            = data.get('grade', 4)

    time_diff = allocated_time - engagement_time
    if time_diff > 10:
        time_analysis = f"Student finished {time_diff} minutes early (engaged {engagement_time} min vs {allocated_time} min allocated). This suggests possible disengagement, rushing, or giving up."
    elif time_diff < -10:
        time_analysis = f"Student took {abs(time_diff)} minutes longer than allocated (engaged {engagement_time} min vs {allocated_time} min allocated). This suggests slow pacing or difficulty understanding."
    else:
        time_analysis = f"Student completed in appropriate time (engaged {engagement_time} min, allocated {allocated_time} min)."

    prompt = f"""Analyze teaching effectiveness. Return ONLY JSON.
LESSON INFO:
Grade Level: {grade}
Teacher Guide: {teacher_guide}
STUDENT FEEDBACK:
"{student_feedback}"
TIME ANALYSIS:
{time_analysis}
Analyze and return JSON:
{{
  "weak_sections": ["section1", "section2", ...],
  "reason": "<detailed analysis>"
}}
OUTPUT:"""

    inputs = teacher_tokenizer(prompt, return_tensors="pt").to(DEVICE)
    outputs = teacher_model.generate(
        **inputs,
        max_new_tokens=400,
        temperature=0.05,
        top_p=0.9,
        do_sample=True,
        repetition_penalty=1.15,
        pad_token_id=teacher_tokenizer.pad_token_id,
        eos_token_id=teacher_tokenizer.eos_token_id,
    )
    generated = teacher_tokenizer.decode(
        outputs[0][inputs['input_ids'].shape[1]:],
        skip_special_tokens=True
    ).strip()

    return parse_json_output(generated)


def parse_json_output(text):
    try:
        start = text.find('{')
        end   = text.rfind('}') + 1
        if start != -1 and end > start:
            return json.loads(text[start:end])
    except:
        pass

    try:
        weak_sections = []
        reason = ""
        sections_match = re.search(r'"weak_sections"\s*:\s*\[(.*?)\]', text, re.DOTALL)
        if sections_match:
            weak_sections = re.findall(r'"([^"]+)"', sections_match.group(1))
        reason_match = re.search(r'"reason"\s*:\s*"([^"]*)"', text, re.DOTALL)
        if reason_match:
            reason = reason_match.group(1)
        if weak_sections or reason:
            return {'weak_sections': weak_sections, 'reason': reason}
    except:
        pass

    return {'weak_sections': [], 'reason': 'Failed to parse model output', 'raw_output': text[:500]}


# ===============================================================================
# QUESTION-TOPIC MATCHING MODEL
# ===============================================================================

MATCHING_MODEL_PATH   = "Qwen/Qwen2.5-1.5B-Instruct"
MATCHING_ADAPTER_PATH = "Chamith2000/answer_evaluate"

RUBRIC = """SCORING RUBRIC:
1.0  Perfect match — the question fully and directly assesses the stated topic
0.75 Slight mismatch — the question is closely related but targets a slightly different concept
0.5  Partial mismatch — the question is from a related area but tests a different skill
0.0  No match — the question is completely unrelated to the stated topic"""

print("Loading question-topic matching model...")
matching_tokenizer = AutoTokenizer.from_pretrained(MATCHING_MODEL_PATH)
if matching_tokenizer.pad_token is None:
    matching_tokenizer.pad_token = matching_tokenizer.eos_token

bnb_config_question_topic = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=True,
)

matching_base = AutoModelForCausalLM.from_pretrained(
    MATCHING_MODEL_PATH,
    quantization_config=bnb_config_question_topic,
    device_map='auto'
)
matching_model = PeftModel.from_pretrained(matching_base, MATCHING_ADAPTER_PATH)
matching_model.eval()
print("✅ Question-topic matching model loaded!")


def build_matching_prompt(grade: int, topic: str, question: str) -> str:
  return f"""### ROLE
You are a strict educational content reviewer for elementary school mathematics (Grades 3-5).
### TASK
Evaluate whether the given math question correctly matches the stated topic. Determine if the question directly tests the topic's core concept.
### INPUT
Grade Level: {grade}
Topic: {topic}
Question: {question}
{RUBRIC}
### OUTPUT FORMAT
Return ONLY valid JSON:
- "matching_score": one of [1.0, 0.75, 0.5, 0.0]
- "improvements": empty list [] when score is 1.0; otherwise, 1-2 specific reasons explaining what the question actually tests and why it doesn't match the topic
### RESPONSE
"""


def parse_matching_json(text):
    try:
        start = text.find('{')
        end   = text.rfind('}') + 1
        if start != -1 and end > start:
            parsed = json.loads(text[start:end])
            if "matching_score" in parsed and "improvements" in parsed:
                score = float(parsed["matching_score"])
                valid_scores = [0.0, 0.5, 0.75, 1.0]
                parsed["matching_score"] = min(valid_scores, key=lambda x: abs(x - score))
                parsed["improvements"] = parsed["improvements"] if isinstance(parsed["improvements"], list) else []
                return parsed
    except:
        pass

    try:
        score_match = re.search(r'"matching_score"\s*:\s*([\d.]+)', text)
        items_match = re.findall(r'"([^"]{10,})"', text)
        if score_match:
            raw_score    = float(score_match.group(1))
            valid_scores = [0.0, 0.5, 0.75, 1.0]
            snapped_score = min(valid_scores, key=lambda x: abs(x - raw_score))
            improvements = [i for i in items_match if i != str(raw_score)][:2]
            return {"matching_score": snapped_score, "improvements": improvements}
    except:
        pass

    return {
        "matching_score": 0.0,
        "improvements": ["Failed to parse model output."],
        "raw_output": text
    }


@torch.no_grad()
def evaluate_question_topic_match(data):
    grade    = data.get('grade', 3)
    topic    = data.get('topic', '')
    question = data.get('question', '')

    if not topic or not question:
        return {"error": "Missing required fields: 'topic' and 'question'"}

    prompt = build_matching_prompt(grade, topic, question)

    inputs = matching_tokenizer(
        prompt,
        return_tensors="pt",
        truncation=True,
        max_length=512
    ).to(DEVICE)

    outputs = matching_model.generate(
        **inputs,
        max_new_tokens=200,
        do_sample=False,
        temperature=0.01,
        repetition_penalty=1.1,
        pad_token_id=matching_tokenizer.eos_token_id,
        eos_token_id=matching_tokenizer.eos_token_id,
    )

    generated = matching_tokenizer.decode(
        outputs[0][inputs['input_ids'].shape[1]:],
        skip_special_tokens=True
    ).strip()

    return parse_matching_json(generated)


# ===============================================================================
# STUDENT FEEDBACK ANALYSER MODEL
# ===============================================================================

FEEDBACK_MODEL_PATH   = "Qwen/Qwen2-1.5B-Instruct"
FEEDBACK_ADAPTER_PATH = "Chamith2000/Video_Feedback"

VALID_ISSUES     = {"audio", "confusion", "difficulty", "pacing", "positive"}
VALID_SEVERITIES = {"high", "medium", "low"}

print("Loading student feedback analyser model...")
feedback_tokenizer = AutoTokenizer.from_pretrained(
    FEEDBACK_MODEL_PATH,
    trust_remote_code=True,
    padding_side="right",
)
if feedback_tokenizer.pad_token is None:
    feedback_tokenizer.pad_token = feedback_tokenizer.eos_token

bnb_config_feedback = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
)

feedback_base = AutoModelForCausalLM.from_pretrained(
    FEEDBACK_MODEL_PATH,
    quantization_config=bnb_config_feedback,
    device_map='auto',
    trust_remote_code=True,
    torch_dtype=torch.float16,
)
feedback_model = PeftModel.from_pretrained(feedback_base, FEEDBACK_ADAPTER_PATH)
feedback_model.eval()
print("✅ Student feedback analyser model loaded!")


def build_feedback_prompt(text: str, grade: int, lesson: str, video_content: str) -> str:
    messages = [
        {
            "role": "system",
            "content": (
                "You are an educational feedback analyser for a mathematics e-learning platform. "
                "You analyse student feedback and return ONLY a valid JSON object — no explanation, "
                "no markdown, no extra text. Just the raw JSON."
            ),
        },
        {
            "role": "user",
            "content": (
                f"Analyse the student feedback below and return a JSON object.\n\n"
                f"Student details:\n"
                f"  Grade        : {grade}\n"
                f"  Lesson       : {lesson}\n"
                f"  Video context: {video_content}\n\n"
                f"Student feedback: \"{text}\"\n\n"
                f"Return JSON with exactly these keys:\n"
                f"  issue          : one of [audio, confusion, difficulty, pacing, positive]\n"
                f"  severity       : one of [high, medium, low]\n"
                f"  recommendation : a specific actionable instruction for the teacher\n"
                f"  confidence     : your confidence as a float between 0.0 and 1.0\n\n"
                f"JSON:"
            ),
        },
    ]
    return feedback_tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True,
    )


def parse_feedback_json(text: str) -> dict:
    """Parse and validate the model's JSON output for feedback analysis."""
    clean = text.strip()

    # Strip markdown code fences if present
    if clean.startswith("```"):
        lines = clean.split("\n")
        clean = "\n".join(l for l in lines if not l.startswith("```"))

    # Try direct parse
    try:
        parsed = json.loads(clean)
    except json.JSONDecodeError:
        # Fall back to brace extraction
        start = clean.find('{')
        end   = clean.rfind('}') + 1
        if start != -1 and end > start:
            try:
                parsed = json.loads(clean[start:end])
            except json.JSONDecodeError:
                return {
                    "error": "Failed to parse model output as JSON",
                    "raw_output": text[:500],
                }
        else:
            return {
                "error": "No JSON object found in model output",
                "raw_output": text[:500],
            }

    # Normalise and validate fields
    issue     = str(parsed.get("issue", "")).lower().strip()
    severity  = str(parsed.get("severity", "")).lower().strip()
    rec       = parsed.get("recommendation", "")
    raw_conf  = parsed.get("confidence", 0.0)

    try:
        confidence = float(raw_conf)
        confidence = max(0.0, min(1.0, confidence))
    except (TypeError, ValueError):
        confidence = 0.0

    if issue not in VALID_ISSUES:
        issue = "confusion"   # safe default
    if severity not in VALID_SEVERITIES:
        severity = "medium"   # safe default

    return {
        "issue":          issue,
        "severity":       severity,
        "recommendation": rec,
        "confidence":     confidence,
    }


@torch.no_grad()
def infer_student_feedback(data: dict) -> dict:
    """Run inference with the student feedback analyser model."""
    text          = data.get("text", "").strip()
    grade         = data.get("grade", 3)
    lesson        = data.get("lesson", "")
    video_content = data.get("video_content", "")

    if not text:
        return {"error": "Missing required field: 'text'"}
    if not lesson:
        return {"error": "Missing required field: 'lesson'"}
    if not video_content:
        return {"error": "Missing required field: 'video_content'"}

    prompt = build_feedback_prompt(text, grade, lesson, video_content)

    inputs = feedback_tokenizer(
        prompt,
        return_tensors="pt",
        truncation=True,
        max_length=512,
    ).to(DEVICE)

    outputs = feedback_model.generate(
        **inputs,
        max_new_tokens=150,
        do_sample=False,
        temperature=1.0,
        repetition_penalty=1.1,
        pad_token_id=feedback_tokenizer.eos_token_id,
        eos_token_id=feedback_tokenizer.eos_token_id,
    )

    generated = feedback_tokenizer.decode(
        outputs[0][inputs["input_ids"].shape[1]:],
        skip_special_tokens=True,
    ).strip()

    return parse_feedback_json(generated)


# ===============================================================================
# FLASK APP
# ===============================================================================

app = Flask(__name__)

@app.route("/generate", methods=["POST"])
def generate():
    data = request.get_json()
    calling_type = data.get('model_type')

    if calling_type == 'teacher':
        return teacher_guide(data.get('teacher_data'))

    elif calling_type == 'question':
        return question_gen(data)

    elif calling_type == 'analyze':
        return analyze_gen(data.get('teacher_data'))

    elif calling_type == 'feedback':
        return student_feedback(data.get('feedback_data'))

    return jsonify({"error": "Invalid model type. Use 'teacher', 'question', 'analyze', or 'feedback'"}), 400


def question_gen(data):
    """Generate math question and return raw model output."""
    if not data:
        return jsonify({"error": "Invalid JSON body"}), 400

    topic      = data.get("topic")
    difficulty = data.get("difficulty")
    grade      = data.get("grade")

    if not topic or not difficulty or not grade:
        return jsonify({"error": "Missing 'topic', 'difficulty', or 'grade'"}), 400

    raw_text = infer_question_gen(topic, difficulty, grade)

    return jsonify({"output": raw_text})


def analyze_gen(data):
    """Evaluate question-topic match and return raw model output."""
    if not data:
        return jsonify({"error": "Invalid JSON body"}), 400

    result = evaluate_question_topic_match(data)

    return jsonify({"output": result})


def teacher_guide(data):
    """Analyze teacher feedback."""
    if not data:
        return jsonify({"error": "Missing 'prompt'"}), 400

    prompt = infer_teacher_guide(data)
    return jsonify({"output": prompt})


def student_feedback(data):
    """Analyse student video feedback and classify issue, severity, and recommendation."""
    if not data:
        return jsonify({"error": "Missing 'feedback_data'"}), 400

    result = infer_student_feedback(data)
    return jsonify({"output": result})


if __name__ == "__main__":
    print("\n" + "="*70)
    print("SERVER READY!")
    print("="*70)
    print("\nEndpoints:")
    print("  POST /generate")
    print("    - model_type: 'question'  → math question generation")
    print("    - model_type: 'analyze'   → question-topic match scoring")
    print("    - model_type: 'teacher'   → teaching feedback analysis")
    print("    - model_type: 'feedback'  → student video feedback analysis")
    print("="*70 + "\n")

    app.run(host="0.0.0.0", port=7860)