"""
core/evaluator.py — Evaluate student answers and provide structured feedback.

Responsibility:
    Compare the student's answer against both the original question and the
    source chunk, then return actionable feedback that includes:
      - Whether the answer is correct / partially correct / incorrect.
      - What the student got right.
      - What is missing or imprecise.
      - A brief model answer for reference.

The LLM acts as a tutor, not just a judge, so feedback is constructive
and encourages deeper understanding rather than simply flagging errors.

Public API:
    evaluate_answer(question: str, chunk: str, student_answer: str) -> str
"""

from model.llm import get_llm
from core.lang import ensure_english

_PROMPT_EN = """\
You are a patient and constructive university tutor.
IMPORTANT: Write your ENTIRE response in English — even if the source material is in another language. Translate everything; do NOT use the source language.

Source material:
{chunk}

Question asked to the student:
{question}

Student's answer:
{answer}

Evaluate using this EXACT 4-section structure — all sections are REQUIRED:
1. Verdict: Correct / Partially correct / Incorrect
2. What was good: Even if the answer is wrong or empty, find something positive to say (e.g., "You attempted the question" or identify any partially correct element). This section is MANDATORY — never skip it.
3. What was missing or imprecise: describe what the student got wrong or omitted.
4. Model answer: Write a concise 2-4 sentence answer IN YOUR OWN WORDS in English. Do NOT copy or quote the source text directly — synthesize it.

Be encouraging and specific. Write in English only — do not use the source language."""


def evaluate_answer(question: str, chunk: str, student_answer: str, language: str = "English") -> str:
    """Return structured feedback for *student_answer* given *question* and *chunk*."""
    llm = get_llm()
    prompt = _PROMPT_EN.format(
        chunk=ensure_english(chunk.strip()),
        question=question.strip(),
        answer=student_answer.strip(),
    )
    # 4-section feedback fits comfortably in 320 tokens — keeps CPU
    # (llama.cpp) latency inside the UI timeout.
    return llm.generate(prompt, max_new_tokens=320, temperature=0.4).strip()