Spaces:

ayushsaun
/

AutoGrader

Running

File size: 2,494 Bytes

b340140

import json
import re
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

MODEL_MAP = {
    "Phi-3-mini": "microsoft/Phi-3-mini-4k-instruct",
    "Mistral-7B-Instruct": "mistralai/Mistral-7B-Instruct-v0.2",
}

_pipelines = {}

def load_pipeline(model_name):
    if model_name in _pipelines:
        return _pipelines[model_name]

    model_id = MODEL_MAP[model_name]
    tokenizer = AutoTokenizer.from_pretrained(model_id)
    model = AutoModelForCausalLM.from_pretrained(
        model_id,
        device_map="cpu",
        torch_dtype="auto"
    )

    pipe = pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        max_new_tokens=600,
        temperature=0.5,
        top_p=0.9,
        do_sample=True
    )

    _pipelines[model_name] = pipe
    return pipe


def extract_json(text):
    match = re.search(r"\{[\s\S]*\}", text)
    if not match:
        return None
    try:
        return json.loads(match.group())
    except:
        return None


def grade_submission(
    model_name,
    question_paper,
    rubric,
    student_answer,
    grading_instruction
):
    pipe = load_pipeline(model_name)

    understanding_prompt = f"""
Read the student submission and extract the key ideas and steps used to answer the questions.

Student Submission:
{student_answer}

Output STRICT JSON:
{{
  "key_points": "concise summary of the student's approach and ideas"
}}
"""

    understanding_raw = pipe(understanding_prompt)[0]["generated_text"]
    understanding = extract_json(understanding_raw)
    if understanding is None:
        understanding = {"key_points": "Unable to reliably extract"}

    grading_prompt = f"""
You are an academic autograder.

Question Paper:
{question_paper}

Rubric:
{rubric}

Grading Instruction:
{grading_instruction}

Student Key Points:
{understanding["key_points"]}

Rules:
- Follow the rubric
- Award marks for logically correct alternative solutions
- Do not penalize different notation or ordering
- Grade only what is requested
- Be fair and consistent

Output STRICT JSON ONLY:
{{
  "total_marks": number,
  "per_question": {{
    "Q1": number,
    "Q2": number
  }},
  "reasoning": "short justification"
}}
"""

    grading_raw = pipe(grading_prompt)[0]["generated_text"]
    grading = extract_json(grading_raw)

    if grading is None:
        return json.dumps({
            "error": "Failed to generate valid grading output"
        }, indent=2)

    return json.dumps(grading, indent=2)