ayushsaun commited on
Commit
b340140
·
1 Parent(s): 39445dc

Initial AutoGrader setup

Browse files
Files changed (5) hide show
  1. .gitattributes +0 -35
  2. README.md +62 -7
  3. app.py +52 -0
  4. main.py +115 -0
  5. requirements.txt +4 -0
.gitattributes DELETED
@@ -1,35 +0,0 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
README.md CHANGED
@@ -1,14 +1,69 @@
1
  ---
2
  title: AutoGrader
3
- emoji: 🚀
4
- colorFrom: red
5
- colorTo: pink
6
  sdk: gradio
7
- sdk_version: 6.4.0
8
  app_file: app.py
9
  pinned: false
10
- license: apache-2.0
11
- short_description: CPU-only LLM autograder
12
  ---
13
 
14
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  title: AutoGrader
3
+ emoji: 🧠
4
+ colorFrom: indigo
5
+ colorTo: purple
6
  sdk: gradio
7
+ sdk_version: 4.44.0
8
  app_file: app.py
9
  pinned: false
 
 
10
  ---
11
 
12
+ # AutoGrader
13
+
14
+ AutoGrader is a **CPU-only, LLM-based academic grading system** that evaluates student submissions using a provided **question paper and rubric**, while also awarding marks for **logically correct alternative solutions**.
15
+
16
+ ## Key Features
17
+
18
+ - Runs entirely on **CPU** (Hugging Face Spaces compatible)
19
+ - **Rubric-aware** grading with flexibility for alternative correct answers
20
+ - **Prompt-controlled evaluation** (e.g. grade only Q2, grade Q2 & Q4, custom marks)
21
+ - **Multiple model options** (user-selectable)
22
+ - **Less deterministic grading** via controlled sampling
23
+ - **Structured JSON output** for reliable parsing
24
+ - Works via **Hugging Face API** (can be called from Kaggle or other platforms)
25
+
26
+ ## Supported Models
27
+
28
+ - Phi-3-mini (fast, CPU-friendly)
29
+ - Mistral-7B-Instruct (higher quality, slower on CPU)
30
+
31
+ Model weights are **not stored in this repository** and are automatically downloaded from the Hugging Face Hub at runtime.
32
+
33
+ ## How It Works
34
+
35
+ 1. The student submission is first analyzed to extract key ideas.
36
+ 2. A second evaluation step grades the answer using the rubric and grading instructions.
37
+ 3. Marks are assigned fairly, even for solutions not explicitly listed in the rubric.
38
+ 4. Output is returned as **strict JSON**.
39
+
40
+ ## Input Fields
41
+
42
+ - **Question Paper**
43
+ - **Rubric**
44
+ - **Grading Instruction**
45
+ Example:
46
+ `Grade only Question 2 out of 20 marks`
47
+ - **Student Submission**
48
+
49
+ ## Output
50
+
51
+ A structured JSON containing:
52
+ - Total marks
53
+ - Per-question marks
54
+ - Short justification
55
+
56
+ ## Limitations
57
+
58
+ - CPU-only inference means **higher latency** for larger models
59
+ - LLM-based grading is **not fully deterministic**
60
+ - Designed for academic assistance, not high-stakes automated grading without review
61
+
62
+ ## License & Usage
63
+
64
+ This project uses open-source models from the Hugging Face Hub.
65
+ Please ensure model licenses are respected when deploying or redistributing.
66
+
67
+ ---
68
+
69
+ Built for flexible, research-oriented automated assessment.
app.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from main import grade_submission, MODEL_MAP
3
+
4
+ with gr.Blocks() as demo:
5
+ gr.Markdown("## AutoGrader (CPU-only, Rubric-Aware, Flexible)")
6
+
7
+ model = gr.Dropdown(
8
+ choices=list(MODEL_MAP.keys()),
9
+ value="Phi-3-mini",
10
+ label="Select Model"
11
+ )
12
+
13
+ question_paper = gr.Textbox(
14
+ label="Question Paper",
15
+ lines=5
16
+ )
17
+
18
+ rubric = gr.Textbox(
19
+ label="Rubric",
20
+ lines=5
21
+ )
22
+
23
+ grading_instruction = gr.Textbox(
24
+ label="Grading Instruction (e.g. 'Grade only Q2 out of 20')",
25
+ lines=2
26
+ )
27
+
28
+ student_answer = gr.Textbox(
29
+ label="Student Submission",
30
+ lines=6
31
+ )
32
+
33
+ output = gr.Textbox(
34
+ label="Grading Output (JSON)",
35
+ lines=12
36
+ )
37
+
38
+ grade = gr.Button("Grade")
39
+
40
+ grade.click(
41
+ fn=grade_submission,
42
+ inputs=[
43
+ model,
44
+ question_paper,
45
+ rubric,
46
+ student_answer,
47
+ grading_instruction
48
+ ],
49
+ outputs=output
50
+ )
51
+
52
+ demo.launch()
main.py ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import re
3
+ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
4
+
5
+ MODEL_MAP = {
6
+ "Phi-3-mini": "microsoft/Phi-3-mini-4k-instruct",
7
+ "Mistral-7B-Instruct": "mistralai/Mistral-7B-Instruct-v0.2",
8
+ }
9
+
10
+ _pipelines = {}
11
+
12
+ def load_pipeline(model_name):
13
+ if model_name in _pipelines:
14
+ return _pipelines[model_name]
15
+
16
+ model_id = MODEL_MAP[model_name]
17
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
18
+ model = AutoModelForCausalLM.from_pretrained(
19
+ model_id,
20
+ device_map="cpu",
21
+ torch_dtype="auto"
22
+ )
23
+
24
+ pipe = pipeline(
25
+ "text-generation",
26
+ model=model,
27
+ tokenizer=tokenizer,
28
+ max_new_tokens=600,
29
+ temperature=0.5,
30
+ top_p=0.9,
31
+ do_sample=True
32
+ )
33
+
34
+ _pipelines[model_name] = pipe
35
+ return pipe
36
+
37
+
38
+ def extract_json(text):
39
+ match = re.search(r"\{[\s\S]*\}", text)
40
+ if not match:
41
+ return None
42
+ try:
43
+ return json.loads(match.group())
44
+ except:
45
+ return None
46
+
47
+
48
+ def grade_submission(
49
+ model_name,
50
+ question_paper,
51
+ rubric,
52
+ student_answer,
53
+ grading_instruction
54
+ ):
55
+ pipe = load_pipeline(model_name)
56
+
57
+ understanding_prompt = f"""
58
+ Read the student submission and extract the key ideas and steps used to answer the questions.
59
+
60
+ Student Submission:
61
+ {student_answer}
62
+
63
+ Output STRICT JSON:
64
+ {{
65
+ "key_points": "concise summary of the student's approach and ideas"
66
+ }}
67
+ """
68
+
69
+ understanding_raw = pipe(understanding_prompt)[0]["generated_text"]
70
+ understanding = extract_json(understanding_raw)
71
+ if understanding is None:
72
+ understanding = {"key_points": "Unable to reliably extract"}
73
+
74
+ grading_prompt = f"""
75
+ You are an academic autograder.
76
+
77
+ Question Paper:
78
+ {question_paper}
79
+
80
+ Rubric:
81
+ {rubric}
82
+
83
+ Grading Instruction:
84
+ {grading_instruction}
85
+
86
+ Student Key Points:
87
+ {understanding["key_points"]}
88
+
89
+ Rules:
90
+ - Follow the rubric
91
+ - Award marks for logically correct alternative solutions
92
+ - Do not penalize different notation or ordering
93
+ - Grade only what is requested
94
+ - Be fair and consistent
95
+
96
+ Output STRICT JSON ONLY:
97
+ {{
98
+ "total_marks": number,
99
+ "per_question": {{
100
+ "Q1": number,
101
+ "Q2": number
102
+ }},
103
+ "reasoning": "short justification"
104
+ }}
105
+ """
106
+
107
+ grading_raw = pipe(grading_prompt)[0]["generated_text"]
108
+ grading = extract_json(grading_raw)
109
+
110
+ if grading is None:
111
+ return json.dumps({
112
+ "error": "Failed to generate valid grading output"
113
+ }, indent=2)
114
+
115
+ return json.dumps(grading, indent=2)
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ torch
2
+ transformers
3
+ accelerate
4
+ gradio