atz21 commited on
Commit
1674b1c
·
verified ·
1 Parent(s): eea77c8

3 step process

Browse files
Files changed (1) hide show
  1. app.py +156 -126
app.py CHANGED
@@ -1,140 +1,158 @@
1
  import os
2
  import gradio as gr
3
  import google.generativeai as genai
4
- from reportlab.platypus import SimpleDocTemplate, Paragraph
5
- from reportlab.lib.styles import getSampleStyleSheet
6
- from reportlab.lib.pagesizes import A4
7
 
8
  # -------------------- CONFIG --------------------
9
  genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
10
 
11
  # ---------- PROMPTS ----------
12
- TRANSCRIPTION_PROMPT = """Your Role: You are an expert technical transcriber specializing in mathematical and scientific documents. Your mission is to convert handwritten solutions from a provided image or PDF into a clean, accurate, and logically structured Markdown format.
13
- Primary Objective: Preserve the author's intended solution path while filtering out all mistakes, corrections, and extraneous marks. The final output must be perfectly formatted and easy to follow.
14
- Core Instructions:
15
- Hierarchical Structure:
16
- Identify all questions and subquestions based on their numbering (e.g., 1. a), ### i)).
17
- Use ## for main questions (e.g., ## Question 1).
18
- Use ### for subquestions (e.g., ### a), ### i)).
19
- If a question number appears out of its logical sequence, transcribe it with the label provided in the source.
20
- What to Exclude (Content Filtration):
21
- Mistakes: Completely ignore and do not transcribe any number, variable, or expression that has been struck through, scribbled over, or crossed out. Transcribe only the corrected, final version.
22
- Extraneous Marks: Do not include any doodles, underlines (unless part of a fraction), or stray marks not relevant to the solution.
23
- Crucial Distinction: Cancellations vs. Step Cuts:
24
- Term Cancellation: This is a valid mathematical step where terms cancel each other out (e.g., +2x and -2x, or a term divided by itself).
25
- Action: Transcribe the step where the cancellation occurs. Immediately after that line, add a concise, bracketed note explaining what was cancelled.
26
- Step Cut: This is when the author skips intermediate algebraic or arithmetic steps (e.g., jumping from 2b = 2 directly to b = 1).
27
- Action: Transcribe the steps exactly as they appear. Do not invent or add the missing steps. The logical jump in the transcribed output serves to represent the step cut.
28
- Formatting and Special Cases:
29
- Equations: Enclose all mathematical equations and multi-line calculations in Markdown code blocks for clarity and proper rendering.
30
- Illegibility: If a specific word or number is impossible to read, use the placeholder [illegible].
31
- Graphs: Do not attempt to recreate graphs. Instead, describe them textually. Note the type of curve (e.g., parabola, polynomial) and list any labeled key points like intercepts, vertices, or asymptotes."""
32
- GRADING_PROMPT = """Instructions to Examiners:
33
- Abbreviations:
34
- - M: Marks for correct Method.
35
- - A: Marks for Answer or Accuracy (often depends on preceding M mark).
36
- - R: Marks for clear Reasoning.
37
- - AG: Answer given in the question; no marks awarded.
38
- - FT: Follow Through; award marks for correct method/answer using incorrect earlier results.
39
- Marking Rules:
40
- 1. Always follow the markscheme annotations (M1, A2, etc.).
41
- 2. M marks must be earned before dependent A marks are awarded (no M0 followed by A1 unless explicitly allowed).
42
- 3. If M and A marks are on the same line (e.g., M1A1), M is for the method attempt, A is for correct values.
43
- 4. Multiple A marks on the same line are awarded independently unless otherwise noted.
44
- 5. Do not split M2, A3, etc. unless instructed.
45
- 6. "Show that" responses do not need to restate the AG line unless noted.
46
- 7. Once a correct answer is seen, ignore further incorrect working unless it affects a later part (then apply FT as appropriate).
47
- 8. Do not award the final A mark if an incorrect approximation is used in the same part.
48
- Error Avoidance:
49
- - **No incorrect mark allocation:** Do not award marks unless they are explicitly justified by the markscheme.
50
- - **No misclassification of errors:** Distinguish correctly between "Conceptual Errors" and "Silly Mistakes."
51
- - **Follow markscheme logic exactly:** Especially regarding when to withhold accuracy marks if method marks are not earned."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
 
53
  # ---------- HELPER: Save to PDF ----------
54
  def save_as_pdf(text, filename="output.pdf"):
55
- styles = getSampleStyleSheet()
56
- doc = SimpleDocTemplate(filename, pagesize=A4)
57
- story = [Paragraph(p, styles["Normal"]) for p in text.split("\n")]
58
- doc.build(story)
59
  return filename
60
 
61
- # ---------- HELPER: Safe Generate with Retry ----------
62
- def safe_generate(model, inputs, fallback_prompt=None):
63
- try:
64
- # Try normal generate
65
- resp = model.generate_content(inputs)
66
- cand = resp.candidates[0] if resp.candidates else None
67
-
68
- if cand and cand.content and cand.content.parts:
69
- return resp.text, None
70
-
71
- reason = getattr(cand, "finish_reason", "None")
72
- if reason == "1": # SAFETY block
73
- # Retry with streaming
74
- chunks = []
75
- stream_resp = model.generate_content(inputs, stream=True)
76
- for chunk in stream_resp:
77
- if chunk.candidates and chunk.candidates[0].content.parts:
78
- chunks.append(chunk.text)
79
- if chunks:
80
- return "".join(chunks), None
81
-
82
- # Retry with simplified prompt if provided
83
- if fallback_prompt:
84
- retry_resp = model.generate_content([fallback_prompt] + inputs[1:], stream=True)
85
- chunks = []
86
- for chunk in retry_resp:
87
- if chunk.candidates and chunk.candidates[0].content.parts:
88
- chunks.append(chunk.text)
89
- if chunks:
90
- return "".join(chunks), None
91
-
92
- return None, f"❌ Empty/blocked response. finish_reason={reason}, safety_ratings={getattr(cand, 'safety_ratings', None)}"
93
-
94
- except Exception as e:
95
- return None, f"❌ Exception: {e}"
96
-
97
- # ---------- COMMON SAFETY SETTINGS ----------
98
- safety_settings = [
99
- {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_NONE"},
100
- {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_NONE"},
101
- {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_NONE"},
102
- {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_NONE"},
103
- ]
104
-
105
- # ---------- STEP 1: TRANSCRIPTION ----------
106
- def transcribe(ans_file):
107
  try:
108
  ans_uploaded = genai.upload_file(path=ans_file, display_name="Answer Sheet")
109
- model = genai.GenerativeModel(
110
- "gemini-2.5-pro",
111
- generation_config={"temperature": 0},
112
- safety_settings=safety_settings
113
- )
114
 
115
- transcription, error = safe_generate(model, [TRANSCRIPTION_PROMPT, ans_uploaded], fallback_prompt="Convert the PDF into structured plain text with questions separated.")
116
- if error:
117
- return error, None
 
118
 
119
- pdf_path = save_as_pdf(transcription, "transcription.pdf")
120
  return transcription, pdf_path
121
  except Exception as e:
122
  return f"❌ Error during transcription: {e}", None
123
 
124
- # ---------- STEP 2: GRADING ----------
125
- def grade(qp_file, ms_file, transcription):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
  try:
127
  qp_uploaded = genai.upload_file(path=qp_file, display_name="Question Paper")
128
- ms_uploaded = genai.upload_file(path=ms_file, display_name="Marking Scheme")
129
- model = genai.GenerativeModel(
130
- "gemini-2.5-pro",
131
- generation_config={"temperature": 0},
132
- safety_settings=safety_settings
133
- )
134
 
135
- grading, error = safe_generate(model, [GRADING_PROMPT, qp_uploaded, ms_uploaded, transcription], fallback_prompt="Grade the answers according to the marking scheme. Show marks step by step.")
136
- if error:
137
- return error, None
 
 
 
 
 
 
 
138
 
139
  pdf_path = save_as_pdf(grading, "grading.pdf")
140
  return grading, pdf_path
@@ -142,36 +160,48 @@ def grade(qp_file, ms_file, transcription):
142
  return f"❌ Error during grading: {e}", None
143
 
144
  # ---------- GRADIO APP ----------
145
- with gr.Blocks(title="LeadIB AI Grading") as demo:
146
- gr.Markdown("## LeadIB AI Grading\nUpload exam documents to transcribe and grade student answers step by step.")
147
 
148
  with gr.Row():
149
  qp_file = gr.File(label="Upload Question Paper (PDF)", type="filepath")
150
  ms_file = gr.File(label="Upload Mark Scheme (PDF)", type="filepath")
151
  ans_file = gr.File(label="Upload Student Answer Sheet (PDF)", type="filepath")
152
 
153
- # Step 1: Transcription
154
- transcribe_btn = gr.Button("Step 1: Transcribe Answer Sheet")
155
  with gr.Row():
156
- transcription_out = gr.Textbox(label="📄 Student Transcription", lines=20)
157
- transcription_pdf = gr.File(label="⬇️ Download Transcription (PDF)")
158
 
159
- # Step 2: Grading
160
- grade_btn = gr.Button("Step 2: Grade the Student")
 
 
 
 
 
 
161
  with gr.Row():
162
  grading_out = gr.Textbox(label="✅ Grading Report (Step-by-Step)", lines=20)
163
- grading_pdf = gr.File(label="⬇️ Download Grading (PDF)")
164
 
165
  # Button Logic
166
- transcribe_btn.click(
167
- fn=transcribe,
168
  inputs=[ans_file],
169
- outputs=[transcription_out, transcription_pdf],
 
 
 
 
 
 
170
  show_progress=True
171
  )
172
  grade_btn.click(
173
  fn=grade,
174
- inputs=[qp_file, ms_file, transcription_out],
175
  outputs=[grading_out, grading_pdf],
176
  show_progress=True
177
  )
 
1
  import os
2
  import gradio as gr
3
  import google.generativeai as genai
4
+ from markdown_pdf import MarkdownPdf, Section
 
 
5
 
6
  # -------------------- CONFIG --------------------
7
  genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
8
 
9
  # ---------- PROMPTS ----------
10
+
11
+ # Student transcription prompt
12
+ TRANSCRIPTION_PROMPT = """Your Role: You are an expert technical transcriber specializing in mathematical and scientific documents.
13
+ Your mission is to convert handwritten solutions from a provided image or PDF into a clean, accurate, and logically structured Markdown format.
14
+
15
+ Instructions:
16
+ - Use ## for questions, ### for subquestions.
17
+ - Transcribe only the corrected, final version of the solution (ignore scribbles, cancellations, mistakes).
18
+ - Keep mathematical expressions in fenced code blocks.
19
+ - If something is illegible, use [illegible].
20
+ - Do not recreate graphs, only describe them.
21
+ """
22
+
23
+ # Markscheme transcription prompt
24
+ MARKSCHEME_TRANSCRIPTION_PROMPT = """Your Role: You are an expert transcriber.
25
+ Convert the official marking scheme from the provided PDF into clean, structured Markdown.
26
+
27
+ Instructions:
28
+ - Preserve all structure (questions, subquestions).
29
+ - Keep M, A, R annotations exactly as written.
30
+ - Represent alternative methods clearly (METHOD 1, METHOD 2, etc.).
31
+ - Preserve any accuracy requirements.
32
+ - Format in Markdown using ## and ### for hierarchy.
33
+ - Use code blocks for equations.
34
+ """
35
+
36
+ # Grading prompt with rules + red highlighting
37
+ GRADING_PROMPT = """You are an official examiner. Use the following grading rules strictly.
38
+
39
+ Abbreviations:
40
+ - M: Marks awarded for attempting to use a correct Method.
41
+ - A: Marks awarded for an Answer or for Accuracy; often dependent on preceding M marks.
42
+ - R: Marks awarded for clear Reasoning.
43
+ - AG: Answer given in the question and so no marks are awarded.
44
+ - FT: Follow through. The practice of awarding marks, despite candidate errors in previous parts, for their correct methods/answers using incorrect results.
45
+
46
+ --------------------------------------------
47
+ ## 1. General
48
+ Award marks using the annotations as noted in the markscheme (e.g., M1, A2).
49
+
50
+ ## 2. Method and Answer/Accuracy marks
51
+ - Do not automatically award full marks for a correct answer; all working must be checked.
52
+ - It is generally not possible to award M0 followed by A1.
53
+ - Where M and A marks are noted on the same line (M1A1), M is for method, A is for accuracy.
54
+ - Multiple A marks can be independent.
55
+
56
+ ## 3. Implied marks
57
+ Implied marks (M1) can only be awarded if correct work is seen or implied.
58
+
59
+ ## 4. Follow through (FT) marks
60
+ - Award FT if an earlier wrong answer is used consistently later.
61
+ - Do not award FT if the result contradicts the question (e.g., probability > 1).
62
+
63
+ ## 5. Mis-read (MR)
64
+ - Penalize once if the candidate misreads a value.
65
+ - Award other marks as appropriate.
66
+
67
+ ## 6. Alternative methods
68
+ - Accept valid alternatives unless "Hence" forbids it.
69
+
70
+ ## 7. Alternative forms
71
+ - Accept equivalent numeric/algebraic forms unless specified otherwise.
72
+
73
+ ## 8. Format and accuracy of answers
74
+ - Use correct accuracy (3 s.f. if not specified).
75
+ - Arithmetic and algebra should be simplified.
76
+
77
+ ## 9. Presentation of candidate work
78
+ - Ignore crossed-out work unless indicated.
79
+ - Mark only the first solution unless candidate specifies otherwise.
80
+
81
+ --------------------------------------------
82
+
83
+ ### OUTPUT FORMAT
84
+ Produce a GitHub-flavored Markdown table with 3 columns:
85
+
86
+ | Student wrote | Marks Awarded | Reason |
87
+ |---------------|---------------|--------|
88
+
89
+ Special Formatting Rule:
90
+ - Whenever a mark is lost (M0, A0, R0 etc.), wrap it in red using: `<span style="color:red">M0</span>`.
91
+ - Keep awarded marks (M1, A1, etc.) in plain text.
92
+ - If mixed (e.g., M1A0A1), only highlight the lost marks (`A0`).
93
+
94
+ After the table, provide:
95
+ ### Summary & Final Mark
96
+ - Total marks obtained vs total available
97
+ - Any FT (follow-through) applied
98
+ - Classification of errors (Conceptual, Silly mistake, Misread, etc.)
99
+ """
100
 
101
  # ---------- HELPER: Save to PDF ----------
102
  def save_as_pdf(text, filename="output.pdf"):
103
+ pdf = MarkdownPdf()
104
+ pdf.add_section(Section(text, toc=False))
105
+ pdf.save(filename)
 
106
  return filename
107
 
108
+ # ---------- STEP 1: TRANSCRIBE STUDENT ----------
109
+ def transcribe_student(ans_file):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
  try:
111
  ans_uploaded = genai.upload_file(path=ans_file, display_name="Answer Sheet")
112
+ model = genai.GenerativeModel("gemini-2.5-pro", generation_config={"temperature": 0})
 
 
 
 
113
 
114
+ resp = model.generate_content([TRANSCRIPTION_PROMPT, ans_uploaded])
115
+ transcription = getattr(resp, "text", None)
116
+ if not transcription and resp.candidates:
117
+ transcription = resp.candidates[0].content.parts[0].text
118
 
119
+ pdf_path = save_as_pdf(transcription, "student_transcription.pdf")
120
  return transcription, pdf_path
121
  except Exception as e:
122
  return f"❌ Error during transcription: {e}", None
123
 
124
+ # ---------- STEP 2: TRANSCRIBE MARKSCHEME ----------
125
+ def transcribe_ms(ms_file):
126
+ try:
127
+ ms_uploaded = genai.upload_file(path=ms_file, display_name="Markscheme")
128
+ model = genai.GenerativeModel("gemini-2.5-pro", generation_config={"temperature": 0})
129
+
130
+ resp = model.generate_content([MARKSCHEME_TRANSCRIPTION_PROMPT, ms_uploaded])
131
+ ms_transcription = getattr(resp, "text", None)
132
+ if not ms_transcription and resp.candidates:
133
+ ms_transcription = resp.candidates[0].content.parts[0].text
134
+
135
+ pdf_path = save_as_pdf(ms_transcription, "ms_transcription.pdf")
136
+ return ms_transcription, pdf_path
137
+ except Exception as e:
138
+ return f"❌ Error during MS transcription: {e}", None
139
+
140
+ # ---------- STEP 3: GRADING ----------
141
+ def grade(qp_file, ms_transcription, student_transcription):
142
  try:
143
  qp_uploaded = genai.upload_file(path=qp_file, display_name="Question Paper")
144
+ model = genai.GenerativeModel("gemini-2.5-pro", generation_config={"temperature": 0})
 
 
 
 
 
145
 
146
+ response = model.generate_content([
147
+ GRADING_PROMPT,
148
+ qp_uploaded,
149
+ "### Markscheme Transcription:\n" + ms_transcription,
150
+ "### Student Transcription:\n" + student_transcription
151
+ ])
152
+
153
+ grading = getattr(response, "text", None)
154
+ if not grading and response.candidates:
155
+ grading = response.candidates[0].content.parts[0].text
156
 
157
  pdf_path = save_as_pdf(grading, "grading.pdf")
158
  return grading, pdf_path
 
160
  return f"❌ Error during grading: {e}", None
161
 
162
  # ---------- GRADIO APP ----------
163
+ with gr.Blocks(title="LeadIB AI Grading (3-Step)") as demo:
164
+ gr.Markdown("## LeadIB AI Grading (3-Step)\nUpload exam documents to transcribe and grade step by step.")
165
 
166
  with gr.Row():
167
  qp_file = gr.File(label="Upload Question Paper (PDF)", type="filepath")
168
  ms_file = gr.File(label="Upload Mark Scheme (PDF)", type="filepath")
169
  ans_file = gr.File(label="Upload Student Answer Sheet (PDF)", type="filepath")
170
 
171
+ # Step 1: Transcribe Student
172
+ transcribe_student_btn = gr.Button("Step 1: Transcribe Student Answer Sheet")
173
  with gr.Row():
174
+ student_out = gr.Textbox(label="📄 Student Transcription", lines=20)
175
+ student_pdf = gr.File(label="⬇️ Download Student Transcription (PDF)")
176
 
177
+ # Step 2: Transcribe Markscheme
178
+ transcribe_ms_btn = gr.Button("Step 2: Transcribe Markscheme")
179
+ with gr.Row():
180
+ ms_out = gr.Textbox(label="📄 Markscheme Transcription", lines=20)
181
+ ms_pdf = gr.File(label="⬇️ Download Markscheme Transcription (PDF)")
182
+
183
+ # Step 3: Grading
184
+ grade_btn = gr.Button("Step 3: Grade the Student")
185
  with gr.Row():
186
  grading_out = gr.Textbox(label="✅ Grading Report (Step-by-Step)", lines=20)
187
+ grading_pdf = gr.File(label="⬇️ Download Grading Report (PDF)")
188
 
189
  # Button Logic
190
+ transcribe_student_btn.click(
191
+ fn=transcribe_student,
192
  inputs=[ans_file],
193
+ outputs=[student_out, student_pdf],
194
+ show_progress=True
195
+ )
196
+ transcribe_ms_btn.click(
197
+ fn=transcribe_ms,
198
+ inputs=[ms_file],
199
+ outputs=[ms_out, ms_pdf],
200
  show_progress=True
201
  )
202
  grade_btn.click(
203
  fn=grade,
204
+ inputs=[qp_file, ms_out, student_out],
205
  outputs=[grading_out, grading_pdf],
206
  show_progress=True
207
  )