atz21 commited on
Commit
b9e41b9
Β·
verified Β·
1 Parent(s): 6c41c7f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +172 -192
app.py CHANGED
@@ -1,50 +1,97 @@
1
- # app.py
2
- # Gradio app for transcription + grading using Google Gemini
3
- # Author: generated for your notebook logic (adapted and sanitized)
4
-
5
  import os
6
- import tempfile
7
  import io
8
- import traceback
9
- import gradio as gr
10
-
11
- try:
12
- import google.generativeai as genai
13
- except Exception as e:
14
- genai = None
15
 
16
- # ---- Configuration ----
17
- # IMPORTANT: Do NOT hardcode your API key here.
18
- # Set environment variable GEMINI_API_KEY in Hugging Face Spaces Secrets.
19
- GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY", None)
20
- if GEMINI_API_KEY:
21
- if genai is not None:
22
- genai.configure(api_key=GEMINI_API_KEY)
23
- else:
24
- # genai may be None if package not installed; Gradio UI will show an error if user tries to run
25
- pass
26
-
27
- # ---- Long instructions copied-from-notebook (transcription) ----
28
- TRANSCRIPTION_INSTRUCTIONS = """
 
 
 
 
 
29
  Persona:
30
  You are an expert transcriptionist specializing in scientific and mathematical documents. Your primary goal is to convert handwritten mathematical work into a perfectly formatted, machine-readable Markdown document using LaTeX for all mathematical notation.
 
31
  Core Task:
32
  Your task is to transcribe the provided handwritten student solutions into a single, clean Markdown string.
 
33
  Key Directives & Rules:
34
  Absolute Fidelity: Transcribe exactly what is written. Do NOT correct mathematical errors, logical fallacies, or spelling mistakes. Your role is purely that of a scribe, not a grader or editor.
 
35
  LaTeX for All Math: All mathematical contentβ€”including single variables, numbers in equations, fractions, exponents, roots, and symbolsβ€”must be enclosed in LaTeX delimiters. Use inline $ ... $ for math within text and block $$ ... $$ for standalone equations.
 
36
  Handle Strikethroughs: Completely ignore and omit any text, numbers, or expressions that have been struck through by the student. Do not include them in the final output.
 
37
  Preserve Structure:
38
  Use Markdown bolding (e.g., **1.**, **2a.**) to clearly separate each question or sub-part.
39
  Maintain the vertical, step-by-step flow of the student's derivations. For multi-line aligned equations, use the \\begin{align*} ... \\end{align*} environment within a $$ ... $$ block.
 
40
  Handle Ambiguity: If a character or symbol is genuinely illegible or ambiguous, make your best interpretation and enclose it in square brackets. For example, if a variable could be u or v, write [u?].
 
41
  Output Format:
42
  The final output must be a single Markdown string.
43
  Ensure all LaTeX renders correctly and the structure is clean and readable.
44
- """
45
 
46
- # ---- Grading system instructions (as in notebook) ----
47
- GRADING_INSTRUCTIONS = """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  Instructions to Examiners:
49
  Abbreviations:
50
  - M: Marks for correct Method.
@@ -64,178 +111,111 @@ Marking Rules:
64
  8. Do not award the final A mark if an incorrect approximation is used in the same part.
65
 
66
  Error Avoidance:
67
- - No incorrect mark allocation: Do not award marks unless they are explicitly justified by the markscheme.
68
- - No misclassification of errors: Distinguish correctly between "Conceptual Errors" and "Silly Mistakes."
69
- - Follow markscheme logic exactly: Especially regarding when to withhold accuracy marks if method marks are not earned.
70
  """
71
-
72
- # ---- Helper functions ----
73
- def ensure_genai_available():
74
- if genai is None:
75
- raise RuntimeError("google-generativeai package is not available. Make sure it's in requirements.txt.")
76
- if not GEMINI_API_KEY:
77
- raise RuntimeError("GEMINI_API_KEY not set. Set it in environment/secrets before running the app.")
78
-
79
- def _save_temp_file(uploaded_file) -> str:
80
- """
81
- uploaded_file is a file-like object provided by Gradio (temp file path).
82
- Returns a path to a saved temp file we can pass to genai.upload_file.
83
- """
84
- if uploaded_file is None:
85
- raise ValueError("No file provided.")
86
- # Gradio gives a dict with 'name' and 'data' in some modes; but usually it's a path
87
- # Attempt to handle multiple types robustly
88
- if isinstance(uploaded_file, str):
89
- return uploaded_file # already a path
90
- # Otherwise write bytes to a temp file
91
- data = None
92
- try:
93
- # uploaded_file may be a file-like with .read()
94
- data = uploaded_file.read()
95
- except Exception:
96
- # uploaded_file may be a tuple returned by gr.File: (name, data)
97
- try:
98
- data = uploaded_file[0].read()
99
- except Exception:
100
- raise
101
- fd, path = tempfile.mkstemp(suffix=".pdf")
102
- os.close(fd)
103
- with open(path, "wb") as f:
104
- f.write(data)
105
- return path
106
-
107
- def upload_file_to_gemini(local_path, display_name="file"):
108
- """
109
- Upload a local file path to Gemini using genai.upload_file and return the file object (as returned).
110
- """
111
- ensure_genai_available()
112
- # The API used in original notebook: genai.upload_file(path=...)
113
- # We'll use the same call and return the object
114
- try:
115
- file_obj = genai.upload_file(path=local_path, display_name=display_name)
116
- return file_obj
117
- except Exception as e:
118
- # Surface the error
119
- raise RuntimeError(f"Failed to upload file to Gemini: {e}")
120
-
121
- def call_gemini_generate(inputs_list):
122
- """
123
- Call Gemini generative model with the provided inputs list (strings and/or uploaded file objects).
124
- Returns the textual content (tries several extraction methods).
125
- """
126
- ensure_genai_available()
127
- try:
128
- model = genai.GenerativeModel("gemini-2.5-pro", generation_config={"temperature": 0})
129
- response = model.generate_content(inputs_list)
130
- text = getattr(response, "text", None)
131
- if not text:
132
- # try legacy path
133
- if hasattr(response, "candidates") and response.candidates:
134
- # drill into candidates
135
- try:
136
- text = response.candidates[0].content.parts[0].text
137
- except Exception:
138
- text = str(response.candidates[0])
139
- if not text:
140
- text = str(response)
141
- return text
142
- except Exception as e:
143
- raise RuntimeError(f"Gemini generation failed: {e}")
144
-
145
- # ---- Core operations ----
146
- def transcribe_answer_sheet(answersheet_file):
147
- """
148
- Save the uploaded answersheet, upload to Gemini, and request transcription.
149
- Returns the transcription string.
150
- """
151
- try:
152
- ensure_genai_available()
153
- except Exception as e:
154
- return f"ERROR: {e}"
155
-
156
- try:
157
- local_ans_path = _save_temp_file(answersheet_file)
158
- uploaded_ans = upload_file_to_gemini(local_ans_path, display_name="Answer Sheet")
159
- # Call Gemini to transcribe (instructions + uploaded file)
160
- response_text = call_gemini_generate([TRANSCRIPTION_INSTRUCTIONS, uploaded_ans])
161
- return response_text
162
- except Exception as e:
163
- tb = traceback.format_exc()
164
- return f"Transcription failed: {e}\n\n{tb}"
165
-
166
- def grade_answer(qp_file, ms_file, transcription_text):
167
- """
168
- Upload QP and MS, then call Gemini with grading instructions + the transcription to obtain grading output.
169
- """
170
- try:
171
- ensure_genai_available()
172
- except Exception as e:
173
- return f"ERROR: {e}"
174
-
175
- if transcription_text is None or transcription_text.strip() == "":
176
- return "ERROR: Empty transcription. Please run transcription first or provide transcription text."
177
-
178
- try:
179
- local_qp = _save_temp_file(qp_file)
180
- local_ms = _save_temp_file(ms_file)
181
- uploaded_qp = upload_file_to_gemini(local_qp, display_name="Question Paper")
182
- uploaded_ms = upload_file_to_gemini(local_ms, display_name="Marking Scheme")
183
-
184
- # Build the prompt combining grading instructions + strict rules (as in the notebook)
185
- grading_prompt = (
186
- "You are an official examiner. Use the following grading system and rules to assess the answers:\n\n"
187
- + GRADING_INSTRUCTIONS
188
- + "\n\nYour output must:\n"
189
  "1. Apply marks exactly as per the markscheme.\n"
190
  "2. Justify each awarded or withheld mark with reference to the grading rules.\n"
191
  "3. Identify and classify all errors accurately (Conceptual Error, Silly Mistake, or None).\n"
192
  "4. Follow the dependency between M and A marks strictly.\n"
193
  "5. Avoid giving marks that the markscheme does not allow.\n"
194
- "6. Provide a step-by-step reasoning for each mark awarded or withheld, explaining your thought process clearly.\n"
195
- )
196
-
197
- response_text = call_gemini_generate([grading_prompt, uploaded_qp, uploaded_ms, transcription_text])
198
- return response_text
 
 
 
 
 
 
 
 
 
 
199
  except Exception as e:
200
- tb = traceback.format_exc()
201
- return f"Grading failed: {e}\n\n{tb}"
202
-
203
- # ---- Gradio UI ----
204
- with gr.Blocks(title="Exam Transcription & Grading (Gemini)") as demo:
205
- gr.Markdown(
206
- """
207
- # Exam Transcription & Grading
208
- Upload three PDFs: Question Paper, Marking Scheme, and Answer Sheet.
209
- Click **Transcribe** to get a LaTeX-friendly Markdown transcription of the student's handwritten answers.
210
- Click **Grade** to apply the marking scheme to the transcription and get a detailed grading justification.
211
- **Important:** set `GEMINI_API_KEY` in environment/secrets before using.
212
- """
213
- )
214
-
215
  with gr.Row():
216
- qp_in = gr.File(label="Question Paper (PDF)", file_count="single", type="file")
217
- ms_in = gr.File(label="Marking Scheme (PDF)", file_count="single", type="file")
218
- ans_in = gr.File(label="Answer Sheet (PDF)", file_count="single", type="file")
219
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
220
  with gr.Row():
221
- transcribe_btn = gr.Button("Transcribe Answer Sheet")
222
- grade_btn = gr.Button("Grade (use existing transcription)")
223
-
224
- transcription_out = gr.Textbox(label="Transcription (Markdown + LaTeX)", lines=20)
225
- grading_out = gr.Textbox(label="Grading Result + Justification", lines=20)
226
-
227
- # Wire buttons
228
- transcribe_btn.click(fn=transcribe_answer_sheet, inputs=[ans_in], outputs=[transcription_out])
229
- # Grade uses QP, MS and transcription textbox as inputs
230
- grade_btn.click(fn=grade_answer, inputs=[qp_in, ms_in, transcription_out], outputs=[grading_out])
231
-
232
- # Provide quick example text area for transcription override (optional)
233
- gr.Markdown("If you already have a prepared transcription (or want to edit before grading), paste it below and click Grade.")
234
- transcription_manual = gr.Textbox(label="Optional: Edit/Provide Transcription (overrides auto)", lines=8)
235
- grade_with_manual_btn = gr.Button("Grade Using Provided Transcription")
236
- grade_with_manual_btn.click(fn=grade_answer, inputs=[qp_in, ms_in, transcription_manual], outputs=[grading_out])
237
-
238
- gr.Markdown("⚠️ Note: This app depends on Google Gemini `google-generativeai` SDK and a valid `GEMINI_API_KEY` environment variable.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
239
 
240
  if __name__ == "__main__":
241
- demo.launch()
 
1
+ import gradio as gr
 
 
 
2
  import os
 
3
  import io
4
+ from google import generativeai as genai
 
 
 
 
 
 
5
 
6
+ def process_exam_papers(question_paper, marking_scheme, answer_sheet, api_key):
7
+ """
8
+ Process uploaded exam papers and return transcription and grading
9
+ """
10
+ if not api_key:
11
+ return "Please provide a valid Gemini API key.", ""
12
+
13
+ try:
14
+ # Configure Gemini API
15
+ genai.configure(api_key=api_key)
16
+
17
+ # Upload files to Gemini
18
+ qp_file = genai.upload_file(path=question_paper.name, display_name="Question Paper")
19
+ ms_file = genai.upload_file(path=marking_scheme.name, display_name="Marking Scheme")
20
+ ans_file = genai.upload_file(path=answer_sheet.name, display_name="Answer Sheet")
21
+
22
+ # Transcription instructions
23
+ transcription_instructions = """
24
  Persona:
25
  You are an expert transcriptionist specializing in scientific and mathematical documents. Your primary goal is to convert handwritten mathematical work into a perfectly formatted, machine-readable Markdown document using LaTeX for all mathematical notation.
26
+
27
  Core Task:
28
  Your task is to transcribe the provided handwritten student solutions into a single, clean Markdown string.
29
+
30
  Key Directives & Rules:
31
  Absolute Fidelity: Transcribe exactly what is written. Do NOT correct mathematical errors, logical fallacies, or spelling mistakes. Your role is purely that of a scribe, not a grader or editor.
32
+
33
  LaTeX for All Math: All mathematical contentβ€”including single variables, numbers in equations, fractions, exponents, roots, and symbolsβ€”must be enclosed in LaTeX delimiters. Use inline $ ... $ for math within text and block $$ ... $$ for standalone equations.
34
+
35
  Handle Strikethroughs: Completely ignore and omit any text, numbers, or expressions that have been struck through by the student. Do not include them in the final output.
36
+
37
  Preserve Structure:
38
  Use Markdown bolding (e.g., **1.**, **2a.**) to clearly separate each question or sub-part.
39
  Maintain the vertical, step-by-step flow of the student's derivations. For multi-line aligned equations, use the \\begin{align*} ... \\end{align*} environment within a $$ ... $$ block.
40
+
41
  Handle Ambiguity: If a character or symbol is genuinely illegible or ambiguous, make your best interpretation and enclose it in square brackets. For example, if a variable could be u or v, write [u?].
42
+
43
  Output Format:
44
  The final output must be a single Markdown string.
45
  Ensure all LaTeX renders correctly and the structure is clean and readable.
 
46
 
47
+ Comprehensive Example:
48
+ If the student's handwritten work for a question looks like this:
49
+ 7. Find the value of y.
50
+ y = (xΒ² + 3) / 2
51
+ for x = 3
52
+ y = (3Β² + 3) / 2
53
+ y = (6+3) / 2
54
+ y = (9 + 3) / 2
55
+ y = 12 / 2
56
+ y = 6
57
+
58
+ Your expected output should be:
59
+ **7.**
60
+
61
+ Find the value of y.
62
+ $$
63
+ y = \\frac{x^2 + 3}{2}
64
+ $$
65
+ for $x = 3$
66
+ $$
67
+ \\begin{align*}
68
+ y &= \\frac{3^2 + 3}{2} \\\\
69
+ y &= \\frac{9 + 3}{2} \\\\
70
+ y &= \\frac{12}{2} \\\\
71
+ y &= 6
72
+ \\end{align*}
73
+ $$
74
+ """
75
+
76
+ # Initialize Gemini model for transcription
77
+ model = genai.GenerativeModel(
78
+ "gemini-2.5-pro",
79
+ generation_config={"temperature": 0}
80
+ )
81
+
82
+ # Generate transcription
83
+ response = model.generate_content([
84
+ transcription_instructions,
85
+ ans_file
86
+ ])
87
+
88
+ # Extract transcription safely
89
+ student_transcription = getattr(response, "text", None)
90
+ if not student_transcription:
91
+ student_transcription = response.candidates[0].content.parts[0].text
92
+
93
+ # Grading system instructions
94
+ grading_system = """
95
  Instructions to Examiners:
96
  Abbreviations:
97
  - M: Marks for correct Method.
 
111
  8. Do not award the final A mark if an incorrect approximation is used in the same part.
112
 
113
  Error Avoidance:
114
+ - **No incorrect mark allocation:** Do not award marks unless they are explicitly justified by the markscheme.
115
+ - **No misclassification of errors:** Distinguish correctly between "Conceptual Errors" and "Silly Mistakes."
116
+ - **Follow markscheme logic exactly:** Especially regarding when to withhold accuracy marks if method marks are not earned.
117
  """
118
+
119
+ # Generate grading
120
+ grading_response = model.generate_content([
121
+ f"You are an official examiner. Use the following grading system and rules to assess the answers:\n\n{grading_system}\n\n"
122
+ "Your output must:\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
  "1. Apply marks exactly as per the markscheme.\n"
124
  "2. Justify each awarded or withheld mark with reference to the grading rules.\n"
125
  "3. Identify and classify all errors accurately (Conceptual Error, Silly Mistake, or None).\n"
126
  "4. Follow the dependency between M and A marks strictly.\n"
127
  "5. Avoid giving marks that the markscheme does not allow.\n"
128
+ "6. Provide a step-by-step reasoning for each mark awarded or withheld, explaining your thought process clearly.\n",
129
+ qp_file,
130
+ ms_file,
131
+ student_transcription
132
+ ])
133
+
134
+ # Extract grading safely
135
+ grading_text = getattr(grading_response, "text", None)
136
+ if not grading_text and grading_response.candidates:
137
+ grading_text = grading_response.candidates[0].content.parts[0].text
138
+ elif not grading_text:
139
+ grading_text = "No Response"
140
+
141
+ return student_transcription, grading_text
142
+
143
  except Exception as e:
144
+ return f"Error processing files: {str(e)}", ""
145
+
146
+ # Create Gradio interface
147
+ with gr.Blocks(title="Exam Paper Grading System", theme=gr.themes.Soft()) as demo:
148
+ gr.Markdown("""
149
+ # πŸ“š Automated Exam Paper Grading System
150
+
151
+ Upload your question paper, marking scheme, and answer sheet to get automated transcription and grading using Google's Gemini AI.
152
+ """)
153
+
 
 
 
 
 
154
  with gr.Row():
155
+ with gr.Column():
156
+ gr.Markdown("### πŸ“‹ Upload Files")
157
+ api_key = gr.Textbox(
158
+ label="Gemini API Key",
159
+ placeholder="Enter your Google Gemini API key",
160
+ type="password"
161
+ )
162
+ question_paper = gr.File(
163
+ label="Question Paper (PDF)",
164
+ file_types=[".pdf"]
165
+ )
166
+ marking_scheme = gr.File(
167
+ label="Marking Scheme (PDF)",
168
+ file_types=[".pdf"]
169
+ )
170
+ answer_sheet = gr.File(
171
+ label="Answer Sheet (PDF)",
172
+ file_types=[".pdf"]
173
+ )
174
+
175
+ process_btn = gr.Button(
176
+ "πŸš€ Process Papers",
177
+ variant="primary",
178
+ size="lg"
179
+ )
180
+
181
  with gr.Row():
182
+ with gr.Column():
183
+ gr.Markdown("### πŸ“ Student Answer Transcription")
184
+ transcription_output = gr.Textbox(
185
+ label="Transcribed Answers",
186
+ lines=15,
187
+ max_lines=25,
188
+ show_copy_button=True
189
+ )
190
+
191
+ with gr.Column():
192
+ gr.Markdown("### βœ… Grading Results")
193
+ grading_output = gr.Textbox(
194
+ label="Detailed Grading",
195
+ lines=15,
196
+ max_lines=25,
197
+ show_copy_button=True
198
+ )
199
+
200
+ # Set up the processing function
201
+ process_btn.click(
202
+ fn=process_exam_papers,
203
+ inputs=[question_paper, marking_scheme, answer_sheet, api_key],
204
+ outputs=[transcription_output, grading_output]
205
+ )
206
+
207
+ gr.Markdown("""
208
+ ### πŸ“– How to Use:
209
+ 1. **Get a Gemini API Key**: Visit [Google AI Studio](https://makersuite.google.com/app/apikey) to get your free API key
210
+ 2. **Upload PDFs**: Upload your question paper, marking scheme, and student answer sheet
211
+ 3. **Process**: Click the "Process Papers" button to get transcription and grading
212
+ 4. **Review**: Check the transcribed answers and detailed grading results
213
+
214
+ ### ⚠️ Notes:
215
+ - All uploaded files are processed securely and not stored permanently
216
+ - The system transcribes exactly what's written (including errors) for accurate grading
217
+ - LaTeX mathematical notation is automatically formatted for clarity
218
+ """)
219
 
220
  if __name__ == "__main__":
221
+ demo.launch()