Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -28,7 +28,7 @@ INPUT: This file is a PDF that first contains the Question Paper and immediately
|
|
| 28 |
TASK:
|
| 29 |
1. Transcribe EXACTLY all the questions FIRST (with their total marks).
|
| 30 |
2. After ALL questions, transcribe the Markscheme exactly, preserving M/A/R notation in brackets.
|
| 31 |
-
3. Always number the questions sequentially (Question 1, Question 2, Question
|
| 32 |
4. After the markscheme, DETECT and FLAG all questions in the markscheme where a graph/diagram is expected. For each, output the question number and the page number in the format below.
|
| 33 |
FORMAT:
|
| 34 |
==== PAPER TOTAL MARKS ====
|
|
@@ -263,33 +263,38 @@ AS:
|
|
| 263 |
|
| 264 |
# Robust parsing functions for graph detection
|
| 265 |
|
| 266 |
-
def extract_graph_questions_from_ms(
|
| 267 |
-
"""
|
| 268 |
-
|
| 269 |
-
|
| 270 |
-
|
| 271 |
-
|
| 272 |
-
|
| 273 |
-
|
| 274 |
-
for line in
|
| 275 |
-
|
| 276 |
-
if
|
| 277 |
-
|
| 278 |
-
|
| 279 |
-
|
| 280 |
-
|
| 281 |
-
|
| 282 |
-
|
| 283 |
-
|
| 284 |
-
"""
|
| 285 |
-
|
| 286 |
-
|
| 287 |
-
|
| 288 |
-
|
| 289 |
-
|
| 290 |
-
|
| 291 |
-
|
| 292 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 293 |
|
| 294 |
def extract_marks_from_grading(grading_text):
|
| 295 |
"""
|
|
|
|
| 28 |
TASK:
|
| 29 |
1. Transcribe EXACTLY all the questions FIRST (with their total marks).
|
| 30 |
2. After ALL questions, transcribe the Markscheme exactly, preserving M/A/R notation in brackets.
|
| 31 |
+
3. Always number the questions sequentially (Question 1, Question 2, Question 3a,Question 3b …) **in the order they appear in the PDF**, even if the PDF shows a different number or leaves it blank. Do NOT skip or leave Question: blank. Never start a question other than question 1 ( even if it is labelled in pdf as 8 name it 1)
|
| 32 |
4. After the markscheme, DETECT and FLAG all questions in the markscheme where a graph/diagram is expected. For each, output the question number and the page number in the format below.
|
| 33 |
FORMAT:
|
| 34 |
==== PAPER TOTAL MARKS ====
|
|
|
|
| 263 |
|
| 264 |
# Robust parsing functions for graph detection
|
| 265 |
|
| 266 |
+
def extract_graph_questions_from_ms(text: str):
|
| 267 |
+
"""Extract graph questions and page numbers from MS transcript."""
|
| 268 |
+
clean_text = text.replace("\u00A0", " ").replace("\t", " ")
|
| 269 |
+
match = re.search(r"==== GRAPH EXPECTED QUESTIONS ====\s*(.*?)\s*==== END GRAPH EXPECTED ====",
|
| 270 |
+
clean_text, re.S)
|
| 271 |
+
graph_dict = {}
|
| 272 |
+
if match:
|
| 273 |
+
block = match.group(1)
|
| 274 |
+
for line in block.splitlines():
|
| 275 |
+
line = line.strip()
|
| 276 |
+
if line.startswith("- Question"):
|
| 277 |
+
q_match = re.match(r"- Question\s+([\dA-Za-z.()]+)\s*→\s*Page\s*(\d+)", line)
|
| 278 |
+
if q_match:
|
| 279 |
+
q_id, page = q_match.groups()
|
| 280 |
+
graph_dict[q_id] = int(page)
|
| 281 |
+
return graph_dict
|
| 282 |
+
|
| 283 |
+
def extract_graph_answers_from_as(text: str):
|
| 284 |
+
"""Extract graph answers and page numbers from AS transcript."""
|
| 285 |
+
clean_text = text.replace("\u00A0", " ").replace("\t", " ")
|
| 286 |
+
block = re.search(r"==== GRAPH FOUND ANSWERS ====\s*(.*?)\s*==== END GRAPH FOUND ====",
|
| 287 |
+
clean_text, re.S)
|
| 288 |
+
graph_dict = {}
|
| 289 |
+
if block:
|
| 290 |
+
for line in block.group(1).splitlines():
|
| 291 |
+
line = line.strip()
|
| 292 |
+
if line.startswith("- Answer"):
|
| 293 |
+
match = re.match(r"- Answer\s+([\dA-Za-z.()]+)\s*→\s*Page\s*(\d+)", line)
|
| 294 |
+
if match:
|
| 295 |
+
ans_id, page = match.groups()
|
| 296 |
+
graph_dict[ans_id] = int(page)
|
| 297 |
+
return graph_dict
|
| 298 |
|
| 299 |
def extract_marks_from_grading(grading_text):
|
| 300 |
"""
|