TRIAL

Sleeping

App Files Files Community

atz21 commited on Sep 28, 2025

Commit

5268488

verified ·

1 Parent(s): a5a195e

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -28

app.py CHANGED Viewed

@@ -28,7 +28,7 @@ INPUT: This file is a PDF that first contains the Question Paper and immediately
 TASK:
 1. Transcribe EXACTLY all the questions FIRST (with their total marks).
 2. After ALL questions, transcribe the Markscheme exactly, preserving M/A/R notation in brackets.
-3. Always number the questions sequentially (Question 1, Question 2, Question 3, …) **in the order they appear in the PDF**, even if the PDF shows a different number or leaves it blank. Do NOT skip or leave Question: blank. Never start a question other than question 1 ( even if it is labelled in pdf as 8 name it 1)
 4. After the markscheme, DETECT and FLAG all questions in the markscheme where a graph/diagram is expected. For each, output the question number and the page number in the format below.
 FORMAT:
 ==== PAPER TOTAL MARKS ====
@@ -263,33 +263,38 @@ AS:
 # Robust parsing functions for graph detection
-def extract_graph_questions_from_ms(ms_text):
-    """
-    Parse LLM output for Markscheme to extract questions/pages where a graph is expected.
-    Returns dict: {question_number: ms_page_number}
-    """
-    matches = re.findall(r"==== GRAPH EXPECTED QUESTIONS ====\\s*Graph expected in:(.*?)==== END GRAPH EXPECTED ====" , ms_text, re.DOTALL)
-    mapping = {}
-    if matches:
-        for line in matches[0].splitlines():
-            m = re.match(r"-\s*Question\s*(\d+)\s*[\u2192\-\:]\s*Page\s*(\d+)", line.strip())
-            if m:
-                mapping[int(m.group(1))] = int(m.group(2))
-    return mapping
-def extract_graph_answers_from_as(as_text):
-    """
-    Parse LLM output for Answer Sheet to extract answers/pages where a graph was found.
-    Returns dict: {answer_number: as_page_number}
-    """
-    matches = re.findall(r"==== GRAPH FOUND ANSWERS ====\\s*Graph found in:(.*?)==== END GRAPH FOUND ====" , as_text, re.DOTALL)
-    mapping = {}
-    if matches:
-        for line in matches[0].splitlines():
-            m = re.match(r"-\s*Answer\s*(\d+)\s*[\u2192\-\:]\s*Page\s*(\d+)", line.strip())
-            if m:
-                mapping[int(m.group(1))] = int(m.group(2))
-    return mapping
 def extract_marks_from_grading(grading_text):
     """

 TASK:
 1. Transcribe EXACTLY all the questions FIRST (with their total marks).
 2. After ALL questions, transcribe the Markscheme exactly, preserving M/A/R notation in brackets.
+3. Always number the questions sequentially (Question 1, Question 2, Question 3a,Question 3b …) **in the order they appear in the PDF**, even if the PDF shows a different number or leaves it blank. Do NOT skip or leave Question: blank. Never start a question other than question 1 ( even if it is labelled in pdf as 8 name it 1)
 4. After the markscheme, DETECT and FLAG all questions in the markscheme where a graph/diagram is expected. For each, output the question number and the page number in the format below.
 FORMAT:
 ==== PAPER TOTAL MARKS ====
 # Robust parsing functions for graph detection
+def extract_graph_questions_from_ms(text: str):
+    """Extract graph questions and page numbers from MS transcript."""
+    clean_text = text.replace("\u00A0", " ").replace("\t", " ")
+    match = re.search(r"==== GRAPH EXPECTED QUESTIONS ====\s*(.*?)\s*==== END GRAPH EXPECTED ====",
+                     clean_text, re.S)
+    graph_dict = {}
+    if match:
+        block = match.group(1)
+        for line in block.splitlines():
+            line = line.strip()
+            if line.startswith("- Question"):
+                q_match = re.match(r"- Question\s+([\dA-Za-z.()]+)\s*→\s*Page\s*(\d+)", line)
+                if q_match:
+                    q_id, page = q_match.groups()
+                    graph_dict[q_id] = int(page)
+    return graph_dict
+def extract_graph_answers_from_as(text: str):
+    """Extract graph answers and page numbers from AS transcript."""
+    clean_text = text.replace("\u00A0", " ").replace("\t", " ")
+    block = re.search(r"==== GRAPH FOUND ANSWERS ====\s*(.*?)\s*==== END GRAPH FOUND ====",
+                     clean_text, re.S)
+    graph_dict = {}
+    if block:
+        for line in block.group(1).splitlines():
+            line = line.strip()
+            if line.startswith("- Answer"):
+                match = re.match(r"- Answer\s+([\dA-Za-z.()]+)\s*→\s*Page\s*(\d+)", line)
+                if match:
+                    ans_id, page = match.groups()
+                    graph_dict[ans_id] = int(page)
+    return graph_dict
 def extract_marks_from_grading(grading_text):
     """