TRIAL

Sleeping

App Files Files Community

atz21 commited on Sep 27, 2025

Commit

b248fb0

verified ·

1 Parent(s): 6941b48

Update app.py

Browse files

Files changed (1) hide show

app.py +306 -152

app.py CHANGED Viewed

@@ -14,11 +14,11 @@ import cv2
 import numpy as np
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from PyPDF2 import PdfReader, PdfWriter
 # ---------------- CONFIG ----------------
 genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
 GRID_ROWS, GRID_COLS = 20, 14
 # ---------------- PROMPTS ----------------
 PROMPTS = {
     "QP_MS_TRANSCRIPTION" : {
@@ -55,7 +55,8 @@ Answer 2 :
 """
 }
 ,
     "GRADING_PROMPT": {
         "role": "system",
         "content": """Developer: You are an official examiner. Apply the following grading rules precisely.
@@ -74,7 +75,7 @@ Answer 2 :
 4. Accept valid equivalent forms unless otherwise specified.
 5. Apply FT where appropriate.
 6. Use proper notation: M1A0, A1, etc.
-7. Any lost mark: use red `<span style="color:red">M0</span>` and make Reason red.
 ---
 ## Output Format
 Produce two sections per question/sub-question, following this structure:
@@ -99,28 +100,28 @@ NOTES:
 """
     }
 }
 # ---------------- HELPERS ----------------
 def save_as_pdf(text, filename="output.pdf"):
     pdf = MarkdownPdf()
     pdf.add_section(Section(text, toc=False))
     pdf.save(filename)
     return filename
 def compress_pdf(input_path, output_path=None, max_size=20*1024*1024):
     if output_path is None:
         base, ext = os.path.splitext(input_path)
         output_path = f"{base}_compressed{ext}"
     try:
         size = os.path.getsize(input_path)
     except Exception:
         return input_path
     if size <= max_size:
         print(f"ℹ️ Not compressing {input_path} ({size/1024/1024:.2f} MB <= {max_size/1024/1024} MB)")
         return input_path
     print(f"🔎 Compressing {input_path} ({size/1024/1024:.2f} MB) -> {output_path}")
     try:
         gs_cmd = [
@@ -141,8 +142,11 @@ def compress_pdf(input_path, output_path=None, max_size=20*1024*1024):
     except Exception as e:
         print("❌ Compression error:", e)
         return input_path
 def create_model():
     try:
         print("⚡ Attempting to use gemini-2.5-pro model")
         model = genai.GenerativeModel("gemini-2.5-pro", generation_config={"temperature": 0})
@@ -158,7 +162,7 @@ def create_model():
     except Exception as e:
         print("❌ Failed to create any Gemini model:", e)
         raise
 def merge_pdfs(paths, output_path):
     writer = PdfWriter()
     for p in paths:
@@ -168,8 +172,12 @@ def merge_pdfs(paths, output_path):
     with open(output_path, "wb") as f:
         writer.write(f)
     return output_path
 def gemini_generate_content(model, prompt_text, file_upload_obj=None, image_obj=None):
     inputs = [prompt_text]
     if file_upload_obj:
         inputs.append(file_upload_obj)
@@ -184,9 +192,14 @@ def gemini_generate_content(model, prompt_text, file_upload_obj=None, image_obj=
         raw_text = str(response)
     print("📥 Received response (chars):", len(raw_text))
     return raw_text
 # ---------------- PARSERS ----------------
 def extract_question_ids_from_qpms(text):
     print("🔎 Extracting question IDs from QP+MS transcript using regex...")
     ids = []
     for m in re.finditer(r"(?im)^\s*Question\s*:\s*([0-9]+(?:(?:\.[a-zA-Z0-9]+)+|(?:\([a-zA-Z0-9]+\))+|[a-zA-Z])*)\b", text):
@@ -196,7 +209,8 @@ def extract_question_ids_from_qpms(text):
         print(f"✅ Extracted {len(ids)} question IDs.")
         print("IDs:", ids)
         return ids
     for m in re.finditer(r"(?m)^\s*([0-9]+(?:(?:\.[a-zA-Z0-9]+)+|(?:\([a-zA-Z0-9]+\))+|[a-zA-Z])*)\s*[\.\):\-]\s", text):
         qid = m.group(1).strip()
         ids.append(qid)
@@ -206,8 +220,11 @@ def extract_question_ids_from_qpms(text):
     else:
         print("⚠️ No question IDs extracted; will send NA placeholder.")
     return ids
 def build_as_prompt_with_expected_ids(expected_ids):
     if not expected_ids:
         ids_block = "{NA}"
     else:
@@ -230,20 +247,25 @@ AS:
 <transcribed answer or placeholder>
 """
     return prompt
 def extract_marks_from_grading(grading_text):
     print("🔎 Extracting awarded marks from grading output...")
     grading_json = {"grading": []}
     question_blocks = re.split(r"##\s*Question\s+", grading_text)
     for block in question_blocks[1:]:
         first_line = block.strip().splitlines()[0].strip() if block.strip().splitlines() else ""
-        q_id_match = re.match(r"([0-9]+(?:[a-zA-Z]|\([^\)]+\)|(?:\.[a-zA-Z0-9]+))*)", first_line)
         if not q_id_match:
             q_id = first_line.split()[0] if first_line else ""
         else:
             q_id = q_id_match.group(1).strip()
         awarded = re.findall(r"\b(M\d+|A\d+|R\d+|M0|A0|R0)\b", block)
-        # 🔴 Change 1: DO NOT deduplicate, keep all marks in sequence
         grading_json["grading"].append({
             "question": q_id,
             "marks_awarded": awarded
@@ -251,34 +273,31 @@ def extract_marks_from_grading(grading_text):
     print("✅ Extracted grading marks for", len(grading_json["grading"]), "question blocks.")
     print(json.dumps(grading_json, indent=2))
     return grading_json
 # ---------------- MAPPING/IMPRINT HELPERS ----------------
-def ask_gemini_for_mapping_for_page(model, image_path, grading_json, rows=GRID_ROWS, cols=GRID_COLS, expected_ids=None):
-    if not expected_ids:
-        ids_block = "{NA}"
-    else:
         ids_block = "{\n" + "\n".join(expected_ids) + "\n}"
-    prompt = f"""
-You are an exam marker. Your role is to identify where each question begins on the page.
-The page is divided into a {rows} x {cols} grid. Each cell has a RUNNING NUMBER label (1..{rows*cols}).
-The only questions you should spot are listed here:
-{ids_block}
-For each question in the grading JSON, return the cell NUMBER where the FIRST STEP of that question begins.
-IMPORTANT RULES:
-- Do not place marks inside another question's answer area.
-- Prefer placing the marks in a BLANK cell immediately to the RIGHT of the answer step. If no blank cell is available to the right, then place in a blank cell to the LEFT.
-- Never place marks above or below the answer.
-- If you find something like Q4.i but above it you see "ii)", interpret it as belonging to Q3.ii instead.
-Return JSON only, like:
-[{{"question": "1.a", "cell_number": 15}}, ...]
-Grading JSON:
-{json.dumps(grading_json, indent=2)}
 """
     print(f"📡 Sending mapping request for image {image_path} to Gemini...")
     img = Image.open(image_path)
@@ -307,115 +326,250 @@ Grading JSON:
                 pass
         print("⚠️ Failed to parse mapping JSON for", image_path)
         return []
-# ---------------- IMPRINTING ----------------
-def imprint_marks_using_mapping(image_path, mapping, output_path, rows=GRID_ROWS, cols=GRID_COLS):
-    print(f"🖊️ Imprinting marks on {image_path} -> {output_path}")
-    img = cv2.imread(image_path)
-    h, w, _ = img.shape
-    cell_h, cell_w = h // rows, w // cols
-    for entry in mapping:
-        try:
-            q = entry["question"]
-            cell_num = int(entry["cell_number"])
-            awarded = entry.get("marks_awarded", [])
-            row = (cell_num - 1) // cols
-            col = (cell_num - 1) % cols
-            x = col * cell_w + 5
-            y = row * cell_h + 20
-            mark_text = f"{q}: {' '.join(awarded)}"
-            cv2.putText(img, mark_text, (x, y),
-                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
-        except Exception as e:
-            print("⚠️ Imprint error for entry:", entry, "|", e)
-    cv2.imwrite(output_path, img)
-    return output_path
-# ---------------- MAIN PIPELINE ----------------
-def align_and_grade_pipeline(qp_ms_pdf, as_pdf):
-    model = create_model()
-    # Step 1: Transcribe QP + MS
-    print("📄 Transcribing QP+MS PDF...")
-    qpms_text = gemini_generate_content(model, PROMPTS["QP_MS_TRANSCRIPTION"]["content"], file_upload_obj=qp_ms_pdf)
-    # Step 2: Extract IDs
-    expected_ids = extract_question_ids_from_qpms(qpms_text)
-    # Step 3: Transcribe AS
-    print("📄 Transcribing Answer Sheet PDF...")
-    as_prompt = build_as_prompt_with_expected_ids(expected_ids)
-    as_text = gemini_generate_content(model, as_prompt, file_upload_obj=as_pdf)
-    # Step 4: Grade
-    grading_prompt = PROMPTS["GRADING_PROMPT"]["content"] + "\n\n" + \
-                     "QP+MS Transcript:\n" + qpms_text + "\n\nAS Transcript:\n" + as_text
-    grading_text = gemini_generate_content(model, grading_prompt)
-    # Step 5: Extract marks JSON
-    grading_json = extract_marks_from_grading(grading_text)
-    # Step 6: Convert AS to images
-    images = convert_from_path(as_pdf, dpi=200)
-    temp_dir = tempfile.mkdtemp()
-    image_paths = []
-    for i, img in enumerate(images):
-        img_path = os.path.join(temp_dir, f"page_{i+1}.png")
-        img.save(img_path, "PNG")
-        image_paths.append(img_path)
-    # Step 7: Mapping for each page
-    mappings = []
-    for img_path in image_paths:
-        mapping = ask_gemini_for_mapping_for_page(model, img_path, grading_json,
-                                                 rows=GRID_ROWS, cols=GRID_COLS,
-                                                 expected_ids=expected_ids)
-        # Merge awarded marks into mapping
-        for entry in mapping:
-            for g in grading_json["grading"]:
-                if g["question"] == entry["question"]:
-                    entry["marks_awarded"] = g["marks_awarded"]
-        mappings.append((img_path, mapping))
-    # Step 8: Imprint marks
-    imprinted_paths = []
-    for img_path, mapping in mappings:
-        out_path = img_path.replace(".png", "_imprinted.png")
-        imprint_marks_using_mapping(img_path, mapping, out_path)
-        imprinted_paths.append(out_path)
-    # Step 9: Convert to PDF
-    output_pdf = os.path.join(temp_dir, "final_output.pdf")
-    with open(output_pdf, "wb") as f:
-        f.write(img2pdf.convert(imprinted_paths))
-    compressed_pdf = compress_pdf(output_pdf)
-    return grading_text, compressed_pdf
-# ---------------- GRADIO UI ----------------
-def run_gradio():
-    with gr.Blocks() as demo:
-        gr.Markdown("# 📘 Automated Exam Grader (QP + MS + AS)")
-        with gr.Row():
-            qpms_file = gr.File(label="Upload Question Paper + Markscheme PDF", file_types=[".pdf"])
-            as_file = gr.File(label="Upload Student Answer Sheet PDF", file_types=[".pdf"])
-        run_btn = gr.Button("Run Alignment + Grading")
-        grading_output = gr.Textbox(label="Grading Report (Markdown)", lines=20)
-        final_pdf = gr.File(label="Download Final Imprinted PDF")
-        def process(qpms_pdf, as_pdf):
-            grading_text, pdf_path = align_and_grade_pipeline(qpms_pdf, as_pdf)
-            return grading_text, pdf_path
-        run_btn.click(process, inputs=[qpms_file, as_file], outputs=[grading_output, final_pdf])
-    demo.launch()
-if __name__ == "__main__":
-    run_gradio()

 import numpy as np
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from PyPDF2 import PdfReader, PdfWriter
 # ---------------- CONFIG ----------------
 genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
 GRID_ROWS, GRID_COLS = 20, 14
 # ---------------- PROMPTS ----------------
 PROMPTS = {
     "QP_MS_TRANSCRIPTION" : {
 """
 }
 ,
+    # GRADING_PROMPT unchanged except we will print steps around calling it
     "GRADING_PROMPT": {
         "role": "system",
         "content": """Developer: You are an official examiner. Apply the following grading rules precisely.
 4. Accept valid equivalent forms unless otherwise specified.
 5. Apply FT where appropriate.
 6. Use proper notation: M1A0, A1, etc.
+7. Any lost mark: use red `<span style=\"color:red\">M0</span>` and make Reason red.
 ---
 ## Output Format
 Produce two sections per question/sub-question, following this structure:
 """
     }
 }
 # ---------------- HELPERS ----------------
 def save_as_pdf(text, filename="output.pdf"):
     pdf = MarkdownPdf()
     pdf.add_section(Section(text, toc=False))
     pdf.save(filename)
     return filename
 def compress_pdf(input_path, output_path=None, max_size=20*1024*1024):
     if output_path is None:
         base, ext = os.path.splitext(input_path)
         output_path = f"{base}_compressed{ext}"
     try:
         size = os.path.getsize(input_path)
     except Exception:
         return input_path
     if size <= max_size:
         print(f"ℹ️ Not compressing {input_path} ({size/1024/1024:.2f} MB <= {max_size/1024/1024} MB)")
         return input_path
     print(f"🔎 Compressing {input_path} ({size/1024/1024:.2f} MB) -> {output_path}")
     try:
         gs_cmd = [
     except Exception as e:
         print("❌ Compression error:", e)
         return input_path
 def create_model():
+    """
+    Create the Gemini model and print which model is selected.
+    """
     try:
         print("⚡ Attempting to use gemini-2.5-pro model")
         model = genai.GenerativeModel("gemini-2.5-pro", generation_config={"temperature": 0})
     except Exception as e:
         print("❌ Failed to create any Gemini model:", e)
         raise
 def merge_pdfs(paths, output_path):
     writer = PdfWriter()
     for p in paths:
     with open(output_path, "wb") as f:
         writer.write(f)
     return output_path
 def gemini_generate_content(model, prompt_text, file_upload_obj=None, image_obj=None):
+    """
+    Send prompt_text and optionally an uploaded file (or an image object) to the model.
+    Returns textual response and prints progress.
+    """
     inputs = [prompt_text]
     if file_upload_obj:
         inputs.append(file_upload_obj)
         raw_text = str(response)
     print("📥 Received response (chars):", len(raw_text))
     return raw_text
 # ---------------- PARSERS ----------------
 def extract_question_ids_from_qpms(text):
+    """
+    Extract question IDs from QP+MS transcript output.
+    We expect the QP+MS prompt to produce lines like 'Question: <id>'
+    Return a list of IDs in order of appearance, including duplicates.
+    """
     print("🔎 Extracting question IDs from QP+MS transcript using regex...")
     ids = []
     for m in re.finditer(r"(?im)^\s*Question\s*:\s*([0-9]+(?:(?:\.[a-zA-Z0-9]+)+|(?:\([a-zA-Z0-9]+\))+|[a-zA-Z])*)\b", text):
         print(f"✅ Extracted {len(ids)} question IDs.")
         print("IDs:", ids)
         return ids
+    # fallback scans
     for m in re.finditer(r"(?m)^\s*([0-9]+(?:(?:\.[a-zA-Z0-9]+)+|(?:\([a-zA-Z0-9]+\))+|[a-zA-Z])*)\s*[\.\):\-]\s", text):
         qid = m.group(1).strip()
         ids.append(qid)
     else:
         print("⚠️ No question IDs extracted; will send NA placeholder.")
     return ids
 def build_as_prompt_with_expected_ids(expected_ids):
+    """
+    Construct the AS transcription prompt injecting the expected IDs block.
+    """
     if not expected_ids:
         ids_block = "{NA}"
     else:
 <transcribed answer or placeholder>
 """
     return prompt
 def extract_marks_from_grading(grading_text):
+    """
+    Parse the grading markdown produced by the GRADING_PROMPT and extract marks per question.
+    Returns dict: {"grading": [{"question": "1.a", "marks_awarded": ["M1","A1"]}, ...]}
+    Preserves all marks in order, including duplicates.
+    """
     print("🔎 Extracting awarded marks from grading output...")
     grading_json = {"grading": []}
     question_blocks = re.split(r"##\s*Question\s+", grading_text)
     for block in question_blocks[1:]:
         first_line = block.strip().splitlines()[0].strip() if block.strip().splitlines() else ""
+        q_id_match = re.match(r"([0-9]+(?:[a-zA-Z]|\([^)]+\)|(?:\.[a-zA-Z0-9]+))*)", first_line)
         if not q_id_match:
             q_id = first_line.split()[0] if first_line else ""
         else:
             q_id = q_id_match.group(1).strip()
         awarded = re.findall(r"\b(M\d+|A\d+|R\d+|M0|A0|R0)\b", block)
         grading_json["grading"].append({
             "question": q_id,
             "marks_awarded": awarded
     print("✅ Extracted grading marks for", len(grading_json["grading"]), "question blocks.")
     print(json.dumps(grading_json, indent=2))
     return grading_json
 # ---------------- MAPPING/IMPRINT HELPERS ----------------
+def ask_gemini_for_mapping_for_page(model, image_path, grading_json, expected_ids=None, rows=GRID_ROWS, cols=GRID_COLS):
+    """
+    Send a single page image along with the grading_json and expected_ids; LLM should return JSON mapping.
+    """
+    ids_block = "{NA}"
+    if expected_ids:
         ids_block = "{\n" + "\n".join(expected_ids) + "\n}"
+    prompt = f"""
+You are an exam marker. Your role is to identify where each question begins on the page.
+The page is divided into a {rows} x {cols} grid. Each cell has a RUNNING NUMBER label (1..{rows*cols}).
+For each question in the grading JSON, return the cell NUMBER where the FIRST STEP of that question begins.
+IMPORTANT: Only spot and return cell numbers for the following question IDs (one per line):
+{ids_block}
+If you see a sub-question (e.g., ii) above a main question (e.g., Q4), infer it belongs to the previous question (e.g., Q3.ii).
+- Do not place marks inside another question's answer area.
+- Prefer placing the marks in a BLANK cell immediately to the RIGHT of the answer step. If no blank cell is available to the right, then place in a blank cell to the LEFT.
+- Never place marks above or below the answer.
+- If a question starts on a previous page, you may omit it for this page.
+Return JSON only, like:
+[{{"question": "1.a", "cell_number": 15}}, ...]
+Grading JSON:
+{json.dumps(grading_json, indent=2)}
 """
     print(f"📡 Sending mapping request for image {image_path} to Gemini...")
     img = Image.open(image_path)
                 pass
         print("⚠️ Failed to parse mapping JSON for", image_path)
         return []
+def imprint_marks_using_mapping(pdf_path, grading_json, output_pdf, model, expected_ids=None, rows=GRID_ROWS, cols=GRID_COLS):
+    """
+    Convert PDF to images, create grid-numbered images for sending to Gemini,
+    send all page images in parallel to Gemini for mapping, then annotate and produce imprinted PDF.
+    """
+    print("📄 Converting answer PDF to images for imprinting...")
+    pages = convert_from_path(pdf_path, dpi=200)
+    annotated_page_paths = []
+    temp_grid_images = []
+    for p_index, page in enumerate(pages):
+        img = page.convert("RGB")
+        w, h = img.size
+        cell_w, cell_h = w / cols, h / rows
+        draw = ImageDraw.Draw(img)
+        try:
+            num_font = ImageFont.truetype("arial.ttf", 16)
+        except Exception:
+            num_font = ImageFont.load_default()
+        cell_num = 1
+        for r in range(rows):
+            for c in range(cols):
+                x = int(c * cell_w + cell_w / 2)
+                y = int(r * cell_h + cell_h / 2)
+                text = str(cell_num)
+                bbox = draw.textbbox((0, 0), text, font=num_font)
+                tw = bbox[2] - bbox[0]
+                th = bbox[3] - bbox[1]
+                draw.text((x - tw/2, y - th/2), text, fill="black", font=num_font)
+                cell_num += 1
+        temp_path = f"page_{p_index+1}_grid.png"
+        img.save(temp_path, "PNG")
+        temp_grid_images.append(temp_path)
+        print("🛰 Created grid image:", temp_path)
+    # Send all grid images in parallel to Gemini to get mappings
+    print("📡 Sending all page images to Gemini in parallel for mapping...")
+    mappings_per_page = {}
+    model_local = model
+    with ThreadPoolExecutor(max_workers=min(8, len(temp_grid_images))) as ex:
+        futures = {ex.submit(ask_gemini_for_mapping_for_page, model_local, img_path, grading_json, expected_ids, rows, cols): idx
+                   for idx, img_path in enumerate(temp_grid_images)}
+        for fut in as_completed(futures):
+            idx = futures[fut]
+            try:
+                mapping = fut.result()
+            except Exception as e:
+                print("⚠️ Mapping request failed for page", idx, e)
+                mapping = []
+            mappings_per_page[idx] = mapping
+    # Annotate original pages according to returned mappings
+    print("🖊 Annotating pages with marks...")
+    for p_index, page in enumerate(pages):
+        page_img = page.convert("RGB")
+        img_cv = np.array(page_img)
+        img_cv = cv2.cvtColor(img_cv, cv2.COLOR_RGB2BGR)
+        h, w, _ = img_cv.shape
+        cell_w_px, cell_h_px = w / cols, h / rows
+        mapping = mappings_per_page.get(p_index, [])
+        occupied = set()
+        for item in mapping:
+            qid = item.get("question")
+            cell_number = item.get("cell_number")
+            if qid is None or cell_number is None:
+                continue
+            marks_list = next((g["marks_awarded"] for g in grading_json.get("grading", []) if g["question"] == qid), [])
+            if not marks_list:
+                marks_list = next((g["marks_awarded"] for g in grading_json.get("grading", [])
+                                   if g["question"].lower() == (qid or "").lower()), [])
+            marks_text = ",".join(marks_list) if marks_list else "?"
+            row = (cell_number - 1) // cols
+            col = (cell_number - 1) % cols
+            candidates = []
+            if col + 1 < cols:
+                candidates.append((row, col + 1))
+            candidates.append((row, col))
+            if col - 1 >= 0:
+                candidates.append((row, col - 1))
+            chosen = None
+            for (r_c, c_c) in candidates:
+                cell_id = r_c * cols + c_c + 1
+                if cell_id not in occupied:
+                    chosen = (r_c, c_c)
+                    occupied.add(cell_id)
+                    break
+            if chosen is None:
+                chosen = (row, col)
+            r_c, c_c = chosen
+            x_c = int((c_c + 1) * cell_w_px - cell_w_px * 0.1)
+            y_c = int((r_c + 0.5) * cell_h_px)
+            font_scale = max(0.6, min(1.6, cell_h_px / 60.0))
+            thickness = max(1, int(font_scale * 2))
+            cv2.putText(img_cv, marks_text, (x_c, y_c), cv2.FONT_HERSHEY_SIMPLEX,
+                        font_scale, (0, 0, 255), thickness, cv2.LINE_AA)
+        annotated_path = f"annotated_page_{p_index+1}.png"
+        cv2.imwrite(annotated_path, img_cv)
+        annotated_page_paths.append(annotated_path)
+        print("✅ Annotated page saved:", annotated_path)
+    with open(output_pdf, "wb") as f:
+        f.write(img2pdf.convert(annotated_page_paths))
+    compressed = compress_pdf(output_pdf)
+    print("📑 Imprinted PDF saved to:", compressed)
+    return compressed
+# ---------------- MAIN PIPELINE ----------------
+def align_and_grade_pipeline(qp_path, ms_path, ans_path, imprint=False):
+    """
+    Final pipeline implementing requested flow and verbose console logging.
+    """
+    try:
+        print("🔁 Starting pipeline...")
+        # Step 0: compress as needed
+        qp_path = compress_pdf(qp_path)
+        ms_path = compress_pdf(ms_path)
+        ans_path = compress_pdf(ans_path)
+        # Merge QP + MS
+        merged_qpms_path = os.path.splitext(qp_path)[0] + "_merged_qp_ms.pdf"
+        merge_pdfs([qp_path, ms_path], merged_qpms_path)
+        print("📎 Merged QP + MS ->", merged_qpms_path)
+        # Upload files to Gemini
+        print("🔼 Uploading files to Gemini...")
+        merged_uploaded = genai.upload_file(path=merged_qpms_path, display_name="QP+MS (merged)")
+        ans_uploaded = genai.upload_file(path=ans_path, display_name="Answer Sheet")
+        print("✅ Upload complete.")
+        # Create model and print which selected
+        model = create_model()
+        # Step 1.i: QP+MS transcription (first)
+        print("1.i) Transcribing QP+MS (questions first, then full markscheme)...")
+        qpms_prompt = PROMPTS["QP_MS_TRANSCRIPTION"]["content"]
+        qpms_text = gemini_generate_content(model, qpms_prompt, file_upload_obj=merged_uploaded)
+        print("📄 QP+MS transcription received. Saving debug file: debug_qpms_transcript.txt")
+        with open("debug_qpms_transcript.txt", "w", encoding="utf-8") as f:
+            f.write(qpms_text)
+        # Step 2: extract serial numbers (question IDs) using regex from qpms_text
+        extracted_ids = extract_question_ids_from_qpms(qpms_text)
+        if not extracted_ids:
+            extracted_ids = ["NA"]
+        # Step 1.ii: Build AS prompt injecting extracted IDs and transcribe AS
+        print("1.ii) Building AS transcription prompt with expected question IDs and sending to Gemini...")
+        as_prompt = build_as_prompt_with_expected_ids(extracted_ids)
+        as_text = gemini_generate_content(model, as_prompt, file_upload_obj=ans_uploaded)
+        print("📝 AS transcription received. Saving debug file: debug_as_transcript.txt")
+        with open("debug_as_transcript.txt", "w", encoding="utf-8") as f:
+            f.write(as_text)
+        # Step 3: Grading - send both transcripts to grading model
+        print("2) Preparing grading input and sending to Gemini for grading...")
+        grading_input = (
+            "=== QP+MS TRANSCRIPT BEGIN ===\n"
+            + qpms_text
+            + "\n=== QP+MS TRANSCRIPT END ===\n\n"
+            + "=== ANSWER SHEET TRANSCRIPT BEGIN ===\n"
+            + as_text
+            + "\n=== ANSWER SHEET TRANSCRIPT END ===\n"
+        )
+        grading_prompt_system = PROMPTS["GRADING_PROMPT"]["content"]
+        grading_text = gemini_generate_content(model, grading_prompt_system + "\n\nPlease grade the following transcripts:\n" + grading_input)
+        print("🧾 Grading output received. Saving debug file: debug_grading.md")
+        with open("debug_grading.md", "w", encoding="utf-8") as f:
+            f.write(grading_text)
+        # Save grading PDF
+        base_name = os.path.splitext(os.path.basename(ans_path))[0]
+        grading_pdf_path = save_as_pdf(grading_text, f"{base_name}_graded.pdf")
+        print("📄 Grading PDF saved:", grading_pdf_path)
+        # Step 4: Extract marks for imprinting
+        grading_json = extract_marks_from_grading(grading_text)
+        with open("debug_grading_json.json", "w", encoding="utf-8") as f:
+            json.dump(grading_json, f, indent=2, ensure_ascii=False)
+        print("🔧 Grading marks extraction complete.")
+        imprinted_pdf_path = None
+        if imprint:
+            print("✍ Imprint option enabled. Starting imprinting process (parallel mapping requests)...")
+            imprinted_pdf_path = f"{base_name}_imprinted.pdf"
+            imprinted_pdf_path = imprint_marks_using_mapping(ans_path, grading_json, imprinted_pdf_path, model, extracted_ids)
+            print("✅ Imprinting finished. Imprinted PDF at:", imprinted_pdf_path)
+        print("🏁 Pipeline finished successfully.")
+        return qpms_text, as_text, grading_text, grading_pdf_path, imprinted_pdf_path
+    except Exception as e:
+        print("❌ Pipeline error:", e)
+        return f"❌ Error: {e}", None, None, None, None
+# ---------------- GRADIO UI ----------------
+with gr.Blocks(title="LeadIB AI Grading (Final Flow — Verbose)") as demo:
+    gr.Markdown("## 📘 LeadIB AI Grading — Final Flow\nUpload **Question Paper**, **Markscheme**, and **Student Answer Sheet**.\nFlow: merge QP+MS -> transcribe QP+MS (questions first, full markscheme) -> extract IDs -> transcribe AS with expected IDs -> grade -> (optional) imprint. Console prints show progress.")
+    with gr.Row():
+        qp_file = gr.File(label="📄 Upload Question Paper (PDF)")
+        ms_file = gr.File(label="📄 Upload Markscheme (PDF)")
+        ans_file = gr.File(label="📝 Upload Student Answer Sheet (PDF)")
+    imprint_toggle = gr.Checkbox(label="✍ Imprint Marks on Student Answer Sheet", value=False)
+    run_button = gr.Button("🚀 Run Pipeline")
+    with gr.Row():
+        qpms_box = gr.Textbox(label="📑 QP+MS Transcript", lines=12)
+        as_box = gr.Textbox(label="📝 AS Transcript", lines=12)
+    grading_output_box = gr.Textbox(label="🧾 Grading (Markdown)", lines=20)
+    grading_pdf_file = gr.File(label="📥 Download Grading PDF")
+    imprint_pdf_file = gr.File(label="📥 Download Imprinted PDF (Optional)")
+    def run_pipeline(qp_file_obj, ms_file_obj, ans_file_obj, imprint_flag):
+        qp_path = qp_file_obj.name
+        ms_path = ms_file_obj.name
+        ans_path = ans_file_obj.name
+        qpms_text, as_text, grading_text, grading_pdf_path, imprinted_pdf_path = align_and_grade_pipeline(
+            qp_path, ms_path, ans_path, imprint=imprint_flag
+        )
+        return qpms_text or "", as_text or "", grading_text or "", grading_pdf_path, imprinted_pdf_path
+    run_button.click(
+        fn=run_pipeline,
+        inputs=[qp_file, ms_file, ans_file, imprint_toggle],
+        outputs=[qpms_box, as_box, grading_output_box, grading_pdf_file, imprint_pdf_file]
+    )
+if __name__ == "__main__":
+    demo.launch()