neurolearn

Sleeping

App Files Files Community

atz21 commited on Sep 27, 2025

Commit

ca54958

verified ·

1 Parent(s): 46d2a1f

Update app.py

Browse files

Files changed (1) hide show

app.py +65 -180

app.py CHANGED Viewed

@@ -151,23 +151,13 @@ def gemini_generate_content(model, prompt_text, file_upload_obj=None, image_obj=
 # ---------------- PARSERS ----------------
 def extract_question_ids_from_qpms(text):
-    """
-    Extract question IDs in the order they appear.
-    NOTE: do NOT deduplicate — keep multiple occurrences as they are in the transcript.
-    """
     ids = []
-    # first attempt: explicit "Question :" lines
     for m in re.finditer(r"(?im)^\s*Question\s*:\s*([0-9]+(?:[a-zA-Z0-9\.\(\)]+)*)\b", text):
-        qid = m.group(1).strip()
-        ids.append(qid)
-    # fallback: lines starting with numbering like "1." or "2)" etc.
-    for m in re.finditer(r"(?m)^\s*([0-9]+(?:[a-zA-Z0-9\.\(\)]+)*)\s*[\.\):\-]\s", text):
-        qid = m.group(1).strip()
-        ids.append(qid)
-    # If nothing found, record "NA" once
     if not ids:
-        ids = ["NA"]
-    return ids
 def build_as_prompt_with_expected_ids(expected_ids):
     ids_block = "{\n" + "\n".join(expected_ids) + "\n}" if expected_ids else "{NA}"
@@ -187,54 +177,41 @@ AS:
     return prompt
 def extract_marks_from_grading_exact(grading_text):
-    """
-    Extract grading marks in the order they appear and keep duplicates.
-    Output JSON with grading list preserving sequence (no deduplication).
-    """
     grading_json = {"grading": []}
-    # split by question blocks by heading "## Question "
     question_blocks = re.split(r"##\s*Question\s+", grading_text)
     for block in question_blocks[1:]:
-        # try to get the ID from the first line (robust)
         first_line = block.strip().splitlines()[0].strip() if block.strip().splitlines() else ""
         q_id_match = re.match(r"([0-9]+(?:[a-zA-Z]|\([^\)]+\)|(?:\.[a-zA-Z0-9]+))*)", first_line)
         q_id = q_id_match.group(1).strip() if q_id_match else first_line.split()[0] if first_line else ""
-        # find all mark tokens in order and preserve duplicates
         awarded = re.findall(r"\b(M\d+|A\d+|R\d+|M0|A0|R0)\b", block)
         grading_json["grading"].append({"question": q_id, "marks_awarded": awarded})
     return grading_json
 # ---------------- IMPRINT ----------------
-def ask_gemini_for_mapping_for_page_v2(model, image_path, grading_json, question_scheme, ids_block, rows=GRID_ROWS, cols=GRID_COLS):
-    """
-    Ask Gemini to map expected question IDs (ids_block) to grid cells on a single page image.
-    The prompt explicitly passes the expected IDs block and instructs the model to interpret
-    mislabelled steps (e.g., ii) above Q4 -> interpret as previous question's subpart).
-    """
     prompt = f"""
 You are an exam marker. Identify where each question begins on this page.
 The page has {rows}x{cols} grid (cells 1..{rows*cols}).
-These are the QUESTIONS YOU MUST SPOT (expected IDs):
-{ids_block}
-Question scheme (authoritative full QP+MS text):
 {question_scheme}
 Grading JSON:
 {json.dumps(grading_json, indent=2)}
-Important instructions:
-- Only return cell numbers for the expected question IDs listed above.
-- If you detect a fragment like "ii)" above a later question heading (for example: you find "Q4" on the page and above it you see "ii)"), interpret that fragment as belonging to the previous question (e.g., "Q3.ii"). In other words: if a subpart appears spatially above a heading for the next question, reassign it to the previous question's appropriate subpart.
-- Return the earliest cell number where the student's first written step (the start of that answer) appears.
-- Prefer a blank cell immediately to the RIGHT of detected starting cell for placing marks; if not available, prefer LEFT.
-- Avoid placing marks inside another question's answer area where possible.
-- Only include questions that actually appear on this page.
-Return JSON only in the format:
-[{"question":"1.a","cell_number":15}, ...]
 """
-    # Attach image plus prompt to Gemini
     img = Image.open(image_path)
     response = model.generate_content([prompt, img])
     raw_text = getattr(response, "text", None)
@@ -242,47 +219,20 @@ Return JSON only in the format:
         raw_text = response.candidates[0].content.parts[0].text
     if not raw_text:
         raw_text = str(response)
-    # attempt to parse JSON array from model output
     try:
         start = raw_text.index('[')
         end = raw_text.rindex(']') + 1
         return json.loads(raw_text[start:end])
     except Exception:
-        # Best-effort: try to extract lines like {"question":"1.a","cell_number":15}
-        try:
-            lines = re.findall(r'\{[^}]*\}', raw_text)
-            parsed = [json.loads(l) for l in lines]
-            return parsed
-        except Exception:
-            return []
-def imprint_marks_using_mapping_v2(pdf_path, grading_json, output_pdf, question_scheme, model, ids_block, rows=GRID_ROWS, cols=GRID_COLS):
-    """
-    Imprint marks onto a PDF using mapping returned by Gemini.
-    Key changes:
-    - Use the PDF's original mediabox (width_pt, height_pt) and render pages at 72 DPI,
-      so that 1 pixel == 1 point and no scaling occurs.
-    - Create annotated images at native page size and recreate PDF using those exact dimensions.
-    - Print progress/log steps.
-    """
-    print("[IMPRINT] Reading PDF and preparing page sizes...")
     reader = PdfReader(pdf_path)
-    # Use first page mediabox as canonical (works if pages share same size). For multi-size PDFs,
-    # we will read each page size when processing that page.
-    pages_info = []
-    for p_index, p in enumerate(reader.pages):
-        width_pt = float(p.mediabox.width)
-        height_pt = float(p.mediabox.height)
-        pages_info.append({"index": p_index, "width_pt": width_pt, "height_pt": height_pt})
-    # Render pages at 72 DPI so pixel dimensions == points (1 pt = 1 px).
-    # This avoids any rescaling.
-    print("[IMPRINT] Converting PDF pages to images at 72 DPI (1 px == 1 point)...")
-    pages = convert_from_path(pdf_path, dpi=72)
     annotated_page_paths = []
     temp_grid_images = []
-    # Create grid overlays (for debugging/visual confirmation) and save images used for mapping
     for p_index, page_img in enumerate(pages):
         img = page_img.convert("RGB")
         draw = ImageDraw.Draw(img)
@@ -291,123 +241,71 @@ def imprint_marks_using_mapping_v2(pdf_path, grading_json, output_pdf, question_
         except:
             font = ImageFont.load_default()
-        cols_local = cols
-        rows_local = rows
-        cell_w = img.width / cols_local
-        cell_h = img.height / rows_local
         cell_num = 1
-        for r in range(rows_local):
-            for c in range(cols_local):
                 x = int(c * cell_w + cell_w / 2)
                 y = int(r * cell_h + cell_h / 2)
                 bbox = draw.textbbox((0,0), str(cell_num), font=font)
                 draw.text((x - (bbox[2]-bbox[0])/2, y - (bbox[3]-bbox[1])/2), str(cell_num), fill="black", font=font)
-                cell_num += 1
         grid_path = f"page_{p_index+1}_grid.png"
         img.save(grid_path, "PNG")
         temp_grid_images.append(grid_path)
-        print(f"[IMPRINT] Grid image saved: {grid_path} (pixels: {img.width}x{img.height})")
-    # Ask Gemini (concurrently) to map question starts to cells
-    print("[IMPRINT] Sending grid images to Gemini to obtain cell mappings...")
     mappings_per_page = {}
-    with ThreadPoolExecutor(max_workers=min(8, len(temp_grid_images))) as ex:
-        futures = {ex.submit(ask_gemini_for_mapping_for_page_v2, model, img_path, grading_json, question_scheme, ids_block, rows, cols): idx
-                   for idx, img_path in enumerate(temp_grid_images)}
         for fut in as_completed(futures):
             idx = futures[fut]
             try:
                 mapping_result = fut.result()
                 mappings_per_page[idx] = mapping_result
-                print(f"[IMPRINT] Mapping received for page {idx+1}: {mapping_result}")
             except Exception as e:
                 mappings_per_page[idx] = []
-                print(f"[IMPRINT] Mapping failed for page {idx+1}: {e}")
-    # Now annotate pages with marks text using the mapping results
-    print("[IMPRINT] Annotating pages with marks...")
     for p_index, page_img in enumerate(pages):
         img_cv = np.array(page_img.convert("RGB"))
         img_cv = cv2.cvtColor(img_cv, cv2.COLOR_RGB2BGR)
         h, w, _ = img_cv.shape
-        cell_w_px, cell_h_px = w / cols, h / rows
         mapping = mappings_per_page.get(p_index, [])
         occupied = set()
         for item in mapping:
             qid = item.get("question")
             cell_number = item.get("cell_number")
-            if qid is None or cell_number is None:
-                continue
-            marks_list = next((g["marks_awarded"] for g in grading_json.get("grading", []) if g["question"] == qid), [])
             marks_text = ",".join(marks_list) if marks_list else "?"
-            # compute canonical cell row/col
-            row = (cell_number - 1) // cols
-            col = (cell_number - 1) % cols
-            # candidate placements (prefer right, then same, then left)
             candidates = []
-            if col + 1 < cols: candidates.append((row, col + 1))
-            candidates.append((row, col))
-            if col - 1 >= 0: candidates.append((row, col - 1))
-            chosen = next(((r, c) for r, c in candidates if (r * cols + c + 1) not in occupied), (row, col))
-            occupied.add(chosen[0] * cols + chosen[1] + 1)
-            x_c = int((chosen[1] + 0.5) * cell_w_px)
-            y_c = int((chosen[0] + 0.5) * cell_h_px)
-            font_scale = max(0.6, min(1.6, cell_h_px / 60))
-            thickness = max(1, int(font_scale * 2))
-            cv2.putText(img_cv, marks_text, (x_c, y_c), cv2.FONT_HERSHEY_SIMPLEX, font_scale, (0, 0, 255), thickness)
-            print(f"[IMPRINT] Placed marks '{marks_text}' for '{qid}' at page {p_index+1} cell {cell_number} -> pixel ({x_c},{y_c})")
         annotated_path = f"annotated_page_{p_index+1}.png"
         cv2.imwrite(annotated_path, img_cv)
         annotated_page_paths.append(annotated_path)
-        print(f"[IMPRINT] Annotated page saved: {annotated_path}")
-    # Recreate PDF using img2pdf with the original page dimensions (points).
-    # Since we rendered at 72 DPI, pixels == points and using layout_fun with (width_pt, height_pt) will preserve size.
-    print("[IMPRINT] Recreating PDF from annotated pages with original page sizes...")
-    layout_sizes = []
-    for p_info in pages_info:
-        layout_sizes.append((p_info["width_pt"], p_info["height_pt"]))
-    # If every page has same mediabox, img2pdf.get_layout_fun can be given that size; otherwise fallback to a per-image function.
-    try:
-        # We will use the mediabox of the first page for layout function if single size, else create per-image layout
-        unique_sizes = { (p["width_pt"], p["height_pt"]) for p in pages_info }
-        if len(unique_sizes) == 1:
-            w_pt, h_pt = pages_info[0]["width_pt"], pages_info[0]["height_pt"]
-            with open(output_pdf, "wb") as f:
-                f.write(img2pdf.convert(annotated_page_paths, layout_fun=img2pdf.get_layout_fun((w_pt, h_pt))))
-        else:
-            # per-page layout: build a custom layout function for each image based on index
-            # img2pdf allows layout_fun that takes (img_width_px, img_height_px, px_density) but easier approach:
-            # create PDF by converting each annotated PNG individually to single-page PDF with proper size and then merge
-            per_page_pdfs = []
-            for idx, ann_path in enumerate(annotated_page_paths):
-                w_pt = pages_info[idx]["width_pt"]
-                h_pt = pages_info[idx]["height_pt"]
-                single_pdf = f"single_{idx+1}.pdf"
-                with open(single_pdf, "wb") as f:
-                    f.write(img2pdf.convert(ann_path, layout_fun=img2pdf.get_layout_fun((w_pt, h_pt))))
-                per_page_pdfs.append(single_pdf)
-            # merge them
-            merge_pdfs(per_page_pdfs, output_pdf)
-            # cleanup single_page temp pdfs
-            for p in per_page_pdfs:
-                try:
-                    os.remove(p)
-                except:
-                    pass
-    except Exception as e:
-        print(f"[IMPRINT] Failed to create imprinted PDF with original sizes: {e}")
-        # fallback: create naive pdf (may be resized)
-        with open(output_pdf, "wb") as f:
-            f.write(img2pdf.convert(annotated_page_paths))
-    print(f"[IMPRINT] Imprinted PDF created: {output_pdf}")
-    # Optionally compress result
-    compressed = compress_pdf(output_pdf)
-    if compressed != output_pdf:
-        print(f"[IMPRINT] Compressed imprinted PDF saved as: {compressed}")
-        return compressed
-    return output_pdf
 # ---------------- PIPELINE ----------------
 def align_and_grade_pipeline(qp_path, ms_path, ans_path, imprint=False):
@@ -415,7 +313,7 @@ def align_and_grade_pipeline(qp_path, ms_path, ans_path, imprint=False):
     ms_path = compress_pdf(ms_path)
     ans_path = compress_pdf(ans_path)
-    merged_qpms_path = os.path.splitext(qp_path)[0] + "_merged_qp_ms.pdf"
     merge_pdfs([qp_path, ms_path], merged_qpms_path)
     merged_uploaded = genai.upload_file(path=merged_qpms_path, display_name="QP+MS (merged)")
@@ -424,42 +322,29 @@ def align_and_grade_pipeline(qp_path, ms_path, ans_path, imprint=False):
     model = create_model()
     qpms_prompt = PROMPTS["QP_MS_TRANSCRIPTION"]["content"]
-    print("[STEP] Requesting QP+MS transcription from Gemini...")
     qpms_text = gemini_generate_content(model, qpms_prompt, file_upload_obj=merged_uploaded)
-    print("[STEP] QP+MS transcription received.")
     extracted_ids = extract_question_ids_from_qpms(qpms_text)
-    print(f"[STEP] Extracted question IDs (in order, duplicates preserved): {extracted_ids}")
     as_prompt = build_as_prompt_with_expected_ids(extracted_ids)
-    print("[STEP] Requesting AS transcription from Gemini (using expected IDs block)...")
     as_text = gemini_generate_content(model, as_prompt, file_upload_obj=ans_uploaded)
-    print("[STEP] AS transcription received.")
     grading_input = (
-        "=== QP+MS TRANSCRIPT BEGIN ===\n" + qpms_text +
-        "\n=== QP+MS TRANSCRIPT END ===\n\n" +
-        "=== ANSWER SHEET TRANSCRIPT BEGIN ===\n" + as_text +
         "\n=== ANSWER SHEET TRANSCRIPT END ===\n"
     )
     grading_prompt_system = PROMPTS["GRADING_PROMPT"]["content"]
-    print("[STEP] Sending transcripts to Gemini for grading...")
-    grading_text = gemini_generate_content(model, grading_prompt_system + "\n\nPlease grade the following transcripts:\n" + grading_input)
-    print("[STEP] Grading received from Gemini.")
-    grading_pdf_path = save_as_pdf(grading_text, os.path.splitext(os.path.basename(ans_path))[0] + "_graded.pdf")
     grading_json = extract_marks_from_grading_exact(grading_text)
-    print(f"[STEP] Extracted grading JSON (duplicates preserved): {json.dumps(grading_json, indent=2)}")
     imprinted_pdf_path = None
     if imprint:
         question_scheme = qpms_text
-        imprinted_pdf_path = os.path.splitext(os.path.basename(ans_path))[0] + "_imprinted.pdf"
-        # Build ids_block to pass to ask_gemini_for_mapping_for_page_v2
-        ids_block = "{\n" + "\n".join(extracted_ids) + "\n}"
-        print("[IMPRINT] Starting imprinting with ids_block and question scheme...")
-        imprinted_pdf_path = imprint_marks_using_mapping_v2(ans_path, grading_json, imprinted_pdf_path, question_scheme, model, ids_block)
-        print(f"[IMPRINT] Completed imprinting. File: {imprinted_pdf_path}")
     return qpms_text, as_text, grading_text, grading_pdf_path, imprinted_pdf_path

 # ---------------- PARSERS ----------------
 def extract_question_ids_from_qpms(text):
     ids = []
     for m in re.finditer(r"(?im)^\s*Question\s*:\s*([0-9]+(?:[a-zA-Z0-9\.\(\)]+)*)\b", text):
+        ids.append(m.group(1).strip())
     if not ids:
+        for m in re.finditer(r"(?m)^\s*([0-9]+(?:[a-zA-Z0-9\.\(\)]+)*)\s*[\.\):\-]\s", text):
+            ids.append(m.group(1).strip())
+    return ids if ids else ["NA"]
 def build_as_prompt_with_expected_ids(expected_ids):
     ids_block = "{\n" + "\n".join(expected_ids) + "\n}" if expected_ids else "{NA}"
     return prompt
 def extract_marks_from_grading_exact(grading_text):
     grading_json = {"grading": []}
     question_blocks = re.split(r"##\s*Question\s+", grading_text)
     for block in question_blocks[1:]:
         first_line = block.strip().splitlines()[0].strip() if block.strip().splitlines() else ""
         q_id_match = re.match(r"([0-9]+(?:[a-zA-Z]|\([^\)]+\)|(?:\.[a-zA-Z0-9]+))*)", first_line)
         q_id = q_id_match.group(1).strip() if q_id_match else first_line.split()[0] if first_line else ""
         awarded = re.findall(r"\b(M\d+|A\d+|R\d+|M0|A0|R0)\b", block)
         grading_json["grading"].append({"question": q_id, "marks_awarded": awarded})
     return grading_json
 # ---------------- IMPRINT ----------------
+def ask_gemini_for_mapping_for_page_v2(model, image_path, grading_json, question_scheme, expected_ids, rows=GRID_ROWS, cols=GRID_COLS):
+    ids_block = "{\n" + "\n".join(expected_ids) + "\n}" if expected_ids else "{NA}"
     prompt = f"""
 You are an exam marker. Identify where each question begins on this page.
 The page has {rows}x{cols} grid (cells 1..{rows*cols}).
+Authoritative question scheme:
 {question_scheme}
+Expected IDs (spot only these):
+{ids_block}
 Grading JSON:
 {json.dumps(grading_json, indent=2)}
+Instructions:
+- Return cell number where first step begins for each question.
+- Only include questions on this page.
+- Handle mislabelled steps: e.g., Q4.i above Q4 may belong to Q3.ii.
+- Avoid placing marks inside another question's answer area.
+- Prefer blank cell to the RIGHT, else LEFT.
+- Never above or below the answer.
+- Return JSON only, like:
+[{{"question":"1.a","cell_number":15}}, ...]
 """
     img = Image.open(image_path)
     response = model.generate_content([prompt, img])
     raw_text = getattr(response, "text", None)
         raw_text = response.candidates[0].content.parts[0].text
     if not raw_text:
         raw_text = str(response)
     try:
         start = raw_text.index('[')
         end = raw_text.rindex(']') + 1
         return json.loads(raw_text[start:end])
     except Exception:
+        return []
+def imprint_marks_using_mapping_v2(pdf_path, grading_json, output_pdf, question_scheme, expected_ids, model, rows=GRID_ROWS, cols=GRID_COLS):
     reader = PdfReader(pdf_path)
     annotated_page_paths = []
+    pages = convert_from_path(pdf_path)  # keep original size
     temp_grid_images = []
     for p_index, page_img in enumerate(pages):
         img = page_img.convert("RGB")
         draw = ImageDraw.Draw(img)
         except:
             font = ImageFont.load_default()
+        cell_w = img.width / cols
+        cell_h = img.height / rows
         cell_num = 1
+        for r in range(rows):
+            for c in range(cols):
                 x = int(c * cell_w + cell_w / 2)
                 y = int(r * cell_h + cell_h / 2)
                 bbox = draw.textbbox((0,0), str(cell_num), font=font)
                 draw.text((x - (bbox[2]-bbox[0])/2, y - (bbox[3]-bbox[1])/2), str(cell_num), fill="black", font=font)
+                cell_num +=1
         grid_path = f"page_{p_index+1}_grid.png"
         img.save(grid_path, "PNG")
         temp_grid_images.append(grid_path)
     mappings_per_page = {}
+    with ThreadPoolExecutor(max_workers=min(8,len(temp_grid_images))) as ex:
+        futures = {
+            ex.submit(
+                ask_gemini_for_mapping_for_page_v2, model, img_path, grading_json, question_scheme, expected_ids, rows, cols
+            ): idx for idx,img_path in enumerate(temp_grid_images)
+        }
         for fut in as_completed(futures):
             idx = futures[fut]
             try:
                 mapping_result = fut.result()
                 mappings_per_page[idx] = mapping_result
+                print(f"[IMPRINT] Mapping received for page {idx+1}: {repr(mapping_result)}")
             except Exception as e:
                 mappings_per_page[idx] = []
+                print(f"[IMPRINT] Mapping failed for page {idx+1}: {repr(e)}")
     for p_index, page_img in enumerate(pages):
         img_cv = np.array(page_img.convert("RGB"))
         img_cv = cv2.cvtColor(img_cv, cv2.COLOR_RGB2BGR)
         h, w, _ = img_cv.shape
+        cell_w_px, cell_h_px = w/cols, h/rows
         mapping = mappings_per_page.get(p_index, [])
         occupied = set()
         for item in mapping:
             qid = item.get("question")
             cell_number = item.get("cell_number")
+            if qid is None or cell_number is None: continue
+            marks_list = next((g["marks_awarded"] for g in grading_json.get("grading", []) if g["question"]==qid), [])
             marks_text = ",".join(marks_list) if marks_list else "?"
+            row = (cell_number-1)//cols
+            col = (cell_number-1)%cols
             candidates = []
+            if col+1<cols: candidates.append((row,col+1))
+            candidates.append((row,col))
+            if col-1>=0: candidates.append((row,col-1))
+            chosen = next(((r,c) for r,c in candidates if (r*cols+c+1) not in occupied), (row,col))
+            occupied.add(chosen[0]*cols+chosen[1]+1)
+            x_c = int((chosen[1]+0.5)*cell_w_px)
+            y_c = int((chosen[0]+0.5)*cell_h_px)
+            font_scale = max(0.6,min(1.6,cell_h_px/60))
+            thickness = max(1,int(font_scale*2))
+            cv2.putText(img_cv, marks_text, (x_c,y_c), cv2.FONT_HERSHEY_SIMPLEX, font_scale, (0,0,255), thickness)
         annotated_path = f"annotated_page_{p_index+1}.png"
         cv2.imwrite(annotated_path, img_cv)
         annotated_page_paths.append(annotated_path)
+    with open(output_pdf,"wb") as f:
+        f.write(img2pdf.convert(annotated_page_paths))
+    return compress_pdf(output_pdf)
 # ---------------- PIPELINE ----------------
 def align_and_grade_pipeline(qp_path, ms_path, ans_path, imprint=False):
     ms_path = compress_pdf(ms_path)
     ans_path = compress_pdf(ans_path)
+    merged_qpms_path = os.path.splitext(qp_path)[0]+"_merged_qp_ms.pdf"
     merge_pdfs([qp_path, ms_path], merged_qpms_path)
     merged_uploaded = genai.upload_file(path=merged_qpms_path, display_name="QP+MS (merged)")
     model = create_model()
     qpms_prompt = PROMPTS["QP_MS_TRANSCRIPTION"]["content"]
     qpms_text = gemini_generate_content(model, qpms_prompt, file_upload_obj=merged_uploaded)
     extracted_ids = extract_question_ids_from_qpms(qpms_text)
     as_prompt = build_as_prompt_with_expected_ids(extracted_ids)
     as_text = gemini_generate_content(model, as_prompt, file_upload_obj=ans_uploaded)
     grading_input = (
+        "=== QP+MS TRANSCRIPT BEGIN ===\n"+qpms_text+
+        "\n=== QP+MS TRANSCRIPT END ===\n\n"+
+        "=== ANSWER SHEET TRANSCRIPT BEGIN ===\n"+as_text+
         "\n=== ANSWER SHEET TRANSCRIPT END ===\n"
     )
     grading_prompt_system = PROMPTS["GRADING_PROMPT"]["content"]
+    grading_text = gemini_generate_content(model, grading_prompt_system+"\n\nPlease grade the following transcripts:\n"+grading_input)
+    grading_pdf_path = save_as_pdf(grading_text, os.path.splitext(os.path.basename(ans_path))[0]+"_graded.pdf")
     grading_json = extract_marks_from_grading_exact(grading_text)
     imprinted_pdf_path = None
     if imprint:
         question_scheme = qpms_text
+        imprinted_pdf_path = os.path.splitext(os.path.basename(ans_path))[0]+"_imprinted.pdf"
+        imprinted_pdf_path = imprint_marks_using_mapping_v2(ans_path, grading_json, imprinted_pdf_path, question_scheme, extracted_ids, model)
     return qpms_text, as_text, grading_text, grading_pdf_path, imprinted_pdf_path