neurolearn

Sleeping

App Files Files Community

atz21 commited on Sep 15, 2025

Commit

954c18d

verified ·

1 Parent(s): 3d4baa8

Update app.py

Browse files

Files changed (1) hide show

app.py +29 -15

app.py CHANGED Viewed

@@ -33,15 +33,6 @@ Each object must have exactly these keys:
 - Preserve math inside fenced code ```...```.
 - If diagram/graph missing, write "[Graph omitted]".
 - Do not add extra commentary outside JSON.
-## Example
-[
-  {
-    "question_number": "1",
-    "qp": "Expand (1+x)^3",
-    "ms": "M1 for binomial expansion, A1 for coefficients, A1 for final form",
-    "as": "```x^3 + 3x^2 + 3x + 1```"
-  }
-]
 """
     },
     "GRADING_PROMPT": {
@@ -70,8 +61,6 @@ Each object must have exactly these keys:
    - Each row = one markable step/point, in order.
    - For blanks: “(no answer)” with marks lost.
 2. After the table, write ONLY one line for total marks in the form:  Final Marks: X / Y
-⚠️ Do NOT include summaries, error classifications, or extra commentary.
-Only table + final marks line.
 """
     }
 }
@@ -81,6 +70,7 @@ genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
 # ---------- HELPER: Save to PDF ----------
 def save_as_pdf(text, filename="output.pdf"):
     pdf = MarkdownPdf()
     pdf.add_section(Section(text, toc=False))
     pdf.save(filename)
@@ -88,14 +78,17 @@ def save_as_pdf(text, filename="output.pdf"):
 # ---------- HELPER: Compress PDF ----------
 def compress_pdf(input_path, output_path=None, max_size=20*1024*1024):
     if output_path is None:
         base, ext = os.path.splitext(input_path)
         output_path = f"{base}_compressed{ext}"
     if os.path.getsize(input_path) <= max_size:
         return input_path
     try:
         gs_cmd = [
             "gs", "-sDEVICE=pdfwrite",
             "-dCompatibilityLevel=1.4",
@@ -105,17 +98,22 @@ def compress_pdf(input_path, output_path=None, max_size=20*1024*1024):
         ]
         subprocess.run(gs_cmd, check=True)
         if os.path.getsize(output_path) <= max_size:
             return output_path
         else:
             return input_path
-    except Exception:
         return input_path
 # ---------- HELPER: Create Model with Fallback ----------
 def create_model():
     try:
         return genai.GenerativeModel("gemini-2.5-pro", generation_config={"temperature": 0})
     except Exception:
         return genai.GenerativeModel("gemini-2.5-flash", generation_config={"temperature": 0})
 # ---------- HELPER: Clean JSON Output ----------
@@ -131,12 +129,16 @@ def clean_json_output(raw_text: str) -> str:
 # ---------- PIPELINE: ALIGN + GRADE ----------
 def align_and_grade(qp_file, ms_file, ans_file, imprint=False):
     try:
         # Step 0: Compress
         qp_file = compress_pdf(qp_file, "qp_compressed.pdf")
         ms_file = compress_pdf(ms_file, "ms_compressed.pdf")
         ans_file = compress_pdf(ans_file, "ans_compressed.pdf")
         # Step 1: Uploads
         qp_uploaded = genai.upload_file(path=qp_file, display_name="Question Paper")
         ms_uploaded = genai.upload_file(path=ms_file, display_name="Markscheme")
         ans_uploaded = genai.upload_file(path=ans_file, display_name="Answer Sheet")
@@ -144,6 +146,7 @@ def align_and_grade(qp_file, ms_file, ans_file, imprint=False):
         model = create_model()
         # Step 2: Alignment
         resp = model.generate_content([
             PROMPTS["ALIGNMENT_PROMPT"]["content"],
             qp_uploaded,
@@ -156,10 +159,14 @@ def align_and_grade(qp_file, ms_file, ans_file, imprint=False):
         aligned_json = clean_json_output(aligned_json)
         questions = json.loads(aligned_json)
         # Step 3: Grading
         def grade_one(idx_q):
             idx, q = idx_q
             q_json = json.dumps(q, indent=2)
             response = model.generate_content([
                 PROMPTS["GRADING_PROMPT"]["content"],
@@ -175,13 +182,13 @@ def align_and_grade(qp_file, ms_file, ans_file, imprint=False):
         results.sort(key=lambda x: x[0])
         # Step 4: Build report
         grading_sections = []
         grading_json = {"grading": []}
         for _, qnum, grading_piece in results:
             section = f"## Question {qnum}\n\n{grading_piece}"
             grading_sections.append(section)
-            # Extract marks list
             marks_list = re.findall(r"(M[01]|A[0-9]|R[01])", grading_piece)
             grading_json["grading"].append({"question": qnum, "marks_awarded": marks_list})
@@ -191,22 +198,25 @@ def align_and_grade(qp_file, ms_file, ans_file, imprint=False):
         imprint_pdf_path = None
         if imprint:
             imprint_pdf_path = imprint_marks(ans_file, grading_json, model)
         return json.dumps(questions, indent=2), grading_report, grading_pdf_path, imprint_pdf_path
     except Exception as e:
         traceback.print_exc()
         return f"❌ Error: {e}", None, None, None
 # ---------- PIPELINE: IMPRINT MARKS ----------
 def imprint_marks(ans_pdf, grading_json, model, grid_rows=20, grid_cols=14):
     output_dir = "grid_pages"
     os.makedirs(output_dir, exist_ok=True)
     pages = convert_from_path(ans_pdf, dpi=200)
     page_images = []
-    # Create grid images
     for i, page in enumerate(pages):
         img_path = os.path.join(output_dir, f"page_{i+1}_grid.png")
         img = page.convert("RGB")
@@ -232,10 +242,11 @@ def imprint_marks(ans_pdf, grading_json, model, grid_rows=20, grid_cols=14):
                 cell_num += 1
         img.save(img_path, "PNG")
         page_images.append(img_path)
     annotated_pages = []
     for idx, page in enumerate(pages):
-        # Ask Gemini for mapping
         prompt = f"""
 You are an exam marker. The page is divided into a {grid_rows} x {grid_cols} grid with numbered cells.
 Return JSON: [{{"question": "1(a)", "cell_number": 15}}, ...]
@@ -246,6 +257,7 @@ Grading JSON:
         mapping_text = getattr(response, "text", "")
         match = re.search(r'\[.*\]', mapping_text, re.DOTALL)
         mapping = json.loads(match.group(0)) if match else []
         # Annotate
         img = np.array(page.convert("RGB"))
@@ -270,10 +282,12 @@ Grading JSON:
         annotated_path = os.path.join(output_dir, f"annotated_{idx+1}.png")
         cv2.imwrite(annotated_path, img)
         annotated_pages.append(annotated_path)
     output_pdf = "answer_sheet_with_marks.pdf"
     with open(output_pdf, "wb") as f:
         f.write(img2pdf.convert(annotated_pages))
     return output_pdf
 # ---------- GRADIO APP ----------

 - Preserve math inside fenced code ```...```.
 - If diagram/graph missing, write "[Graph omitted]".
 - Do not add extra commentary outside JSON.
 """
     },
     "GRADING_PROMPT": {
    - Each row = one markable step/point, in order.
    - For blanks: “(no answer)” with marks lost.
 2. After the table, write ONLY one line for total marks in the form:  Final Marks: X / Y
 """
     }
 }
 # ---------- HELPER: Save to PDF ----------
 def save_as_pdf(text, filename="output.pdf"):
+    print(f"📄 Saving grading report to PDF → {filename}")
     pdf = MarkdownPdf()
     pdf.add_section(Section(text, toc=False))
     pdf.save(filename)
 # ---------- HELPER: Compress PDF ----------
 def compress_pdf(input_path, output_path=None, max_size=20*1024*1024):
+    print(f"🗜️ Checking if compression needed for {input_path}...")
     if output_path is None:
         base, ext = os.path.splitext(input_path)
         output_path = f"{base}_compressed{ext}"
     if os.path.getsize(input_path) <= max_size:
+        print("✅ No compression needed")
         return input_path
     try:
+        print(f"⚡ Compressing {input_path} → {output_path}")
         gs_cmd = [
             "gs", "-sDEVICE=pdfwrite",
             "-dCompatibilityLevel=1.4",
         ]
         subprocess.run(gs_cmd, check=True)
         if os.path.getsize(output_path) <= max_size:
+            print("✅ Compression successful")
             return output_path
         else:
+            print("⚠️ Compression didn’t shrink enough, using original")
             return input_path
+    except Exception as e:
+        print(f"❌ Compression failed: {e}")
         return input_path
 # ---------- HELPER: Create Model with Fallback ----------
 def create_model():
     try:
+        print("⚡ Using gemini-2.5-pro model")
         return genai.GenerativeModel("gemini-2.5-pro", generation_config={"temperature": 0})
     except Exception:
+        print("⚡ Falling back to gemini-2.5-flash model")
         return genai.GenerativeModel("gemini-2.5-flash", generation_config={"temperature": 0})
 # ---------- HELPER: Clean JSON Output ----------
 # ---------- PIPELINE: ALIGN + GRADE ----------
 def align_and_grade(qp_file, ms_file, ans_file, imprint=False):
     try:
+        print("\n🚀 Starting alignment + grading pipeline")
         # Step 0: Compress
+        print("🔍 Step 0: Compressing PDFs...")
         qp_file = compress_pdf(qp_file, "qp_compressed.pdf")
         ms_file = compress_pdf(ms_file, "ms_compressed.pdf")
         ans_file = compress_pdf(ans_file, "ans_compressed.pdf")
         # Step 1: Uploads
+        print("📤 Step 1: Uploading PDFs to Gemini...")
         qp_uploaded = genai.upload_file(path=qp_file, display_name="Question Paper")
         ms_uploaded = genai.upload_file(path=ms_file, display_name="Markscheme")
         ans_uploaded = genai.upload_file(path=ans_file, display_name="Answer Sheet")
         model = create_model()
         # Step 2: Alignment
+        print("🧩 Step 2: Aligning QP, MS, and AS...")
         resp = model.generate_content([
             PROMPTS["ALIGNMENT_PROMPT"]["content"],
             qp_uploaded,
         aligned_json = clean_json_output(aligned_json)
         questions = json.loads(aligned_json)
+        print(f"✅ Parsed JSON with {len(questions)} questions")
         # Step 3: Grading
+        print("📝 Step 3: Grading each question...")
         def grade_one(idx_q):
             idx, q = idx_q
+            print(f"   ➡️ Grading Question {q['question_number']}")
             q_json = json.dumps(q, indent=2)
             response = model.generate_content([
                 PROMPTS["GRADING_PROMPT"]["content"],
         results.sort(key=lambda x: x[0])
         # Step 4: Build report
+        print("📊 Step 4: Building grading report...")
         grading_sections = []
         grading_json = {"grading": []}
         for _, qnum, grading_piece in results:
             section = f"## Question {qnum}\n\n{grading_piece}"
             grading_sections.append(section)
             marks_list = re.findall(r"(M[01]|A[0-9]|R[01])", grading_piece)
             grading_json["grading"].append({"question": qnum, "marks_awarded": marks_list})
         imprint_pdf_path = None
         if imprint:
+            print("✍ Step 5: Imprinting marks onto answer sheet...")
             imprint_pdf_path = imprint_marks(ans_file, grading_json, model)
+        print("✅ Pipeline completed successfully")
         return json.dumps(questions, indent=2), grading_report, grading_pdf_path, imprint_pdf_path
     except Exception as e:
+        print("❌ Fatal error in pipeline")
         traceback.print_exc()
         return f"❌ Error: {e}", None, None, None
 # ---------- PIPELINE: IMPRINT MARKS ----------
 def imprint_marks(ans_pdf, grading_json, model, grid_rows=20, grid_cols=14):
+    print("📄 Converting answer sheet to images with grid...")
     output_dir = "grid_pages"
     os.makedirs(output_dir, exist_ok=True)
     pages = convert_from_path(ans_pdf, dpi=200)
     page_images = []
     for i, page in enumerate(pages):
         img_path = os.path.join(output_dir, f"page_{i+1}_grid.png")
         img = page.convert("RGB")
                 cell_num += 1
         img.save(img_path, "PNG")
         page_images.append(img_path)
+    print("✅ Grid images prepared")
     annotated_pages = []
     for idx, page in enumerate(pages):
+        print(f"🔎 Asking Gemini for mapping on page {idx+1}...")
         prompt = f"""
 You are an exam marker. The page is divided into a {grid_rows} x {grid_cols} grid with numbered cells.
 Return JSON: [{{"question": "1(a)", "cell_number": 15}}, ...]
         mapping_text = getattr(response, "text", "")
         match = re.search(r'\[.*\]', mapping_text, re.DOTALL)
         mapping = json.loads(match.group(0)) if match else []
+        print(f"   ↪ Gemini returned {len(mapping)} mappings")
         # Annotate
         img = np.array(page.convert("RGB"))
         annotated_path = os.path.join(output_dir, f"annotated_{idx+1}.png")
         cv2.imwrite(annotated_path, img)
         annotated_pages.append(annotated_path)
+        print(f"🖊 Marks imprinted for page {idx+1}")
     output_pdf = "answer_sheet_with_marks.pdf"
     with open(output_pdf, "wb") as f:
         f.write(img2pdf.convert(annotated_pages))
+    print(f"✅ Final imprinted PDF saved: {output_pdf}")
     return output_pdf
 # ---------- GRADIO APP ----------