image returned pdf
Browse files
app.py
CHANGED
|
@@ -10,15 +10,11 @@ from fastapi import FastAPI, UploadFile, File, Form, HTTPException
|
|
| 10 |
from fastapi.middleware.cors import CORSMiddleware
|
| 11 |
from PIL import Image, ImageOps, ImageFilter
|
| 12 |
import pytesseract
|
| 13 |
-
<<<<<<< HEAD
|
| 14 |
import os
|
| 15 |
|
| 16 |
# Serve static files from outputs directory
|
| 17 |
from fastapi.staticfiles import StaticFiles
|
| 18 |
from fastapi.responses import FileResponse
|
| 19 |
-
=======
|
| 20 |
-
|
| 21 |
-
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 22 |
from dotenv import load_dotenv
|
| 23 |
load_dotenv()
|
| 24 |
|
|
@@ -34,7 +30,6 @@ except Exception:
|
|
| 34 |
PdfReader = None
|
| 35 |
|
| 36 |
try:
|
| 37 |
-
<<<<<<< HEAD
|
| 38 |
from reportlab.pdfgen import canvas
|
| 39 |
from reportlab.lib.pagesizes import letter
|
| 40 |
from reportlab.lib import colors
|
|
@@ -45,8 +40,6 @@ except Exception as e:
|
|
| 45 |
print(f"[WARN] reportlab import failed: {e}")
|
| 46 |
|
| 47 |
try:
|
| 48 |
-
=======
|
| 49 |
-
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 50 |
from pdf2image import convert_from_bytes # requires poppler
|
| 51 |
except Exception:
|
| 52 |
convert_from_bytes = None
|
|
@@ -62,7 +55,6 @@ except Exception as e:
|
|
| 62 |
genai = None
|
| 63 |
print(f"[WARN] google-genai import failed: {e}")
|
| 64 |
|
| 65 |
-
<<<<<<< HEAD
|
| 66 |
# ✅ Google Cloud Vision SDK (for better handwritten OCR)
|
| 67 |
try:
|
| 68 |
from google.cloud import vision
|
|
@@ -119,13 +111,6 @@ def debug_env():
|
|
| 119 |
"num_keys": len(GOOGLE_API_KEYS),
|
| 120 |
"has_openai_key": bool(os.getenv("OPENAI_API_KEY")),
|
| 121 |
}
|
| 122 |
-
=======
|
| 123 |
-
|
| 124 |
-
# =========================================================
|
| 125 |
-
# ✅ FASTAPI APP INSTANCE
|
| 126 |
-
# =========================================================
|
| 127 |
-
app = FastAPI()
|
| 128 |
-
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 129 |
app.add_middleware(
|
| 130 |
CORSMiddleware,
|
| 131 |
allow_origins=["*"],
|
|
@@ -134,33 +119,20 @@ app.add_middleware(
|
|
| 134 |
allow_headers=["*"],
|
| 135 |
)
|
| 136 |
|
| 137 |
-
<<<<<<< HEAD
|
| 138 |
|
| 139 |
|
| 140 |
-
=======
|
| 141 |
-
# =========================================================
|
| 142 |
-
# ✅ TESSERACT PATH
|
| 143 |
-
# =========================================================
|
| 144 |
-
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 145 |
if os.name == "nt":
|
| 146 |
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
|
| 147 |
else:
|
| 148 |
pytesseract.pytesseract.tesseract_cmd = "/usr/bin/tesseract"
|
| 149 |
|
| 150 |
|
| 151 |
-
<<<<<<< HEAD
|
| 152 |
|
| 153 |
-
=======
|
| 154 |
-
# =========================================================
|
| 155 |
-
# ✅ ERP CONFIG
|
| 156 |
-
# =========================================================
|
| 157 |
-
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 158 |
ERP_BASE = os.getenv("ERP_BASE", "https://erp.triz.co.in/lms_data")
|
| 159 |
STORAGE_BASE = os.getenv("STORAGE_BASE", "https://erp.triz.co.in/storage/student/")
|
| 160 |
ERP_TOKEN = os.getenv("ERP_TOKEN", "")
|
| 161 |
|
| 162 |
|
| 163 |
-
<<<<<<< HEAD
|
| 164 |
def get_public_base_url() -> str:
|
| 165 |
"""
|
| 166 |
Returns the public base URL of this server.
|
|
@@ -279,58 +251,27 @@ def _init_gemini_client(key_index: int = 0) -> None:
|
|
| 279 |
return
|
| 280 |
|
| 281 |
api_key = GOOGLE_API_KEYS[key_index]
|
| 282 |
-
=======
|
| 283 |
-
# =========================================================
|
| 284 |
-
# ✅ GEMINI CONFIG
|
| 285 |
-
# =========================================================
|
| 286 |
-
GOOGLE_API_KEY = (os.getenv("GOOGLE_API_KEY") or "").strip()
|
| 287 |
-
GEMINI_MODEL = (os.getenv("GEMINI_MODEL", "models/gemini-2.0-flash") or "").strip()
|
| 288 |
-
if GEMINI_MODEL and not GEMINI_MODEL.startswith("models/"):
|
| 289 |
-
GEMINI_MODEL = "models/" + GEMINI_MODEL
|
| 290 |
-
|
| 291 |
-
gemini_client = None
|
| 292 |
-
GEMINI_LAST_ERROR = ""
|
| 293 |
-
|
| 294 |
-
|
| 295 |
-
def _init_gemini_client() -> None:
|
| 296 |
-
global gemini_client, GEMINI_LAST_ERROR
|
| 297 |
-
|
| 298 |
-
if gemini_client is not None:
|
| 299 |
-
return
|
| 300 |
-
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 301 |
|
| 302 |
if not genai:
|
| 303 |
GEMINI_LAST_ERROR = "google-genai not installed / import failed"
|
| 304 |
gemini_client = None
|
| 305 |
return
|
| 306 |
|
| 307 |
-
<<<<<<< HEAD
|
| 308 |
if not api_key:
|
| 309 |
GEMINI_LAST_ERROR = f"GOOGLE_API_KEY_{key_index + 1} not set"
|
| 310 |
-
=======
|
| 311 |
-
if not GOOGLE_API_KEY:
|
| 312 |
-
GEMINI_LAST_ERROR = "GOOGLE_API_KEY not set"
|
| 313 |
-
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 314 |
gemini_client = None
|
| 315 |
return
|
| 316 |
|
| 317 |
try:
|
| 318 |
-
<<<<<<< HEAD
|
| 319 |
gemini_client = genai.Client(api_key=api_key)
|
| 320 |
GEMINI_LAST_ERROR = ""
|
| 321 |
print(f"[INFO] Gemini client initialized with key #{key_index + 1}")
|
| 322 |
-
=======
|
| 323 |
-
gemini_client = genai.Client(api_key=GOOGLE_API_KEY)
|
| 324 |
-
GEMINI_LAST_ERROR = ""
|
| 325 |
-
print("[INFO] Gemini client initialized")
|
| 326 |
-
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 327 |
except Exception as e:
|
| 328 |
gemini_client = None
|
| 329 |
GEMINI_LAST_ERROR = str(e)
|
| 330 |
print(f"[WARN] Gemini init failed: {GEMINI_LAST_ERROR}")
|
| 331 |
|
| 332 |
|
| 333 |
-
<<<<<<< HEAD
|
| 334 |
def _is_rate_limit_error(error_msg: str) -> bool:
|
| 335 |
"""Check if the error is a rate limit error (429) or service unavailable (503)."""
|
| 336 |
if not error_msg:
|
|
@@ -373,9 +314,6 @@ def _rotate_to_next_key() -> bool:
|
|
| 373 |
|
| 374 |
|
| 375 |
_init_gemini_client(0)
|
| 376 |
-
=======
|
| 377 |
-
_init_gemini_client()
|
| 378 |
-
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 379 |
|
| 380 |
|
| 381 |
def parse_gemini_error(error_msg: str) -> dict:
|
|
@@ -391,7 +329,6 @@ def parse_gemini_error(error_msg: str) -> dict:
|
|
| 391 |
return {"ok": False, "error_type": "GEMINI_ERROR", "message": msg}
|
| 392 |
|
| 393 |
|
| 394 |
-
<<<<<<< HEAD
|
| 395 |
|
| 396 |
def extract_qid_from_prompt(prompt: str, erp_row: dict = None) -> str:
|
| 397 |
"""
|
|
@@ -438,32 +375,22 @@ def extract_qid_from_prompt(prompt: str, erp_row: dict = None) -> str:
|
|
| 438 |
return "Q1"
|
| 439 |
|
| 440 |
|
| 441 |
-
=======
|
| 442 |
-
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 443 |
def generate_gemini_response(
|
| 444 |
prompt: str,
|
| 445 |
system_prompt: str = "",
|
| 446 |
max_tokens: int = 650,
|
| 447 |
temperature: float = 0.3,
|
| 448 |
) -> str:
|
| 449 |
-
<<<<<<< HEAD
|
| 450 |
global GEMINI_LAST_ERROR, gemini_client, rate_limited_keys
|
| 451 |
-
=======
|
| 452 |
-
global GEMINI_LAST_ERROR
|
| 453 |
-
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 454 |
|
| 455 |
if gemini_client is None:
|
| 456 |
if not GEMINI_LAST_ERROR:
|
| 457 |
GEMINI_LAST_ERROR = "Gemini client not initialized"
|
| 458 |
-
<<<<<<< HEAD
|
| 459 |
# Try to reinitialize if we have keys available
|
| 460 |
if GOOGLE_API_KEYS and current_key_index not in rate_limited_keys:
|
| 461 |
_init_gemini_client(current_key_index)
|
| 462 |
if gemini_client is None:
|
| 463 |
return ""
|
| 464 |
-
=======
|
| 465 |
-
return ""
|
| 466 |
-
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 467 |
|
| 468 |
try:
|
| 469 |
contents = []
|
|
@@ -481,7 +408,6 @@ def generate_gemini_response(
|
|
| 481 |
GEMINI_LAST_ERROR = ""
|
| 482 |
return text
|
| 483 |
except Exception as e:
|
| 484 |
-
<<<<<<< HEAD
|
| 485 |
error_msg = str(e)
|
| 486 |
print(f"[ERROR] Gemini call failed: {error_msg}")
|
| 487 |
|
|
@@ -493,10 +419,6 @@ def generate_gemini_response(
|
|
| 493 |
return generate_gemini_response(prompt, system_prompt, max_tokens, temperature)
|
| 494 |
|
| 495 |
GEMINI_LAST_ERROR = error_msg
|
| 496 |
-
=======
|
| 497 |
-
GEMINI_LAST_ERROR = str(e)
|
| 498 |
-
print(f"[ERROR] Gemini call failed: {GEMINI_LAST_ERROR}")
|
| 499 |
-
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 500 |
return ""
|
| 501 |
|
| 502 |
import time
|
|
@@ -530,13 +452,7 @@ def cheap_overlap_score(student_text: str, prompt: str) -> int:
|
|
| 530 |
return int(round(min(0.6, overlap) * 100)) # cap at 60
|
| 531 |
|
| 532 |
|
| 533 |
-
<<<<<<< HEAD
|
| 534 |
|
| 535 |
-
=======
|
| 536 |
-
# =========================================================
|
| 537 |
-
# ✅ SMALL UTILS
|
| 538 |
-
# =========================================================
|
| 539 |
-
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 540 |
def _norm(s: str) -> str:
|
| 541 |
return re.sub(r"\s+", " ", (s or "").strip().lower())
|
| 542 |
|
|
@@ -571,7 +487,6 @@ def level_policy(student_level: str) -> dict:
|
|
| 571 |
return {"w_sim": 0.6, "w_cov": 0.4, "verified": 75, "partial": 55, "kp_thr": 0.20}
|
| 572 |
|
| 573 |
|
| 574 |
-
<<<<<<< HEAD
|
| 575 |
def mcq_partial_credit(student_level: str) -> dict:
|
| 576 |
"""
|
| 577 |
Returns partial credit percentage for MCQ questions based on student level.
|
|
@@ -592,8 +507,6 @@ def mcq_partial_credit(student_level: str) -> dict:
|
|
| 592 |
return {"credit_per_question": 75, "passing_threshold": 75}
|
| 593 |
|
| 594 |
|
| 595 |
-
=======
|
| 596 |
-
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 597 |
def keypoint_coverage(student_text: str, key_points: List[str], kp_threshold: float) -> Tuple[List[str], List[str], float]:
|
| 598 |
covered, missing = [], []
|
| 599 |
for kp in key_points:
|
|
@@ -611,15 +524,8 @@ def keypoint_coverage(student_text: str, key_points: List[str], kp_threshold: fl
|
|
| 611 |
return covered, missing, coverage
|
| 612 |
|
| 613 |
|
| 614 |
-
<<<<<<< HEAD
|
| 615 |
|
| 616 |
def infer_question_type_from_prompt(prompt: str, student_text: str = "") -> str:
|
| 617 |
-
=======
|
| 618 |
-
# =========================================================
|
| 619 |
-
# ✅ QUESTION TYPE INFERENCE + MCQ PARSING
|
| 620 |
-
# =========================================================
|
| 621 |
-
def infer_question_type_from_prompt(prompt: str) -> str:
|
| 622 |
-
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 623 |
p = _norm(prompt)
|
| 624 |
|
| 625 |
# Explicit markers - check for (mcq) first since it's common in parentheses
|
|
@@ -628,7 +534,6 @@ def infer_question_type_from_prompt(prompt: str) -> str:
|
|
| 628 |
if re.search(r"\btype\s*:\s*narrative\b", p) or re.search(r"\bquestion_type\s*:\s*narrative\b", p):
|
| 629 |
return "narrative"
|
| 630 |
|
| 631 |
-
<<<<<<< HEAD
|
| 632 |
# Heuristic: options A/B/C/D exist in prompt -> likely MCQ
|
| 633 |
if re.search(r"\b(a|b|c|d)\s*[\)\.]\s+", p) or "option a" in p or "option b" in p:
|
| 634 |
return "mcq"
|
|
@@ -646,11 +551,6 @@ def infer_question_type_from_prompt(prompt: str) -> str:
|
|
| 646 |
# If answer starts with A. or B. etc.
|
| 647 |
if re.search(r"^[a-d]\.\s+", s.strip()):
|
| 648 |
return "mcq"
|
| 649 |
-
=======
|
| 650 |
-
# Heuristic: options A/B/C/D exist -> likely MCQ
|
| 651 |
-
if re.search(r"\b(a|b|c|d)\s*[\)\.]\s+", p) or "option a" in p or "option b" in p:
|
| 652 |
-
return "mcq"
|
| 653 |
-
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 654 |
|
| 655 |
return "narrative"
|
| 656 |
|
|
@@ -715,7 +615,6 @@ def parse_questions_from_prompt(prompt: str) -> List[Dict[str, Any]]:
|
|
| 715 |
|
| 716 |
# Check for correct answer (for MCQ)
|
| 717 |
if current_type == 'mcq':
|
| 718 |
-
<<<<<<< HEAD
|
| 719 |
# First check: is this line "Correct Answer(s):" with nothing after it?
|
| 720 |
# If so, we need to look for the answer on the next line
|
| 721 |
if re.search(r'^correct\s*answer\s*\(?s\)?\s*[:\.]?\s*$', line, re.IGNORECASE):
|
|
@@ -748,12 +647,6 @@ def parse_questions_from_prompt(prompt: str) -> List[Dict[str, Any]]:
|
|
| 748 |
else:
|
| 749 |
# Try to extract first letter
|
| 750 |
current_correct = correct_text[0].upper() if correct_text else None
|
| 751 |
-
=======
|
| 752 |
-
# Look for "Correct Answer(s):" or "Correct:" or "Answer:"
|
| 753 |
-
correct_match = re.search(r'(?:Correct\s*(?:Answer)?|Answer)[:.]\s*(?:[A-D]\.?\s*)?(.+)', line, re.IGNORECASE)
|
| 754 |
-
if correct_match and not current_correct:
|
| 755 |
-
current_correct = correct_match.group(1).strip()
|
| 756 |
-
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 757 |
|
| 758 |
# Don't forget the last question
|
| 759 |
if current_q is not None:
|
|
@@ -767,11 +660,7 @@ def parse_questions_from_prompt(prompt: str) -> List[Dict[str, Any]]:
|
|
| 767 |
# If no questions parsed, fall back to old behavior
|
| 768 |
if not questions:
|
| 769 |
qtype = infer_question_type_from_prompt(prompt)
|
| 770 |
-
<<<<<<< HEAD
|
| 771 |
return [{'qid': extract_qid_from_prompt(prompt), 'type': qtype, 'question': prompt, 'correct_answer': None}]
|
| 772 |
-
=======
|
| 773 |
-
return [{'qid': 'Q1', 'type': qtype, 'question': prompt, 'correct_answer': None}]
|
| 774 |
-
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 775 |
|
| 776 |
return questions
|
| 777 |
|
|
@@ -803,7 +692,6 @@ def extract_mcq_choice(text: str) -> str:
|
|
| 803 |
return ""
|
| 804 |
|
| 805 |
|
| 806 |
-
<<<<<<< HEAD
|
| 807 |
def extract_mcq_answers_with_qid(text: str) -> Dict[str, str]:
|
| 808 |
"""
|
| 809 |
Extract MCQ answers WITH question numbers from student text.
|
|
@@ -859,8 +747,6 @@ def extract_mcq_answers_with_qid(text: str) -> Dict[str, str]:
|
|
| 859 |
return results
|
| 860 |
|
| 861 |
|
| 862 |
-
=======
|
| 863 |
-
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 864 |
def extract_correct_mcq_from_prompt(prompt: str) -> str:
|
| 865 |
"""
|
| 866 |
This is IMPORTANT:
|
|
@@ -868,7 +754,6 @@ def extract_correct_mcq_from_prompt(prompt: str) -> str:
|
|
| 868 |
- Correct: B
|
| 869 |
- Answer: C
|
| 870 |
- correct_option: D
|
| 871 |
-
<<<<<<< HEAD
|
| 872 |
- Correct Answer(s): A. Devdatta
|
| 873 |
or JSON: {"correct_option":"B"}
|
| 874 |
|
|
@@ -877,9 +762,6 @@ def extract_correct_mcq_from_prompt(prompt: str) -> str:
|
|
| 877 |
- "Correct Answer(s): A. Devdatta"
|
| 878 |
- "Correct: B"
|
| 879 |
- "Answer: C"
|
| 880 |
-
=======
|
| 881 |
-
or JSON: {"correct_option":"B"}
|
| 882 |
-
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 883 |
"""
|
| 884 |
p = (prompt or "").strip()
|
| 885 |
if not p:
|
|
@@ -896,7 +778,6 @@ def extract_correct_mcq_from_prompt(prompt: str) -> str:
|
|
| 896 |
except Exception:
|
| 897 |
pass
|
| 898 |
|
| 899 |
-
<<<<<<< HEAD
|
| 900 |
# Text prompt support - new format: "Correct Answer(s): A. Devdatta" or "Correct Answer: B"
|
| 901 |
t = _norm(p)
|
| 902 |
|
|
@@ -919,10 +800,6 @@ def extract_correct_mcq_from_prompt(prompt: str) -> str:
|
|
| 919 |
return m1c.group(1)
|
| 920 |
|
| 921 |
# Pattern 2: "Correct: A" or "Answer: B" (original pattern)
|
| 922 |
-
=======
|
| 923 |
-
# Text prompt support
|
| 924 |
-
t = _norm(p)
|
| 925 |
-
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 926 |
m = re.search(r"\b(correct|answer|ans)\s*[:\-]?\s*\(?\s*([a-d])\s*\)?\b", t)
|
| 927 |
if m:
|
| 928 |
return m.group(2)
|
|
@@ -930,13 +807,7 @@ def extract_correct_mcq_from_prompt(prompt: str) -> str:
|
|
| 930 |
return ""
|
| 931 |
|
| 932 |
|
| 933 |
-
<<<<<<< HEAD
|
| 934 |
|
| 935 |
-
=======
|
| 936 |
-
# =========================================================
|
| 937 |
-
# ✅ ERP HELPERS
|
| 938 |
-
# =========================================================
|
| 939 |
-
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 940 |
def _erp_get(params: dict) -> list:
|
| 941 |
headers = {}
|
| 942 |
if ERP_TOKEN:
|
|
@@ -968,7 +839,6 @@ def fetch_student_level_from_erp(row: Dict[str, Any]) -> str:
|
|
| 968 |
return "Medium"
|
| 969 |
|
| 970 |
|
| 971 |
-
<<<<<<< HEAD
|
| 972 |
|
| 973 |
def _preprocess_for_ocr(img: Image.Image) -> Image.Image:
|
| 974 |
"""
|
|
@@ -1037,25 +907,6 @@ def _extract_text_google_vision(image_bytes: bytes) -> str:
|
|
| 1037 |
return ""
|
| 1038 |
|
| 1039 |
|
| 1040 |
-
=======
|
| 1041 |
-
# =========================================================
|
| 1042 |
-
# ✅ OCR + TEXT EXTRACTION
|
| 1043 |
-
# =========================================================
|
| 1044 |
-
def _preprocess_for_ocr(img: Image.Image) -> Image.Image:
|
| 1045 |
-
img = img.convert("L")
|
| 1046 |
-
img = ImageOps.autocontrast(img)
|
| 1047 |
-
|
| 1048 |
-
w, h = img.size
|
| 1049 |
-
if max(w, h) < 1600:
|
| 1050 |
-
scale = 1600 / max(w, h)
|
| 1051 |
-
img = img.resize((int(w * scale), int(h * scale)))
|
| 1052 |
-
|
| 1053 |
-
img = img.filter(ImageFilter.SHARPEN)
|
| 1054 |
-
img = img.point(lambda p: 255 if p > 170 else 0)
|
| 1055 |
-
return img
|
| 1056 |
-
|
| 1057 |
-
|
| 1058 |
-
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 1059 |
def extract_text_from_image(image_bytes: bytes, filename: str = "unknown") -> str:
|
| 1060 |
if not image_bytes or len(image_bytes) < 50:
|
| 1061 |
raise HTTPException(status_code=400, detail=f"Invalid file: '{filename}' - empty/too small")
|
|
@@ -1072,7 +923,6 @@ def extract_text_from_image(image_bytes: bytes, filename: str = "unknown") -> st
|
|
| 1072 |
head = image_bytes[:12]
|
| 1073 |
raise HTTPException(status_code=400, detail=f"Invalid image format: '{filename}' (header={head})")
|
| 1074 |
|
| 1075 |
-
<<<<<<< HEAD
|
| 1076 |
# First try Google Cloud Vision (better for handwriting)
|
| 1077 |
if vision_client:
|
| 1078 |
gv_text = _extract_text_google_vision(image_bytes)
|
|
@@ -1080,8 +930,6 @@ def extract_text_from_image(image_bytes: bytes, filename: str = "unknown") -> st
|
|
| 1080 |
return _clean_extracted_text(gv_text)
|
| 1081 |
|
| 1082 |
# Fallback to Tesseract with improved preprocessing
|
| 1083 |
-
=======
|
| 1084 |
-
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 1085 |
try:
|
| 1086 |
img = Image.open(io.BytesIO(image_bytes))
|
| 1087 |
except Exception as e:
|
|
@@ -1089,7 +937,6 @@ def extract_text_from_image(image_bytes: bytes, filename: str = "unknown") -> st
|
|
| 1089 |
|
| 1090 |
img = _preprocess_for_ocr(img)
|
| 1091 |
|
| 1092 |
-
<<<<<<< HEAD
|
| 1093 |
# Try multiple OCR configurations for better handwritten recognition
|
| 1094 |
ocr_configs = [
|
| 1095 |
"--oem 3 --psm 6", # Default
|
|
@@ -1118,16 +965,6 @@ def extract_text_from_image(image_bytes: bytes, filename: str = "unknown") -> st
|
|
| 1118 |
raise HTTPException(status_code=500, detail=f"OCR failed: {e}")
|
| 1119 |
|
| 1120 |
text = (best_text or "").strip()
|
| 1121 |
-
=======
|
| 1122 |
-
try:
|
| 1123 |
-
text = pytesseract.image_to_string(img, lang="eng", config="--oem 3 --psm 6")
|
| 1124 |
-
except pytesseract.TesseractNotFoundError:
|
| 1125 |
-
raise HTTPException(status_code=500, detail="Tesseract OCR not found. Install it / fix path.")
|
| 1126 |
-
except Exception as e:
|
| 1127 |
-
raise HTTPException(status_code=500, detail=f"OCR failed: {e}")
|
| 1128 |
-
|
| 1129 |
-
text = (text or "").strip()
|
| 1130 |
-
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 1131 |
text = re.sub(r"[ \t]+", " ", text)
|
| 1132 |
return text
|
| 1133 |
|
|
@@ -1179,7 +1016,6 @@ def extract_text_from_pdf(pdf_bytes: bytes, filename: str = "unknown.pdf") -> Di
|
|
| 1179 |
return {"text": extracted, "used_ocr": False, "needs_ocr": True}
|
| 1180 |
try:
|
| 1181 |
used_ocr = True
|
| 1182 |
-
<<<<<<< HEAD
|
| 1183 |
# Higher DPI for better handwritten OCR
|
| 1184 |
pages = convert_from_bytes(pdf_bytes, dpi=300)
|
| 1185 |
page_texts = []
|
|
@@ -1205,23 +1041,12 @@ def extract_text_from_pdf(pdf_bytes: bytes, filename: str = "unknown.pdf") -> Di
|
|
| 1205 |
if img:
|
| 1206 |
img = _preprocess_for_ocr(img)
|
| 1207 |
extracted = pytesseract.image_to_string(img, lang="eng", config="--oem 3 --psm 6") or ""
|
| 1208 |
-
=======
|
| 1209 |
-
pages = convert_from_bytes(pdf_bytes, dpi=250)
|
| 1210 |
-
page_texts = []
|
| 1211 |
-
for img in pages:
|
| 1212 |
-
img = _preprocess_for_ocr(img)
|
| 1213 |
-
t = pytesseract.image_to_string(img, lang="eng", config="--oem 3 --psm 6") or ""
|
| 1214 |
-
if t.strip():
|
| 1215 |
-
page_texts.append(t)
|
| 1216 |
-
extracted = _clean_extracted_text("\n\n".join(page_texts))
|
| 1217 |
-
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 1218 |
except Exception as e:
|
| 1219 |
return {"text": extracted, "used_ocr": used_ocr, "needs_ocr": True, "ocr_error": str(e)}
|
| 1220 |
|
| 1221 |
return {"text": extracted, "used_ocr": used_ocr, "needs_ocr": False}
|
| 1222 |
|
| 1223 |
|
| 1224 |
-
<<<<<<< HEAD
|
| 1225 |
def get_question_positions_from_pdf(pdf_bytes: bytes) -> Dict[int, List[Dict]]:
|
| 1226 |
"""
|
| 1227 |
Detect question number positions in a PDF.
|
|
@@ -1494,8 +1319,6 @@ def create_annotated_pdf(
|
|
| 1494 |
print(f"[ERROR] Failed to create annotated PDF: {e}")
|
| 1495 |
return original_pdf_bytes
|
| 1496 |
|
| 1497 |
-
=======
|
| 1498 |
-
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 1499 |
async def extract_text_from_upload(file: UploadFile) -> Dict[str, Any]:
|
| 1500 |
filename = getattr(file, "filename", "") or "upload"
|
| 1501 |
content_type = (getattr(file, "content_type", "") or "").lower()
|
|
@@ -1545,13 +1368,7 @@ async def extract_text_from_upload(file: UploadFile) -> Dict[str, Any]:
|
|
| 1545 |
|
| 1546 |
|
| 1547 |
|
| 1548 |
-
<<<<<<< HEAD
|
| 1549 |
|
| 1550 |
-
=======
|
| 1551 |
-
# =========================================================
|
| 1552 |
-
# ✅ ROUTES
|
| 1553 |
-
# =========================================================
|
| 1554 |
-
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 1555 |
@app.get("/health")
|
| 1556 |
def health():
|
| 1557 |
return {"status": "ok"}
|
|
@@ -1560,7 +1377,6 @@ def health():
|
|
| 1560 |
@app.get("/health/llm")
|
| 1561 |
def health_llm():
|
| 1562 |
return {
|
| 1563 |
-
<<<<<<< HEAD
|
| 1564 |
"ok": bool(gemini_client) and bool(GOOGLE_API_KEYS),
|
| 1565 |
"gemini": {
|
| 1566 |
"sdk_import_ok": genai is not None,
|
|
@@ -1568,12 +1384,6 @@ def health_llm():
|
|
| 1568 |
"num_keys_configured": len(GOOGLE_API_KEYS),
|
| 1569 |
"current_key_index": current_key_index + 1 if GOOGLE_API_KEYS else 0,
|
| 1570 |
"rate_limited_keys": list(rate_limited_keys),
|
| 1571 |
-
=======
|
| 1572 |
-
"ok": bool(gemini_client) and bool(GOOGLE_API_KEY),
|
| 1573 |
-
"gemini": {
|
| 1574 |
-
"sdk_import_ok": genai is not None,
|
| 1575 |
-
"configured": bool(GOOGLE_API_KEY),
|
| 1576 |
-
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 1577 |
"client_ready": gemini_client is not None,
|
| 1578 |
"model": GEMINI_MODEL,
|
| 1579 |
"last_error": GEMINI_LAST_ERROR if GEMINI_LAST_ERROR else None,
|
|
@@ -1581,7 +1391,6 @@ def health_llm():
|
|
| 1581 |
}
|
| 1582 |
|
| 1583 |
|
| 1584 |
-
<<<<<<< HEAD
|
| 1585 |
@app.get("/homework/annotated-url/{homework_id}/{student_id}")
|
| 1586 |
async def get_annotated_pdf_url(
|
| 1587 |
homework_id: int,
|
|
@@ -1994,13 +1803,10 @@ def build_per_question_results(
|
|
| 1994 |
return ai_evaluate_per_question(prompt, student_text, student_level)
|
| 1995 |
|
| 1996 |
|
| 1997 |
-
=======
|
| 1998 |
-
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 1999 |
@app.post("/homework/validate")
|
| 2000 |
async def homework_validate(
|
| 2001 |
student_id: int = Form(...),
|
| 2002 |
homework_id: int = Form(...),
|
| 2003 |
-
<<<<<<< HEAD
|
| 2004 |
student_file: UploadFile = File(...),
|
| 2005 |
):
|
| 2006 |
# 0) Fetch ERP record -> get all fields automatically
|
|
@@ -2030,32 +1836,118 @@ async def homework_validate(
|
|
| 2030 |
if final_question_type not in ("mcq", "narrative", "mixed"):
|
| 2031 |
final_question_type = infer_question_type_from_prompt(prompt, student_text)
|
| 2032 |
|
| 2033 |
-
=======
|
| 2034 |
-
sub_institute_id: int = Form(...),
|
| 2035 |
-
syear: str = Form(...),
|
| 2036 |
-
prompt: str = Form(...),
|
| 2037 |
-
student_file: UploadFile = File(...),
|
| 2038 |
-
):
|
| 2039 |
-
# 0) Fetch ERP record -> get student_level automatically
|
| 2040 |
-
erp_row = fetch_student_record(homework_id, student_id)
|
| 2041 |
-
student_level = fetch_student_level_from_erp(erp_row)
|
| 2042 |
-
policy = level_policy(student_level)
|
| 2043 |
-
|
| 2044 |
-
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 2045 |
# 1) Infer question_type from prompt automatically (NO EXTRA FIELD)
|
| 2046 |
# Try to parse mixed questions first
|
| 2047 |
parsed_questions = parse_questions_from_prompt(prompt)
|
| 2048 |
has_mcq = any(q.get('type') == 'mcq' for q in parsed_questions)
|
| 2049 |
has_narrative = any(q.get('type') == 'narrative' for q in parsed_questions)
|
| 2050 |
|
| 2051 |
-
|
| 2052 |
-
|
| 2053 |
-
is_pdf_submission
|
| 2054 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2055 |
# Initialize annotated PDF filename
|
| 2056 |
annotated_pdf_filename = None
|
| 2057 |
annotated_pdf_url = None
|
| 2058 |
-
|
| 2059 |
# Function to save annotated PDF — returns (filename, public_url)
|
| 2060 |
def save_annotated_pdf(pdf_bytes, hw_id, stud_id, results, score, stat, lvl, qtype="mcq"):
|
| 2061 |
if not pdf_bytes or len(pdf_bytes) < 100:
|
|
@@ -2066,16 +1958,22 @@ async def homework_validate(
|
|
| 2066 |
ts = int(time.time())
|
| 2067 |
filename = f"marked_{hw_id}_{stud_id}_{ts}.pdf"
|
| 2068 |
filepath = os.path.join(outputs_dir, filename)
|
| 2069 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2070 |
annotated = create_annotated_pdf(
|
| 2071 |
-
original_pdf_bytes=
|
| 2072 |
mcq_results=results,
|
| 2073 |
match_percentage=score,
|
| 2074 |
status=stat,
|
| 2075 |
student_level=lvl,
|
| 2076 |
question_type=qtype
|
| 2077 |
)
|
| 2078 |
-
|
| 2079 |
with open(filepath, "wb") as f:
|
| 2080 |
f.write(annotated)
|
| 2081 |
return filename, build_pdf_url(filename)
|
|
@@ -2086,36 +1984,17 @@ async def homework_validate(
|
|
| 2086 |
MIN_WORDS = 3 if final_question_type == "mcq" else 8
|
| 2087 |
if len(student_text.split()) < MIN_WORDS:
|
| 2088 |
# Save annotated PDF even for unreadable (with status shown)
|
| 2089 |
-
if
|
| 2090 |
# Show circle mark for unreadable
|
| 2091 |
unreadable_result = [{'qid': extract_qid_from_prompt(prompt, erp_row), 'correct': None, 'chosen': 'Unreadable', 'correct_answer': 'N/A'}]
|
| 2092 |
annotated_pdf_filename, annotated_pdf_url = save_annotated_pdf(
|
| 2093 |
original_file_bytes, homework_id, student_id, unreadable_result, 0, "Unreadable", student_level
|
| 2094 |
)
|
| 2095 |
-
=======
|
| 2096 |
-
# Determine overall question type for backwards compatibility
|
| 2097 |
-
if has_mcq and has_narrative:
|
| 2098 |
-
question_type = "mixed"
|
| 2099 |
-
elif has_mcq:
|
| 2100 |
-
question_type = "mcq"
|
| 2101 |
-
elif has_narrative:
|
| 2102 |
-
question_type = "narrative"
|
| 2103 |
-
else:
|
| 2104 |
-
question_type = infer_question_type_from_prompt(prompt)
|
| 2105 |
-
|
| 2106 |
-
# 2) Extract student text
|
| 2107 |
-
student_info = await extract_text_from_upload(student_file)
|
| 2108 |
-
student_text = (student_info.get("text") or "").strip()
|
| 2109 |
-
|
| 2110 |
-
MIN_WORDS = 3 if question_type == "mcq" else 8
|
| 2111 |
-
if len(student_text.split()) < MIN_WORDS:
|
| 2112 |
-
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 2113 |
return {
|
| 2114 |
"student_id": student_id,
|
| 2115 |
"homework_id": homework_id,
|
| 2116 |
"sub_institute_id": sub_institute_id,
|
| 2117 |
"syear": syear,
|
| 2118 |
-
<<<<<<< HEAD
|
| 2119 |
"question_type": final_question_type,
|
| 2120 |
"student_level": student_level,
|
| 2121 |
"status": "Unreadable",
|
|
@@ -2126,36 +2005,22 @@ async def homework_validate(
|
|
| 2126 |
"llm_used": False,
|
| 2127 |
"question_marks": make_question_marks([]),
|
| 2128 |
"annotated_pdf": annotated_pdf_filename,
|
| 2129 |
-
=======
|
| 2130 |
-
"question_type": question_type,
|
| 2131 |
-
"student_level": student_level,
|
| 2132 |
-
"status": "Unreadable",
|
| 2133 |
-
"match_percentage": 0,
|
| 2134 |
-
"ai_generated_remark": None,
|
| 2135 |
-
"rule_based_remark": "Answer text could not be read clearly. Please upload a clearer file.",
|
| 2136 |
-
"student_extracted_text": student_text,
|
| 2137 |
-
"llm_used": False,
|
| 2138 |
-
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 2139 |
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 2140 |
}
|
| 2141 |
|
| 2142 |
if student_info.get("needs_ocr") and not student_text:
|
| 2143 |
-
<<<<<<< HEAD
|
| 2144 |
# Save annotated PDF even for unreadable (with status shown)
|
| 2145 |
-
if
|
| 2146 |
# Show circle mark for scanned PDF that needs OCR
|
| 2147 |
ocr_result = [{'qid': extract_qid_from_prompt(prompt, erp_row), 'correct': None, 'chosen': 'Needs OCR', 'correct_answer': 'N/A'}]
|
| 2148 |
annotated_pdf_filename, annotated_pdf_url = save_annotated_pdf(
|
| 2149 |
original_file_bytes, homework_id, student_id, ocr_result, 0, "Unreadable", student_level
|
| 2150 |
)
|
| 2151 |
-
=======
|
| 2152 |
-
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 2153 |
return {
|
| 2154 |
"student_id": student_id,
|
| 2155 |
"homework_id": homework_id,
|
| 2156 |
"sub_institute_id": sub_institute_id,
|
| 2157 |
"syear": syear,
|
| 2158 |
-
<<<<<<< HEAD
|
| 2159 |
"question_type": final_question_type,
|
| 2160 |
"student_level": student_level,
|
| 2161 |
"status": "Unreadable",
|
|
@@ -2171,28 +2036,10 @@ async def homework_validate(
|
|
| 2171 |
|
| 2172 |
|
| 2173 |
if final_question_type == "mixed":
|
| 2174 |
-
=======
|
| 2175 |
-
"question_type": question_type,
|
| 2176 |
-
"student_level": student_level,
|
| 2177 |
-
"status": "Unreadable",
|
| 2178 |
-
"match_percentage": 0,
|
| 2179 |
-
"ai_generated_remark": None,
|
| 2180 |
-
"rule_based_remark": "This PDF looks scanned. OCR is required (install pdf2image + poppler) or upload a clearer file.",
|
| 2181 |
-
"student_extracted_text": student_text,
|
| 2182 |
-
"llm_used": False,
|
| 2183 |
-
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 2184 |
-
}
|
| 2185 |
-
|
| 2186 |
-
# =========================================================
|
| 2187 |
-
# ✅ MIXED QUESTION TYPES CHECK (MCQ + Narrative)
|
| 2188 |
-
# =========================================================
|
| 2189 |
-
if question_type == "mixed":
|
| 2190 |
-
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 2191 |
# Process each question type separately and combine results
|
| 2192 |
mcq_results = []
|
| 2193 |
narrative_results = []
|
| 2194 |
|
| 2195 |
-
<<<<<<< HEAD
|
| 2196 |
# Extract ALL MCQ answers from student text with question numbers
|
| 2197 |
student_answers_by_qid = extract_mcq_answers_with_qid(student_text)
|
| 2198 |
|
|
@@ -2209,21 +2056,11 @@ async def homework_validate(
|
|
| 2209 |
if not chosen:
|
| 2210 |
chosen = extract_mcq_choice(student_text)
|
| 2211 |
|
| 2212 |
-
=======
|
| 2213 |
-
# Extract MCQ answers from student text for each MCQ question
|
| 2214 |
-
for q in parsed_questions:
|
| 2215 |
-
if q.get('type') == 'mcq':
|
| 2216 |
-
# Try to find answer for this specific question in student's text
|
| 2217 |
-
# Use the question text to help locate the answer
|
| 2218 |
-
q_text = q.get('question', '')
|
| 2219 |
-
chosen = extract_mcq_choice(student_text)
|
| 2220 |
-
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 2221 |
correct = q.get('correct_answer') or extract_correct_mcq_from_prompt(q.get('question', ''))
|
| 2222 |
|
| 2223 |
if correct and chosen:
|
| 2224 |
is_correct = (chosen.lower().strip() == correct.lower().strip())
|
| 2225 |
mcq_results.append({
|
| 2226 |
-
<<<<<<< HEAD
|
| 2227 |
'qid': qid,
|
| 2228 |
'correct': is_correct,
|
| 2229 |
'chosen': chosen,
|
|
@@ -2238,12 +2075,6 @@ async def homework_validate(
|
|
| 2238 |
'chosen': '',
|
| 2239 |
'correct_answer': correct,
|
| 2240 |
'unattempted': True
|
| 2241 |
-
=======
|
| 2242 |
-
'qid': q.get('qid'),
|
| 2243 |
-
'correct': is_correct,
|
| 2244 |
-
'chosen': chosen,
|
| 2245 |
-
'correct_answer': correct
|
| 2246 |
-
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 2247 |
})
|
| 2248 |
|
| 2249 |
# For narrative questions, use AI to generate reference
|
|
@@ -2300,7 +2131,6 @@ async def homework_validate(
|
|
| 2300 |
except Exception as e:
|
| 2301 |
narrative_results = {'error': str(e)}
|
| 2302 |
|
| 2303 |
-
<<<<<<< HEAD
|
| 2304 |
# Calculate combined score with level-based partial credit for MCQ
|
| 2305 |
total_mcq = len(mcq_results)
|
| 2306 |
correct_mcq = sum(1 for r in mcq_results if r.get('correct'))
|
|
@@ -2312,12 +2142,6 @@ async def homework_validate(
|
|
| 2312 |
|
| 2313 |
# Calculate MCQ score based on level (not just binary correct/incorrect)
|
| 2314 |
mcq_score = (correct_mcq * credit_per_q) / max(1, total_mcq)
|
| 2315 |
-
=======
|
| 2316 |
-
# Calculate combined score
|
| 2317 |
-
total_mcq = len(mcq_results)
|
| 2318 |
-
correct_mcq = sum(1 for r in mcq_results if r.get('correct'))
|
| 2319 |
-
mcq_score = (correct_mcq / total_mcq * 100) if total_mcq > 0 else 0
|
| 2320 |
-
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 2321 |
|
| 2322 |
narrative_score = narrative_results.get('match_percentage', 0) if narrative_results else 0
|
| 2323 |
|
|
@@ -2339,15 +2163,12 @@ async def homework_validate(
|
|
| 2339 |
else:
|
| 2340 |
status = "Needs Review"
|
| 2341 |
|
| 2342 |
-
<<<<<<< HEAD
|
| 2343 |
# Save annotated PDF
|
| 2344 |
-
if
|
| 2345 |
annotated_pdf_filename, annotated_pdf_url = save_annotated_pdf(
|
| 2346 |
original_file_bytes, homework_id, student_id, mcq_results, final_score, status, student_level
|
| 2347 |
)
|
| 2348 |
|
| 2349 |
-
=======
|
| 2350 |
-
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 2351 |
return {
|
| 2352 |
"student_id": student_id,
|
| 2353 |
"homework_id": homework_id,
|
|
@@ -2357,18 +2178,12 @@ async def homework_validate(
|
|
| 2357 |
"student_level": student_level,
|
| 2358 |
"status": status,
|
| 2359 |
"match_percentage": final_score,
|
| 2360 |
-
<<<<<<< HEAD
|
| 2361 |
"submission_remarks": None,
|
| 2362 |
"rule_based_remark": f"MCQ: {correct_mcq}/{total_mcq} correct. Narrative score: {narrative_score}%. (Level: {student_level}, Credit per Q: {credit_per_q}%)",
|
| 2363 |
-
=======
|
| 2364 |
-
"ai_generated_remark": None,
|
| 2365 |
-
"rule_based_remark": f"MCQ: {correct_mcq}/{total_mcq} correct. Narrative score: {narrative_score}%.",
|
| 2366 |
-
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 2367 |
"llm_used": bool(narrative_results and 'error' not in narrative_results),
|
| 2368 |
"student_extracted_text": student_text,
|
| 2369 |
"mcq_results": mcq_results,
|
| 2370 |
"narrative_results": narrative_results,
|
| 2371 |
-
<<<<<<< HEAD
|
| 2372 |
"question_marks": make_question_marks(mcq_results),
|
| 2373 |
"annotated_pdf": annotated_pdf_filename,
|
| 2374 |
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
|
@@ -2466,7 +2281,7 @@ async def homework_validate(
|
|
| 2466 |
status = "Verified" if match_percentage >= passing_threshold else "Needs Review"
|
| 2467 |
|
| 2468 |
# Save annotated PDF
|
| 2469 |
-
if
|
| 2470 |
annotated_pdf_filename, annotated_pdf_url = save_annotated_pdf(
|
| 2471 |
original_file_bytes, homework_id, student_id, mcq_results, match_percentage, status, student_level
|
| 2472 |
)
|
|
@@ -2492,7 +2307,7 @@ async def homework_validate(
|
|
| 2492 |
else:
|
| 2493 |
# No correct answers in prompt - return needs review with extracted answers
|
| 2494 |
# Save annotated PDF with circle mark
|
| 2495 |
-
if
|
| 2496 |
no_answer_result = [{'qid': extract_qid_from_prompt(prompt, erp_row), 'correct': None, 'chosen': 'No Answer Key', 'correct_answer': 'N/A'}]
|
| 2497 |
annotated_pdf_filename, annotated_pdf_url = save_annotated_pdf(
|
| 2498 |
original_file_bytes, homework_id, student_id, no_answer_result, 0, "Needs Review", student_level
|
|
@@ -2520,19 +2335,11 @@ async def homework_validate(
|
|
| 2520 |
pass # Will continue to narrative handling
|
| 2521 |
elif not correct:
|
| 2522 |
# Save annotated PDF with circle mark
|
| 2523 |
-
if
|
| 2524 |
no_correct_result = [{'qid': extract_qid_from_prompt(prompt, erp_row), 'correct': None, 'chosen': 'Not Found', 'correct_answer': 'N/A'}]
|
| 2525 |
annotated_pdf_filename, annotated_pdf_url = save_annotated_pdf(
|
| 2526 |
original_file_bytes, homework_id, student_id, no_correct_result, 0, "Needs Review", student_level
|
| 2527 |
)
|
| 2528 |
-
=======
|
| 2529 |
-
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 2530 |
-
}
|
| 2531 |
-
correct = extract_correct_mcq_from_prompt(prompt)
|
| 2532 |
-
chosen = extract_mcq_choice(student_text)
|
| 2533 |
-
|
| 2534 |
-
if not correct:
|
| 2535 |
-
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 2536 |
return {
|
| 2537 |
"student_id": student_id,
|
| 2538 |
"homework_id": homework_id,
|
|
@@ -2542,7 +2349,6 @@ async def homework_validate(
|
|
| 2542 |
"student_level": student_level,
|
| 2543 |
"status": "Needs Review",
|
| 2544 |
"match_percentage": 0,
|
| 2545 |
-
<<<<<<< HEAD
|
| 2546 |
"submission_remarks": None,
|
| 2547 |
"rule_based_remark": "MCQ correct option not found in prompt. Include 'Correct: B' or similar in prompt.",
|
| 2548 |
"student_extracted_text": student_text,
|
|
@@ -2554,22 +2360,11 @@ async def homework_validate(
|
|
| 2554 |
}
|
| 2555 |
elif not chosen:
|
| 2556 |
# Save annotated PDF with circle mark
|
| 2557 |
-
if
|
| 2558 |
no_chosen_result = [{'qid': extract_qid_from_prompt(prompt, erp_row), 'correct': None, 'chosen': 'Not Detected', 'correct_answer': correct or 'N/A'}]
|
| 2559 |
annotated_pdf_filename, annotated_pdf_url = save_annotated_pdf(
|
| 2560 |
original_file_bytes, homework_id, student_id, no_chosen_result, 0, "Needs Review", student_level
|
| 2561 |
)
|
| 2562 |
-
=======
|
| 2563 |
-
"ai_generated_remark": None,
|
| 2564 |
-
"rule_based_remark": "MCQ correct option not found in prompt. Include 'Correct: B' or similar in prompt.",
|
| 2565 |
-
"student_extracted_text": student_text,
|
| 2566 |
-
"llm_used": False,
|
| 2567 |
-
"debug": {"correct": correct, "chosen": chosen},
|
| 2568 |
-
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 2569 |
-
}
|
| 2570 |
-
|
| 2571 |
-
if not chosen:
|
| 2572 |
-
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 2573 |
return {
|
| 2574 |
"student_id": student_id,
|
| 2575 |
"homework_id": homework_id,
|
|
@@ -2579,24 +2374,16 @@ async def homework_validate(
|
|
| 2579 |
"student_level": student_level,
|
| 2580 |
"status": "Needs Review",
|
| 2581 |
"match_percentage": 0,
|
| 2582 |
-
<<<<<<< HEAD
|
| 2583 |
"submission_remarks": None,
|
| 2584 |
"rule_based_remark": "Student option (A/B/C/D) not detected clearly.",
|
| 2585 |
"student_extracted_text": student_text,
|
| 2586 |
"llm_used": False,
|
| 2587 |
"question_marks": make_question_marks([]),
|
| 2588 |
"annotated_pdf": annotated_pdf_filename,
|
| 2589 |
-
=======
|
| 2590 |
-
"ai_generated_remark": None,
|
| 2591 |
-
"rule_based_remark": "Student option (A/B/C/D) not detected clearly.",
|
| 2592 |
-
"student_extracted_text": student_text,
|
| 2593 |
-
"llm_used": False,
|
| 2594 |
-
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 2595 |
"debug": {"correct": correct, "chosen": chosen},
|
| 2596 |
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 2597 |
}
|
| 2598 |
|
| 2599 |
-
<<<<<<< HEAD
|
| 2600 |
# Only process MCQ validation if not redirecting to narrative
|
| 2601 |
if not redirect_to_narrative:
|
| 2602 |
is_correct = (chosen == correct)
|
|
@@ -2615,7 +2402,7 @@ async def homework_validate(
|
|
| 2615 |
# Save annotated PDF
|
| 2616 |
_qid = extract_qid_from_prompt(prompt, erp_row)
|
| 2617 |
mcq_results_single = [{'qid': _qid, 'correct': is_correct, 'chosen': chosen, 'correct_answer': correct}]
|
| 2618 |
-
if
|
| 2619 |
annotated_pdf_filename, annotated_pdf_url = save_annotated_pdf(
|
| 2620 |
original_file_bytes, homework_id, student_id, mcq_results_single, match_percentage, status, student_level
|
| 2621 |
)
|
|
@@ -2642,34 +2429,10 @@ async def homework_validate(
|
|
| 2642 |
|
| 2643 |
if gemini_client is None:
|
| 2644 |
# Save annotated PDF
|
| 2645 |
-
if
|
| 2646 |
annotated_pdf_filename, annotated_pdf_url = save_annotated_pdf(
|
| 2647 |
original_file_bytes, homework_id, student_id, [], 0, "Needs Review", student_level
|
| 2648 |
)
|
| 2649 |
-
=======
|
| 2650 |
-
is_correct = (chosen == correct)
|
| 2651 |
-
return {
|
| 2652 |
-
"student_id": student_id,
|
| 2653 |
-
"homework_id": homework_id,
|
| 2654 |
-
"sub_institute_id": sub_institute_id,
|
| 2655 |
-
"syear": syear,
|
| 2656 |
-
"question_type": "mcq",
|
| 2657 |
-
"student_level": student_level,
|
| 2658 |
-
"status": "Verified" if is_correct else "Needs Review",
|
| 2659 |
-
"match_percentage": 100 if is_correct else 0,
|
| 2660 |
-
"ai_generated_remark": None,
|
| 2661 |
-
"rule_based_remark": "Correct." if is_correct else f"Incorrect. Expected {correct.upper()}, got {chosen.upper()}.",
|
| 2662 |
-
"student_extracted_text": student_text,
|
| 2663 |
-
"llm_used": False,
|
| 2664 |
-
"debug": {"correct": correct, "chosen": chosen},
|
| 2665 |
-
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 2666 |
-
}
|
| 2667 |
-
|
| 2668 |
-
# =========================================================
|
| 2669 |
-
# ✅ NARRATIVE CHECK (Gemini generates reference)
|
| 2670 |
-
# =========================================================
|
| 2671 |
-
if gemini_client is None:
|
| 2672 |
-
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 2673 |
return {
|
| 2674 |
"student_id": student_id,
|
| 2675 |
"homework_id": homework_id,
|
|
@@ -2679,20 +2442,13 @@ async def homework_validate(
|
|
| 2679 |
"student_level": student_level,
|
| 2680 |
"status": "Needs Review",
|
| 2681 |
"match_percentage": 0,
|
| 2682 |
-
<<<<<<< HEAD
|
| 2683 |
"submission_remarks": None,
|
| 2684 |
-
=======
|
| 2685 |
-
"ai_generated_remark": None,
|
| 2686 |
-
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 2687 |
"rule_based_remark": "Gemini not configured. Check /health/llm.",
|
| 2688 |
"llm_used": False,
|
| 2689 |
"llm_error": parse_gemini_error(GEMINI_LAST_ERROR),
|
| 2690 |
"student_extracted_text": student_text,
|
| 2691 |
-
<<<<<<< HEAD
|
| 2692 |
"question_marks": make_question_marks([]),
|
| 2693 |
"annotated_pdf": annotated_pdf_filename,
|
| 2694 |
-
=======
|
| 2695 |
-
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 2696 |
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 2697 |
}
|
| 2698 |
|
|
@@ -2713,14 +2469,11 @@ async def homework_validate(
|
|
| 2713 |
)
|
| 2714 |
|
| 2715 |
if not response_text:
|
| 2716 |
-
<<<<<<< HEAD
|
| 2717 |
# Save annotated PDF
|
| 2718 |
-
if
|
| 2719 |
annotated_pdf_filename, annotated_pdf_url = save_annotated_pdf(
|
| 2720 |
original_file_bytes, homework_id, student_id, [], 0, "Needs Review", student_level
|
| 2721 |
)
|
| 2722 |
-
=======
|
| 2723 |
-
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 2724 |
return {
|
| 2725 |
"student_id": student_id,
|
| 2726 |
"homework_id": homework_id,
|
|
@@ -2730,20 +2483,13 @@ async def homework_validate(
|
|
| 2730 |
"student_level": student_level,
|
| 2731 |
"status": "Needs Review",
|
| 2732 |
"match_percentage": 0,
|
| 2733 |
-
<<<<<<< HEAD
|
| 2734 |
"submission_remarks": None,
|
| 2735 |
-
=======
|
| 2736 |
-
"ai_generated_remark": None,
|
| 2737 |
-
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 2738 |
"rule_based_remark": "Gemini failed. Check /health/llm.",
|
| 2739 |
"llm_used": False,
|
| 2740 |
"llm_error": parse_gemini_error(GEMINI_LAST_ERROR),
|
| 2741 |
"student_extracted_text": student_text,
|
| 2742 |
-
<<<<<<< HEAD
|
| 2743 |
"question_marks": make_question_marks([]),
|
| 2744 |
"annotated_pdf": annotated_pdf_filename,
|
| 2745 |
-
=======
|
| 2746 |
-
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 2747 |
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 2748 |
}
|
| 2749 |
|
|
@@ -2751,14 +2497,11 @@ async def homework_validate(
|
|
| 2751 |
m = re.search(r"\{.*\}", response_text, flags=re.S)
|
| 2752 |
payload = json.loads(m.group(0) if m else response_text)
|
| 2753 |
except Exception as e:
|
| 2754 |
-
<<<<<<< HEAD
|
| 2755 |
# Save annotated PDF
|
| 2756 |
-
if
|
| 2757 |
annotated_pdf_filename, annotated_pdf_url = save_annotated_pdf(
|
| 2758 |
original_file_bytes, homework_id, student_id, [], 0, "Needs Review", student_level
|
| 2759 |
)
|
| 2760 |
-
=======
|
| 2761 |
-
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 2762 |
return {
|
| 2763 |
"student_id": student_id,
|
| 2764 |
"homework_id": homework_id,
|
|
@@ -2768,20 +2511,13 @@ async def homework_validate(
|
|
| 2768 |
"student_level": student_level,
|
| 2769 |
"status": "Needs Review",
|
| 2770 |
"match_percentage": 0,
|
| 2771 |
-
<<<<<<< HEAD
|
| 2772 |
"submission_remarks": None,
|
| 2773 |
-
=======
|
| 2774 |
-
"ai_generated_remark": None,
|
| 2775 |
-
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 2776 |
"rule_based_remark": "Gemini returned non-JSON output.",
|
| 2777 |
"llm_used": False,
|
| 2778 |
"llm_error": {"ok": False, "error_type": "GEMINI_BAD_JSON", "message": str(e), "raw": response_text[:800]},
|
| 2779 |
"student_extracted_text": student_text,
|
| 2780 |
-
<<<<<<< HEAD
|
| 2781 |
"question_marks": make_question_marks([]),
|
| 2782 |
"annotated_pdf": annotated_pdf_filename,
|
| 2783 |
-
=======
|
| 2784 |
-
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 2785 |
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 2786 |
}
|
| 2787 |
|
|
@@ -2792,14 +2528,11 @@ async def homework_validate(
|
|
| 2792 |
key_points = [str(x).strip() for x in key_points if str(x).strip()]
|
| 2793 |
|
| 2794 |
if not ai_reference_answer:
|
| 2795 |
-
<<<<<<< HEAD
|
| 2796 |
# Save annotated PDF
|
| 2797 |
-
if
|
| 2798 |
annotated_pdf_filename, annotated_pdf_url = save_annotated_pdf(
|
| 2799 |
original_file_bytes, homework_id, student_id, [], 0, "Needs Review", student_level
|
| 2800 |
)
|
| 2801 |
-
=======
|
| 2802 |
-
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 2803 |
return {
|
| 2804 |
"student_id": student_id,
|
| 2805 |
"homework_id": homework_id,
|
|
@@ -2809,19 +2542,12 @@ async def homework_validate(
|
|
| 2809 |
"student_level": student_level,
|
| 2810 |
"status": "Needs Review",
|
| 2811 |
"match_percentage": 0,
|
| 2812 |
-
<<<<<<< HEAD
|
| 2813 |
"submission_remarks": None,
|
| 2814 |
"rule_based_remark": "AI returned empty reference answer.",
|
| 2815 |
"llm_used": True,
|
| 2816 |
"student_extracted_text": student_text,
|
| 2817 |
"question_marks": make_question_marks([]),
|
| 2818 |
"annotated_pdf": annotated_pdf_filename,
|
| 2819 |
-
=======
|
| 2820 |
-
"ai_generated_remark": None,
|
| 2821 |
-
"rule_based_remark": "AI returned empty reference answer.",
|
| 2822 |
-
"llm_used": True,
|
| 2823 |
-
"student_extracted_text": student_text,
|
| 2824 |
-
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 2825 |
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 2826 |
}
|
| 2827 |
|
|
@@ -2852,11 +2578,7 @@ async def homework_validate(
|
|
| 2852 |
f"{remark_prompt}"
|
| 2853 |
)
|
| 2854 |
|
| 2855 |
-
<<<<<<< HEAD
|
| 2856 |
submission_remark = generate_gemini_response(
|
| 2857 |
-
=======
|
| 2858 |
-
ai_generated_remark = generate_gemini_response(
|
| 2859 |
-
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 2860 |
prompt=resp2_prompt,
|
| 2861 |
system_prompt="You are a strict, helpful teacher. Be concise and factual.",
|
| 2862 |
max_tokens=140,
|
|
@@ -2864,17 +2586,10 @@ async def homework_validate(
|
|
| 2864 |
)
|
| 2865 |
|
| 2866 |
rule_based_remark = None
|
| 2867 |
-
<<<<<<< HEAD
|
| 2868 |
remark_llm_used = bool(submission_remark)
|
| 2869 |
remark_llm_error = None if submission_remark else (GEMINI_LAST_ERROR or "Unknown LLM error")
|
| 2870 |
|
| 2871 |
if not submission_remark:
|
| 2872 |
-
=======
|
| 2873 |
-
remark_llm_used = bool(ai_generated_remark)
|
| 2874 |
-
remark_llm_error = None if ai_generated_remark else (GEMINI_LAST_ERROR or "Unknown LLM error")
|
| 2875 |
-
|
| 2876 |
-
if not ai_generated_remark:
|
| 2877 |
-
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 2878 |
if status == "Verified":
|
| 2879 |
rule_based_remark = "Homework matches the expected answer well. Good coverage of the key ideas."
|
| 2880 |
elif status == "Partial":
|
|
@@ -2882,7 +2597,6 @@ async def homework_validate(
|
|
| 2882 |
else:
|
| 2883 |
rule_based_remark = "Homework does not match the expected answer enough. Please review the topic and resubmit with clearer, complete points."
|
| 2884 |
|
| 2885 |
-
<<<<<<< HEAD
|
| 2886 |
# Save annotated PDF — evaluate EACH question individually against student text
|
| 2887 |
per_question_results = build_per_question_results(
|
| 2888 |
prompt, student_text, status, match_pct,
|
|
@@ -2891,13 +2605,11 @@ async def homework_validate(
|
|
| 2891 |
policy=policy,
|
| 2892 |
student_level=student_level,
|
| 2893 |
)
|
| 2894 |
-
if
|
| 2895 |
annotated_pdf_filename, annotated_pdf_url = save_annotated_pdf(
|
| 2896 |
original_file_bytes, homework_id, student_id, per_question_results, match_pct, status, student_level, "narrative"
|
| 2897 |
)
|
| 2898 |
|
| 2899 |
-
=======
|
| 2900 |
-
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 2901 |
return {
|
| 2902 |
"student_id": student_id,
|
| 2903 |
"homework_id": homework_id,
|
|
@@ -2907,11 +2619,7 @@ async def homework_validate(
|
|
| 2907 |
"student_level": student_level,
|
| 2908 |
"status": status,
|
| 2909 |
"match_percentage": match_pct,
|
| 2910 |
-
<<<<<<< HEAD
|
| 2911 |
"submission_remarks": submission_remark if submission_remark else None,
|
| 2912 |
-
=======
|
| 2913 |
-
"ai_generated_remark": ai_generated_remark if ai_generated_remark else None,
|
| 2914 |
-
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 2915 |
"rule_based_remark": rule_based_remark,
|
| 2916 |
"llm_used": True,
|
| 2917 |
"remark_llm_used": remark_llm_used,
|
|
@@ -2921,21 +2629,15 @@ async def homework_validate(
|
|
| 2921 |
"key_points": key_points,
|
| 2922 |
"key_points_covered": covered,
|
| 2923 |
"key_points_missing": missing,
|
| 2924 |
-
<<<<<<< HEAD
|
| 2925 |
"question_marks": make_question_marks(per_question_results),
|
| 2926 |
"annotated_pdf": annotated_pdf_filename,
|
| 2927 |
-
=======
|
| 2928 |
-
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 2929 |
"debug": {
|
| 2930 |
"similarity": sim,
|
| 2931 |
"coverage": coverage,
|
| 2932 |
"policy": policy,
|
| 2933 |
-
<<<<<<< HEAD
|
| 2934 |
"per_question_results": per_question_results,
|
| 2935 |
"erp_row_fields": list(erp_row.keys()) if erp_row else [],
|
| 2936 |
"erp_student_level_raw": erp_row.get("student_level") or erp_row.get("level") or erp_row.get("difficulty") or erp_row.get("difficulty_level"),
|
| 2937 |
-
=======
|
| 2938 |
-
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 2939 |
},
|
| 2940 |
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 2941 |
}
|
|
|
|
| 10 |
from fastapi.middleware.cors import CORSMiddleware
|
| 11 |
from PIL import Image, ImageOps, ImageFilter
|
| 12 |
import pytesseract
|
|
|
|
| 13 |
import os
|
| 14 |
|
| 15 |
# Serve static files from outputs directory
|
| 16 |
from fastapi.staticfiles import StaticFiles
|
| 17 |
from fastapi.responses import FileResponse
|
|
|
|
|
|
|
|
|
|
| 18 |
from dotenv import load_dotenv
|
| 19 |
load_dotenv()
|
| 20 |
|
|
|
|
| 30 |
PdfReader = None
|
| 31 |
|
| 32 |
try:
|
|
|
|
| 33 |
from reportlab.pdfgen import canvas
|
| 34 |
from reportlab.lib.pagesizes import letter
|
| 35 |
from reportlab.lib import colors
|
|
|
|
| 40 |
print(f"[WARN] reportlab import failed: {e}")
|
| 41 |
|
| 42 |
try:
|
|
|
|
|
|
|
| 43 |
from pdf2image import convert_from_bytes # requires poppler
|
| 44 |
except Exception:
|
| 45 |
convert_from_bytes = None
|
|
|
|
| 55 |
genai = None
|
| 56 |
print(f"[WARN] google-genai import failed: {e}")
|
| 57 |
|
|
|
|
| 58 |
# ✅ Google Cloud Vision SDK (for better handwritten OCR)
|
| 59 |
try:
|
| 60 |
from google.cloud import vision
|
|
|
|
| 111 |
"num_keys": len(GOOGLE_API_KEYS),
|
| 112 |
"has_openai_key": bool(os.getenv("OPENAI_API_KEY")),
|
| 113 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 114 |
app.add_middleware(
|
| 115 |
CORSMiddleware,
|
| 116 |
allow_origins=["*"],
|
|
|
|
| 119 |
allow_headers=["*"],
|
| 120 |
)
|
| 121 |
|
|
|
|
| 122 |
|
| 123 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 124 |
if os.name == "nt":
|
| 125 |
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
|
| 126 |
else:
|
| 127 |
pytesseract.pytesseract.tesseract_cmd = "/usr/bin/tesseract"
|
| 128 |
|
| 129 |
|
|
|
|
| 130 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 131 |
ERP_BASE = os.getenv("ERP_BASE", "https://erp.triz.co.in/lms_data")
|
| 132 |
STORAGE_BASE = os.getenv("STORAGE_BASE", "https://erp.triz.co.in/storage/student/")
|
| 133 |
ERP_TOKEN = os.getenv("ERP_TOKEN", "")
|
| 134 |
|
| 135 |
|
|
|
|
| 136 |
def get_public_base_url() -> str:
|
| 137 |
"""
|
| 138 |
Returns the public base URL of this server.
|
|
|
|
| 251 |
return
|
| 252 |
|
| 253 |
api_key = GOOGLE_API_KEYS[key_index]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 254 |
|
| 255 |
if not genai:
|
| 256 |
GEMINI_LAST_ERROR = "google-genai not installed / import failed"
|
| 257 |
gemini_client = None
|
| 258 |
return
|
| 259 |
|
|
|
|
| 260 |
if not api_key:
|
| 261 |
GEMINI_LAST_ERROR = f"GOOGLE_API_KEY_{key_index + 1} not set"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 262 |
gemini_client = None
|
| 263 |
return
|
| 264 |
|
| 265 |
try:
|
|
|
|
| 266 |
gemini_client = genai.Client(api_key=api_key)
|
| 267 |
GEMINI_LAST_ERROR = ""
|
| 268 |
print(f"[INFO] Gemini client initialized with key #{key_index + 1}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 269 |
except Exception as e:
|
| 270 |
gemini_client = None
|
| 271 |
GEMINI_LAST_ERROR = str(e)
|
| 272 |
print(f"[WARN] Gemini init failed: {GEMINI_LAST_ERROR}")
|
| 273 |
|
| 274 |
|
|
|
|
| 275 |
def _is_rate_limit_error(error_msg: str) -> bool:
|
| 276 |
"""Check if the error is a rate limit error (429) or service unavailable (503)."""
|
| 277 |
if not error_msg:
|
|
|
|
| 314 |
|
| 315 |
|
| 316 |
_init_gemini_client(0)
|
|
|
|
|
|
|
|
|
|
| 317 |
|
| 318 |
|
| 319 |
def parse_gemini_error(error_msg: str) -> dict:
|
|
|
|
| 329 |
return {"ok": False, "error_type": "GEMINI_ERROR", "message": msg}
|
| 330 |
|
| 331 |
|
|
|
|
| 332 |
|
| 333 |
def extract_qid_from_prompt(prompt: str, erp_row: dict = None) -> str:
|
| 334 |
"""
|
|
|
|
| 375 |
return "Q1"
|
| 376 |
|
| 377 |
|
|
|
|
|
|
|
| 378 |
def generate_gemini_response(
|
| 379 |
prompt: str,
|
| 380 |
system_prompt: str = "",
|
| 381 |
max_tokens: int = 650,
|
| 382 |
temperature: float = 0.3,
|
| 383 |
) -> str:
|
|
|
|
| 384 |
global GEMINI_LAST_ERROR, gemini_client, rate_limited_keys
|
|
|
|
|
|
|
|
|
|
| 385 |
|
| 386 |
if gemini_client is None:
|
| 387 |
if not GEMINI_LAST_ERROR:
|
| 388 |
GEMINI_LAST_ERROR = "Gemini client not initialized"
|
|
|
|
| 389 |
# Try to reinitialize if we have keys available
|
| 390 |
if GOOGLE_API_KEYS and current_key_index not in rate_limited_keys:
|
| 391 |
_init_gemini_client(current_key_index)
|
| 392 |
if gemini_client is None:
|
| 393 |
return ""
|
|
|
|
|
|
|
|
|
|
| 394 |
|
| 395 |
try:
|
| 396 |
contents = []
|
|
|
|
| 408 |
GEMINI_LAST_ERROR = ""
|
| 409 |
return text
|
| 410 |
except Exception as e:
|
|
|
|
| 411 |
error_msg = str(e)
|
| 412 |
print(f"[ERROR] Gemini call failed: {error_msg}")
|
| 413 |
|
|
|
|
| 419 |
return generate_gemini_response(prompt, system_prompt, max_tokens, temperature)
|
| 420 |
|
| 421 |
GEMINI_LAST_ERROR = error_msg
|
|
|
|
|
|
|
|
|
|
|
|
|
| 422 |
return ""
|
| 423 |
|
| 424 |
import time
|
|
|
|
| 452 |
return int(round(min(0.6, overlap) * 100)) # cap at 60
|
| 453 |
|
| 454 |
|
|
|
|
| 455 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 456 |
def _norm(s: str) -> str:
|
| 457 |
return re.sub(r"\s+", " ", (s or "").strip().lower())
|
| 458 |
|
|
|
|
| 487 |
return {"w_sim": 0.6, "w_cov": 0.4, "verified": 75, "partial": 55, "kp_thr": 0.20}
|
| 488 |
|
| 489 |
|
|
|
|
| 490 |
def mcq_partial_credit(student_level: str) -> dict:
|
| 491 |
"""
|
| 492 |
Returns partial credit percentage for MCQ questions based on student level.
|
|
|
|
| 507 |
return {"credit_per_question": 75, "passing_threshold": 75}
|
| 508 |
|
| 509 |
|
|
|
|
|
|
|
| 510 |
def keypoint_coverage(student_text: str, key_points: List[str], kp_threshold: float) -> Tuple[List[str], List[str], float]:
|
| 511 |
covered, missing = [], []
|
| 512 |
for kp in key_points:
|
|
|
|
| 524 |
return covered, missing, coverage
|
| 525 |
|
| 526 |
|
|
|
|
| 527 |
|
| 528 |
def infer_question_type_from_prompt(prompt: str, student_text: str = "") -> str:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 529 |
p = _norm(prompt)
|
| 530 |
|
| 531 |
# Explicit markers - check for (mcq) first since it's common in parentheses
|
|
|
|
| 534 |
if re.search(r"\btype\s*:\s*narrative\b", p) or re.search(r"\bquestion_type\s*:\s*narrative\b", p):
|
| 535 |
return "narrative"
|
| 536 |
|
|
|
|
| 537 |
# Heuristic: options A/B/C/D exist in prompt -> likely MCQ
|
| 538 |
if re.search(r"\b(a|b|c|d)\s*[\)\.]\s+", p) or "option a" in p or "option b" in p:
|
| 539 |
return "mcq"
|
|
|
|
| 551 |
# If answer starts with A. or B. etc.
|
| 552 |
if re.search(r"^[a-d]\.\s+", s.strip()):
|
| 553 |
return "mcq"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 554 |
|
| 555 |
return "narrative"
|
| 556 |
|
|
|
|
| 615 |
|
| 616 |
# Check for correct answer (for MCQ)
|
| 617 |
if current_type == 'mcq':
|
|
|
|
| 618 |
# First check: is this line "Correct Answer(s):" with nothing after it?
|
| 619 |
# If so, we need to look for the answer on the next line
|
| 620 |
if re.search(r'^correct\s*answer\s*\(?s\)?\s*[:\.]?\s*$', line, re.IGNORECASE):
|
|
|
|
| 647 |
else:
|
| 648 |
# Try to extract first letter
|
| 649 |
current_correct = correct_text[0].upper() if correct_text else None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 650 |
|
| 651 |
# Don't forget the last question
|
| 652 |
if current_q is not None:
|
|
|
|
| 660 |
# If no questions parsed, fall back to old behavior
|
| 661 |
if not questions:
|
| 662 |
qtype = infer_question_type_from_prompt(prompt)
|
|
|
|
| 663 |
return [{'qid': extract_qid_from_prompt(prompt), 'type': qtype, 'question': prompt, 'correct_answer': None}]
|
|
|
|
|
|
|
|
|
|
| 664 |
|
| 665 |
return questions
|
| 666 |
|
|
|
|
| 692 |
return ""
|
| 693 |
|
| 694 |
|
|
|
|
| 695 |
def extract_mcq_answers_with_qid(text: str) -> Dict[str, str]:
|
| 696 |
"""
|
| 697 |
Extract MCQ answers WITH question numbers from student text.
|
|
|
|
| 747 |
return results
|
| 748 |
|
| 749 |
|
|
|
|
|
|
|
| 750 |
def extract_correct_mcq_from_prompt(prompt: str) -> str:
|
| 751 |
"""
|
| 752 |
This is IMPORTANT:
|
|
|
|
| 754 |
- Correct: B
|
| 755 |
- Answer: C
|
| 756 |
- correct_option: D
|
|
|
|
| 757 |
- Correct Answer(s): A. Devdatta
|
| 758 |
or JSON: {"correct_option":"B"}
|
| 759 |
|
|
|
|
| 762 |
- "Correct Answer(s): A. Devdatta"
|
| 763 |
- "Correct: B"
|
| 764 |
- "Answer: C"
|
|
|
|
|
|
|
|
|
|
| 765 |
"""
|
| 766 |
p = (prompt or "").strip()
|
| 767 |
if not p:
|
|
|
|
| 778 |
except Exception:
|
| 779 |
pass
|
| 780 |
|
|
|
|
| 781 |
# Text prompt support - new format: "Correct Answer(s): A. Devdatta" or "Correct Answer: B"
|
| 782 |
t = _norm(p)
|
| 783 |
|
|
|
|
| 800 |
return m1c.group(1)
|
| 801 |
|
| 802 |
# Pattern 2: "Correct: A" or "Answer: B" (original pattern)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 803 |
m = re.search(r"\b(correct|answer|ans)\s*[:\-]?\s*\(?\s*([a-d])\s*\)?\b", t)
|
| 804 |
if m:
|
| 805 |
return m.group(2)
|
|
|
|
| 807 |
return ""
|
| 808 |
|
| 809 |
|
|
|
|
| 810 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 811 |
def _erp_get(params: dict) -> list:
|
| 812 |
headers = {}
|
| 813 |
if ERP_TOKEN:
|
|
|
|
| 839 |
return "Medium"
|
| 840 |
|
| 841 |
|
|
|
|
| 842 |
|
| 843 |
def _preprocess_for_ocr(img: Image.Image) -> Image.Image:
|
| 844 |
"""
|
|
|
|
| 907 |
return ""
|
| 908 |
|
| 909 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 910 |
def extract_text_from_image(image_bytes: bytes, filename: str = "unknown") -> str:
|
| 911 |
if not image_bytes or len(image_bytes) < 50:
|
| 912 |
raise HTTPException(status_code=400, detail=f"Invalid file: '{filename}' - empty/too small")
|
|
|
|
| 923 |
head = image_bytes[:12]
|
| 924 |
raise HTTPException(status_code=400, detail=f"Invalid image format: '{filename}' (header={head})")
|
| 925 |
|
|
|
|
| 926 |
# First try Google Cloud Vision (better for handwriting)
|
| 927 |
if vision_client:
|
| 928 |
gv_text = _extract_text_google_vision(image_bytes)
|
|
|
|
| 930 |
return _clean_extracted_text(gv_text)
|
| 931 |
|
| 932 |
# Fallback to Tesseract with improved preprocessing
|
|
|
|
|
|
|
| 933 |
try:
|
| 934 |
img = Image.open(io.BytesIO(image_bytes))
|
| 935 |
except Exception as e:
|
|
|
|
| 937 |
|
| 938 |
img = _preprocess_for_ocr(img)
|
| 939 |
|
|
|
|
| 940 |
# Try multiple OCR configurations for better handwritten recognition
|
| 941 |
ocr_configs = [
|
| 942 |
"--oem 3 --psm 6", # Default
|
|
|
|
| 965 |
raise HTTPException(status_code=500, detail=f"OCR failed: {e}")
|
| 966 |
|
| 967 |
text = (best_text or "").strip()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 968 |
text = re.sub(r"[ \t]+", " ", text)
|
| 969 |
return text
|
| 970 |
|
|
|
|
| 1016 |
return {"text": extracted, "used_ocr": False, "needs_ocr": True}
|
| 1017 |
try:
|
| 1018 |
used_ocr = True
|
|
|
|
| 1019 |
# Higher DPI for better handwritten OCR
|
| 1020 |
pages = convert_from_bytes(pdf_bytes, dpi=300)
|
| 1021 |
page_texts = []
|
|
|
|
| 1041 |
if img:
|
| 1042 |
img = _preprocess_for_ocr(img)
|
| 1043 |
extracted = pytesseract.image_to_string(img, lang="eng", config="--oem 3 --psm 6") or ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1044 |
except Exception as e:
|
| 1045 |
return {"text": extracted, "used_ocr": used_ocr, "needs_ocr": True, "ocr_error": str(e)}
|
| 1046 |
|
| 1047 |
return {"text": extracted, "used_ocr": used_ocr, "needs_ocr": False}
|
| 1048 |
|
| 1049 |
|
|
|
|
| 1050 |
def get_question_positions_from_pdf(pdf_bytes: bytes) -> Dict[int, List[Dict]]:
|
| 1051 |
"""
|
| 1052 |
Detect question number positions in a PDF.
|
|
|
|
| 1319 |
print(f"[ERROR] Failed to create annotated PDF: {e}")
|
| 1320 |
return original_pdf_bytes
|
| 1321 |
|
|
|
|
|
|
|
| 1322 |
async def extract_text_from_upload(file: UploadFile) -> Dict[str, Any]:
|
| 1323 |
filename = getattr(file, "filename", "") or "upload"
|
| 1324 |
content_type = (getattr(file, "content_type", "") or "").lower()
|
|
|
|
| 1368 |
|
| 1369 |
|
| 1370 |
|
|
|
|
| 1371 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1372 |
@app.get("/health")
|
| 1373 |
def health():
|
| 1374 |
return {"status": "ok"}
|
|
|
|
| 1377 |
@app.get("/health/llm")
|
| 1378 |
def health_llm():
|
| 1379 |
return {
|
|
|
|
| 1380 |
"ok": bool(gemini_client) and bool(GOOGLE_API_KEYS),
|
| 1381 |
"gemini": {
|
| 1382 |
"sdk_import_ok": genai is not None,
|
|
|
|
| 1384 |
"num_keys_configured": len(GOOGLE_API_KEYS),
|
| 1385 |
"current_key_index": current_key_index + 1 if GOOGLE_API_KEYS else 0,
|
| 1386 |
"rate_limited_keys": list(rate_limited_keys),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1387 |
"client_ready": gemini_client is not None,
|
| 1388 |
"model": GEMINI_MODEL,
|
| 1389 |
"last_error": GEMINI_LAST_ERROR if GEMINI_LAST_ERROR else None,
|
|
|
|
| 1391 |
}
|
| 1392 |
|
| 1393 |
|
|
|
|
| 1394 |
@app.get("/homework/annotated-url/{homework_id}/{student_id}")
|
| 1395 |
async def get_annotated_pdf_url(
|
| 1396 |
homework_id: int,
|
|
|
|
| 1803 |
return ai_evaluate_per_question(prompt, student_text, student_level)
|
| 1804 |
|
| 1805 |
|
|
|
|
|
|
|
| 1806 |
@app.post("/homework/validate")
|
| 1807 |
async def homework_validate(
|
| 1808 |
student_id: int = Form(...),
|
| 1809 |
homework_id: int = Form(...),
|
|
|
|
| 1810 |
student_file: UploadFile = File(...),
|
| 1811 |
):
|
| 1812 |
# 0) Fetch ERP record -> get all fields automatically
|
|
|
|
| 1836 |
if final_question_type not in ("mcq", "narrative", "mixed"):
|
| 1837 |
final_question_type = infer_question_type_from_prompt(prompt, student_text)
|
| 1838 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1839 |
# 1) Infer question_type from prompt automatically (NO EXTRA FIELD)
|
| 1840 |
# Try to parse mixed questions first
|
| 1841 |
parsed_questions = parse_questions_from_prompt(prompt)
|
| 1842 |
has_mcq = any(q.get('type') == 'mcq' for q in parsed_questions)
|
| 1843 |
has_narrative = any(q.get('type') == 'narrative' for q in parsed_questions)
|
| 1844 |
|
| 1845 |
+
# Detect submission kind
|
| 1846 |
+
submission_kind = student_info.get("kind", "") # "pdf", "image", "docx", etc.
|
| 1847 |
+
is_pdf_submission = submission_kind == "pdf"
|
| 1848 |
+
is_image_submission = submission_kind == "image" or submission_kind == "unknown_as_image"
|
| 1849 |
+
is_docx_submission = submission_kind == "docx"
|
| 1850 |
+
can_annotate = is_pdf_submission or is_image_submission or is_docx_submission
|
| 1851 |
+
|
| 1852 |
+
# ── Converters: image/docx → PDF bytes so create_annotated_pdf can process them ──
|
| 1853 |
+
def _image_bytes_to_pdf(img_bytes: bytes) -> bytes:
|
| 1854 |
+
"""Wrap a raw image inside a single-page PDF using reportlab."""
|
| 1855 |
+
try:
|
| 1856 |
+
from reportlab.pdfgen import canvas as rl_canvas
|
| 1857 |
+
from reportlab.lib.utils import ImageReader
|
| 1858 |
+
from PIL import Image as PILImage
|
| 1859 |
+
import io as _io
|
| 1860 |
+
img = PILImage.open(_io.BytesIO(img_bytes))
|
| 1861 |
+
iw, ih = img.size
|
| 1862 |
+
buf = _io.BytesIO()
|
| 1863 |
+
c = rl_canvas.Canvas(buf, pagesize=(iw, ih))
|
| 1864 |
+
c.drawImage(ImageReader(img), 0, 0, iw, ih)
|
| 1865 |
+
c.save()
|
| 1866 |
+
buf.seek(0)
|
| 1867 |
+
return buf.read()
|
| 1868 |
+
except Exception as e:
|
| 1869 |
+
print(f"[WARN] _image_bytes_to_pdf failed: {e}")
|
| 1870 |
+
return b""
|
| 1871 |
+
|
| 1872 |
+
def _docx_bytes_to_pdf(docx_bytes: bytes) -> bytes:
|
| 1873 |
+
"""
|
| 1874 |
+
Convert DOCX → PDF.
|
| 1875 |
+
Tries LibreOffice (soffice) first — available in most Linux envs.
|
| 1876 |
+
Falls back to building a simple reportlab PDF with the extracted text.
|
| 1877 |
+
"""
|
| 1878 |
+
import subprocess, tempfile, shutil, os as _os, io as _io
|
| 1879 |
+
# Try LibreOffice
|
| 1880 |
+
try:
|
| 1881 |
+
with tempfile.TemporaryDirectory() as tmpdir:
|
| 1882 |
+
docx_path = _os.path.join(tmpdir, "input.docx")
|
| 1883 |
+
with open(docx_path, "wb") as f:
|
| 1884 |
+
f.write(docx_bytes)
|
| 1885 |
+
result = subprocess.run(
|
| 1886 |
+
["soffice", "--headless", "--convert-to", "pdf", "--outdir", tmpdir, docx_path],
|
| 1887 |
+
timeout=30, capture_output=True
|
| 1888 |
+
)
|
| 1889 |
+
pdf_path = docx_path.replace(".docx", ".pdf")
|
| 1890 |
+
if _os.path.exists(pdf_path):
|
| 1891 |
+
with open(pdf_path, "rb") as f:
|
| 1892 |
+
return f.read()
|
| 1893 |
+
except Exception as e:
|
| 1894 |
+
print(f"[WARN] LibreOffice docx→pdf failed: {e}")
|
| 1895 |
+
|
| 1896 |
+
# Fallback: extract text and build a simple PDF with reportlab
|
| 1897 |
+
try:
|
| 1898 |
+
from reportlab.pdfgen import canvas as rl_canvas
|
| 1899 |
+
from reportlab.lib.pagesizes import A4
|
| 1900 |
+
from docx import Document as DocxDoc
|
| 1901 |
+
doc = DocxDoc(_io.BytesIO(docx_bytes))
|
| 1902 |
+
text_lines = [p.text for p in doc.paragraphs if p.text.strip()]
|
| 1903 |
+
buf = _io.BytesIO()
|
| 1904 |
+
page_w, page_h = A4
|
| 1905 |
+
c = rl_canvas.Canvas(buf, pagesize=A4)
|
| 1906 |
+
c.setFont("Helvetica", 11)
|
| 1907 |
+
y = page_h - 50
|
| 1908 |
+
for line in text_lines:
|
| 1909 |
+
# Word-wrap long lines
|
| 1910 |
+
while len(line) > 90:
|
| 1911 |
+
c.drawString(40, y, line[:90])
|
| 1912 |
+
line = line[90:]
|
| 1913 |
+
y -= 16
|
| 1914 |
+
if y < 50:
|
| 1915 |
+
c.showPage()
|
| 1916 |
+
c.setFont("Helvetica", 11)
|
| 1917 |
+
y = page_h - 50
|
| 1918 |
+
c.drawString(40, y, line)
|
| 1919 |
+
y -= 16
|
| 1920 |
+
if y < 50:
|
| 1921 |
+
c.showPage()
|
| 1922 |
+
c.setFont("Helvetica", 11)
|
| 1923 |
+
y = page_h - 50
|
| 1924 |
+
c.save()
|
| 1925 |
+
buf.seek(0)
|
| 1926 |
+
return buf.read()
|
| 1927 |
+
except Exception as e:
|
| 1928 |
+
print(f"[WARN] Fallback docx→pdf failed: {e}")
|
| 1929 |
+
return b""
|
| 1930 |
+
|
| 1931 |
+
def _get_pdf_bytes_for_annotation() -> bytes:
|
| 1932 |
+
"""
|
| 1933 |
+
Returns PDF bytes ready for annotation, converting from image/docx if needed.
|
| 1934 |
+
"""
|
| 1935 |
+
if is_pdf_submission:
|
| 1936 |
+
return original_file_bytes
|
| 1937 |
+
if is_image_submission:
|
| 1938 |
+
pdf = _image_bytes_to_pdf(original_file_bytes)
|
| 1939 |
+
if pdf:
|
| 1940 |
+
return pdf
|
| 1941 |
+
if is_docx_submission:
|
| 1942 |
+
pdf = _docx_bytes_to_pdf(original_file_bytes)
|
| 1943 |
+
if pdf:
|
| 1944 |
+
return pdf
|
| 1945 |
+
return b""
|
| 1946 |
+
|
| 1947 |
# Initialize annotated PDF filename
|
| 1948 |
annotated_pdf_filename = None
|
| 1949 |
annotated_pdf_url = None
|
| 1950 |
+
|
| 1951 |
# Function to save annotated PDF — returns (filename, public_url)
|
| 1952 |
def save_annotated_pdf(pdf_bytes, hw_id, stud_id, results, score, stat, lvl, qtype="mcq"):
|
| 1953 |
if not pdf_bytes or len(pdf_bytes) < 100:
|
|
|
|
| 1958 |
ts = int(time.time())
|
| 1959 |
filename = f"marked_{hw_id}_{stud_id}_{ts}.pdf"
|
| 1960 |
filepath = os.path.join(outputs_dir, filename)
|
| 1961 |
+
|
| 1962 |
+
# Convert image/docx → PDF if needed, then annotate
|
| 1963 |
+
annotation_input = _get_pdf_bytes_for_annotation()
|
| 1964 |
+
if not annotation_input:
|
| 1965 |
+
print(f"[WARN] Could not get PDF bytes for annotation (kind={submission_kind})")
|
| 1966 |
+
return None, None
|
| 1967 |
+
|
| 1968 |
annotated = create_annotated_pdf(
|
| 1969 |
+
original_pdf_bytes=annotation_input,
|
| 1970 |
mcq_results=results,
|
| 1971 |
match_percentage=score,
|
| 1972 |
status=stat,
|
| 1973 |
student_level=lvl,
|
| 1974 |
question_type=qtype
|
| 1975 |
)
|
| 1976 |
+
|
| 1977 |
with open(filepath, "wb") as f:
|
| 1978 |
f.write(annotated)
|
| 1979 |
return filename, build_pdf_url(filename)
|
|
|
|
| 1984 |
MIN_WORDS = 3 if final_question_type == "mcq" else 8
|
| 1985 |
if len(student_text.split()) < MIN_WORDS:
|
| 1986 |
# Save annotated PDF even for unreadable (with status shown)
|
| 1987 |
+
if can_annotate and original_file_bytes:
|
| 1988 |
# Show circle mark for unreadable
|
| 1989 |
unreadable_result = [{'qid': extract_qid_from_prompt(prompt, erp_row), 'correct': None, 'chosen': 'Unreadable', 'correct_answer': 'N/A'}]
|
| 1990 |
annotated_pdf_filename, annotated_pdf_url = save_annotated_pdf(
|
| 1991 |
original_file_bytes, homework_id, student_id, unreadable_result, 0, "Unreadable", student_level
|
| 1992 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1993 |
return {
|
| 1994 |
"student_id": student_id,
|
| 1995 |
"homework_id": homework_id,
|
| 1996 |
"sub_institute_id": sub_institute_id,
|
| 1997 |
"syear": syear,
|
|
|
|
| 1998 |
"question_type": final_question_type,
|
| 1999 |
"student_level": student_level,
|
| 2000 |
"status": "Unreadable",
|
|
|
|
| 2005 |
"llm_used": False,
|
| 2006 |
"question_marks": make_question_marks([]),
|
| 2007 |
"annotated_pdf": annotated_pdf_filename,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2008 |
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 2009 |
}
|
| 2010 |
|
| 2011 |
if student_info.get("needs_ocr") and not student_text:
|
|
|
|
| 2012 |
# Save annotated PDF even for unreadable (with status shown)
|
| 2013 |
+
if can_annotate and original_file_bytes:
|
| 2014 |
# Show circle mark for scanned PDF that needs OCR
|
| 2015 |
ocr_result = [{'qid': extract_qid_from_prompt(prompt, erp_row), 'correct': None, 'chosen': 'Needs OCR', 'correct_answer': 'N/A'}]
|
| 2016 |
annotated_pdf_filename, annotated_pdf_url = save_annotated_pdf(
|
| 2017 |
original_file_bytes, homework_id, student_id, ocr_result, 0, "Unreadable", student_level
|
| 2018 |
)
|
|
|
|
|
|
|
| 2019 |
return {
|
| 2020 |
"student_id": student_id,
|
| 2021 |
"homework_id": homework_id,
|
| 2022 |
"sub_institute_id": sub_institute_id,
|
| 2023 |
"syear": syear,
|
|
|
|
| 2024 |
"question_type": final_question_type,
|
| 2025 |
"student_level": student_level,
|
| 2026 |
"status": "Unreadable",
|
|
|
|
| 2036 |
|
| 2037 |
|
| 2038 |
if final_question_type == "mixed":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2039 |
# Process each question type separately and combine results
|
| 2040 |
mcq_results = []
|
| 2041 |
narrative_results = []
|
| 2042 |
|
|
|
|
| 2043 |
# Extract ALL MCQ answers from student text with question numbers
|
| 2044 |
student_answers_by_qid = extract_mcq_answers_with_qid(student_text)
|
| 2045 |
|
|
|
|
| 2056 |
if not chosen:
|
| 2057 |
chosen = extract_mcq_choice(student_text)
|
| 2058 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2059 |
correct = q.get('correct_answer') or extract_correct_mcq_from_prompt(q.get('question', ''))
|
| 2060 |
|
| 2061 |
if correct and chosen:
|
| 2062 |
is_correct = (chosen.lower().strip() == correct.lower().strip())
|
| 2063 |
mcq_results.append({
|
|
|
|
| 2064 |
'qid': qid,
|
| 2065 |
'correct': is_correct,
|
| 2066 |
'chosen': chosen,
|
|
|
|
| 2075 |
'chosen': '',
|
| 2076 |
'correct_answer': correct,
|
| 2077 |
'unattempted': True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2078 |
})
|
| 2079 |
|
| 2080 |
# For narrative questions, use AI to generate reference
|
|
|
|
| 2131 |
except Exception as e:
|
| 2132 |
narrative_results = {'error': str(e)}
|
| 2133 |
|
|
|
|
| 2134 |
# Calculate combined score with level-based partial credit for MCQ
|
| 2135 |
total_mcq = len(mcq_results)
|
| 2136 |
correct_mcq = sum(1 for r in mcq_results if r.get('correct'))
|
|
|
|
| 2142 |
|
| 2143 |
# Calculate MCQ score based on level (not just binary correct/incorrect)
|
| 2144 |
mcq_score = (correct_mcq * credit_per_q) / max(1, total_mcq)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2145 |
|
| 2146 |
narrative_score = narrative_results.get('match_percentage', 0) if narrative_results else 0
|
| 2147 |
|
|
|
|
| 2163 |
else:
|
| 2164 |
status = "Needs Review"
|
| 2165 |
|
|
|
|
| 2166 |
# Save annotated PDF
|
| 2167 |
+
if can_annotate and original_file_bytes and mcq_results:
|
| 2168 |
annotated_pdf_filename, annotated_pdf_url = save_annotated_pdf(
|
| 2169 |
original_file_bytes, homework_id, student_id, mcq_results, final_score, status, student_level
|
| 2170 |
)
|
| 2171 |
|
|
|
|
|
|
|
| 2172 |
return {
|
| 2173 |
"student_id": student_id,
|
| 2174 |
"homework_id": homework_id,
|
|
|
|
| 2178 |
"student_level": student_level,
|
| 2179 |
"status": status,
|
| 2180 |
"match_percentage": final_score,
|
|
|
|
| 2181 |
"submission_remarks": None,
|
| 2182 |
"rule_based_remark": f"MCQ: {correct_mcq}/{total_mcq} correct. Narrative score: {narrative_score}%. (Level: {student_level}, Credit per Q: {credit_per_q}%)",
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2183 |
"llm_used": bool(narrative_results and 'error' not in narrative_results),
|
| 2184 |
"student_extracted_text": student_text,
|
| 2185 |
"mcq_results": mcq_results,
|
| 2186 |
"narrative_results": narrative_results,
|
|
|
|
| 2187 |
"question_marks": make_question_marks(mcq_results),
|
| 2188 |
"annotated_pdf": annotated_pdf_filename,
|
| 2189 |
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
|
|
|
| 2281 |
status = "Verified" if match_percentage >= passing_threshold else "Needs Review"
|
| 2282 |
|
| 2283 |
# Save annotated PDF
|
| 2284 |
+
if can_annotate and original_file_bytes:
|
| 2285 |
annotated_pdf_filename, annotated_pdf_url = save_annotated_pdf(
|
| 2286 |
original_file_bytes, homework_id, student_id, mcq_results, match_percentage, status, student_level
|
| 2287 |
)
|
|
|
|
| 2307 |
else:
|
| 2308 |
# No correct answers in prompt - return needs review with extracted answers
|
| 2309 |
# Save annotated PDF with circle mark
|
| 2310 |
+
if can_annotate and original_file_bytes:
|
| 2311 |
no_answer_result = [{'qid': extract_qid_from_prompt(prompt, erp_row), 'correct': None, 'chosen': 'No Answer Key', 'correct_answer': 'N/A'}]
|
| 2312 |
annotated_pdf_filename, annotated_pdf_url = save_annotated_pdf(
|
| 2313 |
original_file_bytes, homework_id, student_id, no_answer_result, 0, "Needs Review", student_level
|
|
|
|
| 2335 |
pass # Will continue to narrative handling
|
| 2336 |
elif not correct:
|
| 2337 |
# Save annotated PDF with circle mark
|
| 2338 |
+
if can_annotate and original_file_bytes:
|
| 2339 |
no_correct_result = [{'qid': extract_qid_from_prompt(prompt, erp_row), 'correct': None, 'chosen': 'Not Found', 'correct_answer': 'N/A'}]
|
| 2340 |
annotated_pdf_filename, annotated_pdf_url = save_annotated_pdf(
|
| 2341 |
original_file_bytes, homework_id, student_id, no_correct_result, 0, "Needs Review", student_level
|
| 2342 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2343 |
return {
|
| 2344 |
"student_id": student_id,
|
| 2345 |
"homework_id": homework_id,
|
|
|
|
| 2349 |
"student_level": student_level,
|
| 2350 |
"status": "Needs Review",
|
| 2351 |
"match_percentage": 0,
|
|
|
|
| 2352 |
"submission_remarks": None,
|
| 2353 |
"rule_based_remark": "MCQ correct option not found in prompt. Include 'Correct: B' or similar in prompt.",
|
| 2354 |
"student_extracted_text": student_text,
|
|
|
|
| 2360 |
}
|
| 2361 |
elif not chosen:
|
| 2362 |
# Save annotated PDF with circle mark
|
| 2363 |
+
if can_annotate and original_file_bytes:
|
| 2364 |
no_chosen_result = [{'qid': extract_qid_from_prompt(prompt, erp_row), 'correct': None, 'chosen': 'Not Detected', 'correct_answer': correct or 'N/A'}]
|
| 2365 |
annotated_pdf_filename, annotated_pdf_url = save_annotated_pdf(
|
| 2366 |
original_file_bytes, homework_id, student_id, no_chosen_result, 0, "Needs Review", student_level
|
| 2367 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2368 |
return {
|
| 2369 |
"student_id": student_id,
|
| 2370 |
"homework_id": homework_id,
|
|
|
|
| 2374 |
"student_level": student_level,
|
| 2375 |
"status": "Needs Review",
|
| 2376 |
"match_percentage": 0,
|
|
|
|
| 2377 |
"submission_remarks": None,
|
| 2378 |
"rule_based_remark": "Student option (A/B/C/D) not detected clearly.",
|
| 2379 |
"student_extracted_text": student_text,
|
| 2380 |
"llm_used": False,
|
| 2381 |
"question_marks": make_question_marks([]),
|
| 2382 |
"annotated_pdf": annotated_pdf_filename,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2383 |
"debug": {"correct": correct, "chosen": chosen},
|
| 2384 |
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 2385 |
}
|
| 2386 |
|
|
|
|
| 2387 |
# Only process MCQ validation if not redirecting to narrative
|
| 2388 |
if not redirect_to_narrative:
|
| 2389 |
is_correct = (chosen == correct)
|
|
|
|
| 2402 |
# Save annotated PDF
|
| 2403 |
_qid = extract_qid_from_prompt(prompt, erp_row)
|
| 2404 |
mcq_results_single = [{'qid': _qid, 'correct': is_correct, 'chosen': chosen, 'correct_answer': correct}]
|
| 2405 |
+
if can_annotate and original_file_bytes:
|
| 2406 |
annotated_pdf_filename, annotated_pdf_url = save_annotated_pdf(
|
| 2407 |
original_file_bytes, homework_id, student_id, mcq_results_single, match_percentage, status, student_level
|
| 2408 |
)
|
|
|
|
| 2429 |
|
| 2430 |
if gemini_client is None:
|
| 2431 |
# Save annotated PDF
|
| 2432 |
+
if can_annotate and original_file_bytes:
|
| 2433 |
annotated_pdf_filename, annotated_pdf_url = save_annotated_pdf(
|
| 2434 |
original_file_bytes, homework_id, student_id, [], 0, "Needs Review", student_level
|
| 2435 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2436 |
return {
|
| 2437 |
"student_id": student_id,
|
| 2438 |
"homework_id": homework_id,
|
|
|
|
| 2442 |
"student_level": student_level,
|
| 2443 |
"status": "Needs Review",
|
| 2444 |
"match_percentage": 0,
|
|
|
|
| 2445 |
"submission_remarks": None,
|
|
|
|
|
|
|
|
|
|
| 2446 |
"rule_based_remark": "Gemini not configured. Check /health/llm.",
|
| 2447 |
"llm_used": False,
|
| 2448 |
"llm_error": parse_gemini_error(GEMINI_LAST_ERROR),
|
| 2449 |
"student_extracted_text": student_text,
|
|
|
|
| 2450 |
"question_marks": make_question_marks([]),
|
| 2451 |
"annotated_pdf": annotated_pdf_filename,
|
|
|
|
|
|
|
| 2452 |
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 2453 |
}
|
| 2454 |
|
|
|
|
| 2469 |
)
|
| 2470 |
|
| 2471 |
if not response_text:
|
|
|
|
| 2472 |
# Save annotated PDF
|
| 2473 |
+
if can_annotate and original_file_bytes:
|
| 2474 |
annotated_pdf_filename, annotated_pdf_url = save_annotated_pdf(
|
| 2475 |
original_file_bytes, homework_id, student_id, [], 0, "Needs Review", student_level
|
| 2476 |
)
|
|
|
|
|
|
|
| 2477 |
return {
|
| 2478 |
"student_id": student_id,
|
| 2479 |
"homework_id": homework_id,
|
|
|
|
| 2483 |
"student_level": student_level,
|
| 2484 |
"status": "Needs Review",
|
| 2485 |
"match_percentage": 0,
|
|
|
|
| 2486 |
"submission_remarks": None,
|
|
|
|
|
|
|
|
|
|
| 2487 |
"rule_based_remark": "Gemini failed. Check /health/llm.",
|
| 2488 |
"llm_used": False,
|
| 2489 |
"llm_error": parse_gemini_error(GEMINI_LAST_ERROR),
|
| 2490 |
"student_extracted_text": student_text,
|
|
|
|
| 2491 |
"question_marks": make_question_marks([]),
|
| 2492 |
"annotated_pdf": annotated_pdf_filename,
|
|
|
|
|
|
|
| 2493 |
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 2494 |
}
|
| 2495 |
|
|
|
|
| 2497 |
m = re.search(r"\{.*\}", response_text, flags=re.S)
|
| 2498 |
payload = json.loads(m.group(0) if m else response_text)
|
| 2499 |
except Exception as e:
|
|
|
|
| 2500 |
# Save annotated PDF
|
| 2501 |
+
if can_annotate and original_file_bytes:
|
| 2502 |
annotated_pdf_filename, annotated_pdf_url = save_annotated_pdf(
|
| 2503 |
original_file_bytes, homework_id, student_id, [], 0, "Needs Review", student_level
|
| 2504 |
)
|
|
|
|
|
|
|
| 2505 |
return {
|
| 2506 |
"student_id": student_id,
|
| 2507 |
"homework_id": homework_id,
|
|
|
|
| 2511 |
"student_level": student_level,
|
| 2512 |
"status": "Needs Review",
|
| 2513 |
"match_percentage": 0,
|
|
|
|
| 2514 |
"submission_remarks": None,
|
|
|
|
|
|
|
|
|
|
| 2515 |
"rule_based_remark": "Gemini returned non-JSON output.",
|
| 2516 |
"llm_used": False,
|
| 2517 |
"llm_error": {"ok": False, "error_type": "GEMINI_BAD_JSON", "message": str(e), "raw": response_text[:800]},
|
| 2518 |
"student_extracted_text": student_text,
|
|
|
|
| 2519 |
"question_marks": make_question_marks([]),
|
| 2520 |
"annotated_pdf": annotated_pdf_filename,
|
|
|
|
|
|
|
| 2521 |
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 2522 |
}
|
| 2523 |
|
|
|
|
| 2528 |
key_points = [str(x).strip() for x in key_points if str(x).strip()]
|
| 2529 |
|
| 2530 |
if not ai_reference_answer:
|
|
|
|
| 2531 |
# Save annotated PDF
|
| 2532 |
+
if can_annotate and original_file_bytes:
|
| 2533 |
annotated_pdf_filename, annotated_pdf_url = save_annotated_pdf(
|
| 2534 |
original_file_bytes, homework_id, student_id, [], 0, "Needs Review", student_level
|
| 2535 |
)
|
|
|
|
|
|
|
| 2536 |
return {
|
| 2537 |
"student_id": student_id,
|
| 2538 |
"homework_id": homework_id,
|
|
|
|
| 2542 |
"student_level": student_level,
|
| 2543 |
"status": "Needs Review",
|
| 2544 |
"match_percentage": 0,
|
|
|
|
| 2545 |
"submission_remarks": None,
|
| 2546 |
"rule_based_remark": "AI returned empty reference answer.",
|
| 2547 |
"llm_used": True,
|
| 2548 |
"student_extracted_text": student_text,
|
| 2549 |
"question_marks": make_question_marks([]),
|
| 2550 |
"annotated_pdf": annotated_pdf_filename,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2551 |
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 2552 |
}
|
| 2553 |
|
|
|
|
| 2578 |
f"{remark_prompt}"
|
| 2579 |
)
|
| 2580 |
|
|
|
|
| 2581 |
submission_remark = generate_gemini_response(
|
|
|
|
|
|
|
|
|
|
| 2582 |
prompt=resp2_prompt,
|
| 2583 |
system_prompt="You are a strict, helpful teacher. Be concise and factual.",
|
| 2584 |
max_tokens=140,
|
|
|
|
| 2586 |
)
|
| 2587 |
|
| 2588 |
rule_based_remark = None
|
|
|
|
| 2589 |
remark_llm_used = bool(submission_remark)
|
| 2590 |
remark_llm_error = None if submission_remark else (GEMINI_LAST_ERROR or "Unknown LLM error")
|
| 2591 |
|
| 2592 |
if not submission_remark:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2593 |
if status == "Verified":
|
| 2594 |
rule_based_remark = "Homework matches the expected answer well. Good coverage of the key ideas."
|
| 2595 |
elif status == "Partial":
|
|
|
|
| 2597 |
else:
|
| 2598 |
rule_based_remark = "Homework does not match the expected answer enough. Please review the topic and resubmit with clearer, complete points."
|
| 2599 |
|
|
|
|
| 2600 |
# Save annotated PDF — evaluate EACH question individually against student text
|
| 2601 |
per_question_results = build_per_question_results(
|
| 2602 |
prompt, student_text, status, match_pct,
|
|
|
|
| 2605 |
policy=policy,
|
| 2606 |
student_level=student_level,
|
| 2607 |
)
|
| 2608 |
+
if can_annotate and original_file_bytes:
|
| 2609 |
annotated_pdf_filename, annotated_pdf_url = save_annotated_pdf(
|
| 2610 |
original_file_bytes, homework_id, student_id, per_question_results, match_pct, status, student_level, "narrative"
|
| 2611 |
)
|
| 2612 |
|
|
|
|
|
|
|
| 2613 |
return {
|
| 2614 |
"student_id": student_id,
|
| 2615 |
"homework_id": homework_id,
|
|
|
|
| 2619 |
"student_level": student_level,
|
| 2620 |
"status": status,
|
| 2621 |
"match_percentage": match_pct,
|
|
|
|
| 2622 |
"submission_remarks": submission_remark if submission_remark else None,
|
|
|
|
|
|
|
|
|
|
| 2623 |
"rule_based_remark": rule_based_remark,
|
| 2624 |
"llm_used": True,
|
| 2625 |
"remark_llm_used": remark_llm_used,
|
|
|
|
| 2629 |
"key_points": key_points,
|
| 2630 |
"key_points_covered": covered,
|
| 2631 |
"key_points_missing": missing,
|
|
|
|
| 2632 |
"question_marks": make_question_marks(per_question_results),
|
| 2633 |
"annotated_pdf": annotated_pdf_filename,
|
|
|
|
|
|
|
| 2634 |
"debug": {
|
| 2635 |
"similarity": sim,
|
| 2636 |
"coverage": coverage,
|
| 2637 |
"policy": policy,
|
|
|
|
| 2638 |
"per_question_results": per_question_results,
|
| 2639 |
"erp_row_fields": list(erp_row.keys()) if erp_row else [],
|
| 2640 |
"erp_student_level_raw": erp_row.get("student_level") or erp_row.get("level") or erp_row.get("difficulty") or erp_row.get("difficulty_level"),
|
|
|
|
|
|
|
| 2641 |
},
|
| 2642 |
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 2643 |
}
|