marked_pdf
Browse files- app.py +460 -1
- requirements.txt +2 -1
app.py
CHANGED
|
@@ -26,6 +26,16 @@ try:
|
|
| 26 |
except Exception:
|
| 27 |
PdfReader = None
|
| 28 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
try:
|
| 30 |
from pdf2image import convert_from_bytes # requires poppler
|
| 31 |
except Exception:
|
|
@@ -57,6 +67,22 @@ app = FastAPI()
|
|
| 57 |
|
| 58 |
import os
|
| 59 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
@app.get("/debug/env")
|
| 61 |
def debug_env():
|
| 62 |
return {
|
|
@@ -875,6 +901,142 @@ def extract_text_from_pdf(pdf_bytes: bytes, filename: str = "unknown.pdf") -> Di
|
|
| 875 |
return {"text": extracted, "used_ocr": used_ocr, "needs_ocr": False}
|
| 876 |
|
| 877 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 878 |
async def extract_text_from_upload(file: UploadFile) -> Dict[str, Any]:
|
| 879 |
filename = getattr(file, "filename", "") or "upload"
|
| 880 |
content_type = (getattr(file, "content_type", "") or "").lower()
|
|
@@ -947,6 +1109,180 @@ def health_llm():
|
|
| 947 |
}
|
| 948 |
|
| 949 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 950 |
@app.post("/homework/validate")
|
| 951 |
async def homework_validate(
|
| 952 |
student_id: int = Form(...),
|
|
@@ -981,9 +1317,51 @@ async def homework_validate(
|
|
| 981 |
# 2) Extract student text
|
| 982 |
student_info = await extract_text_from_upload(student_file)
|
| 983 |
student_text = (student_info.get("text") or "").strip()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 984 |
|
| 985 |
MIN_WORDS = 3 if final_question_type == "mcq" else 8
|
| 986 |
if len(student_text.split()) < MIN_WORDS:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 987 |
return {
|
| 988 |
"student_id": student_id,
|
| 989 |
"homework_id": homework_id,
|
|
@@ -997,10 +1375,16 @@ async def homework_validate(
|
|
| 997 |
"rule_based_remark": "Answer text could not be read clearly. Please upload a clearer file.",
|
| 998 |
"student_extracted_text": student_text,
|
| 999 |
"llm_used": False,
|
|
|
|
| 1000 |
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 1001 |
}
|
| 1002 |
|
| 1003 |
if student_info.get("needs_ocr") and not student_text:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1004 |
return {
|
| 1005 |
"student_id": student_id,
|
| 1006 |
"homework_id": homework_id,
|
|
@@ -1014,6 +1398,7 @@ async def homework_validate(
|
|
| 1014 |
"rule_based_remark": "This PDF looks scanned. OCR is required (install pdf2image + poppler) or upload a clearer file.",
|
| 1015 |
"student_extracted_text": student_text,
|
| 1016 |
"llm_used": False,
|
|
|
|
| 1017 |
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 1018 |
}
|
| 1019 |
|
|
@@ -1136,6 +1521,12 @@ async def homework_validate(
|
|
| 1136 |
else:
|
| 1137 |
status = "Needs Review"
|
| 1138 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1139 |
return {
|
| 1140 |
"student_id": student_id,
|
| 1141 |
"homework_id": homework_id,
|
|
@@ -1151,6 +1542,7 @@ async def homework_validate(
|
|
| 1151 |
"student_extracted_text": student_text,
|
| 1152 |
"mcq_results": mcq_results,
|
| 1153 |
"narrative_results": narrative_results,
|
|
|
|
| 1154 |
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 1155 |
"debug": {
|
| 1156 |
"erp_row_fields": list(erp_row.keys()) if erp_row else [],
|
|
@@ -1169,10 +1561,13 @@ async def homework_validate(
|
|
| 1169 |
|
| 1170 |
# Smart fallback: if answer looks like narrative (not MCQ), treat as narrative instead
|
| 1171 |
# This handles cases where question type is MCQ but student answered in narrative format
|
|
|
|
|
|
|
|
|
|
| 1172 |
answer_looks_like_narrative = (
|
| 1173 |
len(student_text.split()) > 15 and # More than 15 words
|
| 1174 |
not has_multiple_mcq and # Not multiple numbered MCQ answers
|
| 1175 |
-
not
|
| 1176 |
)
|
| 1177 |
|
| 1178 |
# If answer looks like narrative, redirect to narrative processing
|
|
@@ -1227,6 +1622,12 @@ async def homework_validate(
|
|
| 1227 |
passing_threshold = mcq_credit["passing_threshold"]
|
| 1228 |
status = "Verified" if match_percentage >= passing_threshold else "Needs Review"
|
| 1229 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1230 |
return {
|
| 1231 |
"student_id": student_id,
|
| 1232 |
"homework_id": homework_id,
|
|
@@ -1240,11 +1641,17 @@ async def homework_validate(
|
|
| 1240 |
"rule_based_remark": f"Multiple MCQ: {correct_count}/{total_count} correct. Score: {match_percentage}% (Level: {student_level})",
|
| 1241 |
"student_extracted_text": student_text,
|
| 1242 |
"llm_used": False,
|
|
|
|
| 1243 |
"debug": {"student_answers": student_answers_by_qid, "mcq_results": mcq_results},
|
| 1244 |
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 1245 |
}
|
| 1246 |
else:
|
| 1247 |
# No correct answers in prompt - return needs review with extracted answers
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1248 |
return {
|
| 1249 |
"student_id": student_id,
|
| 1250 |
"homework_id": homework_id,
|
|
@@ -1258,6 +1665,7 @@ async def homework_validate(
|
|
| 1258 |
"rule_based_remark": f"Found {len(student_answers_by_qid)} MCQ answers but no correct answers in prompt. Include 'Correct: B' for each question.",
|
| 1259 |
"student_extracted_text": student_text,
|
| 1260 |
"llm_used": False,
|
|
|
|
| 1261 |
"debug": {"student_answers": student_answers_by_qid, "correct_answers_in_prompt": False},
|
| 1262 |
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 1263 |
}
|
|
@@ -1265,6 +1673,11 @@ async def homework_validate(
|
|
| 1265 |
if redirect_to_narrative:
|
| 1266 |
pass # Will continue to narrative handling
|
| 1267 |
elif not correct:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1268 |
return {
|
| 1269 |
"student_id": student_id,
|
| 1270 |
"homework_id": homework_id,
|
|
@@ -1278,10 +1691,16 @@ async def homework_validate(
|
|
| 1278 |
"rule_based_remark": "MCQ correct option not found in prompt. Include 'Correct: B' or similar in prompt.",
|
| 1279 |
"student_extracted_text": student_text,
|
| 1280 |
"llm_used": False,
|
|
|
|
| 1281 |
"debug": {"correct": correct, "chosen": chosen},
|
| 1282 |
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 1283 |
}
|
| 1284 |
elif not chosen:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1285 |
return {
|
| 1286 |
"student_id": student_id,
|
| 1287 |
"homework_id": homework_id,
|
|
@@ -1295,6 +1714,7 @@ async def homework_validate(
|
|
| 1295 |
"rule_based_remark": "Student option (A/B/C/D) not detected clearly.",
|
| 1296 |
"student_extracted_text": student_text,
|
| 1297 |
"llm_used": False,
|
|
|
|
| 1298 |
"debug": {"correct": correct, "chosen": chosen},
|
| 1299 |
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 1300 |
}
|
|
@@ -1314,6 +1734,13 @@ async def homework_validate(
|
|
| 1314 |
passing_threshold = mcq_credit["passing_threshold"]
|
| 1315 |
status = "Verified" if match_percentage >= passing_threshold else "Needs Review"
|
| 1316 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1317 |
return {
|
| 1318 |
"student_id": student_id,
|
| 1319 |
"homework_id": homework_id,
|
|
@@ -1327,12 +1754,18 @@ async def homework_validate(
|
|
| 1327 |
"rule_based_remark": f"{'Correct' if is_correct else 'Incorrect'}. Score: {match_percentage}% (Level: {student_level}, Credit per Q: {credit_per_q}%)",
|
| 1328 |
"student_extracted_text": student_text,
|
| 1329 |
"llm_used": False,
|
|
|
|
| 1330 |
"debug": {"correct": correct, "chosen": chosen, "level": student_level, "credit_per_q": credit_per_q},
|
| 1331 |
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 1332 |
}
|
| 1333 |
|
| 1334 |
|
| 1335 |
if gemini_client is None:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1336 |
return {
|
| 1337 |
"student_id": student_id,
|
| 1338 |
"homework_id": homework_id,
|
|
@@ -1347,6 +1780,7 @@ async def homework_validate(
|
|
| 1347 |
"llm_used": False,
|
| 1348 |
"llm_error": parse_gemini_error(GEMINI_LAST_ERROR),
|
| 1349 |
"student_extracted_text": student_text,
|
|
|
|
| 1350 |
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 1351 |
}
|
| 1352 |
|
|
@@ -1367,6 +1801,11 @@ async def homework_validate(
|
|
| 1367 |
)
|
| 1368 |
|
| 1369 |
if not response_text:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1370 |
return {
|
| 1371 |
"student_id": student_id,
|
| 1372 |
"homework_id": homework_id,
|
|
@@ -1381,6 +1820,7 @@ async def homework_validate(
|
|
| 1381 |
"llm_used": False,
|
| 1382 |
"llm_error": parse_gemini_error(GEMINI_LAST_ERROR),
|
| 1383 |
"student_extracted_text": student_text,
|
|
|
|
| 1384 |
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 1385 |
}
|
| 1386 |
|
|
@@ -1388,6 +1828,11 @@ async def homework_validate(
|
|
| 1388 |
m = re.search(r"\{.*\}", response_text, flags=re.S)
|
| 1389 |
payload = json.loads(m.group(0) if m else response_text)
|
| 1390 |
except Exception as e:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1391 |
return {
|
| 1392 |
"student_id": student_id,
|
| 1393 |
"homework_id": homework_id,
|
|
@@ -1402,6 +1847,7 @@ async def homework_validate(
|
|
| 1402 |
"llm_used": False,
|
| 1403 |
"llm_error": {"ok": False, "error_type": "GEMINI_BAD_JSON", "message": str(e), "raw": response_text[:800]},
|
| 1404 |
"student_extracted_text": student_text,
|
|
|
|
| 1405 |
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 1406 |
}
|
| 1407 |
|
|
@@ -1412,6 +1858,11 @@ async def homework_validate(
|
|
| 1412 |
key_points = [str(x).strip() for x in key_points if str(x).strip()]
|
| 1413 |
|
| 1414 |
if not ai_reference_answer:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1415 |
return {
|
| 1416 |
"student_id": student_id,
|
| 1417 |
"homework_id": homework_id,
|
|
@@ -1425,6 +1876,7 @@ async def homework_validate(
|
|
| 1425 |
"rule_based_remark": "AI returned empty reference answer.",
|
| 1426 |
"llm_used": True,
|
| 1427 |
"student_extracted_text": student_text,
|
|
|
|
| 1428 |
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 1429 |
}
|
| 1430 |
|
|
@@ -1474,6 +1926,12 @@ async def homework_validate(
|
|
| 1474 |
else:
|
| 1475 |
rule_based_remark = "Homework does not match the expected answer enough. Please review the topic and resubmit with clearer, complete points."
|
| 1476 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1477 |
return {
|
| 1478 |
"student_id": student_id,
|
| 1479 |
"homework_id": homework_id,
|
|
@@ -1493,6 +1951,7 @@ async def homework_validate(
|
|
| 1493 |
"key_points": key_points,
|
| 1494 |
"key_points_covered": covered,
|
| 1495 |
"key_points_missing": missing,
|
|
|
|
| 1496 |
"debug": {
|
| 1497 |
"similarity": sim,
|
| 1498 |
"coverage": coverage,
|
|
|
|
| 26 |
except Exception:
|
| 27 |
PdfReader = None
|
| 28 |
|
| 29 |
+
try:
|
| 30 |
+
from reportlab.pdfgen import canvas
|
| 31 |
+
from reportlab.lib.pagesizes import letter
|
| 32 |
+
from reportlab.lib import colors
|
| 33 |
+
from reportlab.lib.utils import ImageReader
|
| 34 |
+
import reportlab
|
| 35 |
+
except Exception as e:
|
| 36 |
+
reportlab = None
|
| 37 |
+
print(f"[WARN] reportlab import failed: {e}")
|
| 38 |
+
|
| 39 |
try:
|
| 40 |
from pdf2image import convert_from_bytes # requires poppler
|
| 41 |
except Exception:
|
|
|
|
| 67 |
|
| 68 |
import os
|
| 69 |
|
| 70 |
+
# Serve static files from outputs directory
|
| 71 |
+
from fastapi.staticfiles import StaticFiles
|
| 72 |
+
from fastapi.responses import FileResponse
|
| 73 |
+
|
| 74 |
+
# Create outputs directory if it doesn't exist
|
| 75 |
+
outputs_dir = os.path.join(os.path.dirname(__file__), "outputs")
|
| 76 |
+
os.makedirs(outputs_dir, exist_ok=True)
|
| 77 |
+
|
| 78 |
+
@app.get("/outputs/{filename}")
|
| 79 |
+
async def get_output_file(filename: str):
|
| 80 |
+
"""Serve files from the outputs directory."""
|
| 81 |
+
filepath = os.path.join(outputs_dir, filename)
|
| 82 |
+
if os.path.exists(filepath):
|
| 83 |
+
return FileResponse(filepath)
|
| 84 |
+
raise HTTPException(status_code=404, detail="File not found")
|
| 85 |
+
|
| 86 |
@app.get("/debug/env")
|
| 87 |
def debug_env():
|
| 88 |
return {
|
|
|
|
| 901 |
return {"text": extracted, "used_ocr": used_ocr, "needs_ocr": False}
|
| 902 |
|
| 903 |
|
| 904 |
+
def create_annotated_pdf(
|
| 905 |
+
original_pdf_bytes: bytes,
|
| 906 |
+
mcq_results: List[Dict[str, Any]] = None,
|
| 907 |
+
match_percentage: int = 0,
|
| 908 |
+
status: str = "Needs Review",
|
| 909 |
+
student_level: str = "Medium"
|
| 910 |
+
) -> bytes:
|
| 911 |
+
"""
|
| 912 |
+
Create an annotated PDF with tickmarks showing correct/incorrect answers.
|
| 913 |
+
|
| 914 |
+
Args:
|
| 915 |
+
original_pdf_bytes: The original PDF file content
|
| 916 |
+
mcq_results: List of MCQ results with 'correct' and 'qid' fields
|
| 917 |
+
match_percentage: Overall match percentage
|
| 918 |
+
status: Validation status
|
| 919 |
+
student_level: Student level (Easy/Medium/Hard)
|
| 920 |
+
|
| 921 |
+
Returns:
|
| 922 |
+
Annotated PDF as bytes
|
| 923 |
+
"""
|
| 924 |
+
if not reportlab:
|
| 925 |
+
print("[WARN] reportlab not available, returning original PDF")
|
| 926 |
+
return original_pdf_bytes
|
| 927 |
+
|
| 928 |
+
try:
|
| 929 |
+
from pypdf import PdfWriter, PdfReader
|
| 930 |
+
from io import BytesIO
|
| 931 |
+
|
| 932 |
+
# Read original PDF
|
| 933 |
+
original_reader = PdfReader(BytesIO(original_pdf_bytes))
|
| 934 |
+
writer = PdfWriter()
|
| 935 |
+
|
| 936 |
+
# Process each page
|
| 937 |
+
for page_num, page in enumerate(original_reader.pages):
|
| 938 |
+
# Get page dimensions
|
| 939 |
+
page_width = float(page.mediabox.width)
|
| 940 |
+
page_height = float(page.mediabox.height)
|
| 941 |
+
|
| 942 |
+
# Create overlay canvas for annotations
|
| 943 |
+
packet = BytesIO()
|
| 944 |
+
c = canvas.Canvas(packet, pagesize=(page_width, page_height))
|
| 945 |
+
|
| 946 |
+
# Draw tickmarks for MCQ questions
|
| 947 |
+
# Position marks along the right margin
|
| 948 |
+
if mcq_results:
|
| 949 |
+
y_start = page_height - 50
|
| 950 |
+
y_spacing = 30
|
| 951 |
+
|
| 952 |
+
# Calculate which questions to show on this page
|
| 953 |
+
# (show first few on first page, rest on subsequent pages)
|
| 954 |
+
marks_per_page = int((page_height - 100) / y_spacing)
|
| 955 |
+
|
| 956 |
+
start_idx = page_num * marks_per_page
|
| 957 |
+
end_idx = min(start_idx + marks_per_page, len(mcq_results))
|
| 958 |
+
|
| 959 |
+
for i in range(start_idx, end_idx):
|
| 960 |
+
result = mcq_results[i]
|
| 961 |
+
qid = result.get('qid', f'Q{i+1}')
|
| 962 |
+
is_correct = result.get('correct', False)
|
| 963 |
+
|
| 964 |
+
y_pos = y_start - ((i - start_idx) * y_spacing)
|
| 965 |
+
x_pos = page_width - 60
|
| 966 |
+
|
| 967 |
+
# Draw tick or cross
|
| 968 |
+
if is_correct:
|
| 969 |
+
# Green checkmark
|
| 970 |
+
c.setStrokeColor(colors.green)
|
| 971 |
+
c.setFillColor(colors.green)
|
| 972 |
+
c.setLineWidth(2)
|
| 973 |
+
c.circle(x_pos, y_pos, 12, fill=0)
|
| 974 |
+
c.setFont("Helvetica-Bold", 14)
|
| 975 |
+
c.drawString(x_pos - 5, y_pos - 5, "✓")
|
| 976 |
+
else:
|
| 977 |
+
# Red X mark
|
| 978 |
+
c.setStrokeColor(colors.red)
|
| 979 |
+
c.setFillColor(colors.red)
|
| 980 |
+
c.setLineWidth(2)
|
| 981 |
+
c.circle(x_pos, y_pos, 12, fill=0)
|
| 982 |
+
c.setFont("Helvetica-Bold", 14)
|
| 983 |
+
c.drawString(x_pos - 5, y_pos - 5, "✗")
|
| 984 |
+
|
| 985 |
+
# Draw question label
|
| 986 |
+
c.setStrokeColor(colors.black)
|
| 987 |
+
c.setFillColor(colors.black)
|
| 988 |
+
c.setFont("Helvetica", 8)
|
| 989 |
+
c.drawString(x_pos - 35, y_pos - 3, str(qid))
|
| 990 |
+
|
| 991 |
+
# Add header with summary on first page
|
| 992 |
+
if page_num == 0:
|
| 993 |
+
# Draw header background
|
| 994 |
+
c.setFillColor(colors.lightgrey)
|
| 995 |
+
c.rect(0, page_height - 60, page_width, 60, fill=1, stroke=0)
|
| 996 |
+
|
| 997 |
+
# Draw status text - LARGER FONT
|
| 998 |
+
c.setFillColor(colors.black)
|
| 999 |
+
c.setFont("Helvetica-Bold", 20)
|
| 1000 |
+
|
| 1001 |
+
status_color = colors.green if status == "Verified" else (
|
| 1002 |
+
colors.orange if status == "Partial" else colors.red
|
| 1003 |
+
)
|
| 1004 |
+
c.setFillColor(status_color)
|
| 1005 |
+
c.drawString(30, page_height - 30, f"Status: {status}")
|
| 1006 |
+
|
| 1007 |
+
c.setFillColor(colors.black)
|
| 1008 |
+
c.setFont("Helvetica-Bold", 18)
|
| 1009 |
+
c.drawString(250, page_height - 30, f"Score: {match_percentage}%")
|
| 1010 |
+
c.drawString(450, page_height - 30, f"Level: {student_level}")
|
| 1011 |
+
|
| 1012 |
+
# Draw MCQ summary
|
| 1013 |
+
if mcq_results:
|
| 1014 |
+
correct_count = sum(1 for r in mcq_results if r.get('correct'))
|
| 1015 |
+
total_count = len(mcq_results)
|
| 1016 |
+
c.setFont("Helvetica-Bold", 14)
|
| 1017 |
+
c.drawString(30, page_height - 50, f"MCQ: {correct_count}/{total_count} correct")
|
| 1018 |
+
|
| 1019 |
+
c.save()
|
| 1020 |
+
packet.seek(0)
|
| 1021 |
+
|
| 1022 |
+
# Merge overlay with original page
|
| 1023 |
+
overlay_reader = PdfReader(packet)
|
| 1024 |
+
if overlay_reader.pages:
|
| 1025 |
+
page.merge_page(overlay_reader.pages[0])
|
| 1026 |
+
|
| 1027 |
+
writer.add_page(page)
|
| 1028 |
+
|
| 1029 |
+
# Write the final PDF
|
| 1030 |
+
output = BytesIO()
|
| 1031 |
+
writer.write(output)
|
| 1032 |
+
output.seek(0)
|
| 1033 |
+
return output.read()
|
| 1034 |
+
|
| 1035 |
+
except Exception as e:
|
| 1036 |
+
print(f"[ERROR] Failed to create annotated PDF: {e}")
|
| 1037 |
+
return original_pdf_bytes
|
| 1038 |
+
|
| 1039 |
+
|
| 1040 |
async def extract_text_from_upload(file: UploadFile) -> Dict[str, Any]:
|
| 1041 |
filename = getattr(file, "filename", "") or "upload"
|
| 1042 |
content_type = (getattr(file, "content_type", "") or "").lower()
|
|
|
|
| 1109 |
}
|
| 1110 |
|
| 1111 |
|
| 1112 |
+
@app.get("/homework/annotated-url/{homework_id}/{student_id}")
|
| 1113 |
+
async def get_annotated_pdf_url(
|
| 1114 |
+
homework_id: int,
|
| 1115 |
+
student_id: int,
|
| 1116 |
+
):
|
| 1117 |
+
"""
|
| 1118 |
+
Get the URL for the annotated PDF.
|
| 1119 |
+
Returns JSON with the URL that can be used in your frontend.
|
| 1120 |
+
"""
|
| 1121 |
+
base_url = os.getenv("APP_BASE_URL", "http://127.0.0.1:8000")
|
| 1122 |
+
return {
|
| 1123 |
+
"homework_id": homework_id,
|
| 1124 |
+
"student_id": student_id,
|
| 1125 |
+
"annotated_pdf_url": f"{base_url}/homework/annotated/{homework_id}/{student_id}"
|
| 1126 |
+
}
|
| 1127 |
+
@app.get("/homework/annotated/{homework_id}/{student_id}")
|
| 1128 |
+
async def get_annotated_pdf(
|
| 1129 |
+
homework_id: int,
|
| 1130 |
+
student_id: int,
|
| 1131 |
+
):
|
| 1132 |
+
"""
|
| 1133 |
+
Download the annotated PDF with tickmarks for a validated homework.
|
| 1134 |
+
This endpoint returns the PDF directly as a file download.
|
| 1135 |
+
"""
|
| 1136 |
+
from fastapi.responses import Response
|
| 1137 |
+
|
| 1138 |
+
try:
|
| 1139 |
+
# Fetch ERP record
|
| 1140 |
+
erp_row = fetch_student_record(homework_id, student_id)
|
| 1141 |
+
|
| 1142 |
+
# Get submission image from ERP
|
| 1143 |
+
submission_image = erp_row.get("submission_image")
|
| 1144 |
+
if not submission_image:
|
| 1145 |
+
raise HTTPException(status_code=404, detail="No submission found")
|
| 1146 |
+
|
| 1147 |
+
# Download the original file
|
| 1148 |
+
submission_url = STORAGE_BASE + submission_image
|
| 1149 |
+
resp = requests.get(submission_url, timeout=30)
|
| 1150 |
+
resp.raise_for_status()
|
| 1151 |
+
original_content = resp.content
|
| 1152 |
+
|
| 1153 |
+
# Determine file type
|
| 1154 |
+
filename = submission_image.lower()
|
| 1155 |
+
is_pdf = filename.endswith('.pdf')
|
| 1156 |
+
|
| 1157 |
+
if not is_pdf:
|
| 1158 |
+
raise HTTPException(status_code=400, detail="Annotated PDF only available for PDF submissions")
|
| 1159 |
+
|
| 1160 |
+
# Get prompt and question type
|
| 1161 |
+
prompt = erp_row.get("prompt") or erp_row.get("question_prompt") or ""
|
| 1162 |
+
question_type = erp_row.get("question_type") or erp_row.get("type")
|
| 1163 |
+
student_level = fetch_student_level_from_erp(erp_row)
|
| 1164 |
+
|
| 1165 |
+
final_question_type = (question_type or "").strip().lower()
|
| 1166 |
+
if final_question_type not in ("mcq", "narrative", "mixed"):
|
| 1167 |
+
final_question_type = infer_question_type_from_prompt(prompt)
|
| 1168 |
+
|
| 1169 |
+
# Extract text from PDF
|
| 1170 |
+
pdf_info = extract_text_from_pdf(original_content, filename=submission_image)
|
| 1171 |
+
student_text = (pdf_info.get("text") or "").strip()
|
| 1172 |
+
|
| 1173 |
+
if not student_text or len(student_text) < 10:
|
| 1174 |
+
raise HTTPException(status_code=400, detail="Could not extract text from PDF")
|
| 1175 |
+
|
| 1176 |
+
mcq_results = []
|
| 1177 |
+
status = "Needs Review"
|
| 1178 |
+
match_percentage = 0
|
| 1179 |
+
|
| 1180 |
+
# Process based on question type
|
| 1181 |
+
if final_question_type == "mcq":
|
| 1182 |
+
correct = extract_correct_mcq_from_prompt(prompt)
|
| 1183 |
+
chosen = extract_mcq_choice(student_text)
|
| 1184 |
+
|
| 1185 |
+
student_answers_by_qid = extract_mcq_answers_with_qid(student_text)
|
| 1186 |
+
|
| 1187 |
+
if student_answers_by_qid:
|
| 1188 |
+
# Multiple MCQ
|
| 1189 |
+
parsed_questions = parse_questions_from_prompt(prompt)
|
| 1190 |
+
mcq_questions_with_answers = [q for q in parsed_questions if q.get('type') == 'mcq' and q.get('correct_answer')]
|
| 1191 |
+
|
| 1192 |
+
for qid, student_ans in student_answers_by_qid.items():
|
| 1193 |
+
matched = False
|
| 1194 |
+
for pq in mcq_questions_with_answers:
|
| 1195 |
+
pq_num = pq.get('qid', '').replace('Q', '').strip()
|
| 1196 |
+
qid_num = qid.replace('Q', '').strip()
|
| 1197 |
+
if pq_num == qid_num:
|
| 1198 |
+
is_correct = student_ans.lower() == pq.get('correct_answer', '').lower()
|
| 1199 |
+
mcq_results.append({
|
| 1200 |
+
'qid': qid,
|
| 1201 |
+
'chosen': student_ans,
|
| 1202 |
+
'correct_answer': pq.get('correct_answer'),
|
| 1203 |
+
'correct': is_correct
|
| 1204 |
+
})
|
| 1205 |
+
matched = True
|
| 1206 |
+
break
|
| 1207 |
+
if not matched:
|
| 1208 |
+
mcq_results.append({'qid': qid, 'chosen': student_ans, 'correct_answer': None, 'correct': False})
|
| 1209 |
+
|
| 1210 |
+
if mcq_results:
|
| 1211 |
+
correct_count = sum(1 for r in mcq_results if r.get('correct'))
|
| 1212 |
+
mcq_credit = mcq_partial_credit(student_level)
|
| 1213 |
+
match_percentage = int((correct_count * mcq_credit["credit_per_question"]) / max(1, len(mcq_results)))
|
| 1214 |
+
status = "Verified" if match_percentage >= mcq_credit["passing_threshold"] else "Needs Review"
|
| 1215 |
+
elif correct and chosen:
|
| 1216 |
+
is_correct = (chosen == correct)
|
| 1217 |
+
mcq_credit = mcq_partial_credit(student_level)
|
| 1218 |
+
match_percentage = mcq_credit["credit_per_question"] if is_correct else 0
|
| 1219 |
+
status = "Verified" if match_percentage >= mcq_credit["passing_threshold"] else "Needs Review"
|
| 1220 |
+
mcq_results = [{'qid': 'Q1', 'correct': is_correct, 'chosen': chosen, 'correct_answer': correct}]
|
| 1221 |
+
|
| 1222 |
+
# For narrative, calculate score using AI
|
| 1223 |
+
if final_question_type == "narrative" and gemini_client:
|
| 1224 |
+
# Generate AI reference answer
|
| 1225 |
+
ai_prompt = (
|
| 1226 |
+
f"STUDENT_LEVEL: {student_level}\n"
|
| 1227 |
+
f"QUESTION:\n{prompt.strip()}\n\n"
|
| 1228 |
+
'Return ONLY valid JSON with keys: {"ai_reference_answer": string, "key_points": [string, ...]}.'
|
| 1229 |
+
)
|
| 1230 |
+
|
| 1231 |
+
response_text = generate_gemini_response(
|
| 1232 |
+
prompt=ai_prompt,
|
| 1233 |
+
system_prompt="Generate a correct reference answer for homework evaluation. Keep it aligned with the student level. Output strict JSON only.",
|
| 1234 |
+
max_tokens=650,
|
| 1235 |
+
temperature=0.3,
|
| 1236 |
+
)
|
| 1237 |
+
|
| 1238 |
+
if response_text:
|
| 1239 |
+
try:
|
| 1240 |
+
import re
|
| 1241 |
+
m = re.search(r'\{.*\}', response_text, flags=re.S)
|
| 1242 |
+
payload = json.loads(m.group(0) if m else response_text)
|
| 1243 |
+
|
| 1244 |
+
ai_reference_answer = (payload.get("ai_reference_answer") or "").strip()
|
| 1245 |
+
key_points = payload.get("key_points") or []
|
| 1246 |
+
|
| 1247 |
+
policy = level_policy(student_level)
|
| 1248 |
+
sim = cosine_sim(student_text, ai_reference_answer)
|
| 1249 |
+
covered, missing, coverage = keypoint_coverage(student_text, key_points, kp_threshold=policy["kp_thr"])
|
| 1250 |
+
|
| 1251 |
+
final = policy["w_sim"] * sim + policy["w_cov"] * coverage
|
| 1252 |
+
match_percentage = int(round(final * 100))
|
| 1253 |
+
|
| 1254 |
+
if match_percentage >= policy["verified"]:
|
| 1255 |
+
status = "Verified"
|
| 1256 |
+
elif match_percentage >= policy["partial"]:
|
| 1257 |
+
status = "Partial"
|
| 1258 |
+
else:
|
| 1259 |
+
status = "Needs Review"
|
| 1260 |
+
except Exception as e:
|
| 1261 |
+
print(f"[WARN] Failed to calculate narrative score: {e}")
|
| 1262 |
+
|
| 1263 |
+
# Create annotated PDF
|
| 1264 |
+
annotated_pdf = create_annotated_pdf(
|
| 1265 |
+
original_pdf_bytes=original_content,
|
| 1266 |
+
mcq_results=mcq_results,
|
| 1267 |
+
match_percentage=match_percentage,
|
| 1268 |
+
status=status,
|
| 1269 |
+
student_level=student_level
|
| 1270 |
+
)
|
| 1271 |
+
|
| 1272 |
+
# Return as file download
|
| 1273 |
+
return Response(
|
| 1274 |
+
content=annotated_pdf,
|
| 1275 |
+
media_type="application/pdf",
|
| 1276 |
+
headers={"Content-Disposition": f"inline; filename=annotated_homework_{homework_id}_{student_id}.pdf"}
|
| 1277 |
+
)
|
| 1278 |
+
|
| 1279 |
+
except HTTPException:
|
| 1280 |
+
raise
|
| 1281 |
+
except Exception as e:
|
| 1282 |
+
print(f"[ERROR] Failed to generate annotated PDF: {e}")
|
| 1283 |
+
raise HTTPException(status_code=500, detail=f"Failed to generate PDF: {str(e)}")
|
| 1284 |
+
|
| 1285 |
+
|
| 1286 |
@app.post("/homework/validate")
|
| 1287 |
async def homework_validate(
|
| 1288 |
student_id: int = Form(...),
|
|
|
|
| 1317 |
# 2) Extract student text
|
| 1318 |
student_info = await extract_text_from_upload(student_file)
|
| 1319 |
student_text = (student_info.get("text") or "").strip()
|
| 1320 |
+
|
| 1321 |
+
# Keep a copy of the original file bytes for PDF annotation
|
| 1322 |
+
# Reset file cursor and read again
|
| 1323 |
+
await student_file.seek(0)
|
| 1324 |
+
original_file_bytes = await student_file.read()
|
| 1325 |
+
await student_file.seek(0) # Reset for any further processing
|
| 1326 |
+
|
| 1327 |
+
# Check if it's a PDF
|
| 1328 |
+
is_pdf_submission = student_info.get("kind") == "pdf"
|
| 1329 |
+
|
| 1330 |
+
# Initialize annotated PDF filename
|
| 1331 |
+
annotated_pdf_filename = None
|
| 1332 |
+
|
| 1333 |
+
# Function to save annotated PDF
|
| 1334 |
+
def save_annotated_pdf(pdf_bytes, hw_id, stud_id, results, score, stat, lvl):
|
| 1335 |
+
if not pdf_bytes or len(pdf_bytes) < 100:
|
| 1336 |
+
return None
|
| 1337 |
+
try:
|
| 1338 |
+
outputs_dir = os.path.join(os.path.dirname(__file__), "outputs")
|
| 1339 |
+
os.makedirs(outputs_dir, exist_ok=True)
|
| 1340 |
+
filename = f"marked_{hw_id}_{stud_id}.pdf"
|
| 1341 |
+
filepath = os.path.join(outputs_dir, filename)
|
| 1342 |
+
|
| 1343 |
+
annotated = create_annotated_pdf(
|
| 1344 |
+
original_pdf_bytes=pdf_bytes,
|
| 1345 |
+
mcq_results=results,
|
| 1346 |
+
match_percentage=score,
|
| 1347 |
+
status=stat,
|
| 1348 |
+
student_level=lvl
|
| 1349 |
+
)
|
| 1350 |
+
|
| 1351 |
+
with open(filepath, "wb") as f:
|
| 1352 |
+
f.write(annotated)
|
| 1353 |
+
return filename
|
| 1354 |
+
except Exception as e:
|
| 1355 |
+
print(f"[WARN] Failed to save annotated PDF: {e}")
|
| 1356 |
+
return None
|
| 1357 |
|
| 1358 |
MIN_WORDS = 3 if final_question_type == "mcq" else 8
|
| 1359 |
if len(student_text.split()) < MIN_WORDS:
|
| 1360 |
+
# Save annotated PDF even for unreadable (with status shown)
|
| 1361 |
+
if is_pdf_submission and original_file_bytes:
|
| 1362 |
+
annotated_pdf_filename = save_annotated_pdf(
|
| 1363 |
+
original_file_bytes, homework_id, student_id, [], 0, "Unreadable", student_level
|
| 1364 |
+
)
|
| 1365 |
return {
|
| 1366 |
"student_id": student_id,
|
| 1367 |
"homework_id": homework_id,
|
|
|
|
| 1375 |
"rule_based_remark": "Answer text could not be read clearly. Please upload a clearer file.",
|
| 1376 |
"student_extracted_text": student_text,
|
| 1377 |
"llm_used": False,
|
| 1378 |
+
"annotated_pdf": annotated_pdf_filename,
|
| 1379 |
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 1380 |
}
|
| 1381 |
|
| 1382 |
if student_info.get("needs_ocr") and not student_text:
|
| 1383 |
+
# Save annotated PDF even for unreadable (with status shown)
|
| 1384 |
+
if is_pdf_submission and original_file_bytes:
|
| 1385 |
+
annotated_pdf_filename = save_annotated_pdf(
|
| 1386 |
+
original_file_bytes, homework_id, student_id, [], 0, "Unreadable", student_level
|
| 1387 |
+
)
|
| 1388 |
return {
|
| 1389 |
"student_id": student_id,
|
| 1390 |
"homework_id": homework_id,
|
|
|
|
| 1398 |
"rule_based_remark": "This PDF looks scanned. OCR is required (install pdf2image + poppler) or upload a clearer file.",
|
| 1399 |
"student_extracted_text": student_text,
|
| 1400 |
"llm_used": False,
|
| 1401 |
+
"annotated_pdf": annotated_pdf_filename,
|
| 1402 |
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 1403 |
}
|
| 1404 |
|
|
|
|
| 1521 |
else:
|
| 1522 |
status = "Needs Review"
|
| 1523 |
|
| 1524 |
+
# Save annotated PDF
|
| 1525 |
+
if is_pdf_submission and original_file_bytes and mcq_results:
|
| 1526 |
+
annotated_pdf_filename = save_annotated_pdf(
|
| 1527 |
+
original_file_bytes, homework_id, student_id, mcq_results, final_score, status, student_level
|
| 1528 |
+
)
|
| 1529 |
+
|
| 1530 |
return {
|
| 1531 |
"student_id": student_id,
|
| 1532 |
"homework_id": homework_id,
|
|
|
|
| 1542 |
"student_extracted_text": student_text,
|
| 1543 |
"mcq_results": mcq_results,
|
| 1544 |
"narrative_results": narrative_results,
|
| 1545 |
+
"annotated_pdf": annotated_pdf_filename,
|
| 1546 |
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 1547 |
"debug": {
|
| 1548 |
"erp_row_fields": list(erp_row.keys()) if erp_row else [],
|
|
|
|
| 1561 |
|
| 1562 |
# Smart fallback: if answer looks like narrative (not MCQ), treat as narrative instead
|
| 1563 |
# This handles cases where question type is MCQ but student answered in narrative format
|
| 1564 |
+
# BUT if the answer contains Option A/B/C/D, treat as MCQ
|
| 1565 |
+
answer_has_mcq_option = bool(re.search(r"\b(option|answer|ans)\s*[:\-]?\s*[a-d]\b", _norm(student_text)))
|
| 1566 |
+
|
| 1567 |
answer_looks_like_narrative = (
|
| 1568 |
len(student_text.split()) > 15 and # More than 15 words
|
| 1569 |
not has_multiple_mcq and # Not multiple numbered MCQ answers
|
| 1570 |
+
not answer_has_mcq_option # No explicit option markers
|
| 1571 |
)
|
| 1572 |
|
| 1573 |
# If answer looks like narrative, redirect to narrative processing
|
|
|
|
| 1622 |
passing_threshold = mcq_credit["passing_threshold"]
|
| 1623 |
status = "Verified" if match_percentage >= passing_threshold else "Needs Review"
|
| 1624 |
|
| 1625 |
+
# Save annotated PDF
|
| 1626 |
+
if is_pdf_submission and original_file_bytes:
|
| 1627 |
+
annotated_pdf_filename = save_annotated_pdf(
|
| 1628 |
+
original_file_bytes, homework_id, student_id, mcq_results, match_percentage, status, student_level
|
| 1629 |
+
)
|
| 1630 |
+
|
| 1631 |
return {
|
| 1632 |
"student_id": student_id,
|
| 1633 |
"homework_id": homework_id,
|
|
|
|
| 1641 |
"rule_based_remark": f"Multiple MCQ: {correct_count}/{total_count} correct. Score: {match_percentage}% (Level: {student_level})",
|
| 1642 |
"student_extracted_text": student_text,
|
| 1643 |
"llm_used": False,
|
| 1644 |
+
"annotated_pdf": annotated_pdf_filename,
|
| 1645 |
"debug": {"student_answers": student_answers_by_qid, "mcq_results": mcq_results},
|
| 1646 |
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 1647 |
}
|
| 1648 |
else:
|
| 1649 |
# No correct answers in prompt - return needs review with extracted answers
|
| 1650 |
+
# Save annotated PDF
|
| 1651 |
+
if is_pdf_submission and original_file_bytes:
|
| 1652 |
+
annotated_pdf_filename = save_annotated_pdf(
|
| 1653 |
+
original_file_bytes, homework_id, student_id, [], 0, "Needs Review", student_level
|
| 1654 |
+
)
|
| 1655 |
return {
|
| 1656 |
"student_id": student_id,
|
| 1657 |
"homework_id": homework_id,
|
|
|
|
| 1665 |
"rule_based_remark": f"Found {len(student_answers_by_qid)} MCQ answers but no correct answers in prompt. Include 'Correct: B' for each question.",
|
| 1666 |
"student_extracted_text": student_text,
|
| 1667 |
"llm_used": False,
|
| 1668 |
+
"annotated_pdf": annotated_pdf_filename,
|
| 1669 |
"debug": {"student_answers": student_answers_by_qid, "correct_answers_in_prompt": False},
|
| 1670 |
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 1671 |
}
|
|
|
|
| 1673 |
if redirect_to_narrative:
|
| 1674 |
pass # Will continue to narrative handling
|
| 1675 |
elif not correct:
|
| 1676 |
+
# Save annotated PDF
|
| 1677 |
+
if is_pdf_submission and original_file_bytes:
|
| 1678 |
+
annotated_pdf_filename = save_annotated_pdf(
|
| 1679 |
+
original_file_bytes, homework_id, student_id, [], 0, "Needs Review", student_level
|
| 1680 |
+
)
|
| 1681 |
return {
|
| 1682 |
"student_id": student_id,
|
| 1683 |
"homework_id": homework_id,
|
|
|
|
| 1691 |
"rule_based_remark": "MCQ correct option not found in prompt. Include 'Correct: B' or similar in prompt.",
|
| 1692 |
"student_extracted_text": student_text,
|
| 1693 |
"llm_used": False,
|
| 1694 |
+
"annotated_pdf": annotated_pdf_filename,
|
| 1695 |
"debug": {"correct": correct, "chosen": chosen},
|
| 1696 |
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 1697 |
}
|
| 1698 |
elif not chosen:
|
| 1699 |
+
# Save annotated PDF
|
| 1700 |
+
if is_pdf_submission and original_file_bytes:
|
| 1701 |
+
annotated_pdf_filename = save_annotated_pdf(
|
| 1702 |
+
original_file_bytes, homework_id, student_id, [], 0, "Needs Review", student_level
|
| 1703 |
+
)
|
| 1704 |
return {
|
| 1705 |
"student_id": student_id,
|
| 1706 |
"homework_id": homework_id,
|
|
|
|
| 1714 |
"rule_based_remark": "Student option (A/B/C/D) not detected clearly.",
|
| 1715 |
"student_extracted_text": student_text,
|
| 1716 |
"llm_used": False,
|
| 1717 |
+
"annotated_pdf": annotated_pdf_filename,
|
| 1718 |
"debug": {"correct": correct, "chosen": chosen},
|
| 1719 |
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 1720 |
}
|
|
|
|
| 1734 |
passing_threshold = mcq_credit["passing_threshold"]
|
| 1735 |
status = "Verified" if match_percentage >= passing_threshold else "Needs Review"
|
| 1736 |
|
| 1737 |
+
# Save annotated PDF
|
| 1738 |
+
mcq_results_single = [{'qid': 'Q1', 'correct': is_correct, 'chosen': chosen, 'correct_answer': correct}]
|
| 1739 |
+
if is_pdf_submission and original_file_bytes:
|
| 1740 |
+
annotated_pdf_filename = save_annotated_pdf(
|
| 1741 |
+
original_file_bytes, homework_id, student_id, mcq_results_single, match_percentage, status, student_level
|
| 1742 |
+
)
|
| 1743 |
+
|
| 1744 |
return {
|
| 1745 |
"student_id": student_id,
|
| 1746 |
"homework_id": homework_id,
|
|
|
|
| 1754 |
"rule_based_remark": f"{'Correct' if is_correct else 'Incorrect'}. Score: {match_percentage}% (Level: {student_level}, Credit per Q: {credit_per_q}%)",
|
| 1755 |
"student_extracted_text": student_text,
|
| 1756 |
"llm_used": False,
|
| 1757 |
+
"annotated_pdf": annotated_pdf_filename,
|
| 1758 |
"debug": {"correct": correct, "chosen": chosen, "level": student_level, "credit_per_q": credit_per_q},
|
| 1759 |
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 1760 |
}
|
| 1761 |
|
| 1762 |
|
| 1763 |
if gemini_client is None:
|
| 1764 |
+
# Save annotated PDF
|
| 1765 |
+
if is_pdf_submission and original_file_bytes:
|
| 1766 |
+
annotated_pdf_filename = save_annotated_pdf(
|
| 1767 |
+
original_file_bytes, homework_id, student_id, [], 0, "Needs Review", student_level
|
| 1768 |
+
)
|
| 1769 |
return {
|
| 1770 |
"student_id": student_id,
|
| 1771 |
"homework_id": homework_id,
|
|
|
|
| 1780 |
"llm_used": False,
|
| 1781 |
"llm_error": parse_gemini_error(GEMINI_LAST_ERROR),
|
| 1782 |
"student_extracted_text": student_text,
|
| 1783 |
+
"annotated_pdf": annotated_pdf_filename,
|
| 1784 |
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 1785 |
}
|
| 1786 |
|
|
|
|
| 1801 |
)
|
| 1802 |
|
| 1803 |
if not response_text:
|
| 1804 |
+
# Save annotated PDF
|
| 1805 |
+
if is_pdf_submission and original_file_bytes:
|
| 1806 |
+
annotated_pdf_filename = save_annotated_pdf(
|
| 1807 |
+
original_file_bytes, homework_id, student_id, [], 0, "Needs Review", student_level
|
| 1808 |
+
)
|
| 1809 |
return {
|
| 1810 |
"student_id": student_id,
|
| 1811 |
"homework_id": homework_id,
|
|
|
|
| 1820 |
"llm_used": False,
|
| 1821 |
"llm_error": parse_gemini_error(GEMINI_LAST_ERROR),
|
| 1822 |
"student_extracted_text": student_text,
|
| 1823 |
+
"annotated_pdf": annotated_pdf_filename,
|
| 1824 |
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 1825 |
}
|
| 1826 |
|
|
|
|
| 1828 |
m = re.search(r"\{.*\}", response_text, flags=re.S)
|
| 1829 |
payload = json.loads(m.group(0) if m else response_text)
|
| 1830 |
except Exception as e:
|
| 1831 |
+
# Save annotated PDF
|
| 1832 |
+
if is_pdf_submission and original_file_bytes:
|
| 1833 |
+
annotated_pdf_filename = save_annotated_pdf(
|
| 1834 |
+
original_file_bytes, homework_id, student_id, [], 0, "Needs Review", student_level
|
| 1835 |
+
)
|
| 1836 |
return {
|
| 1837 |
"student_id": student_id,
|
| 1838 |
"homework_id": homework_id,
|
|
|
|
| 1847 |
"llm_used": False,
|
| 1848 |
"llm_error": {"ok": False, "error_type": "GEMINI_BAD_JSON", "message": str(e), "raw": response_text[:800]},
|
| 1849 |
"student_extracted_text": student_text,
|
| 1850 |
+
"annotated_pdf": annotated_pdf_filename,
|
| 1851 |
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 1852 |
}
|
| 1853 |
|
|
|
|
| 1858 |
key_points = [str(x).strip() for x in key_points if str(x).strip()]
|
| 1859 |
|
| 1860 |
if not ai_reference_answer:
|
| 1861 |
+
# Save annotated PDF
|
| 1862 |
+
if is_pdf_submission and original_file_bytes:
|
| 1863 |
+
annotated_pdf_filename = save_annotated_pdf(
|
| 1864 |
+
original_file_bytes, homework_id, student_id, [], 0, "Needs Review", student_level
|
| 1865 |
+
)
|
| 1866 |
return {
|
| 1867 |
"student_id": student_id,
|
| 1868 |
"homework_id": homework_id,
|
|
|
|
| 1876 |
"rule_based_remark": "AI returned empty reference answer.",
|
| 1877 |
"llm_used": True,
|
| 1878 |
"student_extracted_text": student_text,
|
| 1879 |
+
"annotated_pdf": annotated_pdf_filename,
|
| 1880 |
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 1881 |
}
|
| 1882 |
|
|
|
|
| 1926 |
else:
|
| 1927 |
rule_based_remark = "Homework does not match the expected answer enough. Please review the topic and resubmit with clearer, complete points."
|
| 1928 |
|
| 1929 |
+
# Save annotated PDF for narrative (with status but no MCQ marks)
|
| 1930 |
+
if is_pdf_submission and original_file_bytes:
|
| 1931 |
+
annotated_pdf_filename = save_annotated_pdf(
|
| 1932 |
+
original_file_bytes, homework_id, student_id, [], match_pct, status, student_level
|
| 1933 |
+
)
|
| 1934 |
+
|
| 1935 |
return {
|
| 1936 |
"student_id": student_id,
|
| 1937 |
"homework_id": homework_id,
|
|
|
|
| 1951 |
"key_points": key_points,
|
| 1952 |
"key_points_covered": covered,
|
| 1953 |
"key_points_missing": missing,
|
| 1954 |
+
"annotated_pdf": annotated_pdf_filename,
|
| 1955 |
"debug": {
|
| 1956 |
"similarity": sim,
|
| 1957 |
"coverage": coverage,
|
requirements.txt
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
|
| 2 |
uvicorn
|
| 3 |
pytesseract
|
| 4 |
pillow
|
|
@@ -19,5 +19,6 @@ easyocr
|
|
| 19 |
python-docx
|
| 20 |
pypdf
|
| 21 |
pdf2image
|
|
|
|
| 22 |
python-dotenv
|
| 23 |
google-genai
|
|
|
|
| 1 |
+
ffastapi
|
| 2 |
uvicorn
|
| 3 |
pytesseract
|
| 4 |
pillow
|
|
|
|
| 19 |
python-docx
|
| 20 |
pypdf
|
| 21 |
pdf2image
|
| 22 |
+
reportlab
|
| 23 |
python-dotenv
|
| 24 |
google-genai
|