trial base pdf
Browse files- Dockerfile +3 -0
- app.py +305 -74
- requirements.txt +1 -8
Dockerfile
CHANGED
|
@@ -14,6 +14,9 @@ RUN pip install --no-cache-dir -r /app/requirements.txt
|
|
| 14 |
|
| 15 |
COPY . /app
|
| 16 |
|
|
|
|
|
|
|
|
|
|
| 17 |
# Hugging Face Spaces expects 7860 by default
|
| 18 |
EXPOSE 7860
|
| 19 |
|
|
|
|
| 14 |
|
| 15 |
COPY . /app
|
| 16 |
|
| 17 |
+
# Set HF_SPACE environment variable for Hugging Face Spaces
|
| 18 |
+
ENV HF_SPACE=moncey10-homework-validation-system.hf.space
|
| 19 |
+
|
| 20 |
# Hugging Face Spaces expects 7860 by default
|
| 21 |
EXPOSE 7860
|
| 22 |
|
app.py
CHANGED
|
@@ -1,4 +1,3 @@
|
|
| 1 |
-
|
| 2 |
# app.py
|
| 3 |
import os
|
| 4 |
import io
|
|
@@ -11,7 +10,11 @@ from fastapi import FastAPI, UploadFile, File, Form, HTTPException
|
|
| 11 |
from fastapi.middleware.cors import CORSMiddleware
|
| 12 |
from PIL import Image, ImageOps, ImageFilter
|
| 13 |
import pytesseract
|
|
|
|
| 14 |
|
|
|
|
|
|
|
|
|
|
| 15 |
from dotenv import load_dotenv
|
| 16 |
load_dotenv()
|
| 17 |
|
|
@@ -64,12 +67,13 @@ except Exception as e:
|
|
| 64 |
|
| 65 |
|
| 66 |
app = FastAPI()
|
|
|
|
| 67 |
|
| 68 |
-
import os
|
| 69 |
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
|
|
|
| 73 |
|
| 74 |
# Create outputs directory if it doesn't exist
|
| 75 |
outputs_dir = os.path.join(os.path.dirname(__file__), "outputs")
|
|
@@ -83,6 +87,13 @@ async def get_output_file(filename: str):
|
|
| 83 |
return FileResponse(filepath)
|
| 84 |
raise HTTPException(status_code=404, detail="File not found")
|
| 85 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
@app.get("/debug/env")
|
| 87 |
def debug_env():
|
| 88 |
return {
|
|
@@ -112,6 +123,66 @@ STORAGE_BASE = os.getenv("STORAGE_BASE", "https://erp.triz.co.in/storage/student
|
|
| 112 |
ERP_TOKEN = os.getenv("ERP_TOKEN", "")
|
| 113 |
|
| 114 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 115 |
|
| 116 |
# API Key Rotation - Support multiple API keys for higher limits
|
| 117 |
GOOGLE_API_KEYS = []
|
|
@@ -920,7 +991,8 @@ def create_annotated_pdf(
|
|
| 920 |
mcq_results: List[Dict[str, Any]] = None,
|
| 921 |
match_percentage: int = 0,
|
| 922 |
status: str = "Needs Review",
|
| 923 |
-
student_level: str = "Medium"
|
|
|
|
| 924 |
) -> bytes:
|
| 925 |
"""
|
| 926 |
Create an annotated PDF with tickmarks showing correct/incorrect answers.
|
|
@@ -931,6 +1003,7 @@ def create_annotated_pdf(
|
|
| 931 |
match_percentage: Overall match percentage
|
| 932 |
status: Validation status
|
| 933 |
student_level: Student level (Easy/Medium/Hard)
|
|
|
|
| 934 |
|
| 935 |
Returns:
|
| 936 |
Annotated PDF as bytes
|
|
@@ -978,23 +1051,67 @@ def create_annotated_pdf(
|
|
| 978 |
y_pos = y_start - ((i - start_idx) * y_spacing)
|
| 979 |
x_pos = page_width - 60
|
| 980 |
|
| 981 |
-
# Draw
|
| 982 |
-
|
| 983 |
-
|
| 984 |
-
|
| 985 |
-
|
| 986 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 987 |
c.circle(x_pos, y_pos, 12, fill=0)
|
| 988 |
-
|
| 989 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 990 |
else:
|
| 991 |
-
#
|
| 992 |
-
|
| 993 |
-
|
| 994 |
-
|
| 995 |
-
|
| 996 |
-
|
| 997 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 998 |
|
| 999 |
# Draw question label
|
| 1000 |
c.setStrokeColor(colors.black)
|
|
@@ -1008,15 +1125,27 @@ def create_annotated_pdf(
|
|
| 1008 |
c.setFillColor(colors.lightgrey)
|
| 1009 |
c.rect(0, page_height - 60, page_width, 60, fill=1, stroke=0)
|
| 1010 |
|
| 1011 |
-
# Draw status text - LARGER FONT
|
| 1012 |
c.setFillColor(colors.black)
|
| 1013 |
c.setFont("Helvetica-Bold", 20)
|
| 1014 |
|
| 1015 |
-
|
| 1016 |
-
|
| 1017 |
-
|
| 1018 |
-
|
| 1019 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1020 |
|
| 1021 |
c.setFillColor(colors.black)
|
| 1022 |
c.setFont("Helvetica-Bold", 18)
|
|
@@ -1026,9 +1155,23 @@ def create_annotated_pdf(
|
|
| 1026 |
# Draw MCQ summary
|
| 1027 |
if mcq_results:
|
| 1028 |
correct_count = sum(1 for r in mcq_results if r.get('correct'))
|
|
|
|
| 1029 |
total_count = len(mcq_results)
|
| 1030 |
c.setFont("Helvetica-Bold", 14)
|
| 1031 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1032 |
|
| 1033 |
c.save()
|
| 1034 |
packet.seek(0)
|
|
@@ -1132,7 +1275,7 @@ async def get_annotated_pdf_url(
|
|
| 1132 |
Get the URL for the annotated PDF.
|
| 1133 |
Returns JSON with the URL that can be used in your frontend.
|
| 1134 |
"""
|
| 1135 |
-
base_url =
|
| 1136 |
return {
|
| 1137 |
"homework_id": homework_id,
|
| 1138 |
"student_id": student_id,
|
|
@@ -1214,12 +1357,26 @@ async def get_annotated_pdf(
|
|
| 1214 |
'qid': qid,
|
| 1215 |
'chosen': student_ans,
|
| 1216 |
'correct_answer': pq.get('correct_answer'),
|
| 1217 |
-
'correct': is_correct
|
|
|
|
| 1218 |
})
|
| 1219 |
matched = True
|
| 1220 |
break
|
| 1221 |
if not matched:
|
| 1222 |
-
mcq_results.append({'qid': qid, 'chosen': student_ans, 'correct_answer': None, 'correct': False})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1223 |
|
| 1224 |
if mcq_results:
|
| 1225 |
correct_count = sum(1 for r in mcq_results if r.get('correct'))
|
|
@@ -1271,6 +1428,18 @@ async def get_annotated_pdf(
|
|
| 1271 |
status = "Partial"
|
| 1272 |
else:
|
| 1273 |
status = "Needs Review"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1274 |
except Exception as e:
|
| 1275 |
print(f"[WARN] Failed to calculate narrative score: {e}")
|
| 1276 |
|
|
@@ -1280,7 +1449,8 @@ async def get_annotated_pdf(
|
|
| 1280 |
mcq_results=mcq_results,
|
| 1281 |
match_percentage=match_percentage,
|
| 1282 |
status=status,
|
| 1283 |
-
student_level=student_level
|
|
|
|
| 1284 |
)
|
| 1285 |
|
| 1286 |
# Return as file download
|
|
@@ -1341,15 +1511,17 @@ async def homework_validate(
|
|
| 1341 |
|
| 1342 |
# Initialize annotated PDF filename
|
| 1343 |
annotated_pdf_filename = None
|
|
|
|
| 1344 |
|
| 1345 |
-
# Function to save annotated PDF
|
| 1346 |
-
def save_annotated_pdf(pdf_bytes, hw_id, stud_id, results, score, stat, lvl):
|
| 1347 |
if not pdf_bytes or len(pdf_bytes) < 100:
|
| 1348 |
-
return None
|
| 1349 |
try:
|
| 1350 |
outputs_dir = os.path.join(os.path.dirname(__file__), "outputs")
|
| 1351 |
os.makedirs(outputs_dir, exist_ok=True)
|
| 1352 |
-
|
|
|
|
| 1353 |
filepath = os.path.join(outputs_dir, filename)
|
| 1354 |
|
| 1355 |
annotated = create_annotated_pdf(
|
|
@@ -1357,22 +1529,25 @@ async def homework_validate(
|
|
| 1357 |
mcq_results=results,
|
| 1358 |
match_percentage=score,
|
| 1359 |
status=stat,
|
| 1360 |
-
student_level=lvl
|
|
|
|
| 1361 |
)
|
| 1362 |
|
| 1363 |
with open(filepath, "wb") as f:
|
| 1364 |
f.write(annotated)
|
| 1365 |
-
return filename
|
| 1366 |
except Exception as e:
|
| 1367 |
print(f"[WARN] Failed to save annotated PDF: {e}")
|
| 1368 |
-
return None
|
| 1369 |
|
| 1370 |
MIN_WORDS = 3 if final_question_type == "mcq" else 8
|
| 1371 |
if len(student_text.split()) < MIN_WORDS:
|
| 1372 |
# Save annotated PDF even for unreadable (with status shown)
|
| 1373 |
if is_pdf_submission and original_file_bytes:
|
| 1374 |
-
|
| 1375 |
-
|
|
|
|
|
|
|
| 1376 |
)
|
| 1377 |
return {
|
| 1378 |
"student_id": student_id,
|
|
@@ -1387,6 +1562,7 @@ async def homework_validate(
|
|
| 1387 |
"rule_based_remark": "Answer text could not be read clearly. Please upload a clearer file.",
|
| 1388 |
"student_extracted_text": student_text,
|
| 1389 |
"llm_used": False,
|
|
|
|
| 1390 |
"annotated_pdf": annotated_pdf_filename,
|
| 1391 |
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 1392 |
}
|
|
@@ -1394,8 +1570,10 @@ async def homework_validate(
|
|
| 1394 |
if student_info.get("needs_ocr") and not student_text:
|
| 1395 |
# Save annotated PDF even for unreadable (with status shown)
|
| 1396 |
if is_pdf_submission and original_file_bytes:
|
| 1397 |
-
|
| 1398 |
-
|
|
|
|
|
|
|
| 1399 |
)
|
| 1400 |
return {
|
| 1401 |
"student_id": student_id,
|
|
@@ -1410,6 +1588,7 @@ async def homework_validate(
|
|
| 1410 |
"rule_based_remark": "This PDF looks scanned. OCR is required (install pdf2image + poppler) or upload a clearer file.",
|
| 1411 |
"student_extracted_text": student_text,
|
| 1412 |
"llm_used": False,
|
|
|
|
| 1413 |
"annotated_pdf": annotated_pdf_filename,
|
| 1414 |
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 1415 |
}
|
|
@@ -1444,7 +1623,17 @@ async def homework_validate(
|
|
| 1444 |
'qid': qid,
|
| 1445 |
'correct': is_correct,
|
| 1446 |
'chosen': chosen,
|
| 1447 |
-
'correct_answer': correct
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1448 |
})
|
| 1449 |
|
| 1450 |
# For narrative questions, use AI to generate reference
|
|
@@ -1535,7 +1724,7 @@ async def homework_validate(
|
|
| 1535 |
|
| 1536 |
# Save annotated PDF
|
| 1537 |
if is_pdf_submission and original_file_bytes and mcq_results:
|
| 1538 |
-
annotated_pdf_filename = save_annotated_pdf(
|
| 1539 |
original_file_bytes, homework_id, student_id, mcq_results, final_score, status, student_level
|
| 1540 |
)
|
| 1541 |
|
|
@@ -1554,6 +1743,7 @@ async def homework_validate(
|
|
| 1554 |
"student_extracted_text": student_text,
|
| 1555 |
"mcq_results": mcq_results,
|
| 1556 |
"narrative_results": narrative_results,
|
|
|
|
| 1557 |
"annotated_pdf": annotated_pdf_filename,
|
| 1558 |
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 1559 |
"debug": {
|
|
@@ -1615,7 +1805,8 @@ async def homework_validate(
|
|
| 1615 |
'qid': qid,
|
| 1616 |
'chosen': student_ans,
|
| 1617 |
'correct_answer': pq.get('correct_answer'),
|
| 1618 |
-
'correct': is_correct
|
|
|
|
| 1619 |
})
|
| 1620 |
matched = True
|
| 1621 |
break
|
|
@@ -1624,7 +1815,21 @@ async def homework_validate(
|
|
| 1624 |
'qid': qid,
|
| 1625 |
'chosen': student_ans,
|
| 1626 |
'correct_answer': None,
|
| 1627 |
-
'correct': False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1628 |
})
|
| 1629 |
|
| 1630 |
# Calculate score based on level
|
|
@@ -1636,7 +1841,7 @@ async def homework_validate(
|
|
| 1636 |
|
| 1637 |
# Save annotated PDF
|
| 1638 |
if is_pdf_submission and original_file_bytes:
|
| 1639 |
-
annotated_pdf_filename = save_annotated_pdf(
|
| 1640 |
original_file_bytes, homework_id, student_id, mcq_results, match_percentage, status, student_level
|
| 1641 |
)
|
| 1642 |
|
|
@@ -1653,16 +1858,18 @@ async def homework_validate(
|
|
| 1653 |
"rule_based_remark": f"Multiple MCQ: {correct_count}/{total_count} correct. Score: {match_percentage}% (Level: {student_level})",
|
| 1654 |
"student_extracted_text": student_text,
|
| 1655 |
"llm_used": False,
|
| 1656 |
-
"
|
|
|
|
| 1657 |
"debug": {"student_answers": student_answers_by_qid, "mcq_results": mcq_results},
|
| 1658 |
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 1659 |
}
|
| 1660 |
else:
|
| 1661 |
# No correct answers in prompt - return needs review with extracted answers
|
| 1662 |
-
# Save annotated PDF
|
| 1663 |
if is_pdf_submission and original_file_bytes:
|
| 1664 |
-
|
| 1665 |
-
|
|
|
|
| 1666 |
)
|
| 1667 |
return {
|
| 1668 |
"student_id": student_id,
|
|
@@ -1677,7 +1884,8 @@ async def homework_validate(
|
|
| 1677 |
"rule_based_remark": f"Found {len(student_answers_by_qid)} MCQ answers but no correct answers in prompt. Include 'Correct: B' for each question.",
|
| 1678 |
"student_extracted_text": student_text,
|
| 1679 |
"llm_used": False,
|
| 1680 |
-
"
|
|
|
|
| 1681 |
"debug": {"student_answers": student_answers_by_qid, "correct_answers_in_prompt": False},
|
| 1682 |
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 1683 |
}
|
|
@@ -1685,10 +1893,11 @@ async def homework_validate(
|
|
| 1685 |
if redirect_to_narrative:
|
| 1686 |
pass # Will continue to narrative handling
|
| 1687 |
elif not correct:
|
| 1688 |
-
# Save annotated PDF
|
| 1689 |
if is_pdf_submission and original_file_bytes:
|
| 1690 |
-
|
| 1691 |
-
|
|
|
|
| 1692 |
)
|
| 1693 |
return {
|
| 1694 |
"student_id": student_id,
|
|
@@ -1703,15 +1912,17 @@ async def homework_validate(
|
|
| 1703 |
"rule_based_remark": "MCQ correct option not found in prompt. Include 'Correct: B' or similar in prompt.",
|
| 1704 |
"student_extracted_text": student_text,
|
| 1705 |
"llm_used": False,
|
| 1706 |
-
"
|
|
|
|
| 1707 |
"debug": {"correct": correct, "chosen": chosen},
|
| 1708 |
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 1709 |
}
|
| 1710 |
elif not chosen:
|
| 1711 |
-
# Save annotated PDF
|
| 1712 |
if is_pdf_submission and original_file_bytes:
|
| 1713 |
-
|
| 1714 |
-
|
|
|
|
| 1715 |
)
|
| 1716 |
return {
|
| 1717 |
"student_id": student_id,
|
|
@@ -1726,7 +1937,8 @@ async def homework_validate(
|
|
| 1726 |
"rule_based_remark": "Student option (A/B/C/D) not detected clearly.",
|
| 1727 |
"student_extracted_text": student_text,
|
| 1728 |
"llm_used": False,
|
| 1729 |
-
"
|
|
|
|
| 1730 |
"debug": {"correct": correct, "chosen": chosen},
|
| 1731 |
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 1732 |
}
|
|
@@ -1749,7 +1961,7 @@ async def homework_validate(
|
|
| 1749 |
# Save annotated PDF
|
| 1750 |
mcq_results_single = [{'qid': 'Q1', 'correct': is_correct, 'chosen': chosen, 'correct_answer': correct}]
|
| 1751 |
if is_pdf_submission and original_file_bytes:
|
| 1752 |
-
annotated_pdf_filename = save_annotated_pdf(
|
| 1753 |
original_file_bytes, homework_id, student_id, mcq_results_single, match_percentage, status, student_level
|
| 1754 |
)
|
| 1755 |
|
|
@@ -1766,7 +1978,8 @@ async def homework_validate(
|
|
| 1766 |
"rule_based_remark": f"{'Correct' if is_correct else 'Incorrect'}. Score: {match_percentage}% (Level: {student_level}, Credit per Q: {credit_per_q}%)",
|
| 1767 |
"student_extracted_text": student_text,
|
| 1768 |
"llm_used": False,
|
| 1769 |
-
"
|
|
|
|
| 1770 |
"debug": {"correct": correct, "chosen": chosen, "level": student_level, "credit_per_q": credit_per_q},
|
| 1771 |
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 1772 |
}
|
|
@@ -1775,7 +1988,7 @@ async def homework_validate(
|
|
| 1775 |
if gemini_client is None:
|
| 1776 |
# Save annotated PDF
|
| 1777 |
if is_pdf_submission and original_file_bytes:
|
| 1778 |
-
annotated_pdf_filename = save_annotated_pdf(
|
| 1779 |
original_file_bytes, homework_id, student_id, [], 0, "Needs Review", student_level
|
| 1780 |
)
|
| 1781 |
return {
|
|
@@ -1792,6 +2005,7 @@ async def homework_validate(
|
|
| 1792 |
"llm_used": False,
|
| 1793 |
"llm_error": parse_gemini_error(GEMINI_LAST_ERROR),
|
| 1794 |
"student_extracted_text": student_text,
|
|
|
|
| 1795 |
"annotated_pdf": annotated_pdf_filename,
|
| 1796 |
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 1797 |
}
|
|
@@ -1815,7 +2029,7 @@ async def homework_validate(
|
|
| 1815 |
if not response_text:
|
| 1816 |
# Save annotated PDF
|
| 1817 |
if is_pdf_submission and original_file_bytes:
|
| 1818 |
-
annotated_pdf_filename = save_annotated_pdf(
|
| 1819 |
original_file_bytes, homework_id, student_id, [], 0, "Needs Review", student_level
|
| 1820 |
)
|
| 1821 |
return {
|
|
@@ -1832,6 +2046,7 @@ async def homework_validate(
|
|
| 1832 |
"llm_used": False,
|
| 1833 |
"llm_error": parse_gemini_error(GEMINI_LAST_ERROR),
|
| 1834 |
"student_extracted_text": student_text,
|
|
|
|
| 1835 |
"annotated_pdf": annotated_pdf_filename,
|
| 1836 |
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 1837 |
}
|
|
@@ -1842,7 +2057,7 @@ async def homework_validate(
|
|
| 1842 |
except Exception as e:
|
| 1843 |
# Save annotated PDF
|
| 1844 |
if is_pdf_submission and original_file_bytes:
|
| 1845 |
-
annotated_pdf_filename = save_annotated_pdf(
|
| 1846 |
original_file_bytes, homework_id, student_id, [], 0, "Needs Review", student_level
|
| 1847 |
)
|
| 1848 |
return {
|
|
@@ -1859,6 +2074,7 @@ async def homework_validate(
|
|
| 1859 |
"llm_used": False,
|
| 1860 |
"llm_error": {"ok": False, "error_type": "GEMINI_BAD_JSON", "message": str(e), "raw": response_text[:800]},
|
| 1861 |
"student_extracted_text": student_text,
|
|
|
|
| 1862 |
"annotated_pdf": annotated_pdf_filename,
|
| 1863 |
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 1864 |
}
|
|
@@ -1872,7 +2088,7 @@ async def homework_validate(
|
|
| 1872 |
if not ai_reference_answer:
|
| 1873 |
# Save annotated PDF
|
| 1874 |
if is_pdf_submission and original_file_bytes:
|
| 1875 |
-
annotated_pdf_filename = save_annotated_pdf(
|
| 1876 |
original_file_bytes, homework_id, student_id, [], 0, "Needs Review", student_level
|
| 1877 |
)
|
| 1878 |
return {
|
|
@@ -1888,6 +2104,7 @@ async def homework_validate(
|
|
| 1888 |
"rule_based_remark": "AI returned empty reference answer.",
|
| 1889 |
"llm_used": True,
|
| 1890 |
"student_extracted_text": student_text,
|
|
|
|
| 1891 |
"annotated_pdf": annotated_pdf_filename,
|
| 1892 |
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 1893 |
}
|
|
@@ -1938,10 +2155,25 @@ async def homework_validate(
|
|
| 1938 |
else:
|
| 1939 |
rule_based_remark = "Homework does not match the expected answer enough. Please review the topic and resubmit with clearer, complete points."
|
| 1940 |
|
| 1941 |
-
# Save annotated PDF for narrative (with status
|
|
|
|
| 1942 |
if is_pdf_submission and original_file_bytes:
|
| 1943 |
-
|
| 1944 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1945 |
)
|
| 1946 |
|
| 1947 |
return {
|
|
@@ -1963,7 +2195,8 @@ async def homework_validate(
|
|
| 1963 |
"key_points": key_points,
|
| 1964 |
"key_points_covered": covered,
|
| 1965 |
"key_points_missing": missing,
|
| 1966 |
-
"
|
|
|
|
| 1967 |
"debug": {
|
| 1968 |
"similarity": sim,
|
| 1969 |
"coverage": coverage,
|
|
@@ -1972,6 +2205,4 @@ async def homework_validate(
|
|
| 1972 |
"erp_student_level_raw": erp_row.get("student_level") or erp_row.get("level") or erp_row.get("difficulty") or erp_row.get("difficulty_level"),
|
| 1973 |
},
|
| 1974 |
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 1975 |
-
}
|
| 1976 |
-
|
| 1977 |
-
|
|
|
|
|
|
|
| 1 |
# app.py
|
| 2 |
import os
|
| 3 |
import io
|
|
|
|
| 10 |
from fastapi.middleware.cors import CORSMiddleware
|
| 11 |
from PIL import Image, ImageOps, ImageFilter
|
| 12 |
import pytesseract
|
| 13 |
+
import os
|
| 14 |
|
| 15 |
+
# Serve static files from outputs directory
|
| 16 |
+
from fastapi.staticfiles import StaticFiles
|
| 17 |
+
from fastapi.responses import FileResponse
|
| 18 |
from dotenv import load_dotenv
|
| 19 |
load_dotenv()
|
| 20 |
|
|
|
|
| 67 |
|
| 68 |
|
| 69 |
app = FastAPI()
|
| 70 |
+
app.mount("/files", StaticFiles(directory="outputs"), name="files")
|
| 71 |
|
|
|
|
| 72 |
|
| 73 |
+
outputs_dir = "outputs"
|
| 74 |
+
os.makedirs(outputs_dir, exist_ok=True)
|
| 75 |
+
|
| 76 |
+
app.mount("/outputs", StaticFiles(directory=outputs_dir), name="outputs")
|
| 77 |
|
| 78 |
# Create outputs directory if it doesn't exist
|
| 79 |
outputs_dir = os.path.join(os.path.dirname(__file__), "outputs")
|
|
|
|
| 87 |
return FileResponse(filepath)
|
| 88 |
raise HTTPException(status_code=404, detail="File not found")
|
| 89 |
|
| 90 |
+
@app.get("/storage/{filename}")
|
| 91 |
+
async def get_storsge_file(filename:str):
|
| 92 |
+
"""Serve files from the storage directory."""
|
| 93 |
+
filepath = os.path.join(outputs_dir, filename)
|
| 94 |
+
if os.path.exists(filepath):
|
| 95 |
+
return FileResponse(filepath)
|
| 96 |
+
raise HTTPException(status_code=404, detail="File not found")
|
| 97 |
@app.get("/debug/env")
|
| 98 |
def debug_env():
|
| 99 |
return {
|
|
|
|
| 123 |
ERP_TOKEN = os.getenv("ERP_TOKEN", "")
|
| 124 |
|
| 125 |
|
| 126 |
+
def get_public_base_url() -> str:
|
| 127 |
+
"""
|
| 128 |
+
Returns the public base URL of this server.
|
| 129 |
+
Priority:
|
| 130 |
+
1. SPACE_HOST — set automatically by Hugging Face Spaces (most reliable)
|
| 131 |
+
2. HF_SPACE — manual fallback env var for HF
|
| 132 |
+
3. APP_BASE_URL — custom deployment domain
|
| 133 |
+
4. localhost — local dev only
|
| 134 |
+
"""
|
| 135 |
+
hf_host = os.getenv("SPACE_HOST", "").strip()
|
| 136 |
+
if hf_host:
|
| 137 |
+
return f"https://{hf_host}"
|
| 138 |
+
|
| 139 |
+
hf_space = os.getenv("HF_SPACE", "").strip()
|
| 140 |
+
if hf_space:
|
| 141 |
+
return f"https://{hf_space}"
|
| 142 |
+
|
| 143 |
+
custom = os.getenv("APP_BASE_URL", "").strip()
|
| 144 |
+
if custom:
|
| 145 |
+
return custom.rstrip("/")
|
| 146 |
+
|
| 147 |
+
return "http://127.0.0.1:7860"
|
| 148 |
+
|
| 149 |
+
|
| 150 |
+
def build_pdf_url(filename: str) -> str:
|
| 151 |
+
"""Given a saved PDF filename, return its full public URL."""
|
| 152 |
+
if not filename:
|
| 153 |
+
return ""
|
| 154 |
+
return f"{get_public_base_url()}/outputs/{filename}"
|
| 155 |
+
|
| 156 |
+
|
| 157 |
+
def make_question_marks(mcq_results: list) -> list:
|
| 158 |
+
"""
|
| 159 |
+
Convert internal mcq_results into a clean list the frontend can use
|
| 160 |
+
to show ✓ ✗ ○ next to each question number.
|
| 161 |
+
|
| 162 |
+
Each item:
|
| 163 |
+
{
|
| 164 |
+
"qid": "Q1",
|
| 165 |
+
"mark": "correct" | "wrong" | "unattempted",
|
| 166 |
+
"student_answer": "A", # what the student chose (empty if unattempted)
|
| 167 |
+
"correct_answer": "B" # the right answer (null if unknown)
|
| 168 |
+
}
|
| 169 |
+
"""
|
| 170 |
+
result = []
|
| 171 |
+
for r in (mcq_results or []):
|
| 172 |
+
if r.get('unattempted'):
|
| 173 |
+
mark = "unattempted"
|
| 174 |
+
elif r.get('correct') is True:
|
| 175 |
+
mark = "correct"
|
| 176 |
+
else:
|
| 177 |
+
mark = "wrong"
|
| 178 |
+
result.append({
|
| 179 |
+
"qid": r.get('qid', ''),
|
| 180 |
+
"mark": mark,
|
| 181 |
+
"student_answer": r.get('chosen', ''),
|
| 182 |
+
"correct_answer": r.get('correct_answer'),
|
| 183 |
+
})
|
| 184 |
+
return result
|
| 185 |
+
|
| 186 |
|
| 187 |
# API Key Rotation - Support multiple API keys for higher limits
|
| 188 |
GOOGLE_API_KEYS = []
|
|
|
|
| 991 |
mcq_results: List[Dict[str, Any]] = None,
|
| 992 |
match_percentage: int = 0,
|
| 993 |
status: str = "Needs Review",
|
| 994 |
+
student_level: str = "Medium",
|
| 995 |
+
question_type: str = "mcq"
|
| 996 |
) -> bytes:
|
| 997 |
"""
|
| 998 |
Create an annotated PDF with tickmarks showing correct/incorrect answers.
|
|
|
|
| 1003 |
match_percentage: Overall match percentage
|
| 1004 |
status: Validation status
|
| 1005 |
student_level: Student level (Easy/Medium/Hard)
|
| 1006 |
+
question_type: Type of question ('mcq' or 'narrative')
|
| 1007 |
|
| 1008 |
Returns:
|
| 1009 |
Annotated PDF as bytes
|
|
|
|
| 1051 |
y_pos = y_start - ((i - start_idx) * y_spacing)
|
| 1052 |
x_pos = page_width - 60
|
| 1053 |
|
| 1054 |
+
# Draw marks based on question type and correctness
|
| 1055 |
+
# Three states:
|
| 1056 |
+
# unattempted=True → plain CIRCLE (orange) — question was skipped
|
| 1057 |
+
# correct=True → TICK ✓ (green) — answered correctly
|
| 1058 |
+
# correct=False → CROSS ✗ (red) — answered incorrectly
|
| 1059 |
+
is_unattempted = result.get('unattempted', False)
|
| 1060 |
+
|
| 1061 |
+
if is_unattempted:
|
| 1062 |
+
# Plain circle — unattempted question
|
| 1063 |
+
c.setStrokeColor(colors.Color(1.0, 0.55, 0.0)) # orange
|
| 1064 |
+
c.setFillColor(colors.Color(1.0, 0.55, 0.0))
|
| 1065 |
+
c.setLineWidth(2.5)
|
| 1066 |
c.circle(x_pos, y_pos, 12, fill=0)
|
| 1067 |
+
# No symbol inside — the empty circle IS the mark
|
| 1068 |
+
|
| 1069 |
+
elif question_type == "narrative":
|
| 1070 |
+
# Narrative: green tick for Verified, red circle for others
|
| 1071 |
+
if is_correct:
|
| 1072 |
+
c.setStrokeColor(colors.green)
|
| 1073 |
+
c.setFillColor(colors.green)
|
| 1074 |
+
c.setLineWidth(2)
|
| 1075 |
+
c.circle(x_pos, y_pos, 12, fill=0)
|
| 1076 |
+
c.setFont("Helvetica-Bold", 14)
|
| 1077 |
+
c.drawString(x_pos - 5, y_pos - 5, "✓")
|
| 1078 |
+
elif status == "Partial":
|
| 1079 |
+
c.setStrokeColor(colors.orange)
|
| 1080 |
+
c.setFillColor(colors.orange)
|
| 1081 |
+
c.setLineWidth(2)
|
| 1082 |
+
c.circle(x_pos, y_pos, 12, fill=0)
|
| 1083 |
+
else:
|
| 1084 |
+
c.setStrokeColor(colors.red)
|
| 1085 |
+
c.setFillColor(colors.red)
|
| 1086 |
+
c.setLineWidth(2)
|
| 1087 |
+
c.circle(x_pos, y_pos, 12, fill=0)
|
| 1088 |
+
|
| 1089 |
else:
|
| 1090 |
+
# MCQ: tick for correct, cross for wrong, ? for unreadable
|
| 1091 |
+
if is_correct:
|
| 1092 |
+
# Green tick
|
| 1093 |
+
c.setStrokeColor(colors.green)
|
| 1094 |
+
c.setFillColor(colors.green)
|
| 1095 |
+
c.setLineWidth(2)
|
| 1096 |
+
c.circle(x_pos, y_pos, 12, fill=0)
|
| 1097 |
+
c.setFont("Helvetica-Bold", 14)
|
| 1098 |
+
c.drawString(x_pos - 5, y_pos - 5, "✓")
|
| 1099 |
+
elif is_correct is None:
|
| 1100 |
+
# Orange circle with ? — unreadable / no answer key
|
| 1101 |
+
c.setStrokeColor(colors.orange)
|
| 1102 |
+
c.setFillColor(colors.orange)
|
| 1103 |
+
c.setLineWidth(2)
|
| 1104 |
+
c.circle(x_pos, y_pos, 12, fill=0)
|
| 1105 |
+
c.setFont("Helvetica-Bold", 12)
|
| 1106 |
+
c.drawString(x_pos - 4, y_pos - 5, "?")
|
| 1107 |
+
else:
|
| 1108 |
+
# Red cross — wrong answer
|
| 1109 |
+
c.setStrokeColor(colors.red)
|
| 1110 |
+
c.setFillColor(colors.red)
|
| 1111 |
+
c.setLineWidth(2)
|
| 1112 |
+
c.circle(x_pos, y_pos, 12, fill=0)
|
| 1113 |
+
c.setFont("Helvetica-Bold", 14)
|
| 1114 |
+
c.drawString(x_pos - 5, y_pos - 5, "✗")
|
| 1115 |
|
| 1116 |
# Draw question label
|
| 1117 |
c.setStrokeColor(colors.black)
|
|
|
|
| 1125 |
c.setFillColor(colors.lightgrey)
|
| 1126 |
c.rect(0, page_height - 60, page_width, 60, fill=1, stroke=0)
|
| 1127 |
|
| 1128 |
+
# Draw status circle and text - LARGER FONT
|
| 1129 |
c.setFillColor(colors.black)
|
| 1130 |
c.setFont("Helvetica-Bold", 20)
|
| 1131 |
|
| 1132 |
+
# Determine circle color based on status
|
| 1133 |
+
if status == "Verified":
|
| 1134 |
+
status_circle_color = colors.green
|
| 1135 |
+
elif status == "Partial":
|
| 1136 |
+
status_circle_color = colors.orange
|
| 1137 |
+
else:
|
| 1138 |
+
status_circle_color = colors.red
|
| 1139 |
+
|
| 1140 |
+
# Draw status circle
|
| 1141 |
+
c.setStrokeColor(status_circle_color)
|
| 1142 |
+
c.setFillColor(status_circle_color)
|
| 1143 |
+
c.setLineWidth(3)
|
| 1144 |
+
c.circle(25, page_height - 25, 10, fill=1)
|
| 1145 |
+
|
| 1146 |
+
# Draw status text
|
| 1147 |
+
c.setFillColor(status_circle_color)
|
| 1148 |
+
c.drawString(45, page_height - 30, f"Status: {status}")
|
| 1149 |
|
| 1150 |
c.setFillColor(colors.black)
|
| 1151 |
c.setFont("Helvetica-Bold", 18)
|
|
|
|
| 1155 |
# Draw MCQ summary
|
| 1156 |
if mcq_results:
|
| 1157 |
correct_count = sum(1 for r in mcq_results if r.get('correct'))
|
| 1158 |
+
unattempted_count = sum(1 for r in mcq_results if r.get('unattempted'))
|
| 1159 |
total_count = len(mcq_results)
|
| 1160 |
c.setFont("Helvetica-Bold", 14)
|
| 1161 |
+
if question_type == "narrative":
|
| 1162 |
+
c.drawString(30, page_height - 50, f"Narrative Evaluation: Score {match_percentage}%")
|
| 1163 |
+
else:
|
| 1164 |
+
c.drawString(30, page_height - 50, f"MCQ: {correct_count}/{total_count} correct")
|
| 1165 |
+
|
| 1166 |
+
# Legend: ✓ Correct ✗ Wrong ○ Unattempted
|
| 1167 |
+
legend_x = page_width - 220
|
| 1168 |
+
c.setFont("Helvetica", 9)
|
| 1169 |
+
c.setFillColor(colors.green)
|
| 1170 |
+
c.drawString(legend_x, page_height - 50, "✓ Correct")
|
| 1171 |
+
c.setFillColor(colors.red)
|
| 1172 |
+
c.drawString(legend_x + 70, page_height - 50, "✗ Wrong")
|
| 1173 |
+
c.setFillColor(colors.Color(1.0, 0.55, 0.0))
|
| 1174 |
+
c.drawString(legend_x + 135, page_height - 50, "○ Unattempted")
|
| 1175 |
|
| 1176 |
c.save()
|
| 1177 |
packet.seek(0)
|
|
|
|
| 1275 |
Get the URL for the annotated PDF.
|
| 1276 |
Returns JSON with the URL that can be used in your frontend.
|
| 1277 |
"""
|
| 1278 |
+
base_url = get_public_base_url()
|
| 1279 |
return {
|
| 1280 |
"homework_id": homework_id,
|
| 1281 |
"student_id": student_id,
|
|
|
|
| 1357 |
'qid': qid,
|
| 1358 |
'chosen': student_ans,
|
| 1359 |
'correct_answer': pq.get('correct_answer'),
|
| 1360 |
+
'correct': is_correct,
|
| 1361 |
+
'unattempted': False
|
| 1362 |
})
|
| 1363 |
matched = True
|
| 1364 |
break
|
| 1365 |
if not matched:
|
| 1366 |
+
mcq_results.append({'qid': qid, 'chosen': student_ans, 'correct_answer': None, 'correct': False, 'unattempted': False})
|
| 1367 |
+
|
| 1368 |
+
# Mark questions from the prompt that the student never answered
|
| 1369 |
+
answered_nums = {r['qid'].replace('Q', '').strip() for r in mcq_results}
|
| 1370 |
+
for pq in mcq_questions_with_answers:
|
| 1371 |
+
pq_num = pq.get('qid', '').replace('Q', '').strip()
|
| 1372 |
+
if pq_num not in answered_nums:
|
| 1373 |
+
mcq_results.append({
|
| 1374 |
+
'qid': pq.get('qid'),
|
| 1375 |
+
'chosen': '',
|
| 1376 |
+
'correct_answer': pq.get('correct_answer'),
|
| 1377 |
+
'correct': False,
|
| 1378 |
+
'unattempted': True
|
| 1379 |
+
})
|
| 1380 |
|
| 1381 |
if mcq_results:
|
| 1382 |
correct_count = sum(1 for r in mcq_results if r.get('correct'))
|
|
|
|
| 1428 |
status = "Partial"
|
| 1429 |
else:
|
| 1430 |
status = "Needs Review"
|
| 1431 |
+
|
| 1432 |
+
# Create result for narrative to show in PDF
|
| 1433 |
+
if status == "Verified":
|
| 1434 |
+
narrative_correct = True
|
| 1435 |
+
elif status == "Partial":
|
| 1436 |
+
narrative_correct = False
|
| 1437 |
+
else:
|
| 1438 |
+
narrative_correct = False
|
| 1439 |
+
|
| 1440 |
+
mcq_results = [
|
| 1441 |
+
{'qid': 'Q1', 'correct': narrative_correct, 'chosen': f'Score: {match_percentage}%', 'correct_answer': status}
|
| 1442 |
+
]
|
| 1443 |
except Exception as e:
|
| 1444 |
print(f"[WARN] Failed to calculate narrative score: {e}")
|
| 1445 |
|
|
|
|
| 1449 |
mcq_results=mcq_results,
|
| 1450 |
match_percentage=match_percentage,
|
| 1451 |
status=status,
|
| 1452 |
+
student_level=student_level,
|
| 1453 |
+
question_type=final_question_type
|
| 1454 |
)
|
| 1455 |
|
| 1456 |
# Return as file download
|
|
|
|
| 1511 |
|
| 1512 |
# Initialize annotated PDF filename
|
| 1513 |
annotated_pdf_filename = None
|
| 1514 |
+
annotated_pdf_url = None
|
| 1515 |
|
| 1516 |
+
# Function to save annotated PDF — returns (filename, public_url)
|
| 1517 |
+
def save_annotated_pdf(pdf_bytes, hw_id, stud_id, results, score, stat, lvl, qtype="mcq"):
|
| 1518 |
if not pdf_bytes or len(pdf_bytes) < 100:
|
| 1519 |
+
return None, None
|
| 1520 |
try:
|
| 1521 |
outputs_dir = os.path.join(os.path.dirname(__file__), "outputs")
|
| 1522 |
os.makedirs(outputs_dir, exist_ok=True)
|
| 1523 |
+
ts = int(time.time())
|
| 1524 |
+
filename = f"marked_{hw_id}_{stud_id}_{ts}.pdf"
|
| 1525 |
filepath = os.path.join(outputs_dir, filename)
|
| 1526 |
|
| 1527 |
annotated = create_annotated_pdf(
|
|
|
|
| 1529 |
mcq_results=results,
|
| 1530 |
match_percentage=score,
|
| 1531 |
status=stat,
|
| 1532 |
+
student_level=lvl,
|
| 1533 |
+
question_type=qtype
|
| 1534 |
)
|
| 1535 |
|
| 1536 |
with open(filepath, "wb") as f:
|
| 1537 |
f.write(annotated)
|
| 1538 |
+
return filename, build_pdf_url(filename)
|
| 1539 |
except Exception as e:
|
| 1540 |
print(f"[WARN] Failed to save annotated PDF: {e}")
|
| 1541 |
+
return None, None
|
| 1542 |
|
| 1543 |
MIN_WORDS = 3 if final_question_type == "mcq" else 8
|
| 1544 |
if len(student_text.split()) < MIN_WORDS:
|
| 1545 |
# Save annotated PDF even for unreadable (with status shown)
|
| 1546 |
if is_pdf_submission and original_file_bytes:
|
| 1547 |
+
# Show circle mark for unreadable
|
| 1548 |
+
unreadable_result = [{'qid': 'Q1', 'correct': None, 'chosen': 'Unreadable', 'correct_answer': 'N/A'}]
|
| 1549 |
+
annotated_pdf_filename, annotated_pdf_url = save_annotated_pdf(
|
| 1550 |
+
original_file_bytes, homework_id, student_id, unreadable_result, 0, "Unreadable", student_level
|
| 1551 |
)
|
| 1552 |
return {
|
| 1553 |
"student_id": student_id,
|
|
|
|
| 1562 |
"rule_based_remark": "Answer text could not be read clearly. Please upload a clearer file.",
|
| 1563 |
"student_extracted_text": student_text,
|
| 1564 |
"llm_used": False,
|
| 1565 |
+
"question_marks": make_question_marks([]),
|
| 1566 |
"annotated_pdf": annotated_pdf_filename,
|
| 1567 |
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 1568 |
}
|
|
|
|
| 1570 |
if student_info.get("needs_ocr") and not student_text:
|
| 1571 |
# Save annotated PDF even for unreadable (with status shown)
|
| 1572 |
if is_pdf_submission and original_file_bytes:
|
| 1573 |
+
# Show circle mark for scanned PDF that needs OCR
|
| 1574 |
+
ocr_result = [{'qid': 'Q1', 'correct': None, 'chosen': 'Needs OCR', 'correct_answer': 'N/A'}]
|
| 1575 |
+
annotated_pdf_filename, annotated_pdf_url = save_annotated_pdf(
|
| 1576 |
+
original_file_bytes, homework_id, student_id, ocr_result, 0, "Unreadable", student_level
|
| 1577 |
)
|
| 1578 |
return {
|
| 1579 |
"student_id": student_id,
|
|
|
|
| 1588 |
"rule_based_remark": "This PDF looks scanned. OCR is required (install pdf2image + poppler) or upload a clearer file.",
|
| 1589 |
"student_extracted_text": student_text,
|
| 1590 |
"llm_used": False,
|
| 1591 |
+
"question_marks": make_question_marks([]),
|
| 1592 |
"annotated_pdf": annotated_pdf_filename,
|
| 1593 |
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 1594 |
}
|
|
|
|
| 1623 |
'qid': qid,
|
| 1624 |
'correct': is_correct,
|
| 1625 |
'chosen': chosen,
|
| 1626 |
+
'correct_answer': correct,
|
| 1627 |
+
'unattempted': False
|
| 1628 |
+
})
|
| 1629 |
+
elif correct and not chosen:
|
| 1630 |
+
# Student didn't answer this question at all
|
| 1631 |
+
mcq_results.append({
|
| 1632 |
+
'qid': qid,
|
| 1633 |
+
'correct': False,
|
| 1634 |
+
'chosen': '',
|
| 1635 |
+
'correct_answer': correct,
|
| 1636 |
+
'unattempted': True
|
| 1637 |
})
|
| 1638 |
|
| 1639 |
# For narrative questions, use AI to generate reference
|
|
|
|
| 1724 |
|
| 1725 |
# Save annotated PDF
|
| 1726 |
if is_pdf_submission and original_file_bytes and mcq_results:
|
| 1727 |
+
annotated_pdf_filename, annotated_pdf_url = save_annotated_pdf(
|
| 1728 |
original_file_bytes, homework_id, student_id, mcq_results, final_score, status, student_level
|
| 1729 |
)
|
| 1730 |
|
|
|
|
| 1743 |
"student_extracted_text": student_text,
|
| 1744 |
"mcq_results": mcq_results,
|
| 1745 |
"narrative_results": narrative_results,
|
| 1746 |
+
"question_marks": make_question_marks(mcq_results),
|
| 1747 |
"annotated_pdf": annotated_pdf_filename,
|
| 1748 |
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 1749 |
"debug": {
|
|
|
|
| 1805 |
'qid': qid,
|
| 1806 |
'chosen': student_ans,
|
| 1807 |
'correct_answer': pq.get('correct_answer'),
|
| 1808 |
+
'correct': is_correct,
|
| 1809 |
+
'unattempted': False
|
| 1810 |
})
|
| 1811 |
matched = True
|
| 1812 |
break
|
|
|
|
| 1815 |
'qid': qid,
|
| 1816 |
'chosen': student_ans,
|
| 1817 |
'correct_answer': None,
|
| 1818 |
+
'correct': False,
|
| 1819 |
+
'unattempted': False
|
| 1820 |
+
})
|
| 1821 |
+
|
| 1822 |
+
# Add any questions from the prompt that the student never answered
|
| 1823 |
+
answered_nums = {r['qid'].replace('Q', '').strip() for r in mcq_results}
|
| 1824 |
+
for pq in mcq_questions_with_answers:
|
| 1825 |
+
pq_num = pq.get('qid', '').replace('Q', '').strip()
|
| 1826 |
+
if pq_num not in answered_nums:
|
| 1827 |
+
mcq_results.append({
|
| 1828 |
+
'qid': pq.get('qid'),
|
| 1829 |
+
'chosen': '',
|
| 1830 |
+
'correct_answer': pq.get('correct_answer'),
|
| 1831 |
+
'correct': False,
|
| 1832 |
+
'unattempted': True
|
| 1833 |
})
|
| 1834 |
|
| 1835 |
# Calculate score based on level
|
|
|
|
| 1841 |
|
| 1842 |
# Save annotated PDF
|
| 1843 |
if is_pdf_submission and original_file_bytes:
|
| 1844 |
+
annotated_pdf_filename, annotated_pdf_url = save_annotated_pdf(
|
| 1845 |
original_file_bytes, homework_id, student_id, mcq_results, match_percentage, status, student_level
|
| 1846 |
)
|
| 1847 |
|
|
|
|
| 1858 |
"rule_based_remark": f"Multiple MCQ: {correct_count}/{total_count} correct. Score: {match_percentage}% (Level: {student_level})",
|
| 1859 |
"student_extracted_text": student_text,
|
| 1860 |
"llm_used": False,
|
| 1861 |
+
"question_marks": make_question_marks(mcq_results),
|
| 1862 |
+
"annotated_pdf": annotated_pdf_filename,
|
| 1863 |
"debug": {"student_answers": student_answers_by_qid, "mcq_results": mcq_results},
|
| 1864 |
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 1865 |
}
|
| 1866 |
else:
|
| 1867 |
# No correct answers in prompt - return needs review with extracted answers
|
| 1868 |
+
# Save annotated PDF with circle mark
|
| 1869 |
if is_pdf_submission and original_file_bytes:
|
| 1870 |
+
no_answer_result = [{'qid': 'Q1', 'correct': None, 'chosen': 'No Answer Key', 'correct_answer': 'N/A'}]
|
| 1871 |
+
annotated_pdf_filename, annotated_pdf_url = save_annotated_pdf(
|
| 1872 |
+
original_file_bytes, homework_id, student_id, no_answer_result, 0, "Needs Review", student_level
|
| 1873 |
)
|
| 1874 |
return {
|
| 1875 |
"student_id": student_id,
|
|
|
|
| 1884 |
"rule_based_remark": f"Found {len(student_answers_by_qid)} MCQ answers but no correct answers in prompt. Include 'Correct: B' for each question.",
|
| 1885 |
"student_extracted_text": student_text,
|
| 1886 |
"llm_used": False,
|
| 1887 |
+
"question_marks": make_question_marks([]),
|
| 1888 |
+
"annotated_pdf": annotated_pdf_filename,
|
| 1889 |
"debug": {"student_answers": student_answers_by_qid, "correct_answers_in_prompt": False},
|
| 1890 |
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 1891 |
}
|
|
|
|
| 1893 |
if redirect_to_narrative:
|
| 1894 |
pass # Will continue to narrative handling
|
| 1895 |
elif not correct:
|
| 1896 |
+
# Save annotated PDF with circle mark
|
| 1897 |
if is_pdf_submission and original_file_bytes:
|
| 1898 |
+
no_correct_result = [{'qid': 'Q1', 'correct': None, 'chosen': 'Not Found', 'correct_answer': 'N/A'}]
|
| 1899 |
+
annotated_pdf_filename, annotated_pdf_url = save_annotated_pdf(
|
| 1900 |
+
original_file_bytes, homework_id, student_id, no_correct_result, 0, "Needs Review", student_level
|
| 1901 |
)
|
| 1902 |
return {
|
| 1903 |
"student_id": student_id,
|
|
|
|
| 1912 |
"rule_based_remark": "MCQ correct option not found in prompt. Include 'Correct: B' or similar in prompt.",
|
| 1913 |
"student_extracted_text": student_text,
|
| 1914 |
"llm_used": False,
|
| 1915 |
+
"question_marks": make_question_marks([]),
|
| 1916 |
+
"annotated_pdf": annotated_pdf_filename,
|
| 1917 |
"debug": {"correct": correct, "chosen": chosen},
|
| 1918 |
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 1919 |
}
|
| 1920 |
elif not chosen:
|
| 1921 |
+
# Save annotated PDF with circle mark
|
| 1922 |
if is_pdf_submission and original_file_bytes:
|
| 1923 |
+
no_chosen_result = [{'qid': 'Q1', 'correct': None, 'chosen': 'Not Detected', 'correct_answer': correct or 'N/A'}]
|
| 1924 |
+
annotated_pdf_filename, annotated_pdf_url = save_annotated_pdf(
|
| 1925 |
+
original_file_bytes, homework_id, student_id, no_chosen_result, 0, "Needs Review", student_level
|
| 1926 |
)
|
| 1927 |
return {
|
| 1928 |
"student_id": student_id,
|
|
|
|
| 1937 |
"rule_based_remark": "Student option (A/B/C/D) not detected clearly.",
|
| 1938 |
"student_extracted_text": student_text,
|
| 1939 |
"llm_used": False,
|
| 1940 |
+
"question_marks": make_question_marks([]),
|
| 1941 |
+
"annotated_pdf": annotated_pdf_filename,
|
| 1942 |
"debug": {"correct": correct, "chosen": chosen},
|
| 1943 |
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 1944 |
}
|
|
|
|
| 1961 |
# Save annotated PDF
|
| 1962 |
mcq_results_single = [{'qid': 'Q1', 'correct': is_correct, 'chosen': chosen, 'correct_answer': correct}]
|
| 1963 |
if is_pdf_submission and original_file_bytes:
|
| 1964 |
+
annotated_pdf_filename, annotated_pdf_url = save_annotated_pdf(
|
| 1965 |
original_file_bytes, homework_id, student_id, mcq_results_single, match_percentage, status, student_level
|
| 1966 |
)
|
| 1967 |
|
|
|
|
| 1978 |
"rule_based_remark": f"{'Correct' if is_correct else 'Incorrect'}. Score: {match_percentage}% (Level: {student_level}, Credit per Q: {credit_per_q}%)",
|
| 1979 |
"student_extracted_text": student_text,
|
| 1980 |
"llm_used": False,
|
| 1981 |
+
"question_marks": make_question_marks(mcq_results_single),
|
| 1982 |
+
"annotated_pdf": annotated_pdf_filename,
|
| 1983 |
"debug": {"correct": correct, "chosen": chosen, "level": student_level, "credit_per_q": credit_per_q},
|
| 1984 |
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 1985 |
}
|
|
|
|
| 1988 |
if gemini_client is None:
|
| 1989 |
# Save annotated PDF
|
| 1990 |
if is_pdf_submission and original_file_bytes:
|
| 1991 |
+
annotated_pdf_filename, annotated_pdf_url = save_annotated_pdf(
|
| 1992 |
original_file_bytes, homework_id, student_id, [], 0, "Needs Review", student_level
|
| 1993 |
)
|
| 1994 |
return {
|
|
|
|
| 2005 |
"llm_used": False,
|
| 2006 |
"llm_error": parse_gemini_error(GEMINI_LAST_ERROR),
|
| 2007 |
"student_extracted_text": student_text,
|
| 2008 |
+
"question_marks": make_question_marks([]),
|
| 2009 |
"annotated_pdf": annotated_pdf_filename,
|
| 2010 |
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 2011 |
}
|
|
|
|
| 2029 |
if not response_text:
|
| 2030 |
# Save annotated PDF
|
| 2031 |
if is_pdf_submission and original_file_bytes:
|
| 2032 |
+
annotated_pdf_filename, annotated_pdf_url = save_annotated_pdf(
|
| 2033 |
original_file_bytes, homework_id, student_id, [], 0, "Needs Review", student_level
|
| 2034 |
)
|
| 2035 |
return {
|
|
|
|
| 2046 |
"llm_used": False,
|
| 2047 |
"llm_error": parse_gemini_error(GEMINI_LAST_ERROR),
|
| 2048 |
"student_extracted_text": student_text,
|
| 2049 |
+
"question_marks": make_question_marks([]),
|
| 2050 |
"annotated_pdf": annotated_pdf_filename,
|
| 2051 |
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 2052 |
}
|
|
|
|
| 2057 |
except Exception as e:
|
| 2058 |
# Save annotated PDF
|
| 2059 |
if is_pdf_submission and original_file_bytes:
|
| 2060 |
+
annotated_pdf_filename, annotated_pdf_url = save_annotated_pdf(
|
| 2061 |
original_file_bytes, homework_id, student_id, [], 0, "Needs Review", student_level
|
| 2062 |
)
|
| 2063 |
return {
|
|
|
|
| 2074 |
"llm_used": False,
|
| 2075 |
"llm_error": {"ok": False, "error_type": "GEMINI_BAD_JSON", "message": str(e), "raw": response_text[:800]},
|
| 2076 |
"student_extracted_text": student_text,
|
| 2077 |
+
"question_marks": make_question_marks([]),
|
| 2078 |
"annotated_pdf": annotated_pdf_filename,
|
| 2079 |
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 2080 |
}
|
|
|
|
| 2088 |
if not ai_reference_answer:
|
| 2089 |
# Save annotated PDF
|
| 2090 |
if is_pdf_submission and original_file_bytes:
|
| 2091 |
+
annotated_pdf_filename, annotated_pdf_url = save_annotated_pdf(
|
| 2092 |
original_file_bytes, homework_id, student_id, [], 0, "Needs Review", student_level
|
| 2093 |
)
|
| 2094 |
return {
|
|
|
|
| 2104 |
"rule_based_remark": "AI returned empty reference answer.",
|
| 2105 |
"llm_used": True,
|
| 2106 |
"student_extracted_text": student_text,
|
| 2107 |
+
"question_marks": make_question_marks([]),
|
| 2108 |
"annotated_pdf": annotated_pdf_filename,
|
| 2109 |
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 2110 |
}
|
|
|
|
| 2155 |
else:
|
| 2156 |
rule_based_remark = "Homework does not match the expected answer enough. Please review the topic and resubmit with clearer, complete points."
|
| 2157 |
|
| 2158 |
+
# Save annotated PDF for narrative (with status-based circle marking)
|
| 2159 |
+
# Add a placeholder to show it's a narrative question that was evaluated
|
| 2160 |
if is_pdf_submission and original_file_bytes:
|
| 2161 |
+
# Determine correctness based on status:
|
| 2162 |
+
# - Verified: correct (green checkmark)
|
| 2163 |
+
# - Partial: partially correct (yellow circle)
|
| 2164 |
+
# - Needs Review: incorrect (red circle)
|
| 2165 |
+
if status == "Verified":
|
| 2166 |
+
narrative_correct = True
|
| 2167 |
+
elif status == "Partial":
|
| 2168 |
+
narrative_correct = False # Will show as yellow circle for partial
|
| 2169 |
+
else:
|
| 2170 |
+
narrative_correct = False # Will show as red circle for needs review
|
| 2171 |
+
|
| 2172 |
+
narrative_result = [
|
| 2173 |
+
{'qid': 'Q1', 'correct': narrative_correct, 'chosen': f'Score: {match_pct}%', 'correct_answer': status}
|
| 2174 |
+
]
|
| 2175 |
+
annotated_pdf_filename, annotated_pdf_url = save_annotated_pdf(
|
| 2176 |
+
original_file_bytes, homework_id, student_id, narrative_result, match_pct, status, student_level, "narrative"
|
| 2177 |
)
|
| 2178 |
|
| 2179 |
return {
|
|
|
|
| 2195 |
"key_points": key_points,
|
| 2196 |
"key_points_covered": covered,
|
| 2197 |
"key_points_missing": missing,
|
| 2198 |
+
"question_marks": make_question_marks([]),
|
| 2199 |
+
"annotated_pdf": annotated_pdf_filename,
|
| 2200 |
"debug": {
|
| 2201 |
"similarity": sim,
|
| 2202 |
"coverage": coverage,
|
|
|
|
| 2205 |
"erp_student_level_raw": erp_row.get("student_level") or erp_row.get("level") or erp_row.get("difficulty") or erp_row.get("difficulty_level"),
|
| 2206 |
},
|
| 2207 |
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 2208 |
+
}
|
|
|
|
|
|
requirements.txt
CHANGED
|
@@ -1,24 +1,17 @@
|
|
| 1 |
-
|
| 2 |
uvicorn
|
| 3 |
pytesseract
|
| 4 |
pillow
|
| 5 |
sqlalchemy
|
| 6 |
scikit-learn
|
| 7 |
requests
|
| 8 |
-
|
| 9 |
-
openai
|
| 10 |
python-docx
|
| 11 |
google-genai
|
| 12 |
python-dotenv
|
| 13 |
pypdf
|
| 14 |
python-multipart
|
| 15 |
-
openai
|
| 16 |
google-generativeai
|
| 17 |
google-cloud-vision
|
| 18 |
easyocr
|
| 19 |
-
python-docx
|
| 20 |
-
pypdf
|
| 21 |
pdf2image
|
| 22 |
reportlab
|
| 23 |
-
python-dotenv
|
| 24 |
-
google-genai
|
|
|
|
| 1 |
+
fastapi
|
| 2 |
uvicorn
|
| 3 |
pytesseract
|
| 4 |
pillow
|
| 5 |
sqlalchemy
|
| 6 |
scikit-learn
|
| 7 |
requests
|
|
|
|
|
|
|
| 8 |
python-docx
|
| 9 |
google-genai
|
| 10 |
python-dotenv
|
| 11 |
pypdf
|
| 12 |
python-multipart
|
|
|
|
| 13 |
google-generativeai
|
| 14 |
google-cloud-vision
|
| 15 |
easyocr
|
|
|
|
|
|
|
| 16 |
pdf2image
|
| 17 |
reportlab
|
|
|
|
|
|