Merge GitHub main into local
Browse files- .gitignore +4 -1
- Dockerfile +23 -0
- README.md +92 -1
- answer_key.json +60 -0
- app.py +403 -0
- db.py +3 -0
- requirements.txt +10 -0
- scholar clone.lnk +0 -0
.gitignore
CHANGED
|
@@ -18,6 +18,7 @@ answer_key.json
|
|
| 18 |
__pycache__/
|
| 19 |
*.pyc
|
| 20 |
.env
|
|
|
|
| 21 |
myenv/
|
| 22 |
venv/
|
| 23 |
.venv/
|
|
@@ -27,4 +28,6 @@ __pycache__/
|
|
| 27 |
*.pyd
|
| 28 |
*.db
|
| 29 |
homework.db
|
| 30 |
-
_local_backup/
|
|
|
|
|
|
|
|
|
| 18 |
__pycache__/
|
| 19 |
*.pyc
|
| 20 |
.env
|
| 21 |
+
<<<<<<< HEAD
|
| 22 |
myenv/
|
| 23 |
venv/
|
| 24 |
.venv/
|
|
|
|
| 28 |
*.pyd
|
| 29 |
*.db
|
| 30 |
homework.db
|
| 31 |
+
_local_backup/
|
| 32 |
+
=======
|
| 33 |
+
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
Dockerfile
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
|
|
| 1 |
FROM python:3.10-slim
|
| 2 |
|
| 3 |
# Install Tesseract OCR + Poppler + dependencies
|
|
@@ -21,3 +22,25 @@ ENV HF_SPACE=moncey10-homework-validation-system.hf.space
|
|
| 21 |
EXPOSE 7860
|
| 22 |
|
| 23 |
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<<<<<<< HEAD
|
| 2 |
FROM python:3.10-slim
|
| 3 |
|
| 4 |
# Install Tesseract OCR + Poppler + dependencies
|
|
|
|
| 22 |
EXPOSE 7860
|
| 23 |
|
| 24 |
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
|
| 25 |
+
=======
|
| 26 |
+
FROM python:3.10-slim
|
| 27 |
+
|
| 28 |
+
# System deps (Tesseract + basic libs for PIL)
|
| 29 |
+
RUN apt-get update && apt-get install -y \
|
| 30 |
+
tesseract-ocr \
|
| 31 |
+
libtesseract-dev \
|
| 32 |
+
libleptonica-dev \
|
| 33 |
+
poppler-utils \
|
| 34 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 35 |
+
|
| 36 |
+
WORKDIR /app
|
| 37 |
+
COPY requirements.txt /app/requirements.txt
|
| 38 |
+
RUN pip install --no-cache-dir -r /app/requirements.txt
|
| 39 |
+
|
| 40 |
+
COPY . /app
|
| 41 |
+
|
| 42 |
+
# HF Spaces uses 7860
|
| 43 |
+
EXPOSE 7860
|
| 44 |
+
|
| 45 |
+
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
|
| 46 |
+
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
README.md
CHANGED
|
@@ -1,6 +1,97 @@
|
|
|
|
|
| 1 |
---
|
| 2 |
title: Homework Validation System
|
| 3 |
sdk: docker
|
| 4 |
app_port: 7860
|
| 5 |
---
|
| 6 |
-
hello
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<<<<<<< HEAD
|
| 2 |
---
|
| 3 |
title: Homework Validation System
|
| 4 |
sdk: docker
|
| 5 |
app_port: 7860
|
| 6 |
---
|
| 7 |
+
hello
|
| 8 |
+
=======
|
| 9 |
+
---
|
| 10 |
+
title: Homework Validation System
|
| 11 |
+
sdk: docker
|
| 12 |
+
app_port: 7860
|
| 13 |
+
---
|
| 14 |
+
# Homework Validation System (FastAPI)
|
| 15 |
+
|
| 16 |
+
A backend API that validates student homework by extracting text from teacher and student files, comparing answers, and generating remarks using rule-based logic and optional AI.
|
| 17 |
+
|
| 18 |
+
---
|
| 19 |
+
|
| 20 |
+
## Features
|
| 21 |
+
|
| 22 |
+
- Upload teacher and student homework files
|
| 23 |
+
- OCR support for images and scanned PDFs
|
| 24 |
+
- Text extraction from PDF and DOCX
|
| 25 |
+
- Similarity matching using TF-IDF + cosine similarity
|
| 26 |
+
- Optional AI-generated remarks (OpenAI / Gemini)
|
| 27 |
+
- FastAPI Swagger documentation
|
| 28 |
+
|
| 29 |
+
---
|
| 30 |
+
|
| 31 |
+
## Tech Stack
|
| 32 |
+
|
| 33 |
+
- FastAPI
|
| 34 |
+
- Python
|
| 35 |
+
- pytesseract
|
| 36 |
+
- Pillow
|
| 37 |
+
- pypdf / pdf2image
|
| 38 |
+
- python-docx
|
| 39 |
+
- scikit-learn
|
| 40 |
+
- OpenAI / Gemini (optional)
|
| 41 |
+
|
| 42 |
+
---
|
| 43 |
+
|
| 44 |
+
## Project Structure
|
| 45 |
+
|
| 46 |
+
---
|
| 47 |
+
homework_validation_system/
|
| 48 |
+
│
|
| 49 |
+
├── app.py
|
| 50 |
+
├── requirements.txt
|
| 51 |
+
├── artifacts/
|
| 52 |
+
├── uploads/
|
| 53 |
+
├── src/
|
| 54 |
+
│ ├── extractors.py
|
| 55 |
+
│ ├── similarity.py
|
| 56 |
+
│ ├── llm_client.py
|
| 57 |
+
│ └── utils.py
|
| 58 |
+
└── README.md
|
| 59 |
+
## Installation
|
| 60 |
+
|
| 61 |
+
### 1. Create Virtual Environment
|
| 62 |
+
python -m venv myenv
|
| 63 |
+
|
| 64 |
+
### 2. Install Requirements
|
| 65 |
+
pip install -r requirements.txt
|
| 66 |
+
## OCR Setup (Required)
|
| 67 |
+
|
| 68 |
+
### Install Tesseract OCR
|
| 69 |
+
|
| 70 |
+
This project uses **Tesseract OCR** for extracting text from images and scanned PDFs.
|
| 71 |
+
|
| 72 |
+
#### Windows
|
| 73 |
+
1. Download and install Tesseract OCR.
|
| 74 |
+
2. Default installation path:
|
| 75 |
+
3. Add this path in your code:
|
| 76 |
+
|
| 77 |
+
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
|
| 78 |
+
|
| 79 |
+
### Run API
|
| 80 |
+
uvicorn app:app --reload --host 0.0.0.0 --port 8000
|
| 81 |
+
|
| 82 |
+
### Swagger UI:
|
| 83 |
+
|
| 84 |
+
http://localhost:8000/docs
|
| 85 |
+
|
| 86 |
+
### Example API Response
|
| 87 |
+
{
|
| 88 |
+
"student_id": 1,
|
| 89 |
+
"homework_id": 10,
|
| 90 |
+
"status": "Needs Review",
|
| 91 |
+
"match_percentage": 72,
|
| 92 |
+
"teacher_extracted_text": "...",
|
| 93 |
+
"student_extracted_text": "...",
|
| 94 |
+
"ai_generated_remark": "Good attempt but missing key points.",
|
| 95 |
+
"llm_used": true
|
| 96 |
+
}
|
| 97 |
+
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
answer_key.json
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
|
|
| 1 |
{
|
| 2 |
"hw01": {
|
| 3 |
"questions": [
|
|
@@ -55,4 +56,63 @@
|
|
| 55 |
}
|
| 56 |
]
|
| 57 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
}
|
|
|
|
| 1 |
+
<<<<<<< HEAD
|
| 2 |
{
|
| 3 |
"hw01": {
|
| 4 |
"questions": [
|
|
|
|
| 56 |
}
|
| 57 |
]
|
| 58 |
}
|
| 59 |
+
=======
|
| 60 |
+
{
|
| 61 |
+
"hw01": {
|
| 62 |
+
"questions": [
|
| 63 |
+
{
|
| 64 |
+
"qid": "Q1",
|
| 65 |
+
"type": "text",
|
| 66 |
+
"answer": "Artificial Intelligence is the simulation of human intelligence."
|
| 67 |
+
},
|
| 68 |
+
{
|
| 69 |
+
"qid": "Q2",
|
| 70 |
+
"type": "text",
|
| 71 |
+
"answer": "Machine Learning is a subset of AI that learns from data."
|
| 72 |
+
}
|
| 73 |
+
]
|
| 74 |
+
},
|
| 75 |
+
"hw99": {
|
| 76 |
+
"questions": [
|
| 77 |
+
{
|
| 78 |
+
"qid": "Q1",
|
| 79 |
+
"type": "text",
|
| 80 |
+
"answer": "Artificial Intelligence is the simulation of human intelligence."
|
| 81 |
+
},
|
| 82 |
+
{
|
| 83 |
+
"qid": "Q2",
|
| 84 |
+
"type": "text",
|
| 85 |
+
"answer": "Machine Learning is a subset of AI that learns from data."
|
| 86 |
+
}
|
| 87 |
+
]
|
| 88 |
+
},
|
| 89 |
+
"hw90": {
|
| 90 |
+
"questions": [
|
| 91 |
+
{
|
| 92 |
+
"qid": "Q1",
|
| 93 |
+
"type": "text",
|
| 94 |
+
"answer": "Artificial Intelligence is the simulation of human intelligence."
|
| 95 |
+
},
|
| 96 |
+
{
|
| 97 |
+
"qid": "Q2",
|
| 98 |
+
"type": "text",
|
| 99 |
+
"answer": "Machine Learning is a subset of AI that learns from data."
|
| 100 |
+
}
|
| 101 |
+
]
|
| 102 |
+
},
|
| 103 |
+
"hw15": {
|
| 104 |
+
"questions": [
|
| 105 |
+
{
|
| 106 |
+
"qid": "Q1",
|
| 107 |
+
"type": "text",
|
| 108 |
+
"answer": "Artificial Intelligence is the simulation of human intelligence."
|
| 109 |
+
},
|
| 110 |
+
{
|
| 111 |
+
"qid": "Q2",
|
| 112 |
+
"type": "text",
|
| 113 |
+
"answer": "Machine Learning is a subset of AI that learns from data."
|
| 114 |
+
}
|
| 115 |
+
]
|
| 116 |
+
}
|
| 117 |
+
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 118 |
}
|
app.py
CHANGED
|
@@ -10,11 +10,15 @@ from fastapi import FastAPI, UploadFile, File, Form, HTTPException
|
|
| 10 |
from fastapi.middleware.cors import CORSMiddleware
|
| 11 |
from PIL import Image, ImageOps, ImageFilter
|
| 12 |
import pytesseract
|
|
|
|
| 13 |
import os
|
| 14 |
|
| 15 |
# Serve static files from outputs directory
|
| 16 |
from fastapi.staticfiles import StaticFiles
|
| 17 |
from fastapi.responses import FileResponse
|
|
|
|
|
|
|
|
|
|
| 18 |
from dotenv import load_dotenv
|
| 19 |
load_dotenv()
|
| 20 |
|
|
@@ -30,6 +34,7 @@ except Exception:
|
|
| 30 |
PdfReader = None
|
| 31 |
|
| 32 |
try:
|
|
|
|
| 33 |
from reportlab.pdfgen import canvas
|
| 34 |
from reportlab.lib.pagesizes import letter
|
| 35 |
from reportlab.lib import colors
|
|
@@ -40,6 +45,8 @@ except Exception as e:
|
|
| 40 |
print(f"[WARN] reportlab import failed: {e}")
|
| 41 |
|
| 42 |
try:
|
|
|
|
|
|
|
| 43 |
from pdf2image import convert_from_bytes # requires poppler
|
| 44 |
except Exception:
|
| 45 |
convert_from_bytes = None
|
|
@@ -55,6 +62,7 @@ except Exception as e:
|
|
| 55 |
genai = None
|
| 56 |
print(f"[WARN] google-genai import failed: {e}")
|
| 57 |
|
|
|
|
| 58 |
# ✅ Google Cloud Vision SDK (for better handwritten OCR)
|
| 59 |
try:
|
| 60 |
from google.cloud import vision
|
|
@@ -111,6 +119,13 @@ def debug_env():
|
|
| 111 |
"num_keys": len(GOOGLE_API_KEYS),
|
| 112 |
"has_openai_key": bool(os.getenv("OPENAI_API_KEY")),
|
| 113 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 114 |
app.add_middleware(
|
| 115 |
CORSMiddleware,
|
| 116 |
allow_origins=["*"],
|
|
@@ -119,20 +134,33 @@ app.add_middleware(
|
|
| 119 |
allow_headers=["*"],
|
| 120 |
)
|
| 121 |
|
|
|
|
| 122 |
|
| 123 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 124 |
if os.name == "nt":
|
| 125 |
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
|
| 126 |
else:
|
| 127 |
pytesseract.pytesseract.tesseract_cmd = "/usr/bin/tesseract"
|
| 128 |
|
| 129 |
|
|
|
|
| 130 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 131 |
ERP_BASE = os.getenv("ERP_BASE", "https://erp.triz.co.in/lms_data")
|
| 132 |
STORAGE_BASE = os.getenv("STORAGE_BASE", "https://erp.triz.co.in/storage/student/")
|
| 133 |
ERP_TOKEN = os.getenv("ERP_TOKEN", "")
|
| 134 |
|
| 135 |
|
|
|
|
| 136 |
def get_public_base_url() -> str:
|
| 137 |
"""
|
| 138 |
Returns the public base URL of this server.
|
|
@@ -251,27 +279,58 @@ def _init_gemini_client(key_index: int = 0) -> None:
|
|
| 251 |
return
|
| 252 |
|
| 253 |
api_key = GOOGLE_API_KEYS[key_index]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 254 |
|
| 255 |
if not genai:
|
| 256 |
GEMINI_LAST_ERROR = "google-genai not installed / import failed"
|
| 257 |
gemini_client = None
|
| 258 |
return
|
| 259 |
|
|
|
|
| 260 |
if not api_key:
|
| 261 |
GEMINI_LAST_ERROR = f"GOOGLE_API_KEY_{key_index + 1} not set"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 262 |
gemini_client = None
|
| 263 |
return
|
| 264 |
|
| 265 |
try:
|
|
|
|
| 266 |
gemini_client = genai.Client(api_key=api_key)
|
| 267 |
GEMINI_LAST_ERROR = ""
|
| 268 |
print(f"[INFO] Gemini client initialized with key #{key_index + 1}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 269 |
except Exception as e:
|
| 270 |
gemini_client = None
|
| 271 |
GEMINI_LAST_ERROR = str(e)
|
| 272 |
print(f"[WARN] Gemini init failed: {GEMINI_LAST_ERROR}")
|
| 273 |
|
| 274 |
|
|
|
|
| 275 |
def _is_rate_limit_error(error_msg: str) -> bool:
|
| 276 |
"""Check if the error is a rate limit error (429) or service unavailable (503)."""
|
| 277 |
if not error_msg:
|
|
@@ -314,6 +373,9 @@ def _rotate_to_next_key() -> bool:
|
|
| 314 |
|
| 315 |
|
| 316 |
_init_gemini_client(0)
|
|
|
|
|
|
|
|
|
|
| 317 |
|
| 318 |
|
| 319 |
def parse_gemini_error(error_msg: str) -> dict:
|
|
@@ -329,6 +391,7 @@ def parse_gemini_error(error_msg: str) -> dict:
|
|
| 329 |
return {"ok": False, "error_type": "GEMINI_ERROR", "message": msg}
|
| 330 |
|
| 331 |
|
|
|
|
| 332 |
|
| 333 |
def extract_qid_from_prompt(prompt: str, erp_row: dict = None) -> str:
|
| 334 |
"""
|
|
@@ -375,22 +438,32 @@ def extract_qid_from_prompt(prompt: str, erp_row: dict = None) -> str:
|
|
| 375 |
return "Q1"
|
| 376 |
|
| 377 |
|
|
|
|
|
|
|
| 378 |
def generate_gemini_response(
|
| 379 |
prompt: str,
|
| 380 |
system_prompt: str = "",
|
| 381 |
max_tokens: int = 650,
|
| 382 |
temperature: float = 0.3,
|
| 383 |
) -> str:
|
|
|
|
| 384 |
global GEMINI_LAST_ERROR, gemini_client, rate_limited_keys
|
|
|
|
|
|
|
|
|
|
| 385 |
|
| 386 |
if gemini_client is None:
|
| 387 |
if not GEMINI_LAST_ERROR:
|
| 388 |
GEMINI_LAST_ERROR = "Gemini client not initialized"
|
|
|
|
| 389 |
# Try to reinitialize if we have keys available
|
| 390 |
if GOOGLE_API_KEYS and current_key_index not in rate_limited_keys:
|
| 391 |
_init_gemini_client(current_key_index)
|
| 392 |
if gemini_client is None:
|
| 393 |
return ""
|
|
|
|
|
|
|
|
|
|
| 394 |
|
| 395 |
try:
|
| 396 |
contents = []
|
|
@@ -408,6 +481,7 @@ def generate_gemini_response(
|
|
| 408 |
GEMINI_LAST_ERROR = ""
|
| 409 |
return text
|
| 410 |
except Exception as e:
|
|
|
|
| 411 |
error_msg = str(e)
|
| 412 |
print(f"[ERROR] Gemini call failed: {error_msg}")
|
| 413 |
|
|
@@ -419,6 +493,10 @@ def generate_gemini_response(
|
|
| 419 |
return generate_gemini_response(prompt, system_prompt, max_tokens, temperature)
|
| 420 |
|
| 421 |
GEMINI_LAST_ERROR = error_msg
|
|
|
|
|
|
|
|
|
|
|
|
|
| 422 |
return ""
|
| 423 |
|
| 424 |
import time
|
|
@@ -452,7 +530,13 @@ def cheap_overlap_score(student_text: str, prompt: str) -> int:
|
|
| 452 |
return int(round(min(0.6, overlap) * 100)) # cap at 60
|
| 453 |
|
| 454 |
|
|
|
|
| 455 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 456 |
def _norm(s: str) -> str:
|
| 457 |
return re.sub(r"\s+", " ", (s or "").strip().lower())
|
| 458 |
|
|
@@ -487,6 +571,7 @@ def level_policy(student_level: str) -> dict:
|
|
| 487 |
return {"w_sim": 0.6, "w_cov": 0.4, "verified": 75, "partial": 55, "kp_thr": 0.20}
|
| 488 |
|
| 489 |
|
|
|
|
| 490 |
def mcq_partial_credit(student_level: str) -> dict:
|
| 491 |
"""
|
| 492 |
Returns partial credit percentage for MCQ questions based on student level.
|
|
@@ -507,6 +592,8 @@ def mcq_partial_credit(student_level: str) -> dict:
|
|
| 507 |
return {"credit_per_question": 75, "passing_threshold": 75}
|
| 508 |
|
| 509 |
|
|
|
|
|
|
|
| 510 |
def keypoint_coverage(student_text: str, key_points: List[str], kp_threshold: float) -> Tuple[List[str], List[str], float]:
|
| 511 |
covered, missing = [], []
|
| 512 |
for kp in key_points:
|
|
@@ -524,8 +611,15 @@ def keypoint_coverage(student_text: str, key_points: List[str], kp_threshold: fl
|
|
| 524 |
return covered, missing, coverage
|
| 525 |
|
| 526 |
|
|
|
|
| 527 |
|
| 528 |
def infer_question_type_from_prompt(prompt: str, student_text: str = "") -> str:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 529 |
p = _norm(prompt)
|
| 530 |
|
| 531 |
# Explicit markers - check for (mcq) first since it's common in parentheses
|
|
@@ -534,6 +628,7 @@ def infer_question_type_from_prompt(prompt: str, student_text: str = "") -> str:
|
|
| 534 |
if re.search(r"\btype\s*:\s*narrative\b", p) or re.search(r"\bquestion_type\s*:\s*narrative\b", p):
|
| 535 |
return "narrative"
|
| 536 |
|
|
|
|
| 537 |
# Heuristic: options A/B/C/D exist in prompt -> likely MCQ
|
| 538 |
if re.search(r"\b(a|b|c|d)\s*[\)\.]\s+", p) or "option a" in p or "option b" in p:
|
| 539 |
return "mcq"
|
|
@@ -551,6 +646,11 @@ def infer_question_type_from_prompt(prompt: str, student_text: str = "") -> str:
|
|
| 551 |
# If answer starts with A. or B. etc.
|
| 552 |
if re.search(r"^[a-d]\.\s+", s.strip()):
|
| 553 |
return "mcq"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 554 |
|
| 555 |
return "narrative"
|
| 556 |
|
|
@@ -615,6 +715,7 @@ def parse_questions_from_prompt(prompt: str) -> List[Dict[str, Any]]:
|
|
| 615 |
|
| 616 |
# Check for correct answer (for MCQ)
|
| 617 |
if current_type == 'mcq':
|
|
|
|
| 618 |
# First check: is this line "Correct Answer(s):" with nothing after it?
|
| 619 |
# If so, we need to look for the answer on the next line
|
| 620 |
if re.search(r'^correct\s*answer\s*\(?s\)?\s*[:\.]?\s*$', line, re.IGNORECASE):
|
|
@@ -647,6 +748,12 @@ def parse_questions_from_prompt(prompt: str) -> List[Dict[str, Any]]:
|
|
| 647 |
else:
|
| 648 |
# Try to extract first letter
|
| 649 |
current_correct = correct_text[0].upper() if correct_text else None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 650 |
|
| 651 |
# Don't forget the last question
|
| 652 |
if current_q is not None:
|
|
@@ -660,7 +767,11 @@ def parse_questions_from_prompt(prompt: str) -> List[Dict[str, Any]]:
|
|
| 660 |
# If no questions parsed, fall back to old behavior
|
| 661 |
if not questions:
|
| 662 |
qtype = infer_question_type_from_prompt(prompt)
|
|
|
|
| 663 |
return [{'qid': extract_qid_from_prompt(prompt), 'type': qtype, 'question': prompt, 'correct_answer': None}]
|
|
|
|
|
|
|
|
|
|
| 664 |
|
| 665 |
return questions
|
| 666 |
|
|
@@ -692,6 +803,7 @@ def extract_mcq_choice(text: str) -> str:
|
|
| 692 |
return ""
|
| 693 |
|
| 694 |
|
|
|
|
| 695 |
def extract_mcq_answers_with_qid(text: str) -> Dict[str, str]:
|
| 696 |
"""
|
| 697 |
Extract MCQ answers WITH question numbers from student text.
|
|
@@ -747,6 +859,8 @@ def extract_mcq_answers_with_qid(text: str) -> Dict[str, str]:
|
|
| 747 |
return results
|
| 748 |
|
| 749 |
|
|
|
|
|
|
|
| 750 |
def extract_correct_mcq_from_prompt(prompt: str) -> str:
|
| 751 |
"""
|
| 752 |
This is IMPORTANT:
|
|
@@ -754,6 +868,7 @@ def extract_correct_mcq_from_prompt(prompt: str) -> str:
|
|
| 754 |
- Correct: B
|
| 755 |
- Answer: C
|
| 756 |
- correct_option: D
|
|
|
|
| 757 |
- Correct Answer(s): A. Devdatta
|
| 758 |
or JSON: {"correct_option":"B"}
|
| 759 |
|
|
@@ -762,6 +877,9 @@ def extract_correct_mcq_from_prompt(prompt: str) -> str:
|
|
| 762 |
- "Correct Answer(s): A. Devdatta"
|
| 763 |
- "Correct: B"
|
| 764 |
- "Answer: C"
|
|
|
|
|
|
|
|
|
|
| 765 |
"""
|
| 766 |
p = (prompt or "").strip()
|
| 767 |
if not p:
|
|
@@ -778,6 +896,7 @@ def extract_correct_mcq_from_prompt(prompt: str) -> str:
|
|
| 778 |
except Exception:
|
| 779 |
pass
|
| 780 |
|
|
|
|
| 781 |
# Text prompt support - new format: "Correct Answer(s): A. Devdatta" or "Correct Answer: B"
|
| 782 |
t = _norm(p)
|
| 783 |
|
|
@@ -800,6 +919,10 @@ def extract_correct_mcq_from_prompt(prompt: str) -> str:
|
|
| 800 |
return m1c.group(1)
|
| 801 |
|
| 802 |
# Pattern 2: "Correct: A" or "Answer: B" (original pattern)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 803 |
m = re.search(r"\b(correct|answer|ans)\s*[:\-]?\s*\(?\s*([a-d])\s*\)?\b", t)
|
| 804 |
if m:
|
| 805 |
return m.group(2)
|
|
@@ -807,7 +930,13 @@ def extract_correct_mcq_from_prompt(prompt: str) -> str:
|
|
| 807 |
return ""
|
| 808 |
|
| 809 |
|
|
|
|
| 810 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 811 |
def _erp_get(params: dict) -> list:
|
| 812 |
headers = {}
|
| 813 |
if ERP_TOKEN:
|
|
@@ -839,6 +968,7 @@ def fetch_student_level_from_erp(row: Dict[str, Any]) -> str:
|
|
| 839 |
return "Medium"
|
| 840 |
|
| 841 |
|
|
|
|
| 842 |
|
| 843 |
def _preprocess_for_ocr(img: Image.Image) -> Image.Image:
|
| 844 |
"""
|
|
@@ -907,6 +1037,25 @@ def _extract_text_google_vision(image_bytes: bytes) -> str:
|
|
| 907 |
return ""
|
| 908 |
|
| 909 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 910 |
def extract_text_from_image(image_bytes: bytes, filename: str = "unknown") -> str:
|
| 911 |
if not image_bytes or len(image_bytes) < 50:
|
| 912 |
raise HTTPException(status_code=400, detail=f"Invalid file: '{filename}' - empty/too small")
|
|
@@ -923,6 +1072,7 @@ def extract_text_from_image(image_bytes: bytes, filename: str = "unknown") -> st
|
|
| 923 |
head = image_bytes[:12]
|
| 924 |
raise HTTPException(status_code=400, detail=f"Invalid image format: '{filename}' (header={head})")
|
| 925 |
|
|
|
|
| 926 |
# First try Google Cloud Vision (better for handwriting)
|
| 927 |
if vision_client:
|
| 928 |
gv_text = _extract_text_google_vision(image_bytes)
|
|
@@ -930,6 +1080,8 @@ def extract_text_from_image(image_bytes: bytes, filename: str = "unknown") -> st
|
|
| 930 |
return _clean_extracted_text(gv_text)
|
| 931 |
|
| 932 |
# Fallback to Tesseract with improved preprocessing
|
|
|
|
|
|
|
| 933 |
try:
|
| 934 |
img = Image.open(io.BytesIO(image_bytes))
|
| 935 |
except Exception as e:
|
|
@@ -937,6 +1089,7 @@ def extract_text_from_image(image_bytes: bytes, filename: str = "unknown") -> st
|
|
| 937 |
|
| 938 |
img = _preprocess_for_ocr(img)
|
| 939 |
|
|
|
|
| 940 |
# Try multiple OCR configurations for better handwritten recognition
|
| 941 |
ocr_configs = [
|
| 942 |
"--oem 3 --psm 6", # Default
|
|
@@ -965,6 +1118,16 @@ def extract_text_from_image(image_bytes: bytes, filename: str = "unknown") -> st
|
|
| 965 |
raise HTTPException(status_code=500, detail=f"OCR failed: {e}")
|
| 966 |
|
| 967 |
text = (best_text or "").strip()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 968 |
text = re.sub(r"[ \t]+", " ", text)
|
| 969 |
return text
|
| 970 |
|
|
@@ -1016,6 +1179,7 @@ def extract_text_from_pdf(pdf_bytes: bytes, filename: str = "unknown.pdf") -> Di
|
|
| 1016 |
return {"text": extracted, "used_ocr": False, "needs_ocr": True}
|
| 1017 |
try:
|
| 1018 |
used_ocr = True
|
|
|
|
| 1019 |
# Higher DPI for better handwritten OCR
|
| 1020 |
pages = convert_from_bytes(pdf_bytes, dpi=300)
|
| 1021 |
page_texts = []
|
|
@@ -1041,12 +1205,23 @@ def extract_text_from_pdf(pdf_bytes: bytes, filename: str = "unknown.pdf") -> Di
|
|
| 1041 |
if img:
|
| 1042 |
img = _preprocess_for_ocr(img)
|
| 1043 |
extracted = pytesseract.image_to_string(img, lang="eng", config="--oem 3 --psm 6") or ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1044 |
except Exception as e:
|
| 1045 |
return {"text": extracted, "used_ocr": used_ocr, "needs_ocr": True, "ocr_error": str(e)}
|
| 1046 |
|
| 1047 |
return {"text": extracted, "used_ocr": used_ocr, "needs_ocr": False}
|
| 1048 |
|
| 1049 |
|
|
|
|
| 1050 |
def get_question_positions_from_pdf(pdf_bytes: bytes) -> Dict[int, List[Dict]]:
|
| 1051 |
"""
|
| 1052 |
Detect question number positions in a PDF.
|
|
@@ -1319,6 +1494,8 @@ def create_annotated_pdf(
|
|
| 1319 |
print(f"[ERROR] Failed to create annotated PDF: {e}")
|
| 1320 |
return original_pdf_bytes
|
| 1321 |
|
|
|
|
|
|
|
| 1322 |
async def extract_text_from_upload(file: UploadFile) -> Dict[str, Any]:
|
| 1323 |
filename = getattr(file, "filename", "") or "upload"
|
| 1324 |
content_type = (getattr(file, "content_type", "") or "").lower()
|
|
@@ -1368,7 +1545,13 @@ async def extract_text_from_upload(file: UploadFile) -> Dict[str, Any]:
|
|
| 1368 |
|
| 1369 |
|
| 1370 |
|
|
|
|
| 1371 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1372 |
@app.get("/health")
|
| 1373 |
def health():
|
| 1374 |
return {"status": "ok"}
|
|
@@ -1377,6 +1560,7 @@ def health():
|
|
| 1377 |
@app.get("/health/llm")
|
| 1378 |
def health_llm():
|
| 1379 |
return {
|
|
|
|
| 1380 |
"ok": bool(gemini_client) and bool(GOOGLE_API_KEYS),
|
| 1381 |
"gemini": {
|
| 1382 |
"sdk_import_ok": genai is not None,
|
|
@@ -1384,6 +1568,12 @@ def health_llm():
|
|
| 1384 |
"num_keys_configured": len(GOOGLE_API_KEYS),
|
| 1385 |
"current_key_index": current_key_index + 1 if GOOGLE_API_KEYS else 0,
|
| 1386 |
"rate_limited_keys": list(rate_limited_keys),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1387 |
"client_ready": gemini_client is not None,
|
| 1388 |
"model": GEMINI_MODEL,
|
| 1389 |
"last_error": GEMINI_LAST_ERROR if GEMINI_LAST_ERROR else None,
|
|
@@ -1391,6 +1581,7 @@ def health_llm():
|
|
| 1391 |
}
|
| 1392 |
|
| 1393 |
|
|
|
|
| 1394 |
@app.get("/homework/annotated-url/{homework_id}/{student_id}")
|
| 1395 |
async def get_annotated_pdf_url(
|
| 1396 |
homework_id: int,
|
|
@@ -1803,10 +1994,13 @@ def build_per_question_results(
|
|
| 1803 |
return ai_evaluate_per_question(prompt, student_text, student_level)
|
| 1804 |
|
| 1805 |
|
|
|
|
|
|
|
| 1806 |
@app.post("/homework/validate")
|
| 1807 |
async def homework_validate(
|
| 1808 |
student_id: int = Form(...),
|
| 1809 |
homework_id: int = Form(...),
|
|
|
|
| 1810 |
student_file: UploadFile = File(...),
|
| 1811 |
):
|
| 1812 |
# 0) Fetch ERP record -> get all fields automatically
|
|
@@ -1836,12 +2030,25 @@ async def homework_validate(
|
|
| 1836 |
if final_question_type not in ("mcq", "narrative", "mixed"):
|
| 1837 |
final_question_type = infer_question_type_from_prompt(prompt, student_text)
|
| 1838 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1839 |
# 1) Infer question_type from prompt automatically (NO EXTRA FIELD)
|
| 1840 |
# Try to parse mixed questions first
|
| 1841 |
parsed_questions = parse_questions_from_prompt(prompt)
|
| 1842 |
has_mcq = any(q.get('type') == 'mcq' for q in parsed_questions)
|
| 1843 |
has_narrative = any(q.get('type') == 'narrative' for q in parsed_questions)
|
| 1844 |
|
|
|
|
| 1845 |
# Check if it's a PDF
|
| 1846 |
is_pdf_submission = student_info.get("kind") == "pdf"
|
| 1847 |
|
|
@@ -1885,11 +2092,30 @@ async def homework_validate(
|
|
| 1885 |
annotated_pdf_filename, annotated_pdf_url = save_annotated_pdf(
|
| 1886 |
original_file_bytes, homework_id, student_id, unreadable_result, 0, "Unreadable", student_level
|
| 1887 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1888 |
return {
|
| 1889 |
"student_id": student_id,
|
| 1890 |
"homework_id": homework_id,
|
| 1891 |
"sub_institute_id": sub_institute_id,
|
| 1892 |
"syear": syear,
|
|
|
|
| 1893 |
"question_type": final_question_type,
|
| 1894 |
"student_level": student_level,
|
| 1895 |
"status": "Unreadable",
|
|
@@ -1900,10 +2126,21 @@ async def homework_validate(
|
|
| 1900 |
"llm_used": False,
|
| 1901 |
"question_marks": make_question_marks([]),
|
| 1902 |
"annotated_pdf": annotated_pdf_filename,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1903 |
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 1904 |
}
|
| 1905 |
|
| 1906 |
if student_info.get("needs_ocr") and not student_text:
|
|
|
|
| 1907 |
# Save annotated PDF even for unreadable (with status shown)
|
| 1908 |
if is_pdf_submission and original_file_bytes:
|
| 1909 |
# Show circle mark for scanned PDF that needs OCR
|
|
@@ -1911,11 +2148,14 @@ async def homework_validate(
|
|
| 1911 |
annotated_pdf_filename, annotated_pdf_url = save_annotated_pdf(
|
| 1912 |
original_file_bytes, homework_id, student_id, ocr_result, 0, "Unreadable", student_level
|
| 1913 |
)
|
|
|
|
|
|
|
| 1914 |
return {
|
| 1915 |
"student_id": student_id,
|
| 1916 |
"homework_id": homework_id,
|
| 1917 |
"sub_institute_id": sub_institute_id,
|
| 1918 |
"syear": syear,
|
|
|
|
| 1919 |
"question_type": final_question_type,
|
| 1920 |
"student_level": student_level,
|
| 1921 |
"status": "Unreadable",
|
|
@@ -1931,10 +2171,28 @@ async def homework_validate(
|
|
| 1931 |
|
| 1932 |
|
| 1933 |
if final_question_type == "mixed":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1934 |
# Process each question type separately and combine results
|
| 1935 |
mcq_results = []
|
| 1936 |
narrative_results = []
|
| 1937 |
|
|
|
|
| 1938 |
# Extract ALL MCQ answers from student text with question numbers
|
| 1939 |
student_answers_by_qid = extract_mcq_answers_with_qid(student_text)
|
| 1940 |
|
|
@@ -1951,11 +2209,21 @@ async def homework_validate(
|
|
| 1951 |
if not chosen:
|
| 1952 |
chosen = extract_mcq_choice(student_text)
|
| 1953 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1954 |
correct = q.get('correct_answer') or extract_correct_mcq_from_prompt(q.get('question', ''))
|
| 1955 |
|
| 1956 |
if correct and chosen:
|
| 1957 |
is_correct = (chosen.lower().strip() == correct.lower().strip())
|
| 1958 |
mcq_results.append({
|
|
|
|
| 1959 |
'qid': qid,
|
| 1960 |
'correct': is_correct,
|
| 1961 |
'chosen': chosen,
|
|
@@ -1970,6 +2238,12 @@ async def homework_validate(
|
|
| 1970 |
'chosen': '',
|
| 1971 |
'correct_answer': correct,
|
| 1972 |
'unattempted': True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1973 |
})
|
| 1974 |
|
| 1975 |
# For narrative questions, use AI to generate reference
|
|
@@ -2026,6 +2300,7 @@ async def homework_validate(
|
|
| 2026 |
except Exception as e:
|
| 2027 |
narrative_results = {'error': str(e)}
|
| 2028 |
|
|
|
|
| 2029 |
# Calculate combined score with level-based partial credit for MCQ
|
| 2030 |
total_mcq = len(mcq_results)
|
| 2031 |
correct_mcq = sum(1 for r in mcq_results if r.get('correct'))
|
|
@@ -2037,6 +2312,12 @@ async def homework_validate(
|
|
| 2037 |
|
| 2038 |
# Calculate MCQ score based on level (not just binary correct/incorrect)
|
| 2039 |
mcq_score = (correct_mcq * credit_per_q) / max(1, total_mcq)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2040 |
|
| 2041 |
narrative_score = narrative_results.get('match_percentage', 0) if narrative_results else 0
|
| 2042 |
|
|
@@ -2058,12 +2339,15 @@ async def homework_validate(
|
|
| 2058 |
else:
|
| 2059 |
status = "Needs Review"
|
| 2060 |
|
|
|
|
| 2061 |
# Save annotated PDF
|
| 2062 |
if is_pdf_submission and original_file_bytes and mcq_results:
|
| 2063 |
annotated_pdf_filename, annotated_pdf_url = save_annotated_pdf(
|
| 2064 |
original_file_bytes, homework_id, student_id, mcq_results, final_score, status, student_level
|
| 2065 |
)
|
| 2066 |
|
|
|
|
|
|
|
| 2067 |
return {
|
| 2068 |
"student_id": student_id,
|
| 2069 |
"homework_id": homework_id,
|
|
@@ -2073,12 +2357,18 @@ async def homework_validate(
|
|
| 2073 |
"student_level": student_level,
|
| 2074 |
"status": status,
|
| 2075 |
"match_percentage": final_score,
|
|
|
|
| 2076 |
"submission_remarks": None,
|
| 2077 |
"rule_based_remark": f"MCQ: {correct_mcq}/{total_mcq} correct. Narrative score: {narrative_score}%. (Level: {student_level}, Credit per Q: {credit_per_q}%)",
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2078 |
"llm_used": bool(narrative_results and 'error' not in narrative_results),
|
| 2079 |
"student_extracted_text": student_text,
|
| 2080 |
"mcq_results": mcq_results,
|
| 2081 |
"narrative_results": narrative_results,
|
|
|
|
| 2082 |
"question_marks": make_question_marks(mcq_results),
|
| 2083 |
"annotated_pdf": annotated_pdf_filename,
|
| 2084 |
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
|
@@ -2235,6 +2525,14 @@ async def homework_validate(
|
|
| 2235 |
annotated_pdf_filename, annotated_pdf_url = save_annotated_pdf(
|
| 2236 |
original_file_bytes, homework_id, student_id, no_correct_result, 0, "Needs Review", student_level
|
| 2237 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2238 |
return {
|
| 2239 |
"student_id": student_id,
|
| 2240 |
"homework_id": homework_id,
|
|
@@ -2244,6 +2542,7 @@ async def homework_validate(
|
|
| 2244 |
"student_level": student_level,
|
| 2245 |
"status": "Needs Review",
|
| 2246 |
"match_percentage": 0,
|
|
|
|
| 2247 |
"submission_remarks": None,
|
| 2248 |
"rule_based_remark": "MCQ correct option not found in prompt. Include 'Correct: B' or similar in prompt.",
|
| 2249 |
"student_extracted_text": student_text,
|
|
@@ -2260,6 +2559,17 @@ async def homework_validate(
|
|
| 2260 |
annotated_pdf_filename, annotated_pdf_url = save_annotated_pdf(
|
| 2261 |
original_file_bytes, homework_id, student_id, no_chosen_result, 0, "Needs Review", student_level
|
| 2262 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2263 |
return {
|
| 2264 |
"student_id": student_id,
|
| 2265 |
"homework_id": homework_id,
|
|
@@ -2269,16 +2579,24 @@ async def homework_validate(
|
|
| 2269 |
"student_level": student_level,
|
| 2270 |
"status": "Needs Review",
|
| 2271 |
"match_percentage": 0,
|
|
|
|
| 2272 |
"submission_remarks": None,
|
| 2273 |
"rule_based_remark": "Student option (A/B/C/D) not detected clearly.",
|
| 2274 |
"student_extracted_text": student_text,
|
| 2275 |
"llm_used": False,
|
| 2276 |
"question_marks": make_question_marks([]),
|
| 2277 |
"annotated_pdf": annotated_pdf_filename,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2278 |
"debug": {"correct": correct, "chosen": chosen},
|
| 2279 |
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 2280 |
}
|
| 2281 |
|
|
|
|
| 2282 |
# Only process MCQ validation if not redirecting to narrative
|
| 2283 |
if not redirect_to_narrative:
|
| 2284 |
is_correct = (chosen == correct)
|
|
@@ -2328,6 +2646,30 @@ async def homework_validate(
|
|
| 2328 |
annotated_pdf_filename, annotated_pdf_url = save_annotated_pdf(
|
| 2329 |
original_file_bytes, homework_id, student_id, [], 0, "Needs Review", student_level
|
| 2330 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2331 |
return {
|
| 2332 |
"student_id": student_id,
|
| 2333 |
"homework_id": homework_id,
|
|
@@ -2337,13 +2679,20 @@ async def homework_validate(
|
|
| 2337 |
"student_level": student_level,
|
| 2338 |
"status": "Needs Review",
|
| 2339 |
"match_percentage": 0,
|
|
|
|
| 2340 |
"submission_remarks": None,
|
|
|
|
|
|
|
|
|
|
| 2341 |
"rule_based_remark": "Gemini not configured. Check /health/llm.",
|
| 2342 |
"llm_used": False,
|
| 2343 |
"llm_error": parse_gemini_error(GEMINI_LAST_ERROR),
|
| 2344 |
"student_extracted_text": student_text,
|
|
|
|
| 2345 |
"question_marks": make_question_marks([]),
|
| 2346 |
"annotated_pdf": annotated_pdf_filename,
|
|
|
|
|
|
|
| 2347 |
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 2348 |
}
|
| 2349 |
|
|
@@ -2364,11 +2713,14 @@ async def homework_validate(
|
|
| 2364 |
)
|
| 2365 |
|
| 2366 |
if not response_text:
|
|
|
|
| 2367 |
# Save annotated PDF
|
| 2368 |
if is_pdf_submission and original_file_bytes:
|
| 2369 |
annotated_pdf_filename, annotated_pdf_url = save_annotated_pdf(
|
| 2370 |
original_file_bytes, homework_id, student_id, [], 0, "Needs Review", student_level
|
| 2371 |
)
|
|
|
|
|
|
|
| 2372 |
return {
|
| 2373 |
"student_id": student_id,
|
| 2374 |
"homework_id": homework_id,
|
|
@@ -2378,13 +2730,20 @@ async def homework_validate(
|
|
| 2378 |
"student_level": student_level,
|
| 2379 |
"status": "Needs Review",
|
| 2380 |
"match_percentage": 0,
|
|
|
|
| 2381 |
"submission_remarks": None,
|
|
|
|
|
|
|
|
|
|
| 2382 |
"rule_based_remark": "Gemini failed. Check /health/llm.",
|
| 2383 |
"llm_used": False,
|
| 2384 |
"llm_error": parse_gemini_error(GEMINI_LAST_ERROR),
|
| 2385 |
"student_extracted_text": student_text,
|
|
|
|
| 2386 |
"question_marks": make_question_marks([]),
|
| 2387 |
"annotated_pdf": annotated_pdf_filename,
|
|
|
|
|
|
|
| 2388 |
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 2389 |
}
|
| 2390 |
|
|
@@ -2392,11 +2751,14 @@ async def homework_validate(
|
|
| 2392 |
m = re.search(r"\{.*\}", response_text, flags=re.S)
|
| 2393 |
payload = json.loads(m.group(0) if m else response_text)
|
| 2394 |
except Exception as e:
|
|
|
|
| 2395 |
# Save annotated PDF
|
| 2396 |
if is_pdf_submission and original_file_bytes:
|
| 2397 |
annotated_pdf_filename, annotated_pdf_url = save_annotated_pdf(
|
| 2398 |
original_file_bytes, homework_id, student_id, [], 0, "Needs Review", student_level
|
| 2399 |
)
|
|
|
|
|
|
|
| 2400 |
return {
|
| 2401 |
"student_id": student_id,
|
| 2402 |
"homework_id": homework_id,
|
|
@@ -2406,13 +2768,20 @@ async def homework_validate(
|
|
| 2406 |
"student_level": student_level,
|
| 2407 |
"status": "Needs Review",
|
| 2408 |
"match_percentage": 0,
|
|
|
|
| 2409 |
"submission_remarks": None,
|
|
|
|
|
|
|
|
|
|
| 2410 |
"rule_based_remark": "Gemini returned non-JSON output.",
|
| 2411 |
"llm_used": False,
|
| 2412 |
"llm_error": {"ok": False, "error_type": "GEMINI_BAD_JSON", "message": str(e), "raw": response_text[:800]},
|
| 2413 |
"student_extracted_text": student_text,
|
|
|
|
| 2414 |
"question_marks": make_question_marks([]),
|
| 2415 |
"annotated_pdf": annotated_pdf_filename,
|
|
|
|
|
|
|
| 2416 |
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 2417 |
}
|
| 2418 |
|
|
@@ -2423,11 +2792,14 @@ async def homework_validate(
|
|
| 2423 |
key_points = [str(x).strip() for x in key_points if str(x).strip()]
|
| 2424 |
|
| 2425 |
if not ai_reference_answer:
|
|
|
|
| 2426 |
# Save annotated PDF
|
| 2427 |
if is_pdf_submission and original_file_bytes:
|
| 2428 |
annotated_pdf_filename, annotated_pdf_url = save_annotated_pdf(
|
| 2429 |
original_file_bytes, homework_id, student_id, [], 0, "Needs Review", student_level
|
| 2430 |
)
|
|
|
|
|
|
|
| 2431 |
return {
|
| 2432 |
"student_id": student_id,
|
| 2433 |
"homework_id": homework_id,
|
|
@@ -2437,12 +2809,19 @@ async def homework_validate(
|
|
| 2437 |
"student_level": student_level,
|
| 2438 |
"status": "Needs Review",
|
| 2439 |
"match_percentage": 0,
|
|
|
|
| 2440 |
"submission_remarks": None,
|
| 2441 |
"rule_based_remark": "AI returned empty reference answer.",
|
| 2442 |
"llm_used": True,
|
| 2443 |
"student_extracted_text": student_text,
|
| 2444 |
"question_marks": make_question_marks([]),
|
| 2445 |
"annotated_pdf": annotated_pdf_filename,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2446 |
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 2447 |
}
|
| 2448 |
|
|
@@ -2473,7 +2852,11 @@ async def homework_validate(
|
|
| 2473 |
f"{remark_prompt}"
|
| 2474 |
)
|
| 2475 |
|
|
|
|
| 2476 |
submission_remark = generate_gemini_response(
|
|
|
|
|
|
|
|
|
|
| 2477 |
prompt=resp2_prompt,
|
| 2478 |
system_prompt="You are a strict, helpful teacher. Be concise and factual.",
|
| 2479 |
max_tokens=140,
|
|
@@ -2481,10 +2864,17 @@ async def homework_validate(
|
|
| 2481 |
)
|
| 2482 |
|
| 2483 |
rule_based_remark = None
|
|
|
|
| 2484 |
remark_llm_used = bool(submission_remark)
|
| 2485 |
remark_llm_error = None if submission_remark else (GEMINI_LAST_ERROR or "Unknown LLM error")
|
| 2486 |
|
| 2487 |
if not submission_remark:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2488 |
if status == "Verified":
|
| 2489 |
rule_based_remark = "Homework matches the expected answer well. Good coverage of the key ideas."
|
| 2490 |
elif status == "Partial":
|
|
@@ -2492,6 +2882,7 @@ async def homework_validate(
|
|
| 2492 |
else:
|
| 2493 |
rule_based_remark = "Homework does not match the expected answer enough. Please review the topic and resubmit with clearer, complete points."
|
| 2494 |
|
|
|
|
| 2495 |
# Save annotated PDF — evaluate EACH question individually against student text
|
| 2496 |
per_question_results = build_per_question_results(
|
| 2497 |
prompt, student_text, status, match_pct,
|
|
@@ -2505,6 +2896,8 @@ async def homework_validate(
|
|
| 2505 |
original_file_bytes, homework_id, student_id, per_question_results, match_pct, status, student_level, "narrative"
|
| 2506 |
)
|
| 2507 |
|
|
|
|
|
|
|
| 2508 |
return {
|
| 2509 |
"student_id": student_id,
|
| 2510 |
"homework_id": homework_id,
|
|
@@ -2514,7 +2907,11 @@ async def homework_validate(
|
|
| 2514 |
"student_level": student_level,
|
| 2515 |
"status": status,
|
| 2516 |
"match_percentage": match_pct,
|
|
|
|
| 2517 |
"submission_remarks": submission_remark if submission_remark else None,
|
|
|
|
|
|
|
|
|
|
| 2518 |
"rule_based_remark": rule_based_remark,
|
| 2519 |
"llm_used": True,
|
| 2520 |
"remark_llm_used": remark_llm_used,
|
|
@@ -2524,15 +2921,21 @@ async def homework_validate(
|
|
| 2524 |
"key_points": key_points,
|
| 2525 |
"key_points_covered": covered,
|
| 2526 |
"key_points_missing": missing,
|
|
|
|
| 2527 |
"question_marks": make_question_marks(per_question_results),
|
| 2528 |
"annotated_pdf": annotated_pdf_filename,
|
|
|
|
|
|
|
| 2529 |
"debug": {
|
| 2530 |
"similarity": sim,
|
| 2531 |
"coverage": coverage,
|
| 2532 |
"policy": policy,
|
|
|
|
| 2533 |
"per_question_results": per_question_results,
|
| 2534 |
"erp_row_fields": list(erp_row.keys()) if erp_row else [],
|
| 2535 |
"erp_student_level_raw": erp_row.get("student_level") or erp_row.get("level") or erp_row.get("difficulty") or erp_row.get("difficulty_level"),
|
|
|
|
|
|
|
| 2536 |
},
|
| 2537 |
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 2538 |
}
|
|
|
|
| 10 |
from fastapi.middleware.cors import CORSMiddleware
|
| 11 |
from PIL import Image, ImageOps, ImageFilter
|
| 12 |
import pytesseract
|
| 13 |
+
<<<<<<< HEAD
|
| 14 |
import os
|
| 15 |
|
| 16 |
# Serve static files from outputs directory
|
| 17 |
from fastapi.staticfiles import StaticFiles
|
| 18 |
from fastapi.responses import FileResponse
|
| 19 |
+
=======
|
| 20 |
+
|
| 21 |
+
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 22 |
from dotenv import load_dotenv
|
| 23 |
load_dotenv()
|
| 24 |
|
|
|
|
| 34 |
PdfReader = None
|
| 35 |
|
| 36 |
try:
|
| 37 |
+
<<<<<<< HEAD
|
| 38 |
from reportlab.pdfgen import canvas
|
| 39 |
from reportlab.lib.pagesizes import letter
|
| 40 |
from reportlab.lib import colors
|
|
|
|
| 45 |
print(f"[WARN] reportlab import failed: {e}")
|
| 46 |
|
| 47 |
try:
|
| 48 |
+
=======
|
| 49 |
+
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 50 |
from pdf2image import convert_from_bytes # requires poppler
|
| 51 |
except Exception:
|
| 52 |
convert_from_bytes = None
|
|
|
|
| 62 |
genai = None
|
| 63 |
print(f"[WARN] google-genai import failed: {e}")
|
| 64 |
|
| 65 |
+
<<<<<<< HEAD
|
| 66 |
# ✅ Google Cloud Vision SDK (for better handwritten OCR)
|
| 67 |
try:
|
| 68 |
from google.cloud import vision
|
|
|
|
| 119 |
"num_keys": len(GOOGLE_API_KEYS),
|
| 120 |
"has_openai_key": bool(os.getenv("OPENAI_API_KEY")),
|
| 121 |
}
|
| 122 |
+
=======
|
| 123 |
+
|
| 124 |
+
# =========================================================
|
| 125 |
+
# ✅ FASTAPI APP INSTANCE
|
| 126 |
+
# =========================================================
|
| 127 |
+
app = FastAPI()
|
| 128 |
+
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 129 |
app.add_middleware(
|
| 130 |
CORSMiddleware,
|
| 131 |
allow_origins=["*"],
|
|
|
|
| 134 |
allow_headers=["*"],
|
| 135 |
)
|
| 136 |
|
| 137 |
+
<<<<<<< HEAD
|
| 138 |
|
| 139 |
|
| 140 |
+
=======
|
| 141 |
+
# =========================================================
|
| 142 |
+
# ✅ TESSERACT PATH
|
| 143 |
+
# =========================================================
|
| 144 |
+
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 145 |
if os.name == "nt":
|
| 146 |
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
|
| 147 |
else:
|
| 148 |
pytesseract.pytesseract.tesseract_cmd = "/usr/bin/tesseract"
|
| 149 |
|
| 150 |
|
| 151 |
+
<<<<<<< HEAD
|
| 152 |
|
| 153 |
+
=======
|
| 154 |
+
# =========================================================
|
| 155 |
+
# ✅ ERP CONFIG
|
| 156 |
+
# =========================================================
|
| 157 |
+
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 158 |
ERP_BASE = os.getenv("ERP_BASE", "https://erp.triz.co.in/lms_data")
|
| 159 |
STORAGE_BASE = os.getenv("STORAGE_BASE", "https://erp.triz.co.in/storage/student/")
|
| 160 |
ERP_TOKEN = os.getenv("ERP_TOKEN", "")
|
| 161 |
|
| 162 |
|
| 163 |
+
<<<<<<< HEAD
|
| 164 |
def get_public_base_url() -> str:
|
| 165 |
"""
|
| 166 |
Returns the public base URL of this server.
|
|
|
|
| 279 |
return
|
| 280 |
|
| 281 |
api_key = GOOGLE_API_KEYS[key_index]
|
| 282 |
+
=======
|
| 283 |
+
# =========================================================
|
| 284 |
+
# ✅ GEMINI CONFIG
|
| 285 |
+
# =========================================================
|
| 286 |
+
GOOGLE_API_KEY = (os.getenv("GOOGLE_API_KEY") or "").strip()
|
| 287 |
+
GEMINI_MODEL = (os.getenv("GEMINI_MODEL", "models/gemini-2.0-flash") or "").strip()
|
| 288 |
+
if GEMINI_MODEL and not GEMINI_MODEL.startswith("models/"):
|
| 289 |
+
GEMINI_MODEL = "models/" + GEMINI_MODEL
|
| 290 |
+
|
| 291 |
+
gemini_client = None
|
| 292 |
+
GEMINI_LAST_ERROR = ""
|
| 293 |
+
|
| 294 |
+
|
| 295 |
+
def _init_gemini_client() -> None:
|
| 296 |
+
global gemini_client, GEMINI_LAST_ERROR
|
| 297 |
+
|
| 298 |
+
if gemini_client is not None:
|
| 299 |
+
return
|
| 300 |
+
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 301 |
|
| 302 |
if not genai:
|
| 303 |
GEMINI_LAST_ERROR = "google-genai not installed / import failed"
|
| 304 |
gemini_client = None
|
| 305 |
return
|
| 306 |
|
| 307 |
+
<<<<<<< HEAD
|
| 308 |
if not api_key:
|
| 309 |
GEMINI_LAST_ERROR = f"GOOGLE_API_KEY_{key_index + 1} not set"
|
| 310 |
+
=======
|
| 311 |
+
if not GOOGLE_API_KEY:
|
| 312 |
+
GEMINI_LAST_ERROR = "GOOGLE_API_KEY not set"
|
| 313 |
+
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 314 |
gemini_client = None
|
| 315 |
return
|
| 316 |
|
| 317 |
try:
|
| 318 |
+
<<<<<<< HEAD
|
| 319 |
gemini_client = genai.Client(api_key=api_key)
|
| 320 |
GEMINI_LAST_ERROR = ""
|
| 321 |
print(f"[INFO] Gemini client initialized with key #{key_index + 1}")
|
| 322 |
+
=======
|
| 323 |
+
gemini_client = genai.Client(api_key=GOOGLE_API_KEY)
|
| 324 |
+
GEMINI_LAST_ERROR = ""
|
| 325 |
+
print("[INFO] Gemini client initialized")
|
| 326 |
+
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 327 |
except Exception as e:
|
| 328 |
gemini_client = None
|
| 329 |
GEMINI_LAST_ERROR = str(e)
|
| 330 |
print(f"[WARN] Gemini init failed: {GEMINI_LAST_ERROR}")
|
| 331 |
|
| 332 |
|
| 333 |
+
<<<<<<< HEAD
|
| 334 |
def _is_rate_limit_error(error_msg: str) -> bool:
|
| 335 |
"""Check if the error is a rate limit error (429) or service unavailable (503)."""
|
| 336 |
if not error_msg:
|
|
|
|
| 373 |
|
| 374 |
|
| 375 |
_init_gemini_client(0)
|
| 376 |
+
=======
|
| 377 |
+
_init_gemini_client()
|
| 378 |
+
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 379 |
|
| 380 |
|
| 381 |
def parse_gemini_error(error_msg: str) -> dict:
|
|
|
|
| 391 |
return {"ok": False, "error_type": "GEMINI_ERROR", "message": msg}
|
| 392 |
|
| 393 |
|
| 394 |
+
<<<<<<< HEAD
|
| 395 |
|
| 396 |
def extract_qid_from_prompt(prompt: str, erp_row: dict = None) -> str:
|
| 397 |
"""
|
|
|
|
| 438 |
return "Q1"
|
| 439 |
|
| 440 |
|
| 441 |
+
=======
|
| 442 |
+
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 443 |
def generate_gemini_response(
|
| 444 |
prompt: str,
|
| 445 |
system_prompt: str = "",
|
| 446 |
max_tokens: int = 650,
|
| 447 |
temperature: float = 0.3,
|
| 448 |
) -> str:
|
| 449 |
+
<<<<<<< HEAD
|
| 450 |
global GEMINI_LAST_ERROR, gemini_client, rate_limited_keys
|
| 451 |
+
=======
|
| 452 |
+
global GEMINI_LAST_ERROR
|
| 453 |
+
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 454 |
|
| 455 |
if gemini_client is None:
|
| 456 |
if not GEMINI_LAST_ERROR:
|
| 457 |
GEMINI_LAST_ERROR = "Gemini client not initialized"
|
| 458 |
+
<<<<<<< HEAD
|
| 459 |
# Try to reinitialize if we have keys available
|
| 460 |
if GOOGLE_API_KEYS and current_key_index not in rate_limited_keys:
|
| 461 |
_init_gemini_client(current_key_index)
|
| 462 |
if gemini_client is None:
|
| 463 |
return ""
|
| 464 |
+
=======
|
| 465 |
+
return ""
|
| 466 |
+
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 467 |
|
| 468 |
try:
|
| 469 |
contents = []
|
|
|
|
| 481 |
GEMINI_LAST_ERROR = ""
|
| 482 |
return text
|
| 483 |
except Exception as e:
|
| 484 |
+
<<<<<<< HEAD
|
| 485 |
error_msg = str(e)
|
| 486 |
print(f"[ERROR] Gemini call failed: {error_msg}")
|
| 487 |
|
|
|
|
| 493 |
return generate_gemini_response(prompt, system_prompt, max_tokens, temperature)
|
| 494 |
|
| 495 |
GEMINI_LAST_ERROR = error_msg
|
| 496 |
+
=======
|
| 497 |
+
GEMINI_LAST_ERROR = str(e)
|
| 498 |
+
print(f"[ERROR] Gemini call failed: {GEMINI_LAST_ERROR}")
|
| 499 |
+
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 500 |
return ""
|
| 501 |
|
| 502 |
import time
|
|
|
|
| 530 |
return int(round(min(0.6, overlap) * 100)) # cap at 60
|
| 531 |
|
| 532 |
|
| 533 |
+
<<<<<<< HEAD
|
| 534 |
|
| 535 |
+
=======
|
| 536 |
+
# =========================================================
|
| 537 |
+
# ✅ SMALL UTILS
|
| 538 |
+
# =========================================================
|
| 539 |
+
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 540 |
def _norm(s: str) -> str:
|
| 541 |
return re.sub(r"\s+", " ", (s or "").strip().lower())
|
| 542 |
|
|
|
|
| 571 |
return {"w_sim": 0.6, "w_cov": 0.4, "verified": 75, "partial": 55, "kp_thr": 0.20}
|
| 572 |
|
| 573 |
|
| 574 |
+
<<<<<<< HEAD
|
| 575 |
def mcq_partial_credit(student_level: str) -> dict:
|
| 576 |
"""
|
| 577 |
Returns partial credit percentage for MCQ questions based on student level.
|
|
|
|
| 592 |
return {"credit_per_question": 75, "passing_threshold": 75}
|
| 593 |
|
| 594 |
|
| 595 |
+
=======
|
| 596 |
+
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 597 |
def keypoint_coverage(student_text: str, key_points: List[str], kp_threshold: float) -> Tuple[List[str], List[str], float]:
|
| 598 |
covered, missing = [], []
|
| 599 |
for kp in key_points:
|
|
|
|
| 611 |
return covered, missing, coverage
|
| 612 |
|
| 613 |
|
| 614 |
+
<<<<<<< HEAD
|
| 615 |
|
| 616 |
def infer_question_type_from_prompt(prompt: str, student_text: str = "") -> str:
|
| 617 |
+
=======
|
| 618 |
+
# =========================================================
|
| 619 |
+
# ✅ QUESTION TYPE INFERENCE + MCQ PARSING
|
| 620 |
+
# =========================================================
|
| 621 |
+
def infer_question_type_from_prompt(prompt: str) -> str:
|
| 622 |
+
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 623 |
p = _norm(prompt)
|
| 624 |
|
| 625 |
# Explicit markers - check for (mcq) first since it's common in parentheses
|
|
|
|
| 628 |
if re.search(r"\btype\s*:\s*narrative\b", p) or re.search(r"\bquestion_type\s*:\s*narrative\b", p):
|
| 629 |
return "narrative"
|
| 630 |
|
| 631 |
+
<<<<<<< HEAD
|
| 632 |
# Heuristic: options A/B/C/D exist in prompt -> likely MCQ
|
| 633 |
if re.search(r"\b(a|b|c|d)\s*[\)\.]\s+", p) or "option a" in p or "option b" in p:
|
| 634 |
return "mcq"
|
|
|
|
| 646 |
# If answer starts with A. or B. etc.
|
| 647 |
if re.search(r"^[a-d]\.\s+", s.strip()):
|
| 648 |
return "mcq"
|
| 649 |
+
=======
|
| 650 |
+
# Heuristic: options A/B/C/D exist -> likely MCQ
|
| 651 |
+
if re.search(r"\b(a|b|c|d)\s*[\)\.]\s+", p) or "option a" in p or "option b" in p:
|
| 652 |
+
return "mcq"
|
| 653 |
+
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 654 |
|
| 655 |
return "narrative"
|
| 656 |
|
|
|
|
| 715 |
|
| 716 |
# Check for correct answer (for MCQ)
|
| 717 |
if current_type == 'mcq':
|
| 718 |
+
<<<<<<< HEAD
|
| 719 |
# First check: is this line "Correct Answer(s):" with nothing after it?
|
| 720 |
# If so, we need to look for the answer on the next line
|
| 721 |
if re.search(r'^correct\s*answer\s*\(?s\)?\s*[:\.]?\s*$', line, re.IGNORECASE):
|
|
|
|
| 748 |
else:
|
| 749 |
# Try to extract first letter
|
| 750 |
current_correct = correct_text[0].upper() if correct_text else None
|
| 751 |
+
=======
|
| 752 |
+
# Look for "Correct Answer(s):" or "Correct:" or "Answer:"
|
| 753 |
+
correct_match = re.search(r'(?:Correct\s*(?:Answer)?|Answer)[:.]\s*(?:[A-D]\.?\s*)?(.+)', line, re.IGNORECASE)
|
| 754 |
+
if correct_match and not current_correct:
|
| 755 |
+
current_correct = correct_match.group(1).strip()
|
| 756 |
+
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 757 |
|
| 758 |
# Don't forget the last question
|
| 759 |
if current_q is not None:
|
|
|
|
| 767 |
# If no questions parsed, fall back to old behavior
|
| 768 |
if not questions:
|
| 769 |
qtype = infer_question_type_from_prompt(prompt)
|
| 770 |
+
<<<<<<< HEAD
|
| 771 |
return [{'qid': extract_qid_from_prompt(prompt), 'type': qtype, 'question': prompt, 'correct_answer': None}]
|
| 772 |
+
=======
|
| 773 |
+
return [{'qid': 'Q1', 'type': qtype, 'question': prompt, 'correct_answer': None}]
|
| 774 |
+
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 775 |
|
| 776 |
return questions
|
| 777 |
|
|
|
|
| 803 |
return ""
|
| 804 |
|
| 805 |
|
| 806 |
+
<<<<<<< HEAD
|
| 807 |
def extract_mcq_answers_with_qid(text: str) -> Dict[str, str]:
|
| 808 |
"""
|
| 809 |
Extract MCQ answers WITH question numbers from student text.
|
|
|
|
| 859 |
return results
|
| 860 |
|
| 861 |
|
| 862 |
+
=======
|
| 863 |
+
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 864 |
def extract_correct_mcq_from_prompt(prompt: str) -> str:
|
| 865 |
"""
|
| 866 |
This is IMPORTANT:
|
|
|
|
| 868 |
- Correct: B
|
| 869 |
- Answer: C
|
| 870 |
- correct_option: D
|
| 871 |
+
<<<<<<< HEAD
|
| 872 |
- Correct Answer(s): A. Devdatta
|
| 873 |
or JSON: {"correct_option":"B"}
|
| 874 |
|
|
|
|
| 877 |
- "Correct Answer(s): A. Devdatta"
|
| 878 |
- "Correct: B"
|
| 879 |
- "Answer: C"
|
| 880 |
+
=======
|
| 881 |
+
or JSON: {"correct_option":"B"}
|
| 882 |
+
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 883 |
"""
|
| 884 |
p = (prompt or "").strip()
|
| 885 |
if not p:
|
|
|
|
| 896 |
except Exception:
|
| 897 |
pass
|
| 898 |
|
| 899 |
+
<<<<<<< HEAD
|
| 900 |
# Text prompt support - new format: "Correct Answer(s): A. Devdatta" or "Correct Answer: B"
|
| 901 |
t = _norm(p)
|
| 902 |
|
|
|
|
| 919 |
return m1c.group(1)
|
| 920 |
|
| 921 |
# Pattern 2: "Correct: A" or "Answer: B" (original pattern)
|
| 922 |
+
=======
|
| 923 |
+
# Text prompt support
|
| 924 |
+
t = _norm(p)
|
| 925 |
+
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 926 |
m = re.search(r"\b(correct|answer|ans)\s*[:\-]?\s*\(?\s*([a-d])\s*\)?\b", t)
|
| 927 |
if m:
|
| 928 |
return m.group(2)
|
|
|
|
| 930 |
return ""
|
| 931 |
|
| 932 |
|
| 933 |
+
<<<<<<< HEAD
|
| 934 |
|
| 935 |
+
=======
|
| 936 |
+
# =========================================================
|
| 937 |
+
# ✅ ERP HELPERS
|
| 938 |
+
# =========================================================
|
| 939 |
+
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 940 |
def _erp_get(params: dict) -> list:
|
| 941 |
headers = {}
|
| 942 |
if ERP_TOKEN:
|
|
|
|
| 968 |
return "Medium"
|
| 969 |
|
| 970 |
|
| 971 |
+
<<<<<<< HEAD
|
| 972 |
|
| 973 |
def _preprocess_for_ocr(img: Image.Image) -> Image.Image:
|
| 974 |
"""
|
|
|
|
| 1037 |
return ""
|
| 1038 |
|
| 1039 |
|
| 1040 |
+
=======
|
| 1041 |
+
# =========================================================
|
| 1042 |
+
# ✅ OCR + TEXT EXTRACTION
|
| 1043 |
+
# =========================================================
|
| 1044 |
+
def _preprocess_for_ocr(img: Image.Image) -> Image.Image:
|
| 1045 |
+
img = img.convert("L")
|
| 1046 |
+
img = ImageOps.autocontrast(img)
|
| 1047 |
+
|
| 1048 |
+
w, h = img.size
|
| 1049 |
+
if max(w, h) < 1600:
|
| 1050 |
+
scale = 1600 / max(w, h)
|
| 1051 |
+
img = img.resize((int(w * scale), int(h * scale)))
|
| 1052 |
+
|
| 1053 |
+
img = img.filter(ImageFilter.SHARPEN)
|
| 1054 |
+
img = img.point(lambda p: 255 if p > 170 else 0)
|
| 1055 |
+
return img
|
| 1056 |
+
|
| 1057 |
+
|
| 1058 |
+
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 1059 |
def extract_text_from_image(image_bytes: bytes, filename: str = "unknown") -> str:
|
| 1060 |
if not image_bytes or len(image_bytes) < 50:
|
| 1061 |
raise HTTPException(status_code=400, detail=f"Invalid file: '{filename}' - empty/too small")
|
|
|
|
| 1072 |
head = image_bytes[:12]
|
| 1073 |
raise HTTPException(status_code=400, detail=f"Invalid image format: '{filename}' (header={head})")
|
| 1074 |
|
| 1075 |
+
<<<<<<< HEAD
|
| 1076 |
# First try Google Cloud Vision (better for handwriting)
|
| 1077 |
if vision_client:
|
| 1078 |
gv_text = _extract_text_google_vision(image_bytes)
|
|
|
|
| 1080 |
return _clean_extracted_text(gv_text)
|
| 1081 |
|
| 1082 |
# Fallback to Tesseract with improved preprocessing
|
| 1083 |
+
=======
|
| 1084 |
+
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 1085 |
try:
|
| 1086 |
img = Image.open(io.BytesIO(image_bytes))
|
| 1087 |
except Exception as e:
|
|
|
|
| 1089 |
|
| 1090 |
img = _preprocess_for_ocr(img)
|
| 1091 |
|
| 1092 |
+
<<<<<<< HEAD
|
| 1093 |
# Try multiple OCR configurations for better handwritten recognition
|
| 1094 |
ocr_configs = [
|
| 1095 |
"--oem 3 --psm 6", # Default
|
|
|
|
| 1118 |
raise HTTPException(status_code=500, detail=f"OCR failed: {e}")
|
| 1119 |
|
| 1120 |
text = (best_text or "").strip()
|
| 1121 |
+
=======
|
| 1122 |
+
try:
|
| 1123 |
+
text = pytesseract.image_to_string(img, lang="eng", config="--oem 3 --psm 6")
|
| 1124 |
+
except pytesseract.TesseractNotFoundError:
|
| 1125 |
+
raise HTTPException(status_code=500, detail="Tesseract OCR not found. Install it / fix path.")
|
| 1126 |
+
except Exception as e:
|
| 1127 |
+
raise HTTPException(status_code=500, detail=f"OCR failed: {e}")
|
| 1128 |
+
|
| 1129 |
+
text = (text or "").strip()
|
| 1130 |
+
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 1131 |
text = re.sub(r"[ \t]+", " ", text)
|
| 1132 |
return text
|
| 1133 |
|
|
|
|
| 1179 |
return {"text": extracted, "used_ocr": False, "needs_ocr": True}
|
| 1180 |
try:
|
| 1181 |
used_ocr = True
|
| 1182 |
+
<<<<<<< HEAD
|
| 1183 |
# Higher DPI for better handwritten OCR
|
| 1184 |
pages = convert_from_bytes(pdf_bytes, dpi=300)
|
| 1185 |
page_texts = []
|
|
|
|
| 1205 |
if img:
|
| 1206 |
img = _preprocess_for_ocr(img)
|
| 1207 |
extracted = pytesseract.image_to_string(img, lang="eng", config="--oem 3 --psm 6") or ""
|
| 1208 |
+
=======
|
| 1209 |
+
pages = convert_from_bytes(pdf_bytes, dpi=250)
|
| 1210 |
+
page_texts = []
|
| 1211 |
+
for img in pages:
|
| 1212 |
+
img = _preprocess_for_ocr(img)
|
| 1213 |
+
t = pytesseract.image_to_string(img, lang="eng", config="--oem 3 --psm 6") or ""
|
| 1214 |
+
if t.strip():
|
| 1215 |
+
page_texts.append(t)
|
| 1216 |
+
extracted = _clean_extracted_text("\n\n".join(page_texts))
|
| 1217 |
+
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 1218 |
except Exception as e:
|
| 1219 |
return {"text": extracted, "used_ocr": used_ocr, "needs_ocr": True, "ocr_error": str(e)}
|
| 1220 |
|
| 1221 |
return {"text": extracted, "used_ocr": used_ocr, "needs_ocr": False}
|
| 1222 |
|
| 1223 |
|
| 1224 |
+
<<<<<<< HEAD
|
| 1225 |
def get_question_positions_from_pdf(pdf_bytes: bytes) -> Dict[int, List[Dict]]:
|
| 1226 |
"""
|
| 1227 |
Detect question number positions in a PDF.
|
|
|
|
| 1494 |
print(f"[ERROR] Failed to create annotated PDF: {e}")
|
| 1495 |
return original_pdf_bytes
|
| 1496 |
|
| 1497 |
+
=======
|
| 1498 |
+
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 1499 |
async def extract_text_from_upload(file: UploadFile) -> Dict[str, Any]:
|
| 1500 |
filename = getattr(file, "filename", "") or "upload"
|
| 1501 |
content_type = (getattr(file, "content_type", "") or "").lower()
|
|
|
|
| 1545 |
|
| 1546 |
|
| 1547 |
|
| 1548 |
+
<<<<<<< HEAD
|
| 1549 |
|
| 1550 |
+
=======
|
| 1551 |
+
# =========================================================
|
| 1552 |
+
# ✅ ROUTES
|
| 1553 |
+
# =========================================================
|
| 1554 |
+
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 1555 |
@app.get("/health")
|
| 1556 |
def health():
|
| 1557 |
return {"status": "ok"}
|
|
|
|
| 1560 |
@app.get("/health/llm")
|
| 1561 |
def health_llm():
|
| 1562 |
return {
|
| 1563 |
+
<<<<<<< HEAD
|
| 1564 |
"ok": bool(gemini_client) and bool(GOOGLE_API_KEYS),
|
| 1565 |
"gemini": {
|
| 1566 |
"sdk_import_ok": genai is not None,
|
|
|
|
| 1568 |
"num_keys_configured": len(GOOGLE_API_KEYS),
|
| 1569 |
"current_key_index": current_key_index + 1 if GOOGLE_API_KEYS else 0,
|
| 1570 |
"rate_limited_keys": list(rate_limited_keys),
|
| 1571 |
+
=======
|
| 1572 |
+
"ok": bool(gemini_client) and bool(GOOGLE_API_KEY),
|
| 1573 |
+
"gemini": {
|
| 1574 |
+
"sdk_import_ok": genai is not None,
|
| 1575 |
+
"configured": bool(GOOGLE_API_KEY),
|
| 1576 |
+
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 1577 |
"client_ready": gemini_client is not None,
|
| 1578 |
"model": GEMINI_MODEL,
|
| 1579 |
"last_error": GEMINI_LAST_ERROR if GEMINI_LAST_ERROR else None,
|
|
|
|
| 1581 |
}
|
| 1582 |
|
| 1583 |
|
| 1584 |
+
<<<<<<< HEAD
|
| 1585 |
@app.get("/homework/annotated-url/{homework_id}/{student_id}")
|
| 1586 |
async def get_annotated_pdf_url(
|
| 1587 |
homework_id: int,
|
|
|
|
| 1994 |
return ai_evaluate_per_question(prompt, student_text, student_level)
|
| 1995 |
|
| 1996 |
|
| 1997 |
+
=======
|
| 1998 |
+
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 1999 |
@app.post("/homework/validate")
|
| 2000 |
async def homework_validate(
|
| 2001 |
student_id: int = Form(...),
|
| 2002 |
homework_id: int = Form(...),
|
| 2003 |
+
<<<<<<< HEAD
|
| 2004 |
student_file: UploadFile = File(...),
|
| 2005 |
):
|
| 2006 |
# 0) Fetch ERP record -> get all fields automatically
|
|
|
|
| 2030 |
if final_question_type not in ("mcq", "narrative", "mixed"):
|
| 2031 |
final_question_type = infer_question_type_from_prompt(prompt, student_text)
|
| 2032 |
|
| 2033 |
+
=======
|
| 2034 |
+
sub_institute_id: int = Form(...),
|
| 2035 |
+
syear: str = Form(...),
|
| 2036 |
+
prompt: str = Form(...),
|
| 2037 |
+
student_file: UploadFile = File(...),
|
| 2038 |
+
):
|
| 2039 |
+
# 0) Fetch ERP record -> get student_level automatically
|
| 2040 |
+
erp_row = fetch_student_record(homework_id, student_id)
|
| 2041 |
+
student_level = fetch_student_level_from_erp(erp_row)
|
| 2042 |
+
policy = level_policy(student_level)
|
| 2043 |
+
|
| 2044 |
+
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 2045 |
# 1) Infer question_type from prompt automatically (NO EXTRA FIELD)
|
| 2046 |
# Try to parse mixed questions first
|
| 2047 |
parsed_questions = parse_questions_from_prompt(prompt)
|
| 2048 |
has_mcq = any(q.get('type') == 'mcq' for q in parsed_questions)
|
| 2049 |
has_narrative = any(q.get('type') == 'narrative' for q in parsed_questions)
|
| 2050 |
|
| 2051 |
+
<<<<<<< HEAD
|
| 2052 |
# Check if it's a PDF
|
| 2053 |
is_pdf_submission = student_info.get("kind") == "pdf"
|
| 2054 |
|
|
|
|
| 2092 |
annotated_pdf_filename, annotated_pdf_url = save_annotated_pdf(
|
| 2093 |
original_file_bytes, homework_id, student_id, unreadable_result, 0, "Unreadable", student_level
|
| 2094 |
)
|
| 2095 |
+
=======
|
| 2096 |
+
# Determine overall question type for backwards compatibility
|
| 2097 |
+
if has_mcq and has_narrative:
|
| 2098 |
+
question_type = "mixed"
|
| 2099 |
+
elif has_mcq:
|
| 2100 |
+
question_type = "mcq"
|
| 2101 |
+
elif has_narrative:
|
| 2102 |
+
question_type = "narrative"
|
| 2103 |
+
else:
|
| 2104 |
+
question_type = infer_question_type_from_prompt(prompt)
|
| 2105 |
+
|
| 2106 |
+
# 2) Extract student text
|
| 2107 |
+
student_info = await extract_text_from_upload(student_file)
|
| 2108 |
+
student_text = (student_info.get("text") or "").strip()
|
| 2109 |
+
|
| 2110 |
+
MIN_WORDS = 3 if question_type == "mcq" else 8
|
| 2111 |
+
if len(student_text.split()) < MIN_WORDS:
|
| 2112 |
+
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 2113 |
return {
|
| 2114 |
"student_id": student_id,
|
| 2115 |
"homework_id": homework_id,
|
| 2116 |
"sub_institute_id": sub_institute_id,
|
| 2117 |
"syear": syear,
|
| 2118 |
+
<<<<<<< HEAD
|
| 2119 |
"question_type": final_question_type,
|
| 2120 |
"student_level": student_level,
|
| 2121 |
"status": "Unreadable",
|
|
|
|
| 2126 |
"llm_used": False,
|
| 2127 |
"question_marks": make_question_marks([]),
|
| 2128 |
"annotated_pdf": annotated_pdf_filename,
|
| 2129 |
+
=======
|
| 2130 |
+
"question_type": question_type,
|
| 2131 |
+
"student_level": student_level,
|
| 2132 |
+
"status": "Unreadable",
|
| 2133 |
+
"match_percentage": 0,
|
| 2134 |
+
"ai_generated_remark": None,
|
| 2135 |
+
"rule_based_remark": "Answer text could not be read clearly. Please upload a clearer file.",
|
| 2136 |
+
"student_extracted_text": student_text,
|
| 2137 |
+
"llm_used": False,
|
| 2138 |
+
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 2139 |
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 2140 |
}
|
| 2141 |
|
| 2142 |
if student_info.get("needs_ocr") and not student_text:
|
| 2143 |
+
<<<<<<< HEAD
|
| 2144 |
# Save annotated PDF even for unreadable (with status shown)
|
| 2145 |
if is_pdf_submission and original_file_bytes:
|
| 2146 |
# Show circle mark for scanned PDF that needs OCR
|
|
|
|
| 2148 |
annotated_pdf_filename, annotated_pdf_url = save_annotated_pdf(
|
| 2149 |
original_file_bytes, homework_id, student_id, ocr_result, 0, "Unreadable", student_level
|
| 2150 |
)
|
| 2151 |
+
=======
|
| 2152 |
+
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 2153 |
return {
|
| 2154 |
"student_id": student_id,
|
| 2155 |
"homework_id": homework_id,
|
| 2156 |
"sub_institute_id": sub_institute_id,
|
| 2157 |
"syear": syear,
|
| 2158 |
+
<<<<<<< HEAD
|
| 2159 |
"question_type": final_question_type,
|
| 2160 |
"student_level": student_level,
|
| 2161 |
"status": "Unreadable",
|
|
|
|
| 2171 |
|
| 2172 |
|
| 2173 |
if final_question_type == "mixed":
|
| 2174 |
+
=======
|
| 2175 |
+
"question_type": question_type,
|
| 2176 |
+
"student_level": student_level,
|
| 2177 |
+
"status": "Unreadable",
|
| 2178 |
+
"match_percentage": 0,
|
| 2179 |
+
"ai_generated_remark": None,
|
| 2180 |
+
"rule_based_remark": "This PDF looks scanned. OCR is required (install pdf2image + poppler) or upload a clearer file.",
|
| 2181 |
+
"student_extracted_text": student_text,
|
| 2182 |
+
"llm_used": False,
|
| 2183 |
+
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 2184 |
+
}
|
| 2185 |
+
|
| 2186 |
+
# =========================================================
|
| 2187 |
+
# ✅ MIXED QUESTION TYPES CHECK (MCQ + Narrative)
|
| 2188 |
+
# =========================================================
|
| 2189 |
+
if question_type == "mixed":
|
| 2190 |
+
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 2191 |
# Process each question type separately and combine results
|
| 2192 |
mcq_results = []
|
| 2193 |
narrative_results = []
|
| 2194 |
|
| 2195 |
+
<<<<<<< HEAD
|
| 2196 |
# Extract ALL MCQ answers from student text with question numbers
|
| 2197 |
student_answers_by_qid = extract_mcq_answers_with_qid(student_text)
|
| 2198 |
|
|
|
|
| 2209 |
if not chosen:
|
| 2210 |
chosen = extract_mcq_choice(student_text)
|
| 2211 |
|
| 2212 |
+
=======
|
| 2213 |
+
# Extract MCQ answers from student text for each MCQ question
|
| 2214 |
+
for q in parsed_questions:
|
| 2215 |
+
if q.get('type') == 'mcq':
|
| 2216 |
+
# Try to find answer for this specific question in student's text
|
| 2217 |
+
# Use the question text to help locate the answer
|
| 2218 |
+
q_text = q.get('question', '')
|
| 2219 |
+
chosen = extract_mcq_choice(student_text)
|
| 2220 |
+
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 2221 |
correct = q.get('correct_answer') or extract_correct_mcq_from_prompt(q.get('question', ''))
|
| 2222 |
|
| 2223 |
if correct and chosen:
|
| 2224 |
is_correct = (chosen.lower().strip() == correct.lower().strip())
|
| 2225 |
mcq_results.append({
|
| 2226 |
+
<<<<<<< HEAD
|
| 2227 |
'qid': qid,
|
| 2228 |
'correct': is_correct,
|
| 2229 |
'chosen': chosen,
|
|
|
|
| 2238 |
'chosen': '',
|
| 2239 |
'correct_answer': correct,
|
| 2240 |
'unattempted': True
|
| 2241 |
+
=======
|
| 2242 |
+
'qid': q.get('qid'),
|
| 2243 |
+
'correct': is_correct,
|
| 2244 |
+
'chosen': chosen,
|
| 2245 |
+
'correct_answer': correct
|
| 2246 |
+
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 2247 |
})
|
| 2248 |
|
| 2249 |
# For narrative questions, use AI to generate reference
|
|
|
|
| 2300 |
except Exception as e:
|
| 2301 |
narrative_results = {'error': str(e)}
|
| 2302 |
|
| 2303 |
+
<<<<<<< HEAD
|
| 2304 |
# Calculate combined score with level-based partial credit for MCQ
|
| 2305 |
total_mcq = len(mcq_results)
|
| 2306 |
correct_mcq = sum(1 for r in mcq_results if r.get('correct'))
|
|
|
|
| 2312 |
|
| 2313 |
# Calculate MCQ score based on level (not just binary correct/incorrect)
|
| 2314 |
mcq_score = (correct_mcq * credit_per_q) / max(1, total_mcq)
|
| 2315 |
+
=======
|
| 2316 |
+
# Calculate combined score
|
| 2317 |
+
total_mcq = len(mcq_results)
|
| 2318 |
+
correct_mcq = sum(1 for r in mcq_results if r.get('correct'))
|
| 2319 |
+
mcq_score = (correct_mcq / total_mcq * 100) if total_mcq > 0 else 0
|
| 2320 |
+
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 2321 |
|
| 2322 |
narrative_score = narrative_results.get('match_percentage', 0) if narrative_results else 0
|
| 2323 |
|
|
|
|
| 2339 |
else:
|
| 2340 |
status = "Needs Review"
|
| 2341 |
|
| 2342 |
+
<<<<<<< HEAD
|
| 2343 |
# Save annotated PDF
|
| 2344 |
if is_pdf_submission and original_file_bytes and mcq_results:
|
| 2345 |
annotated_pdf_filename, annotated_pdf_url = save_annotated_pdf(
|
| 2346 |
original_file_bytes, homework_id, student_id, mcq_results, final_score, status, student_level
|
| 2347 |
)
|
| 2348 |
|
| 2349 |
+
=======
|
| 2350 |
+
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 2351 |
return {
|
| 2352 |
"student_id": student_id,
|
| 2353 |
"homework_id": homework_id,
|
|
|
|
| 2357 |
"student_level": student_level,
|
| 2358 |
"status": status,
|
| 2359 |
"match_percentage": final_score,
|
| 2360 |
+
<<<<<<< HEAD
|
| 2361 |
"submission_remarks": None,
|
| 2362 |
"rule_based_remark": f"MCQ: {correct_mcq}/{total_mcq} correct. Narrative score: {narrative_score}%. (Level: {student_level}, Credit per Q: {credit_per_q}%)",
|
| 2363 |
+
=======
|
| 2364 |
+
"ai_generated_remark": None,
|
| 2365 |
+
"rule_based_remark": f"MCQ: {correct_mcq}/{total_mcq} correct. Narrative score: {narrative_score}%.",
|
| 2366 |
+
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 2367 |
"llm_used": bool(narrative_results and 'error' not in narrative_results),
|
| 2368 |
"student_extracted_text": student_text,
|
| 2369 |
"mcq_results": mcq_results,
|
| 2370 |
"narrative_results": narrative_results,
|
| 2371 |
+
<<<<<<< HEAD
|
| 2372 |
"question_marks": make_question_marks(mcq_results),
|
| 2373 |
"annotated_pdf": annotated_pdf_filename,
|
| 2374 |
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
|
|
|
| 2525 |
annotated_pdf_filename, annotated_pdf_url = save_annotated_pdf(
|
| 2526 |
original_file_bytes, homework_id, student_id, no_correct_result, 0, "Needs Review", student_level
|
| 2527 |
)
|
| 2528 |
+
=======
|
| 2529 |
+
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 2530 |
+
}
|
| 2531 |
+
correct = extract_correct_mcq_from_prompt(prompt)
|
| 2532 |
+
chosen = extract_mcq_choice(student_text)
|
| 2533 |
+
|
| 2534 |
+
if not correct:
|
| 2535 |
+
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 2536 |
return {
|
| 2537 |
"student_id": student_id,
|
| 2538 |
"homework_id": homework_id,
|
|
|
|
| 2542 |
"student_level": student_level,
|
| 2543 |
"status": "Needs Review",
|
| 2544 |
"match_percentage": 0,
|
| 2545 |
+
<<<<<<< HEAD
|
| 2546 |
"submission_remarks": None,
|
| 2547 |
"rule_based_remark": "MCQ correct option not found in prompt. Include 'Correct: B' or similar in prompt.",
|
| 2548 |
"student_extracted_text": student_text,
|
|
|
|
| 2559 |
annotated_pdf_filename, annotated_pdf_url = save_annotated_pdf(
|
| 2560 |
original_file_bytes, homework_id, student_id, no_chosen_result, 0, "Needs Review", student_level
|
| 2561 |
)
|
| 2562 |
+
=======
|
| 2563 |
+
"ai_generated_remark": None,
|
| 2564 |
+
"rule_based_remark": "MCQ correct option not found in prompt. Include 'Correct: B' or similar in prompt.",
|
| 2565 |
+
"student_extracted_text": student_text,
|
| 2566 |
+
"llm_used": False,
|
| 2567 |
+
"debug": {"correct": correct, "chosen": chosen},
|
| 2568 |
+
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 2569 |
+
}
|
| 2570 |
+
|
| 2571 |
+
if not chosen:
|
| 2572 |
+
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 2573 |
return {
|
| 2574 |
"student_id": student_id,
|
| 2575 |
"homework_id": homework_id,
|
|
|
|
| 2579 |
"student_level": student_level,
|
| 2580 |
"status": "Needs Review",
|
| 2581 |
"match_percentage": 0,
|
| 2582 |
+
<<<<<<< HEAD
|
| 2583 |
"submission_remarks": None,
|
| 2584 |
"rule_based_remark": "Student option (A/B/C/D) not detected clearly.",
|
| 2585 |
"student_extracted_text": student_text,
|
| 2586 |
"llm_used": False,
|
| 2587 |
"question_marks": make_question_marks([]),
|
| 2588 |
"annotated_pdf": annotated_pdf_filename,
|
| 2589 |
+
=======
|
| 2590 |
+
"ai_generated_remark": None,
|
| 2591 |
+
"rule_based_remark": "Student option (A/B/C/D) not detected clearly.",
|
| 2592 |
+
"student_extracted_text": student_text,
|
| 2593 |
+
"llm_used": False,
|
| 2594 |
+
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 2595 |
"debug": {"correct": correct, "chosen": chosen},
|
| 2596 |
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 2597 |
}
|
| 2598 |
|
| 2599 |
+
<<<<<<< HEAD
|
| 2600 |
# Only process MCQ validation if not redirecting to narrative
|
| 2601 |
if not redirect_to_narrative:
|
| 2602 |
is_correct = (chosen == correct)
|
|
|
|
| 2646 |
annotated_pdf_filename, annotated_pdf_url = save_annotated_pdf(
|
| 2647 |
original_file_bytes, homework_id, student_id, [], 0, "Needs Review", student_level
|
| 2648 |
)
|
| 2649 |
+
=======
|
| 2650 |
+
is_correct = (chosen == correct)
|
| 2651 |
+
return {
|
| 2652 |
+
"student_id": student_id,
|
| 2653 |
+
"homework_id": homework_id,
|
| 2654 |
+
"sub_institute_id": sub_institute_id,
|
| 2655 |
+
"syear": syear,
|
| 2656 |
+
"question_type": "mcq",
|
| 2657 |
+
"student_level": student_level,
|
| 2658 |
+
"status": "Verified" if is_correct else "Needs Review",
|
| 2659 |
+
"match_percentage": 100 if is_correct else 0,
|
| 2660 |
+
"ai_generated_remark": None,
|
| 2661 |
+
"rule_based_remark": "Correct." if is_correct else f"Incorrect. Expected {correct.upper()}, got {chosen.upper()}.",
|
| 2662 |
+
"student_extracted_text": student_text,
|
| 2663 |
+
"llm_used": False,
|
| 2664 |
+
"debug": {"correct": correct, "chosen": chosen},
|
| 2665 |
+
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 2666 |
+
}
|
| 2667 |
+
|
| 2668 |
+
# =========================================================
|
| 2669 |
+
# ✅ NARRATIVE CHECK (Gemini generates reference)
|
| 2670 |
+
# =========================================================
|
| 2671 |
+
if gemini_client is None:
|
| 2672 |
+
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 2673 |
return {
|
| 2674 |
"student_id": student_id,
|
| 2675 |
"homework_id": homework_id,
|
|
|
|
| 2679 |
"student_level": student_level,
|
| 2680 |
"status": "Needs Review",
|
| 2681 |
"match_percentage": 0,
|
| 2682 |
+
<<<<<<< HEAD
|
| 2683 |
"submission_remarks": None,
|
| 2684 |
+
=======
|
| 2685 |
+
"ai_generated_remark": None,
|
| 2686 |
+
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 2687 |
"rule_based_remark": "Gemini not configured. Check /health/llm.",
|
| 2688 |
"llm_used": False,
|
| 2689 |
"llm_error": parse_gemini_error(GEMINI_LAST_ERROR),
|
| 2690 |
"student_extracted_text": student_text,
|
| 2691 |
+
<<<<<<< HEAD
|
| 2692 |
"question_marks": make_question_marks([]),
|
| 2693 |
"annotated_pdf": annotated_pdf_filename,
|
| 2694 |
+
=======
|
| 2695 |
+
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 2696 |
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 2697 |
}
|
| 2698 |
|
|
|
|
| 2713 |
)
|
| 2714 |
|
| 2715 |
if not response_text:
|
| 2716 |
+
<<<<<<< HEAD
|
| 2717 |
# Save annotated PDF
|
| 2718 |
if is_pdf_submission and original_file_bytes:
|
| 2719 |
annotated_pdf_filename, annotated_pdf_url = save_annotated_pdf(
|
| 2720 |
original_file_bytes, homework_id, student_id, [], 0, "Needs Review", student_level
|
| 2721 |
)
|
| 2722 |
+
=======
|
| 2723 |
+
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 2724 |
return {
|
| 2725 |
"student_id": student_id,
|
| 2726 |
"homework_id": homework_id,
|
|
|
|
| 2730 |
"student_level": student_level,
|
| 2731 |
"status": "Needs Review",
|
| 2732 |
"match_percentage": 0,
|
| 2733 |
+
<<<<<<< HEAD
|
| 2734 |
"submission_remarks": None,
|
| 2735 |
+
=======
|
| 2736 |
+
"ai_generated_remark": None,
|
| 2737 |
+
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 2738 |
"rule_based_remark": "Gemini failed. Check /health/llm.",
|
| 2739 |
"llm_used": False,
|
| 2740 |
"llm_error": parse_gemini_error(GEMINI_LAST_ERROR),
|
| 2741 |
"student_extracted_text": student_text,
|
| 2742 |
+
<<<<<<< HEAD
|
| 2743 |
"question_marks": make_question_marks([]),
|
| 2744 |
"annotated_pdf": annotated_pdf_filename,
|
| 2745 |
+
=======
|
| 2746 |
+
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 2747 |
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 2748 |
}
|
| 2749 |
|
|
|
|
| 2751 |
m = re.search(r"\{.*\}", response_text, flags=re.S)
|
| 2752 |
payload = json.loads(m.group(0) if m else response_text)
|
| 2753 |
except Exception as e:
|
| 2754 |
+
<<<<<<< HEAD
|
| 2755 |
# Save annotated PDF
|
| 2756 |
if is_pdf_submission and original_file_bytes:
|
| 2757 |
annotated_pdf_filename, annotated_pdf_url = save_annotated_pdf(
|
| 2758 |
original_file_bytes, homework_id, student_id, [], 0, "Needs Review", student_level
|
| 2759 |
)
|
| 2760 |
+
=======
|
| 2761 |
+
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 2762 |
return {
|
| 2763 |
"student_id": student_id,
|
| 2764 |
"homework_id": homework_id,
|
|
|
|
| 2768 |
"student_level": student_level,
|
| 2769 |
"status": "Needs Review",
|
| 2770 |
"match_percentage": 0,
|
| 2771 |
+
<<<<<<< HEAD
|
| 2772 |
"submission_remarks": None,
|
| 2773 |
+
=======
|
| 2774 |
+
"ai_generated_remark": None,
|
| 2775 |
+
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 2776 |
"rule_based_remark": "Gemini returned non-JSON output.",
|
| 2777 |
"llm_used": False,
|
| 2778 |
"llm_error": {"ok": False, "error_type": "GEMINI_BAD_JSON", "message": str(e), "raw": response_text[:800]},
|
| 2779 |
"student_extracted_text": student_text,
|
| 2780 |
+
<<<<<<< HEAD
|
| 2781 |
"question_marks": make_question_marks([]),
|
| 2782 |
"annotated_pdf": annotated_pdf_filename,
|
| 2783 |
+
=======
|
| 2784 |
+
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 2785 |
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 2786 |
}
|
| 2787 |
|
|
|
|
| 2792 |
key_points = [str(x).strip() for x in key_points if str(x).strip()]
|
| 2793 |
|
| 2794 |
if not ai_reference_answer:
|
| 2795 |
+
<<<<<<< HEAD
|
| 2796 |
# Save annotated PDF
|
| 2797 |
if is_pdf_submission and original_file_bytes:
|
| 2798 |
annotated_pdf_filename, annotated_pdf_url = save_annotated_pdf(
|
| 2799 |
original_file_bytes, homework_id, student_id, [], 0, "Needs Review", student_level
|
| 2800 |
)
|
| 2801 |
+
=======
|
| 2802 |
+
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 2803 |
return {
|
| 2804 |
"student_id": student_id,
|
| 2805 |
"homework_id": homework_id,
|
|
|
|
| 2809 |
"student_level": student_level,
|
| 2810 |
"status": "Needs Review",
|
| 2811 |
"match_percentage": 0,
|
| 2812 |
+
<<<<<<< HEAD
|
| 2813 |
"submission_remarks": None,
|
| 2814 |
"rule_based_remark": "AI returned empty reference answer.",
|
| 2815 |
"llm_used": True,
|
| 2816 |
"student_extracted_text": student_text,
|
| 2817 |
"question_marks": make_question_marks([]),
|
| 2818 |
"annotated_pdf": annotated_pdf_filename,
|
| 2819 |
+
=======
|
| 2820 |
+
"ai_generated_remark": None,
|
| 2821 |
+
"rule_based_remark": "AI returned empty reference answer.",
|
| 2822 |
+
"llm_used": True,
|
| 2823 |
+
"student_extracted_text": student_text,
|
| 2824 |
+
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 2825 |
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 2826 |
}
|
| 2827 |
|
|
|
|
| 2852 |
f"{remark_prompt}"
|
| 2853 |
)
|
| 2854 |
|
| 2855 |
+
<<<<<<< HEAD
|
| 2856 |
submission_remark = generate_gemini_response(
|
| 2857 |
+
=======
|
| 2858 |
+
ai_generated_remark = generate_gemini_response(
|
| 2859 |
+
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 2860 |
prompt=resp2_prompt,
|
| 2861 |
system_prompt="You are a strict, helpful teacher. Be concise and factual.",
|
| 2862 |
max_tokens=140,
|
|
|
|
| 2864 |
)
|
| 2865 |
|
| 2866 |
rule_based_remark = None
|
| 2867 |
+
<<<<<<< HEAD
|
| 2868 |
remark_llm_used = bool(submission_remark)
|
| 2869 |
remark_llm_error = None if submission_remark else (GEMINI_LAST_ERROR or "Unknown LLM error")
|
| 2870 |
|
| 2871 |
if not submission_remark:
|
| 2872 |
+
=======
|
| 2873 |
+
remark_llm_used = bool(ai_generated_remark)
|
| 2874 |
+
remark_llm_error = None if ai_generated_remark else (GEMINI_LAST_ERROR or "Unknown LLM error")
|
| 2875 |
+
|
| 2876 |
+
if not ai_generated_remark:
|
| 2877 |
+
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 2878 |
if status == "Verified":
|
| 2879 |
rule_based_remark = "Homework matches the expected answer well. Good coverage of the key ideas."
|
| 2880 |
elif status == "Partial":
|
|
|
|
| 2882 |
else:
|
| 2883 |
rule_based_remark = "Homework does not match the expected answer enough. Please review the topic and resubmit with clearer, complete points."
|
| 2884 |
|
| 2885 |
+
<<<<<<< HEAD
|
| 2886 |
# Save annotated PDF — evaluate EACH question individually against student text
|
| 2887 |
per_question_results = build_per_question_results(
|
| 2888 |
prompt, student_text, status, match_pct,
|
|
|
|
| 2896 |
original_file_bytes, homework_id, student_id, per_question_results, match_pct, status, student_level, "narrative"
|
| 2897 |
)
|
| 2898 |
|
| 2899 |
+
=======
|
| 2900 |
+
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 2901 |
return {
|
| 2902 |
"student_id": student_id,
|
| 2903 |
"homework_id": homework_id,
|
|
|
|
| 2907 |
"student_level": student_level,
|
| 2908 |
"status": status,
|
| 2909 |
"match_percentage": match_pct,
|
| 2910 |
+
<<<<<<< HEAD
|
| 2911 |
"submission_remarks": submission_remark if submission_remark else None,
|
| 2912 |
+
=======
|
| 2913 |
+
"ai_generated_remark": ai_generated_remark if ai_generated_remark else None,
|
| 2914 |
+
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 2915 |
"rule_based_remark": rule_based_remark,
|
| 2916 |
"llm_used": True,
|
| 2917 |
"remark_llm_used": remark_llm_used,
|
|
|
|
| 2921 |
"key_points": key_points,
|
| 2922 |
"key_points_covered": covered,
|
| 2923 |
"key_points_missing": missing,
|
| 2924 |
+
<<<<<<< HEAD
|
| 2925 |
"question_marks": make_question_marks(per_question_results),
|
| 2926 |
"annotated_pdf": annotated_pdf_filename,
|
| 2927 |
+
=======
|
| 2928 |
+
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 2929 |
"debug": {
|
| 2930 |
"similarity": sim,
|
| 2931 |
"coverage": coverage,
|
| 2932 |
"policy": policy,
|
| 2933 |
+
<<<<<<< HEAD
|
| 2934 |
"per_question_results": per_question_results,
|
| 2935 |
"erp_row_fields": list(erp_row.keys()) if erp_row else [],
|
| 2936 |
"erp_student_level_raw": erp_row.get("student_level") or erp_row.get("level") or erp_row.get("difficulty") or erp_row.get("difficulty_level"),
|
| 2937 |
+
=======
|
| 2938 |
+
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
| 2939 |
},
|
| 2940 |
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 2941 |
}
|
db.py
CHANGED
|
@@ -10,4 +10,7 @@ engine = create_engine(
|
|
| 10 |
|
| 11 |
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
|
| 12 |
Base = declarative_base()
|
|
|
|
| 13 |
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
|
| 12 |
Base = declarative_base()
|
| 13 |
+
<<<<<<< HEAD
|
| 14 |
|
| 15 |
+
=======
|
| 16 |
+
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
requirements.txt
CHANGED
|
@@ -5,6 +5,7 @@ pillow
|
|
| 5 |
sqlalchemy
|
| 6 |
scikit-learn
|
| 7 |
requests
|
|
|
|
| 8 |
python-docx
|
| 9 |
google-genai
|
| 10 |
python-dotenv
|
|
@@ -15,3 +16,12 @@ google-cloud-vision
|
|
| 15 |
easyocr
|
| 16 |
pdf2image
|
| 17 |
reportlab
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
sqlalchemy
|
| 6 |
scikit-learn
|
| 7 |
requests
|
| 8 |
+
<<<<<<< HEAD
|
| 9 |
python-docx
|
| 10 |
google-genai
|
| 11 |
python-dotenv
|
|
|
|
| 16 |
easyocr
|
| 17 |
pdf2image
|
| 18 |
reportlab
|
| 19 |
+
=======
|
| 20 |
+
python-multipart
|
| 21 |
+
openai
|
| 22 |
+
google-generativeai
|
| 23 |
+
python-docx
|
| 24 |
+
pypdf
|
| 25 |
+
pdf2image
|
| 26 |
+
python-dotenv
|
| 27 |
+
>>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
|
scholar clone.lnk
ADDED
|
Binary file (760 Bytes). View file
|
|
|