Fix app merge marker
Browse files
app.py
CHANGED
|
@@ -52,9 +52,7 @@ except Exception as e:
|
|
| 52 |
print(f"[WARN] google-cloud-vision import failed: {e}")
|
| 53 |
|
| 54 |
|
| 55 |
-
|
| 56 |
-
# ✅ FASTAPI APP INSTANCE
|
| 57 |
-
# =========================================================
|
| 58 |
app = FastAPI()
|
| 59 |
app.add_middleware(
|
| 60 |
CORSMiddleware,
|
|
@@ -64,34 +62,26 @@ app.add_middleware(
|
|
| 64 |
allow_headers=["*"],
|
| 65 |
)
|
| 66 |
|
| 67 |
-
|
| 68 |
-
# ✅ TESSERACT PATH
|
| 69 |
-
# =========================================================
|
| 70 |
if os.name == "nt":
|
| 71 |
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
|
| 72 |
else:
|
| 73 |
pytesseract.pytesseract.tesseract_cmd = "/usr/bin/tesseract"
|
| 74 |
|
| 75 |
|
| 76 |
-
|
| 77 |
-
# ✅ ERP CONFIG
|
| 78 |
-
# =========================================================
|
| 79 |
ERP_BASE = os.getenv("ERP_BASE", "https://erp.triz.co.in/lms_data")
|
| 80 |
STORAGE_BASE = os.getenv("STORAGE_BASE", "https://erp.triz.co.in/storage/student/")
|
| 81 |
ERP_TOKEN = os.getenv("ERP_TOKEN", "")
|
| 82 |
|
| 83 |
|
| 84 |
-
|
| 85 |
-
# ✅ GEMINI CONFIG
|
| 86 |
-
# =========================================================
|
| 87 |
GOOGLE_API_KEY = (os.getenv("GOOGLE_API_KEY") or "").strip()
|
| 88 |
GEMINI_MODEL = (os.getenv("GEMINI_MODEL", "models/gemini-flash-lite-latest") or "").strip()
|
| 89 |
if GEMINI_MODEL and not GEMINI_MODEL.startswith("models/"):
|
| 90 |
GEMINI_MODEL = "models/" + GEMINI_MODEL
|
| 91 |
|
| 92 |
-
|
| 93 |
-
# ✅ GOOGLE CLOUD VISION CONFIG (for handwritten OCR)
|
| 94 |
-
# =========================================================
|
| 95 |
GOOGLE_CLOUD_VISION_API_KEY = (os.getenv("GCV_API_KEY") or "").strip()
|
| 96 |
# Fall back to Gemini API key if no separate Vision key provided
|
| 97 |
if not GOOGLE_CLOUD_VISION_API_KEY and GOOGLE_API_KEY:
|
|
@@ -218,9 +208,7 @@ def cheap_overlap_score(student_text: str, prompt: str) -> int:
|
|
| 218 |
return int(round(min(0.6, overlap) * 100)) # cap at 60
|
| 219 |
|
| 220 |
|
| 221 |
-
|
| 222 |
-
# ✅ SMALL UTILS
|
| 223 |
-
# =========================================================
|
| 224 |
def _norm(s: str) -> str:
|
| 225 |
return re.sub(r"\s+", " ", (s or "").strip().lower())
|
| 226 |
|
|
@@ -292,9 +280,7 @@ def keypoint_coverage(student_text: str, key_points: List[str], kp_threshold: fl
|
|
| 292 |
return covered, missing, coverage
|
| 293 |
|
| 294 |
|
| 295 |
-
|
| 296 |
-
# ✅ QUESTION TYPE INFERENCE + MCQ PARSING
|
| 297 |
-
# =========================================================
|
| 298 |
def infer_question_type_from_prompt(prompt: str) -> str:
|
| 299 |
p = _norm(prompt)
|
| 300 |
|
|
@@ -563,9 +549,7 @@ def extract_correct_mcq_from_prompt(prompt: str) -> str:
|
|
| 563 |
return ""
|
| 564 |
|
| 565 |
|
| 566 |
-
|
| 567 |
-
# ✅ ERP HELPERS
|
| 568 |
-
# =========================================================
|
| 569 |
def _erp_get(params: dict) -> list:
|
| 570 |
headers = {}
|
| 571 |
if ERP_TOKEN:
|
|
@@ -597,9 +581,7 @@ def fetch_student_level_from_erp(row: Dict[str, Any]) -> str:
|
|
| 597 |
return "Medium"
|
| 598 |
|
| 599 |
|
| 600 |
-
|
| 601 |
-
# ✅ OCR + TEXT EXTRACTION - IMPROVED FOR HANDWRITTEN
|
| 602 |
-
# =========================================================
|
| 603 |
def _preprocess_for_ocr(img: Image.Image) -> Image.Image:
|
| 604 |
"""
|
| 605 |
Enhanced preprocessing for better OCR on handwritten images.
|
|
@@ -856,9 +838,7 @@ async def extract_text_from_upload(file: UploadFile) -> Dict[str, Any]:
|
|
| 856 |
|
| 857 |
|
| 858 |
|
| 859 |
-
|
| 860 |
-
# ✅ ROUTES
|
| 861 |
-
# =========================================================
|
| 862 |
@app.get("/health")
|
| 863 |
def health():
|
| 864 |
return {"status": "ok"}
|
|
@@ -947,9 +927,7 @@ async def homework_validate(
|
|
| 947 |
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 948 |
}
|
| 949 |
|
| 950 |
-
|
| 951 |
-
# ✅ MIXED QUESTION TYPES CHECK (MCQ + Narrative)
|
| 952 |
-
# =========================================================
|
| 953 |
if question_type == "mixed":
|
| 954 |
# Process each question type separately and combine results
|
| 955 |
mcq_results = []
|
|
@@ -1263,9 +1241,7 @@ async def homework_validate(
|
|
| 1263 |
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 1264 |
}
|
| 1265 |
|
| 1266 |
-
|
| 1267 |
-
# ✅ NARRATIVE CHECK (Gemini generates reference) - Also handles MCQ->Narrative redirect
|
| 1268 |
-
# =========================================================
|
| 1269 |
if gemini_client is None:
|
| 1270 |
return {
|
| 1271 |
"student_id": student_id,
|
|
@@ -1580,9 +1556,7 @@ def extract_correct_mcq_from_prompt(prompt: str) -> str:
|
|
| 1580 |
return ""
|
| 1581 |
|
| 1582 |
|
| 1583 |
-
|
| 1584 |
-
# ✅ ERP HELPERS
|
| 1585 |
-
# =========================================================
|
| 1586 |
def _erp_get(params: dict) -> list:
|
| 1587 |
headers = {}
|
| 1588 |
if ERP_TOKEN:
|
|
@@ -1614,9 +1588,7 @@ def fetch_student_level_from_erp(row: Dict[str, Any]) -> str:
|
|
| 1614 |
return "Medium"
|
| 1615 |
|
| 1616 |
|
| 1617 |
-
|
| 1618 |
-
# ✅ OCR + TEXT EXTRACTION - IMPROVED FOR HANDWRITTEN
|
| 1619 |
-
# =========================================================
|
| 1620 |
def _preprocess_for_ocr(img: Image.Image) -> Image.Image:
|
| 1621 |
"""
|
| 1622 |
Enhanced preprocessing for better OCR on handwritten images.
|
|
@@ -1873,9 +1845,7 @@ async def extract_text_from_upload(file: UploadFile) -> Dict[str, Any]:
|
|
| 1873 |
|
| 1874 |
|
| 1875 |
|
| 1876 |
-
|
| 1877 |
-
# ✅ ROUTES
|
| 1878 |
-
# =========================================================
|
| 1879 |
@app.get("/health")
|
| 1880 |
def health():
|
| 1881 |
return {"status": "ok"}
|
|
@@ -1964,10 +1934,7 @@ async def homework_validate(
|
|
| 1964 |
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 1965 |
}
|
| 1966 |
|
| 1967 |
-
|
| 1968 |
-
# ✅ MIXED QUESTION TYPES CHECK (MCQ + Narrative)
|
| 1969 |
-
# =========================================================
|
| 1970 |
-
if question_type == "mixed":
|
| 1971 |
# Process each question type separately and combine results
|
| 1972 |
mcq_results = []
|
| 1973 |
narrative_results = []
|
|
@@ -2280,9 +2247,7 @@ async def homework_validate(
|
|
| 2280 |
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 2281 |
}
|
| 2282 |
|
| 2283 |
-
|
| 2284 |
-
# ✅ NARRATIVE CHECK (Gemini generates reference) - Also handles MCQ->Narrative redirect
|
| 2285 |
-
# =========================================================
|
| 2286 |
if gemini_client is None:
|
| 2287 |
return {
|
| 2288 |
"student_id": student_id,
|
|
@@ -2453,7 +2418,7 @@ async def homework_validate(
|
|
| 2453 |
},
|
| 2454 |
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 2455 |
}
|
| 2456 |
-
|
| 2457 |
# app.py
|
| 2458 |
import os
|
| 2459 |
import io
|
|
@@ -2536,17 +2501,14 @@ STORAGE_BASE = os.getenv("STORAGE_BASE", "https://erp.triz.co.in/storage/student
|
|
| 2536 |
ERP_TOKEN = os.getenv("ERP_TOKEN", "")
|
| 2537 |
|
| 2538 |
|
| 2539 |
-
|
| 2540 |
-
# ✅ GEMINI CONFIG
|
| 2541 |
-
# =========================================================
|
| 2542 |
GOOGLE_API_KEY = (os.getenv("GOOGLE_API_KEY") or "").strip()
|
| 2543 |
GEMINI_MODEL = (os.getenv("GEMINI_MODEL", "models/gemini-flash-lite-latest") or "").strip()
|
| 2544 |
if GEMINI_MODEL and not GEMINI_MODEL.startswith("models/"):
|
| 2545 |
GEMINI_MODEL = "models/" + GEMINI_MODEL
|
| 2546 |
|
| 2547 |
-
|
| 2548 |
-
|
| 2549 |
-
# =========================================================
|
| 2550 |
GOOGLE_CLOUD_VISION_API_KEY = (os.getenv("GCV_API_KEY") or "").strip()
|
| 2551 |
# Fall back to Gemini API key if no separate Vision key provided
|
| 2552 |
if not GOOGLE_CLOUD_VISION_API_KEY and GOOGLE_API_KEY:
|
|
@@ -2673,9 +2635,7 @@ def cheap_overlap_score(student_text: str, prompt: str) -> int:
|
|
| 2673 |
return int(round(min(0.6, overlap) * 100)) # cap at 60
|
| 2674 |
|
| 2675 |
|
| 2676 |
-
|
| 2677 |
-
# ✅ SMALL UTILS
|
| 2678 |
-
# =========================================================
|
| 2679 |
def _norm(s: str) -> str:
|
| 2680 |
return re.sub(r"\s+", " ", (s or "").strip().lower())
|
| 2681 |
|
|
@@ -2979,9 +2939,6 @@ def extract_correct_mcq_from_prompt(prompt: str) -> str:
|
|
| 2979 |
return ""
|
| 2980 |
|
| 2981 |
|
| 2982 |
-
# =========================================================
|
| 2983 |
-
# ✅ ERP HELPERS
|
| 2984 |
-
# =========================================================
|
| 2985 |
def _erp_get(params: dict) -> list:
|
| 2986 |
headers = {}
|
| 2987 |
if ERP_TOKEN:
|
|
@@ -3013,9 +2970,7 @@ def fetch_student_level_from_erp(row: Dict[str, Any]) -> str:
|
|
| 3013 |
return "Medium"
|
| 3014 |
|
| 3015 |
|
| 3016 |
-
|
| 3017 |
-
# ✅ OCR + TEXT EXTRACTION - IMPROVED FOR HANDWRITTEN
|
| 3018 |
-
# =========================================================
|
| 3019 |
def _preprocess_for_ocr(img: Image.Image) -> Image.Image:
|
| 3020 |
"""
|
| 3021 |
Enhanced preprocessing for better OCR on handwritten images.
|
|
@@ -3272,9 +3227,7 @@ async def extract_text_from_upload(file: UploadFile) -> Dict[str, Any]:
|
|
| 3272 |
|
| 3273 |
|
| 3274 |
|
| 3275 |
-
|
| 3276 |
-
# ✅ ROUTES
|
| 3277 |
-
# =========================================================
|
| 3278 |
@app.get("/health")
|
| 3279 |
def health():
|
| 3280 |
return {"status": "ok"}
|
|
@@ -3363,9 +3316,7 @@ async def homework_validate(
|
|
| 3363 |
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 3364 |
}
|
| 3365 |
|
| 3366 |
-
|
| 3367 |
-
# ✅ MIXED QUESTION TYPES CHECK (MCQ + Narrative)
|
| 3368 |
-
# =========================================================
|
| 3369 |
if question_type == "mixed":
|
| 3370 |
# Process each question type separately and combine results
|
| 3371 |
mcq_results = []
|
|
@@ -3679,9 +3630,7 @@ async def homework_validate(
|
|
| 3679 |
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 3680 |
}
|
| 3681 |
|
| 3682 |
-
|
| 3683 |
-
# ✅ NARRATIVE CHECK (Gemini generates reference) - Also handles MCQ->Narrative redirect
|
| 3684 |
-
# =========================================================
|
| 3685 |
if gemini_client is None:
|
| 3686 |
return {
|
| 3687 |
"student_id": student_id,
|
|
|
|
| 52 |
print(f"[WARN] google-cloud-vision import failed: {e}")
|
| 53 |
|
| 54 |
|
| 55 |
+
|
|
|
|
|
|
|
| 56 |
app = FastAPI()
|
| 57 |
app.add_middleware(
|
| 58 |
CORSMiddleware,
|
|
|
|
| 62 |
allow_headers=["*"],
|
| 63 |
)
|
| 64 |
|
| 65 |
+
|
|
|
|
|
|
|
| 66 |
if os.name == "nt":
|
| 67 |
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
|
| 68 |
else:
|
| 69 |
pytesseract.pytesseract.tesseract_cmd = "/usr/bin/tesseract"
|
| 70 |
|
| 71 |
|
| 72 |
+
|
|
|
|
|
|
|
| 73 |
ERP_BASE = os.getenv("ERP_BASE", "https://erp.triz.co.in/lms_data")
|
| 74 |
STORAGE_BASE = os.getenv("STORAGE_BASE", "https://erp.triz.co.in/storage/student/")
|
| 75 |
ERP_TOKEN = os.getenv("ERP_TOKEN", "")
|
| 76 |
|
| 77 |
|
| 78 |
+
|
|
|
|
|
|
|
| 79 |
GOOGLE_API_KEY = (os.getenv("GOOGLE_API_KEY") or "").strip()
|
| 80 |
GEMINI_MODEL = (os.getenv("GEMINI_MODEL", "models/gemini-flash-lite-latest") or "").strip()
|
| 81 |
if GEMINI_MODEL and not GEMINI_MODEL.startswith("models/"):
|
| 82 |
GEMINI_MODEL = "models/" + GEMINI_MODEL
|
| 83 |
|
| 84 |
+
|
|
|
|
|
|
|
| 85 |
GOOGLE_CLOUD_VISION_API_KEY = (os.getenv("GCV_API_KEY") or "").strip()
|
| 86 |
# Fall back to Gemini API key if no separate Vision key provided
|
| 87 |
if not GOOGLE_CLOUD_VISION_API_KEY and GOOGLE_API_KEY:
|
|
|
|
| 208 |
return int(round(min(0.6, overlap) * 100)) # cap at 60
|
| 209 |
|
| 210 |
|
| 211 |
+
|
|
|
|
|
|
|
| 212 |
def _norm(s: str) -> str:
|
| 213 |
return re.sub(r"\s+", " ", (s or "").strip().lower())
|
| 214 |
|
|
|
|
| 280 |
return covered, missing, coverage
|
| 281 |
|
| 282 |
|
| 283 |
+
|
|
|
|
|
|
|
| 284 |
def infer_question_type_from_prompt(prompt: str) -> str:
|
| 285 |
p = _norm(prompt)
|
| 286 |
|
|
|
|
| 549 |
return ""
|
| 550 |
|
| 551 |
|
| 552 |
+
|
|
|
|
|
|
|
| 553 |
def _erp_get(params: dict) -> list:
|
| 554 |
headers = {}
|
| 555 |
if ERP_TOKEN:
|
|
|
|
| 581 |
return "Medium"
|
| 582 |
|
| 583 |
|
| 584 |
+
|
|
|
|
|
|
|
| 585 |
def _preprocess_for_ocr(img: Image.Image) -> Image.Image:
|
| 586 |
"""
|
| 587 |
Enhanced preprocessing for better OCR on handwritten images.
|
|
|
|
| 838 |
|
| 839 |
|
| 840 |
|
| 841 |
+
|
|
|
|
|
|
|
| 842 |
@app.get("/health")
|
| 843 |
def health():
|
| 844 |
return {"status": "ok"}
|
|
|
|
| 927 |
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 928 |
}
|
| 929 |
|
| 930 |
+
|
|
|
|
|
|
|
| 931 |
if question_type == "mixed":
|
| 932 |
# Process each question type separately and combine results
|
| 933 |
mcq_results = []
|
|
|
|
| 1241 |
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 1242 |
}
|
| 1243 |
|
| 1244 |
+
|
|
|
|
|
|
|
| 1245 |
if gemini_client is None:
|
| 1246 |
return {
|
| 1247 |
"student_id": student_id,
|
|
|
|
| 1556 |
return ""
|
| 1557 |
|
| 1558 |
|
| 1559 |
+
|
|
|
|
|
|
|
| 1560 |
def _erp_get(params: dict) -> list:
|
| 1561 |
headers = {}
|
| 1562 |
if ERP_TOKEN:
|
|
|
|
| 1588 |
return "Medium"
|
| 1589 |
|
| 1590 |
|
| 1591 |
+
|
|
|
|
|
|
|
| 1592 |
def _preprocess_for_ocr(img: Image.Image) -> Image.Image:
|
| 1593 |
"""
|
| 1594 |
Enhanced preprocessing for better OCR on handwritten images.
|
|
|
|
| 1845 |
|
| 1846 |
|
| 1847 |
|
| 1848 |
+
|
|
|
|
|
|
|
| 1849 |
@app.get("/health")
|
| 1850 |
def health():
|
| 1851 |
return {"status": "ok"}
|
|
|
|
| 1934 |
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 1935 |
}
|
| 1936 |
|
| 1937 |
+
if question_type == "mixed":
|
|
|
|
|
|
|
|
|
|
| 1938 |
# Process each question type separately and combine results
|
| 1939 |
mcq_results = []
|
| 1940 |
narrative_results = []
|
|
|
|
| 2247 |
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 2248 |
}
|
| 2249 |
|
| 2250 |
+
|
|
|
|
|
|
|
| 2251 |
if gemini_client is None:
|
| 2252 |
return {
|
| 2253 |
"student_id": student_id,
|
|
|
|
| 2418 |
},
|
| 2419 |
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 2420 |
}
|
| 2421 |
+
|
| 2422 |
# app.py
|
| 2423 |
import os
|
| 2424 |
import io
|
|
|
|
| 2501 |
ERP_TOKEN = os.getenv("ERP_TOKEN", "")
|
| 2502 |
|
| 2503 |
|
| 2504 |
+
|
|
|
|
|
|
|
| 2505 |
GOOGLE_API_KEY = (os.getenv("GOOGLE_API_KEY") or "").strip()
|
| 2506 |
GEMINI_MODEL = (os.getenv("GEMINI_MODEL", "models/gemini-flash-lite-latest") or "").strip()
|
| 2507 |
if GEMINI_MODEL and not GEMINI_MODEL.startswith("models/"):
|
| 2508 |
GEMINI_MODEL = "models/" + GEMINI_MODEL
|
| 2509 |
|
| 2510 |
+
|
| 2511 |
+
|
|
|
|
| 2512 |
GOOGLE_CLOUD_VISION_API_KEY = (os.getenv("GCV_API_KEY") or "").strip()
|
| 2513 |
# Fall back to Gemini API key if no separate Vision key provided
|
| 2514 |
if not GOOGLE_CLOUD_VISION_API_KEY and GOOGLE_API_KEY:
|
|
|
|
| 2635 |
return int(round(min(0.6, overlap) * 100)) # cap at 60
|
| 2636 |
|
| 2637 |
|
| 2638 |
+
|
|
|
|
|
|
|
| 2639 |
def _norm(s: str) -> str:
|
| 2640 |
return re.sub(r"\s+", " ", (s or "").strip().lower())
|
| 2641 |
|
|
|
|
| 2939 |
return ""
|
| 2940 |
|
| 2941 |
|
|
|
|
|
|
|
|
|
|
| 2942 |
def _erp_get(params: dict) -> list:
|
| 2943 |
headers = {}
|
| 2944 |
if ERP_TOKEN:
|
|
|
|
| 2970 |
return "Medium"
|
| 2971 |
|
| 2972 |
|
| 2973 |
+
|
|
|
|
|
|
|
| 2974 |
def _preprocess_for_ocr(img: Image.Image) -> Image.Image:
|
| 2975 |
"""
|
| 2976 |
Enhanced preprocessing for better OCR on handwritten images.
|
|
|
|
| 3227 |
|
| 3228 |
|
| 3229 |
|
| 3230 |
+
|
|
|
|
|
|
|
| 3231 |
@app.get("/health")
|
| 3232 |
def health():
|
| 3233 |
return {"status": "ok"}
|
|
|
|
| 3316 |
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 3317 |
}
|
| 3318 |
|
| 3319 |
+
|
|
|
|
|
|
|
| 3320 |
if question_type == "mixed":
|
| 3321 |
# Process each question type separately and combine results
|
| 3322 |
mcq_results = []
|
|
|
|
| 3630 |
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 3631 |
}
|
| 3632 |
|
| 3633 |
+
|
|
|
|
|
|
|
| 3634 |
if gemini_client is None:
|
| 3635 |
return {
|
| 3636 |
"student_id": student_id,
|