Moncey10 commited on
Commit
143f8b8
·
1 Parent(s): 9e8d1d7

Fix app merge marker

Browse files
Files changed (1) hide show
  1. app.py +26 -77
app.py CHANGED
@@ -52,9 +52,7 @@ except Exception as e:
52
  print(f"[WARN] google-cloud-vision import failed: {e}")
53
 
54
 
55
- # =========================================================
56
- # ✅ FASTAPI APP INSTANCE
57
- # =========================================================
58
  app = FastAPI()
59
  app.add_middleware(
60
  CORSMiddleware,
@@ -64,34 +62,26 @@ app.add_middleware(
64
  allow_headers=["*"],
65
  )
66
 
67
- # =========================================================
68
- # ✅ TESSERACT PATH
69
- # =========================================================
70
  if os.name == "nt":
71
  pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
72
  else:
73
  pytesseract.pytesseract.tesseract_cmd = "/usr/bin/tesseract"
74
 
75
 
76
- # =========================================================
77
- # ✅ ERP CONFIG
78
- # =========================================================
79
  ERP_BASE = os.getenv("ERP_BASE", "https://erp.triz.co.in/lms_data")
80
  STORAGE_BASE = os.getenv("STORAGE_BASE", "https://erp.triz.co.in/storage/student/")
81
  ERP_TOKEN = os.getenv("ERP_TOKEN", "")
82
 
83
 
84
- # =========================================================
85
- # ✅ GEMINI CONFIG
86
- # =========================================================
87
  GOOGLE_API_KEY = (os.getenv("GOOGLE_API_KEY") or "").strip()
88
  GEMINI_MODEL = (os.getenv("GEMINI_MODEL", "models/gemini-flash-lite-latest") or "").strip()
89
  if GEMINI_MODEL and not GEMINI_MODEL.startswith("models/"):
90
  GEMINI_MODEL = "models/" + GEMINI_MODEL
91
 
92
- # =========================================================
93
- # ✅ GOOGLE CLOUD VISION CONFIG (for handwritten OCR)
94
- # =========================================================
95
  GOOGLE_CLOUD_VISION_API_KEY = (os.getenv("GCV_API_KEY") or "").strip()
96
  # Fall back to Gemini API key if no separate Vision key provided
97
  if not GOOGLE_CLOUD_VISION_API_KEY and GOOGLE_API_KEY:
@@ -218,9 +208,7 @@ def cheap_overlap_score(student_text: str, prompt: str) -> int:
218
  return int(round(min(0.6, overlap) * 100)) # cap at 60
219
 
220
 
221
- # =========================================================
222
- # ✅ SMALL UTILS
223
- # =========================================================
224
  def _norm(s: str) -> str:
225
  return re.sub(r"\s+", " ", (s or "").strip().lower())
226
 
@@ -292,9 +280,7 @@ def keypoint_coverage(student_text: str, key_points: List[str], kp_threshold: fl
292
  return covered, missing, coverage
293
 
294
 
295
- # =========================================================
296
- # ✅ QUESTION TYPE INFERENCE + MCQ PARSING
297
- # =========================================================
298
  def infer_question_type_from_prompt(prompt: str) -> str:
299
  p = _norm(prompt)
300
 
@@ -563,9 +549,7 @@ def extract_correct_mcq_from_prompt(prompt: str) -> str:
563
  return ""
564
 
565
 
566
- # =========================================================
567
- # ✅ ERP HELPERS
568
- # =========================================================
569
  def _erp_get(params: dict) -> list:
570
  headers = {}
571
  if ERP_TOKEN:
@@ -597,9 +581,7 @@ def fetch_student_level_from_erp(row: Dict[str, Any]) -> str:
597
  return "Medium"
598
 
599
 
600
- # =========================================================
601
- # ✅ OCR + TEXT EXTRACTION - IMPROVED FOR HANDWRITTEN
602
- # =========================================================
603
  def _preprocess_for_ocr(img: Image.Image) -> Image.Image:
604
  """
605
  Enhanced preprocessing for better OCR on handwritten images.
@@ -856,9 +838,7 @@ async def extract_text_from_upload(file: UploadFile) -> Dict[str, Any]:
856
 
857
 
858
 
859
- # =========================================================
860
- # ✅ ROUTES
861
- # =========================================================
862
  @app.get("/health")
863
  def health():
864
  return {"status": "ok"}
@@ -947,9 +927,7 @@ async def homework_validate(
947
  "extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
948
  }
949
 
950
- # =========================================================
951
- # ✅ MIXED QUESTION TYPES CHECK (MCQ + Narrative)
952
- # =========================================================
953
  if question_type == "mixed":
954
  # Process each question type separately and combine results
955
  mcq_results = []
@@ -1263,9 +1241,7 @@ async def homework_validate(
1263
  "extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
1264
  }
1265
 
1266
- # =========================================================
1267
- # ✅ NARRATIVE CHECK (Gemini generates reference) - Also handles MCQ->Narrative redirect
1268
- # =========================================================
1269
  if gemini_client is None:
1270
  return {
1271
  "student_id": student_id,
@@ -1580,9 +1556,7 @@ def extract_correct_mcq_from_prompt(prompt: str) -> str:
1580
  return ""
1581
 
1582
 
1583
- # =========================================================
1584
- # ✅ ERP HELPERS
1585
- # =========================================================
1586
  def _erp_get(params: dict) -> list:
1587
  headers = {}
1588
  if ERP_TOKEN:
@@ -1614,9 +1588,7 @@ def fetch_student_level_from_erp(row: Dict[str, Any]) -> str:
1614
  return "Medium"
1615
 
1616
 
1617
- # =========================================================
1618
- # ✅ OCR + TEXT EXTRACTION - IMPROVED FOR HANDWRITTEN
1619
- # =========================================================
1620
  def _preprocess_for_ocr(img: Image.Image) -> Image.Image:
1621
  """
1622
  Enhanced preprocessing for better OCR on handwritten images.
@@ -1873,9 +1845,7 @@ async def extract_text_from_upload(file: UploadFile) -> Dict[str, Any]:
1873
 
1874
 
1875
 
1876
- # =========================================================
1877
- # ✅ ROUTES
1878
- # =========================================================
1879
  @app.get("/health")
1880
  def health():
1881
  return {"status": "ok"}
@@ -1964,10 +1934,7 @@ async def homework_validate(
1964
  "extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
1965
  }
1966
 
1967
- # =========================================================
1968
- # ✅ MIXED QUESTION TYPES CHECK (MCQ + Narrative)
1969
- # =========================================================
1970
- if question_type == "mixed":
1971
  # Process each question type separately and combine results
1972
  mcq_results = []
1973
  narrative_results = []
@@ -2280,9 +2247,7 @@ async def homework_validate(
2280
  "extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
2281
  }
2282
 
2283
- # =========================================================
2284
- # ✅ NARRATIVE CHECK (Gemini generates reference) - Also handles MCQ->Narrative redirect
2285
- # =========================================================
2286
  if gemini_client is None:
2287
  return {
2288
  "student_id": student_id,
@@ -2453,7 +2418,7 @@ async def homework_validate(
2453
  },
2454
  "extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
2455
  }
2456
- =======
2457
  # app.py
2458
  import os
2459
  import io
@@ -2536,17 +2501,14 @@ STORAGE_BASE = os.getenv("STORAGE_BASE", "https://erp.triz.co.in/storage/student
2536
  ERP_TOKEN = os.getenv("ERP_TOKEN", "")
2537
 
2538
 
2539
- # =========================================================
2540
- # ✅ GEMINI CONFIG
2541
- # =========================================================
2542
  GOOGLE_API_KEY = (os.getenv("GOOGLE_API_KEY") or "").strip()
2543
  GEMINI_MODEL = (os.getenv("GEMINI_MODEL", "models/gemini-flash-lite-latest") or "").strip()
2544
  if GEMINI_MODEL and not GEMINI_MODEL.startswith("models/"):
2545
  GEMINI_MODEL = "models/" + GEMINI_MODEL
2546
 
2547
- # =========================================================
2548
- # ✅ GOOGLE CLOUD VISION CONFIG (for handwritten OCR)
2549
- # =========================================================
2550
  GOOGLE_CLOUD_VISION_API_KEY = (os.getenv("GCV_API_KEY") or "").strip()
2551
  # Fall back to Gemini API key if no separate Vision key provided
2552
  if not GOOGLE_CLOUD_VISION_API_KEY and GOOGLE_API_KEY:
@@ -2673,9 +2635,7 @@ def cheap_overlap_score(student_text: str, prompt: str) -> int:
2673
  return int(round(min(0.6, overlap) * 100)) # cap at 60
2674
 
2675
 
2676
- # =========================================================
2677
- # ✅ SMALL UTILS
2678
- # =========================================================
2679
  def _norm(s: str) -> str:
2680
  return re.sub(r"\s+", " ", (s or "").strip().lower())
2681
 
@@ -2979,9 +2939,6 @@ def extract_correct_mcq_from_prompt(prompt: str) -> str:
2979
  return ""
2980
 
2981
 
2982
- # =========================================================
2983
- # ✅ ERP HELPERS
2984
- # =========================================================
2985
  def _erp_get(params: dict) -> list:
2986
  headers = {}
2987
  if ERP_TOKEN:
@@ -3013,9 +2970,7 @@ def fetch_student_level_from_erp(row: Dict[str, Any]) -> str:
3013
  return "Medium"
3014
 
3015
 
3016
- # =========================================================
3017
- # ✅ OCR + TEXT EXTRACTION - IMPROVED FOR HANDWRITTEN
3018
- # =========================================================
3019
  def _preprocess_for_ocr(img: Image.Image) -> Image.Image:
3020
  """
3021
  Enhanced preprocessing for better OCR on handwritten images.
@@ -3272,9 +3227,7 @@ async def extract_text_from_upload(file: UploadFile) -> Dict[str, Any]:
3272
 
3273
 
3274
 
3275
- # =========================================================
3276
- # ✅ ROUTES
3277
- # =========================================================
3278
  @app.get("/health")
3279
  def health():
3280
  return {"status": "ok"}
@@ -3363,9 +3316,7 @@ async def homework_validate(
3363
  "extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
3364
  }
3365
 
3366
- # =========================================================
3367
- # ✅ MIXED QUESTION TYPES CHECK (MCQ + Narrative)
3368
- # =========================================================
3369
  if question_type == "mixed":
3370
  # Process each question type separately and combine results
3371
  mcq_results = []
@@ -3679,9 +3630,7 @@ async def homework_validate(
3679
  "extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
3680
  }
3681
 
3682
- # =========================================================
3683
- # ✅ NARRATIVE CHECK (Gemini generates reference) - Also handles MCQ->Narrative redirect
3684
- # =========================================================
3685
  if gemini_client is None:
3686
  return {
3687
  "student_id": student_id,
 
52
  print(f"[WARN] google-cloud-vision import failed: {e}")
53
 
54
 
55
+
 
 
56
  app = FastAPI()
57
  app.add_middleware(
58
  CORSMiddleware,
 
62
  allow_headers=["*"],
63
  )
64
 
65
+
 
 
66
  if os.name == "nt":
67
  pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
68
  else:
69
  pytesseract.pytesseract.tesseract_cmd = "/usr/bin/tesseract"
70
 
71
 
72
+
 
 
73
  ERP_BASE = os.getenv("ERP_BASE", "https://erp.triz.co.in/lms_data")
74
  STORAGE_BASE = os.getenv("STORAGE_BASE", "https://erp.triz.co.in/storage/student/")
75
  ERP_TOKEN = os.getenv("ERP_TOKEN", "")
76
 
77
 
78
+
 
 
79
  GOOGLE_API_KEY = (os.getenv("GOOGLE_API_KEY") or "").strip()
80
  GEMINI_MODEL = (os.getenv("GEMINI_MODEL", "models/gemini-flash-lite-latest") or "").strip()
81
  if GEMINI_MODEL and not GEMINI_MODEL.startswith("models/"):
82
  GEMINI_MODEL = "models/" + GEMINI_MODEL
83
 
84
+
 
 
85
  GOOGLE_CLOUD_VISION_API_KEY = (os.getenv("GCV_API_KEY") or "").strip()
86
  # Fall back to Gemini API key if no separate Vision key provided
87
  if not GOOGLE_CLOUD_VISION_API_KEY and GOOGLE_API_KEY:
 
208
  return int(round(min(0.6, overlap) * 100)) # cap at 60
209
 
210
 
211
+
 
 
212
  def _norm(s: str) -> str:
213
  return re.sub(r"\s+", " ", (s or "").strip().lower())
214
 
 
280
  return covered, missing, coverage
281
 
282
 
283
+
 
 
284
  def infer_question_type_from_prompt(prompt: str) -> str:
285
  p = _norm(prompt)
286
 
 
549
  return ""
550
 
551
 
552
+
 
 
553
  def _erp_get(params: dict) -> list:
554
  headers = {}
555
  if ERP_TOKEN:
 
581
  return "Medium"
582
 
583
 
584
+
 
 
585
  def _preprocess_for_ocr(img: Image.Image) -> Image.Image:
586
  """
587
  Enhanced preprocessing for better OCR on handwritten images.
 
838
 
839
 
840
 
841
+
 
 
842
  @app.get("/health")
843
  def health():
844
  return {"status": "ok"}
 
927
  "extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
928
  }
929
 
930
+
 
 
931
  if question_type == "mixed":
932
  # Process each question type separately and combine results
933
  mcq_results = []
 
1241
  "extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
1242
  }
1243
 
1244
+
 
 
1245
  if gemini_client is None:
1246
  return {
1247
  "student_id": student_id,
 
1556
  return ""
1557
 
1558
 
1559
+
 
 
1560
  def _erp_get(params: dict) -> list:
1561
  headers = {}
1562
  if ERP_TOKEN:
 
1588
  return "Medium"
1589
 
1590
 
1591
+
 
 
1592
  def _preprocess_for_ocr(img: Image.Image) -> Image.Image:
1593
  """
1594
  Enhanced preprocessing for better OCR on handwritten images.
 
1845
 
1846
 
1847
 
1848
+
 
 
1849
  @app.get("/health")
1850
  def health():
1851
  return {"status": "ok"}
 
1934
  "extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
1935
  }
1936
 
1937
+ if question_type == "mixed":
 
 
 
1938
  # Process each question type separately and combine results
1939
  mcq_results = []
1940
  narrative_results = []
 
2247
  "extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
2248
  }
2249
 
2250
+
 
 
2251
  if gemini_client is None:
2252
  return {
2253
  "student_id": student_id,
 
2418
  },
2419
  "extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
2420
  }
2421
+
2422
  # app.py
2423
  import os
2424
  import io
 
2501
  ERP_TOKEN = os.getenv("ERP_TOKEN", "")
2502
 
2503
 
2504
+
 
 
2505
  GOOGLE_API_KEY = (os.getenv("GOOGLE_API_KEY") or "").strip()
2506
  GEMINI_MODEL = (os.getenv("GEMINI_MODEL", "models/gemini-flash-lite-latest") or "").strip()
2507
  if GEMINI_MODEL and not GEMINI_MODEL.startswith("models/"):
2508
  GEMINI_MODEL = "models/" + GEMINI_MODEL
2509
 
2510
+
2511
+
 
2512
  GOOGLE_CLOUD_VISION_API_KEY = (os.getenv("GCV_API_KEY") or "").strip()
2513
  # Fall back to Gemini API key if no separate Vision key provided
2514
  if not GOOGLE_CLOUD_VISION_API_KEY and GOOGLE_API_KEY:
 
2635
  return int(round(min(0.6, overlap) * 100)) # cap at 60
2636
 
2637
 
2638
+
 
 
2639
  def _norm(s: str) -> str:
2640
  return re.sub(r"\s+", " ", (s or "").strip().lower())
2641
 
 
2939
  return ""
2940
 
2941
 
 
 
 
2942
  def _erp_get(params: dict) -> list:
2943
  headers = {}
2944
  if ERP_TOKEN:
 
2970
  return "Medium"
2971
 
2972
 
2973
+
 
 
2974
  def _preprocess_for_ocr(img: Image.Image) -> Image.Image:
2975
  """
2976
  Enhanced preprocessing for better OCR on handwritten images.
 
3227
 
3228
 
3229
 
3230
+
 
 
3231
  @app.get("/health")
3232
  def health():
3233
  return {"status": "ok"}
 
3316
  "extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
3317
  }
3318
 
3319
+
 
 
3320
  if question_type == "mixed":
3321
  # Process each question type separately and combine results
3322
  mcq_results = []
 
3630
  "extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
3631
  }
3632
 
3633
+
 
 
3634
  if gemini_client is None:
3635
  return {
3636
  "student_id": student_id,