Moncey10 commited on
Commit
5fe9776
·
2 Parent(s): 90877145ec9328

Merge GitHub main into local

Browse files
Files changed (8) hide show
  1. .gitignore +4 -1
  2. Dockerfile +23 -0
  3. README.md +92 -1
  4. answer_key.json +60 -0
  5. app.py +403 -0
  6. db.py +3 -0
  7. requirements.txt +10 -0
  8. scholar clone.lnk +0 -0
.gitignore CHANGED
@@ -18,6 +18,7 @@ answer_key.json
18
  __pycache__/
19
  *.pyc
20
  .env
 
21
  myenv/
22
  venv/
23
  .venv/
@@ -27,4 +28,6 @@ __pycache__/
27
  *.pyd
28
  *.db
29
  homework.db
30
- _local_backup/
 
 
 
18
  __pycache__/
19
  *.pyc
20
  .env
21
+ <<<<<<< HEAD
22
  myenv/
23
  venv/
24
  .venv/
 
28
  *.pyd
29
  *.db
30
  homework.db
31
+ _local_backup/
32
+ =======
33
+ >>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
Dockerfile CHANGED
@@ -1,3 +1,4 @@
 
1
  FROM python:3.10-slim
2
 
3
  # Install Tesseract OCR + Poppler + dependencies
@@ -21,3 +22,25 @@ ENV HF_SPACE=moncey10-homework-validation-system.hf.space
21
  EXPOSE 7860
22
 
23
  CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <<<<<<< HEAD
2
  FROM python:3.10-slim
3
 
4
  # Install Tesseract OCR + Poppler + dependencies
 
22
  EXPOSE 7860
23
 
24
  CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
25
+ =======
26
+ FROM python:3.10-slim
27
+
28
+ # System deps (Tesseract + basic libs for PIL)
29
+ RUN apt-get update && apt-get install -y \
30
+ tesseract-ocr \
31
+ libtesseract-dev \
32
+ libleptonica-dev \
33
+ poppler-utils \
34
+ && rm -rf /var/lib/apt/lists/*
35
+
36
+ WORKDIR /app
37
+ COPY requirements.txt /app/requirements.txt
38
+ RUN pip install --no-cache-dir -r /app/requirements.txt
39
+
40
+ COPY . /app
41
+
42
+ # HF Spaces uses 7860
43
+ EXPOSE 7860
44
+
45
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
46
+ >>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
README.md CHANGED
@@ -1,6 +1,97 @@
 
1
  ---
2
  title: Homework Validation System
3
  sdk: docker
4
  app_port: 7860
5
  ---
6
- hello
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <<<<<<< HEAD
2
  ---
3
  title: Homework Validation System
4
  sdk: docker
5
  app_port: 7860
6
  ---
7
+ hello
8
+ =======
9
+ ---
10
+ title: Homework Validation System
11
+ sdk: docker
12
+ app_port: 7860
13
+ ---
14
+ # Homework Validation System (FastAPI)
15
+
16
+ A backend API that validates student homework by extracting text from teacher and student files, comparing answers, and generating remarks using rule-based logic and optional AI.
17
+
18
+ ---
19
+
20
+ ## Features
21
+
22
+ - Upload teacher and student homework files
23
+ - OCR support for images and scanned PDFs
24
+ - Text extraction from PDF and DOCX
25
+ - Similarity matching using TF-IDF + cosine similarity
26
+ - Optional AI-generated remarks (OpenAI / Gemini)
27
+ - FastAPI Swagger documentation
28
+
29
+ ---
30
+
31
+ ## Tech Stack
32
+
33
+ - FastAPI
34
+ - Python
35
+ - pytesseract
36
+ - Pillow
37
+ - pypdf / pdf2image
38
+ - python-docx
39
+ - scikit-learn
40
+ - OpenAI / Gemini (optional)
41
+
42
+ ---
43
+
44
+ ## Project Structure
45
+
46
+ ---
47
+ homework_validation_system/
48
+
49
+ ├── app.py
50
+ ├── requirements.txt
51
+ ├── artifacts/
52
+ ├── uploads/
53
+ ├── src/
54
+ │ ├── extractors.py
55
+ │ ├── similarity.py
56
+ │ ├── llm_client.py
57
+ │ └── utils.py
58
+ └── README.md
59
+ ## Installation
60
+
61
+ ### 1. Create Virtual Environment
62
+ python -m venv myenv
63
+
64
+ ### 2. Install Requirements
65
+ pip install -r requirements.txt
66
+ ## OCR Setup (Required)
67
+
68
+ ### Install Tesseract OCR
69
+
70
+ This project uses **Tesseract OCR** for extracting text from images and scanned PDFs.
71
+
72
+ #### Windows
73
+ 1. Download and install Tesseract OCR.
74
+ 2. Default installation path:
75
+ 3. Add this path in your code:
76
+
77
+ pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
78
+
79
+ ### Run API
80
+ uvicorn app:app --reload --host 0.0.0.0 --port 8000
81
+
82
+ ### Swagger UI:
83
+
84
+ http://localhost:8000/docs
85
+
86
+ ### Example API Response
87
+ {
88
+ "student_id": 1,
89
+ "homework_id": 10,
90
+ "status": "Needs Review",
91
+ "match_percentage": 72,
92
+ "teacher_extracted_text": "...",
93
+ "student_extracted_text": "...",
94
+ "ai_generated_remark": "Good attempt but missing key points.",
95
+ "llm_used": true
96
+ }
97
+ >>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
answer_key.json CHANGED
@@ -1,3 +1,4 @@
 
1
  {
2
  "hw01": {
3
  "questions": [
@@ -55,4 +56,63 @@
55
  }
56
  ]
57
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  }
 
1
+ <<<<<<< HEAD
2
  {
3
  "hw01": {
4
  "questions": [
 
56
  }
57
  ]
58
  }
59
+ =======
60
+ {
61
+ "hw01": {
62
+ "questions": [
63
+ {
64
+ "qid": "Q1",
65
+ "type": "text",
66
+ "answer": "Artificial Intelligence is the simulation of human intelligence."
67
+ },
68
+ {
69
+ "qid": "Q2",
70
+ "type": "text",
71
+ "answer": "Machine Learning is a subset of AI that learns from data."
72
+ }
73
+ ]
74
+ },
75
+ "hw99": {
76
+ "questions": [
77
+ {
78
+ "qid": "Q1",
79
+ "type": "text",
80
+ "answer": "Artificial Intelligence is the simulation of human intelligence."
81
+ },
82
+ {
83
+ "qid": "Q2",
84
+ "type": "text",
85
+ "answer": "Machine Learning is a subset of AI that learns from data."
86
+ }
87
+ ]
88
+ },
89
+ "hw90": {
90
+ "questions": [
91
+ {
92
+ "qid": "Q1",
93
+ "type": "text",
94
+ "answer": "Artificial Intelligence is the simulation of human intelligence."
95
+ },
96
+ {
97
+ "qid": "Q2",
98
+ "type": "text",
99
+ "answer": "Machine Learning is a subset of AI that learns from data."
100
+ }
101
+ ]
102
+ },
103
+ "hw15": {
104
+ "questions": [
105
+ {
106
+ "qid": "Q1",
107
+ "type": "text",
108
+ "answer": "Artificial Intelligence is the simulation of human intelligence."
109
+ },
110
+ {
111
+ "qid": "Q2",
112
+ "type": "text",
113
+ "answer": "Machine Learning is a subset of AI that learns from data."
114
+ }
115
+ ]
116
+ }
117
+ >>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
118
  }
app.py CHANGED
@@ -10,11 +10,15 @@ from fastapi import FastAPI, UploadFile, File, Form, HTTPException
10
  from fastapi.middleware.cors import CORSMiddleware
11
  from PIL import Image, ImageOps, ImageFilter
12
  import pytesseract
 
13
  import os
14
 
15
  # Serve static files from outputs directory
16
  from fastapi.staticfiles import StaticFiles
17
  from fastapi.responses import FileResponse
 
 
 
18
  from dotenv import load_dotenv
19
  load_dotenv()
20
 
@@ -30,6 +34,7 @@ except Exception:
30
  PdfReader = None
31
 
32
  try:
 
33
  from reportlab.pdfgen import canvas
34
  from reportlab.lib.pagesizes import letter
35
  from reportlab.lib import colors
@@ -40,6 +45,8 @@ except Exception as e:
40
  print(f"[WARN] reportlab import failed: {e}")
41
 
42
  try:
 
 
43
  from pdf2image import convert_from_bytes # requires poppler
44
  except Exception:
45
  convert_from_bytes = None
@@ -55,6 +62,7 @@ except Exception as e:
55
  genai = None
56
  print(f"[WARN] google-genai import failed: {e}")
57
 
 
58
  # ✅ Google Cloud Vision SDK (for better handwritten OCR)
59
  try:
60
  from google.cloud import vision
@@ -111,6 +119,13 @@ def debug_env():
111
  "num_keys": len(GOOGLE_API_KEYS),
112
  "has_openai_key": bool(os.getenv("OPENAI_API_KEY")),
113
  }
 
 
 
 
 
 
 
114
  app.add_middleware(
115
  CORSMiddleware,
116
  allow_origins=["*"],
@@ -119,20 +134,33 @@ app.add_middleware(
119
  allow_headers=["*"],
120
  )
121
 
 
122
 
123
 
 
 
 
 
 
124
  if os.name == "nt":
125
  pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
126
  else:
127
  pytesseract.pytesseract.tesseract_cmd = "/usr/bin/tesseract"
128
 
129
 
 
130
 
 
 
 
 
 
131
  ERP_BASE = os.getenv("ERP_BASE", "https://erp.triz.co.in/lms_data")
132
  STORAGE_BASE = os.getenv("STORAGE_BASE", "https://erp.triz.co.in/storage/student/")
133
  ERP_TOKEN = os.getenv("ERP_TOKEN", "")
134
 
135
 
 
136
  def get_public_base_url() -> str:
137
  """
138
  Returns the public base URL of this server.
@@ -251,27 +279,58 @@ def _init_gemini_client(key_index: int = 0) -> None:
251
  return
252
 
253
  api_key = GOOGLE_API_KEYS[key_index]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
254
 
255
  if not genai:
256
  GEMINI_LAST_ERROR = "google-genai not installed / import failed"
257
  gemini_client = None
258
  return
259
 
 
260
  if not api_key:
261
  GEMINI_LAST_ERROR = f"GOOGLE_API_KEY_{key_index + 1} not set"
 
 
 
 
262
  gemini_client = None
263
  return
264
 
265
  try:
 
266
  gemini_client = genai.Client(api_key=api_key)
267
  GEMINI_LAST_ERROR = ""
268
  print(f"[INFO] Gemini client initialized with key #{key_index + 1}")
 
 
 
 
 
269
  except Exception as e:
270
  gemini_client = None
271
  GEMINI_LAST_ERROR = str(e)
272
  print(f"[WARN] Gemini init failed: {GEMINI_LAST_ERROR}")
273
 
274
 
 
275
  def _is_rate_limit_error(error_msg: str) -> bool:
276
  """Check if the error is a rate limit error (429) or service unavailable (503)."""
277
  if not error_msg:
@@ -314,6 +373,9 @@ def _rotate_to_next_key() -> bool:
314
 
315
 
316
  _init_gemini_client(0)
 
 
 
317
 
318
 
319
  def parse_gemini_error(error_msg: str) -> dict:
@@ -329,6 +391,7 @@ def parse_gemini_error(error_msg: str) -> dict:
329
  return {"ok": False, "error_type": "GEMINI_ERROR", "message": msg}
330
 
331
 
 
332
 
333
  def extract_qid_from_prompt(prompt: str, erp_row: dict = None) -> str:
334
  """
@@ -375,22 +438,32 @@ def extract_qid_from_prompt(prompt: str, erp_row: dict = None) -> str:
375
  return "Q1"
376
 
377
 
 
 
378
  def generate_gemini_response(
379
  prompt: str,
380
  system_prompt: str = "",
381
  max_tokens: int = 650,
382
  temperature: float = 0.3,
383
  ) -> str:
 
384
  global GEMINI_LAST_ERROR, gemini_client, rate_limited_keys
 
 
 
385
 
386
  if gemini_client is None:
387
  if not GEMINI_LAST_ERROR:
388
  GEMINI_LAST_ERROR = "Gemini client not initialized"
 
389
  # Try to reinitialize if we have keys available
390
  if GOOGLE_API_KEYS and current_key_index not in rate_limited_keys:
391
  _init_gemini_client(current_key_index)
392
  if gemini_client is None:
393
  return ""
 
 
 
394
 
395
  try:
396
  contents = []
@@ -408,6 +481,7 @@ def generate_gemini_response(
408
  GEMINI_LAST_ERROR = ""
409
  return text
410
  except Exception as e:
 
411
  error_msg = str(e)
412
  print(f"[ERROR] Gemini call failed: {error_msg}")
413
 
@@ -419,6 +493,10 @@ def generate_gemini_response(
419
  return generate_gemini_response(prompt, system_prompt, max_tokens, temperature)
420
 
421
  GEMINI_LAST_ERROR = error_msg
 
 
 
 
422
  return ""
423
 
424
  import time
@@ -452,7 +530,13 @@ def cheap_overlap_score(student_text: str, prompt: str) -> int:
452
  return int(round(min(0.6, overlap) * 100)) # cap at 60
453
 
454
 
 
455
 
 
 
 
 
 
456
  def _norm(s: str) -> str:
457
  return re.sub(r"\s+", " ", (s or "").strip().lower())
458
 
@@ -487,6 +571,7 @@ def level_policy(student_level: str) -> dict:
487
  return {"w_sim": 0.6, "w_cov": 0.4, "verified": 75, "partial": 55, "kp_thr": 0.20}
488
 
489
 
 
490
  def mcq_partial_credit(student_level: str) -> dict:
491
  """
492
  Returns partial credit percentage for MCQ questions based on student level.
@@ -507,6 +592,8 @@ def mcq_partial_credit(student_level: str) -> dict:
507
  return {"credit_per_question": 75, "passing_threshold": 75}
508
 
509
 
 
 
510
  def keypoint_coverage(student_text: str, key_points: List[str], kp_threshold: float) -> Tuple[List[str], List[str], float]:
511
  covered, missing = [], []
512
  for kp in key_points:
@@ -524,8 +611,15 @@ def keypoint_coverage(student_text: str, key_points: List[str], kp_threshold: fl
524
  return covered, missing, coverage
525
 
526
 
 
527
 
528
  def infer_question_type_from_prompt(prompt: str, student_text: str = "") -> str:
 
 
 
 
 
 
529
  p = _norm(prompt)
530
 
531
  # Explicit markers - check for (mcq) first since it's common in parentheses
@@ -534,6 +628,7 @@ def infer_question_type_from_prompt(prompt: str, student_text: str = "") -> str:
534
  if re.search(r"\btype\s*:\s*narrative\b", p) or re.search(r"\bquestion_type\s*:\s*narrative\b", p):
535
  return "narrative"
536
 
 
537
  # Heuristic: options A/B/C/D exist in prompt -> likely MCQ
538
  if re.search(r"\b(a|b|c|d)\s*[\)\.]\s+", p) or "option a" in p or "option b" in p:
539
  return "mcq"
@@ -551,6 +646,11 @@ def infer_question_type_from_prompt(prompt: str, student_text: str = "") -> str:
551
  # If answer starts with A. or B. etc.
552
  if re.search(r"^[a-d]\.\s+", s.strip()):
553
  return "mcq"
 
 
 
 
 
554
 
555
  return "narrative"
556
 
@@ -615,6 +715,7 @@ def parse_questions_from_prompt(prompt: str) -> List[Dict[str, Any]]:
615
 
616
  # Check for correct answer (for MCQ)
617
  if current_type == 'mcq':
 
618
  # First check: is this line "Correct Answer(s):" with nothing after it?
619
  # If so, we need to look for the answer on the next line
620
  if re.search(r'^correct\s*answer\s*\(?s\)?\s*[:\.]?\s*$', line, re.IGNORECASE):
@@ -647,6 +748,12 @@ def parse_questions_from_prompt(prompt: str) -> List[Dict[str, Any]]:
647
  else:
648
  # Try to extract first letter
649
  current_correct = correct_text[0].upper() if correct_text else None
 
 
 
 
 
 
650
 
651
  # Don't forget the last question
652
  if current_q is not None:
@@ -660,7 +767,11 @@ def parse_questions_from_prompt(prompt: str) -> List[Dict[str, Any]]:
660
  # If no questions parsed, fall back to old behavior
661
  if not questions:
662
  qtype = infer_question_type_from_prompt(prompt)
 
663
  return [{'qid': extract_qid_from_prompt(prompt), 'type': qtype, 'question': prompt, 'correct_answer': None}]
 
 
 
664
 
665
  return questions
666
 
@@ -692,6 +803,7 @@ def extract_mcq_choice(text: str) -> str:
692
  return ""
693
 
694
 
 
695
  def extract_mcq_answers_with_qid(text: str) -> Dict[str, str]:
696
  """
697
  Extract MCQ answers WITH question numbers from student text.
@@ -747,6 +859,8 @@ def extract_mcq_answers_with_qid(text: str) -> Dict[str, str]:
747
  return results
748
 
749
 
 
 
750
  def extract_correct_mcq_from_prompt(prompt: str) -> str:
751
  """
752
  This is IMPORTANT:
@@ -754,6 +868,7 @@ def extract_correct_mcq_from_prompt(prompt: str) -> str:
754
  - Correct: B
755
  - Answer: C
756
  - correct_option: D
 
757
  - Correct Answer(s): A. Devdatta
758
  or JSON: {"correct_option":"B"}
759
 
@@ -762,6 +877,9 @@ def extract_correct_mcq_from_prompt(prompt: str) -> str:
762
  - "Correct Answer(s): A. Devdatta"
763
  - "Correct: B"
764
  - "Answer: C"
 
 
 
765
  """
766
  p = (prompt or "").strip()
767
  if not p:
@@ -778,6 +896,7 @@ def extract_correct_mcq_from_prompt(prompt: str) -> str:
778
  except Exception:
779
  pass
780
 
 
781
  # Text prompt support - new format: "Correct Answer(s): A. Devdatta" or "Correct Answer: B"
782
  t = _norm(p)
783
 
@@ -800,6 +919,10 @@ def extract_correct_mcq_from_prompt(prompt: str) -> str:
800
  return m1c.group(1)
801
 
802
  # Pattern 2: "Correct: A" or "Answer: B" (original pattern)
 
 
 
 
803
  m = re.search(r"\b(correct|answer|ans)\s*[:\-]?\s*\(?\s*([a-d])\s*\)?\b", t)
804
  if m:
805
  return m.group(2)
@@ -807,7 +930,13 @@ def extract_correct_mcq_from_prompt(prompt: str) -> str:
807
  return ""
808
 
809
 
 
810
 
 
 
 
 
 
811
  def _erp_get(params: dict) -> list:
812
  headers = {}
813
  if ERP_TOKEN:
@@ -839,6 +968,7 @@ def fetch_student_level_from_erp(row: Dict[str, Any]) -> str:
839
  return "Medium"
840
 
841
 
 
842
 
843
  def _preprocess_for_ocr(img: Image.Image) -> Image.Image:
844
  """
@@ -907,6 +1037,25 @@ def _extract_text_google_vision(image_bytes: bytes) -> str:
907
  return ""
908
 
909
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
910
  def extract_text_from_image(image_bytes: bytes, filename: str = "unknown") -> str:
911
  if not image_bytes or len(image_bytes) < 50:
912
  raise HTTPException(status_code=400, detail=f"Invalid file: '{filename}' - empty/too small")
@@ -923,6 +1072,7 @@ def extract_text_from_image(image_bytes: bytes, filename: str = "unknown") -> st
923
  head = image_bytes[:12]
924
  raise HTTPException(status_code=400, detail=f"Invalid image format: '{filename}' (header={head})")
925
 
 
926
  # First try Google Cloud Vision (better for handwriting)
927
  if vision_client:
928
  gv_text = _extract_text_google_vision(image_bytes)
@@ -930,6 +1080,8 @@ def extract_text_from_image(image_bytes: bytes, filename: str = "unknown") -> st
930
  return _clean_extracted_text(gv_text)
931
 
932
  # Fallback to Tesseract with improved preprocessing
 
 
933
  try:
934
  img = Image.open(io.BytesIO(image_bytes))
935
  except Exception as e:
@@ -937,6 +1089,7 @@ def extract_text_from_image(image_bytes: bytes, filename: str = "unknown") -> st
937
 
938
  img = _preprocess_for_ocr(img)
939
 
 
940
  # Try multiple OCR configurations for better handwritten recognition
941
  ocr_configs = [
942
  "--oem 3 --psm 6", # Default
@@ -965,6 +1118,16 @@ def extract_text_from_image(image_bytes: bytes, filename: str = "unknown") -> st
965
  raise HTTPException(status_code=500, detail=f"OCR failed: {e}")
966
 
967
  text = (best_text or "").strip()
 
 
 
 
 
 
 
 
 
 
968
  text = re.sub(r"[ \t]+", " ", text)
969
  return text
970
 
@@ -1016,6 +1179,7 @@ def extract_text_from_pdf(pdf_bytes: bytes, filename: str = "unknown.pdf") -> Di
1016
  return {"text": extracted, "used_ocr": False, "needs_ocr": True}
1017
  try:
1018
  used_ocr = True
 
1019
  # Higher DPI for better handwritten OCR
1020
  pages = convert_from_bytes(pdf_bytes, dpi=300)
1021
  page_texts = []
@@ -1041,12 +1205,23 @@ def extract_text_from_pdf(pdf_bytes: bytes, filename: str = "unknown.pdf") -> Di
1041
  if img:
1042
  img = _preprocess_for_ocr(img)
1043
  extracted = pytesseract.image_to_string(img, lang="eng", config="--oem 3 --psm 6") or ""
 
 
 
 
 
 
 
 
 
 
1044
  except Exception as e:
1045
  return {"text": extracted, "used_ocr": used_ocr, "needs_ocr": True, "ocr_error": str(e)}
1046
 
1047
  return {"text": extracted, "used_ocr": used_ocr, "needs_ocr": False}
1048
 
1049
 
 
1050
  def get_question_positions_from_pdf(pdf_bytes: bytes) -> Dict[int, List[Dict]]:
1051
  """
1052
  Detect question number positions in a PDF.
@@ -1319,6 +1494,8 @@ def create_annotated_pdf(
1319
  print(f"[ERROR] Failed to create annotated PDF: {e}")
1320
  return original_pdf_bytes
1321
 
 
 
1322
  async def extract_text_from_upload(file: UploadFile) -> Dict[str, Any]:
1323
  filename = getattr(file, "filename", "") or "upload"
1324
  content_type = (getattr(file, "content_type", "") or "").lower()
@@ -1368,7 +1545,13 @@ async def extract_text_from_upload(file: UploadFile) -> Dict[str, Any]:
1368
 
1369
 
1370
 
 
1371
 
 
 
 
 
 
1372
  @app.get("/health")
1373
  def health():
1374
  return {"status": "ok"}
@@ -1377,6 +1560,7 @@ def health():
1377
  @app.get("/health/llm")
1378
  def health_llm():
1379
  return {
 
1380
  "ok": bool(gemini_client) and bool(GOOGLE_API_KEYS),
1381
  "gemini": {
1382
  "sdk_import_ok": genai is not None,
@@ -1384,6 +1568,12 @@ def health_llm():
1384
  "num_keys_configured": len(GOOGLE_API_KEYS),
1385
  "current_key_index": current_key_index + 1 if GOOGLE_API_KEYS else 0,
1386
  "rate_limited_keys": list(rate_limited_keys),
 
 
 
 
 
 
1387
  "client_ready": gemini_client is not None,
1388
  "model": GEMINI_MODEL,
1389
  "last_error": GEMINI_LAST_ERROR if GEMINI_LAST_ERROR else None,
@@ -1391,6 +1581,7 @@ def health_llm():
1391
  }
1392
 
1393
 
 
1394
  @app.get("/homework/annotated-url/{homework_id}/{student_id}")
1395
  async def get_annotated_pdf_url(
1396
  homework_id: int,
@@ -1803,10 +1994,13 @@ def build_per_question_results(
1803
  return ai_evaluate_per_question(prompt, student_text, student_level)
1804
 
1805
 
 
 
1806
  @app.post("/homework/validate")
1807
  async def homework_validate(
1808
  student_id: int = Form(...),
1809
  homework_id: int = Form(...),
 
1810
  student_file: UploadFile = File(...),
1811
  ):
1812
  # 0) Fetch ERP record -> get all fields automatically
@@ -1836,12 +2030,25 @@ async def homework_validate(
1836
  if final_question_type not in ("mcq", "narrative", "mixed"):
1837
  final_question_type = infer_question_type_from_prompt(prompt, student_text)
1838
 
 
 
 
 
 
 
 
 
 
 
 
 
1839
  # 1) Infer question_type from prompt automatically (NO EXTRA FIELD)
1840
  # Try to parse mixed questions first
1841
  parsed_questions = parse_questions_from_prompt(prompt)
1842
  has_mcq = any(q.get('type') == 'mcq' for q in parsed_questions)
1843
  has_narrative = any(q.get('type') == 'narrative' for q in parsed_questions)
1844
 
 
1845
  # Check if it's a PDF
1846
  is_pdf_submission = student_info.get("kind") == "pdf"
1847
 
@@ -1885,11 +2092,30 @@ async def homework_validate(
1885
  annotated_pdf_filename, annotated_pdf_url = save_annotated_pdf(
1886
  original_file_bytes, homework_id, student_id, unreadable_result, 0, "Unreadable", student_level
1887
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1888
  return {
1889
  "student_id": student_id,
1890
  "homework_id": homework_id,
1891
  "sub_institute_id": sub_institute_id,
1892
  "syear": syear,
 
1893
  "question_type": final_question_type,
1894
  "student_level": student_level,
1895
  "status": "Unreadable",
@@ -1900,10 +2126,21 @@ async def homework_validate(
1900
  "llm_used": False,
1901
  "question_marks": make_question_marks([]),
1902
  "annotated_pdf": annotated_pdf_filename,
 
 
 
 
 
 
 
 
 
 
1903
  "extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
1904
  }
1905
 
1906
  if student_info.get("needs_ocr") and not student_text:
 
1907
  # Save annotated PDF even for unreadable (with status shown)
1908
  if is_pdf_submission and original_file_bytes:
1909
  # Show circle mark for scanned PDF that needs OCR
@@ -1911,11 +2148,14 @@ async def homework_validate(
1911
  annotated_pdf_filename, annotated_pdf_url = save_annotated_pdf(
1912
  original_file_bytes, homework_id, student_id, ocr_result, 0, "Unreadable", student_level
1913
  )
 
 
1914
  return {
1915
  "student_id": student_id,
1916
  "homework_id": homework_id,
1917
  "sub_institute_id": sub_institute_id,
1918
  "syear": syear,
 
1919
  "question_type": final_question_type,
1920
  "student_level": student_level,
1921
  "status": "Unreadable",
@@ -1931,10 +2171,28 @@ async def homework_validate(
1931
 
1932
 
1933
  if final_question_type == "mixed":
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1934
  # Process each question type separately and combine results
1935
  mcq_results = []
1936
  narrative_results = []
1937
 
 
1938
  # Extract ALL MCQ answers from student text with question numbers
1939
  student_answers_by_qid = extract_mcq_answers_with_qid(student_text)
1940
 
@@ -1951,11 +2209,21 @@ async def homework_validate(
1951
  if not chosen:
1952
  chosen = extract_mcq_choice(student_text)
1953
 
 
 
 
 
 
 
 
 
 
1954
  correct = q.get('correct_answer') or extract_correct_mcq_from_prompt(q.get('question', ''))
1955
 
1956
  if correct and chosen:
1957
  is_correct = (chosen.lower().strip() == correct.lower().strip())
1958
  mcq_results.append({
 
1959
  'qid': qid,
1960
  'correct': is_correct,
1961
  'chosen': chosen,
@@ -1970,6 +2238,12 @@ async def homework_validate(
1970
  'chosen': '',
1971
  'correct_answer': correct,
1972
  'unattempted': True
 
 
 
 
 
 
1973
  })
1974
 
1975
  # For narrative questions, use AI to generate reference
@@ -2026,6 +2300,7 @@ async def homework_validate(
2026
  except Exception as e:
2027
  narrative_results = {'error': str(e)}
2028
 
 
2029
  # Calculate combined score with level-based partial credit for MCQ
2030
  total_mcq = len(mcq_results)
2031
  correct_mcq = sum(1 for r in mcq_results if r.get('correct'))
@@ -2037,6 +2312,12 @@ async def homework_validate(
2037
 
2038
  # Calculate MCQ score based on level (not just binary correct/incorrect)
2039
  mcq_score = (correct_mcq * credit_per_q) / max(1, total_mcq)
 
 
 
 
 
 
2040
 
2041
  narrative_score = narrative_results.get('match_percentage', 0) if narrative_results else 0
2042
 
@@ -2058,12 +2339,15 @@ async def homework_validate(
2058
  else:
2059
  status = "Needs Review"
2060
 
 
2061
  # Save annotated PDF
2062
  if is_pdf_submission and original_file_bytes and mcq_results:
2063
  annotated_pdf_filename, annotated_pdf_url = save_annotated_pdf(
2064
  original_file_bytes, homework_id, student_id, mcq_results, final_score, status, student_level
2065
  )
2066
 
 
 
2067
  return {
2068
  "student_id": student_id,
2069
  "homework_id": homework_id,
@@ -2073,12 +2357,18 @@ async def homework_validate(
2073
  "student_level": student_level,
2074
  "status": status,
2075
  "match_percentage": final_score,
 
2076
  "submission_remarks": None,
2077
  "rule_based_remark": f"MCQ: {correct_mcq}/{total_mcq} correct. Narrative score: {narrative_score}%. (Level: {student_level}, Credit per Q: {credit_per_q}%)",
 
 
 
 
2078
  "llm_used": bool(narrative_results and 'error' not in narrative_results),
2079
  "student_extracted_text": student_text,
2080
  "mcq_results": mcq_results,
2081
  "narrative_results": narrative_results,
 
2082
  "question_marks": make_question_marks(mcq_results),
2083
  "annotated_pdf": annotated_pdf_filename,
2084
  "extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
@@ -2235,6 +2525,14 @@ async def homework_validate(
2235
  annotated_pdf_filename, annotated_pdf_url = save_annotated_pdf(
2236
  original_file_bytes, homework_id, student_id, no_correct_result, 0, "Needs Review", student_level
2237
  )
 
 
 
 
 
 
 
 
2238
  return {
2239
  "student_id": student_id,
2240
  "homework_id": homework_id,
@@ -2244,6 +2542,7 @@ async def homework_validate(
2244
  "student_level": student_level,
2245
  "status": "Needs Review",
2246
  "match_percentage": 0,
 
2247
  "submission_remarks": None,
2248
  "rule_based_remark": "MCQ correct option not found in prompt. Include 'Correct: B' or similar in prompt.",
2249
  "student_extracted_text": student_text,
@@ -2260,6 +2559,17 @@ async def homework_validate(
2260
  annotated_pdf_filename, annotated_pdf_url = save_annotated_pdf(
2261
  original_file_bytes, homework_id, student_id, no_chosen_result, 0, "Needs Review", student_level
2262
  )
 
 
 
 
 
 
 
 
 
 
 
2263
  return {
2264
  "student_id": student_id,
2265
  "homework_id": homework_id,
@@ -2269,16 +2579,24 @@ async def homework_validate(
2269
  "student_level": student_level,
2270
  "status": "Needs Review",
2271
  "match_percentage": 0,
 
2272
  "submission_remarks": None,
2273
  "rule_based_remark": "Student option (A/B/C/D) not detected clearly.",
2274
  "student_extracted_text": student_text,
2275
  "llm_used": False,
2276
  "question_marks": make_question_marks([]),
2277
  "annotated_pdf": annotated_pdf_filename,
 
 
 
 
 
 
2278
  "debug": {"correct": correct, "chosen": chosen},
2279
  "extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
2280
  }
2281
 
 
2282
  # Only process MCQ validation if not redirecting to narrative
2283
  if not redirect_to_narrative:
2284
  is_correct = (chosen == correct)
@@ -2328,6 +2646,30 @@ async def homework_validate(
2328
  annotated_pdf_filename, annotated_pdf_url = save_annotated_pdf(
2329
  original_file_bytes, homework_id, student_id, [], 0, "Needs Review", student_level
2330
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2331
  return {
2332
  "student_id": student_id,
2333
  "homework_id": homework_id,
@@ -2337,13 +2679,20 @@ async def homework_validate(
2337
  "student_level": student_level,
2338
  "status": "Needs Review",
2339
  "match_percentage": 0,
 
2340
  "submission_remarks": None,
 
 
 
2341
  "rule_based_remark": "Gemini not configured. Check /health/llm.",
2342
  "llm_used": False,
2343
  "llm_error": parse_gemini_error(GEMINI_LAST_ERROR),
2344
  "student_extracted_text": student_text,
 
2345
  "question_marks": make_question_marks([]),
2346
  "annotated_pdf": annotated_pdf_filename,
 
 
2347
  "extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
2348
  }
2349
 
@@ -2364,11 +2713,14 @@ async def homework_validate(
2364
  )
2365
 
2366
  if not response_text:
 
2367
  # Save annotated PDF
2368
  if is_pdf_submission and original_file_bytes:
2369
  annotated_pdf_filename, annotated_pdf_url = save_annotated_pdf(
2370
  original_file_bytes, homework_id, student_id, [], 0, "Needs Review", student_level
2371
  )
 
 
2372
  return {
2373
  "student_id": student_id,
2374
  "homework_id": homework_id,
@@ -2378,13 +2730,20 @@ async def homework_validate(
2378
  "student_level": student_level,
2379
  "status": "Needs Review",
2380
  "match_percentage": 0,
 
2381
  "submission_remarks": None,
 
 
 
2382
  "rule_based_remark": "Gemini failed. Check /health/llm.",
2383
  "llm_used": False,
2384
  "llm_error": parse_gemini_error(GEMINI_LAST_ERROR),
2385
  "student_extracted_text": student_text,
 
2386
  "question_marks": make_question_marks([]),
2387
  "annotated_pdf": annotated_pdf_filename,
 
 
2388
  "extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
2389
  }
2390
 
@@ -2392,11 +2751,14 @@ async def homework_validate(
2392
  m = re.search(r"\{.*\}", response_text, flags=re.S)
2393
  payload = json.loads(m.group(0) if m else response_text)
2394
  except Exception as e:
 
2395
  # Save annotated PDF
2396
  if is_pdf_submission and original_file_bytes:
2397
  annotated_pdf_filename, annotated_pdf_url = save_annotated_pdf(
2398
  original_file_bytes, homework_id, student_id, [], 0, "Needs Review", student_level
2399
  )
 
 
2400
  return {
2401
  "student_id": student_id,
2402
  "homework_id": homework_id,
@@ -2406,13 +2768,20 @@ async def homework_validate(
2406
  "student_level": student_level,
2407
  "status": "Needs Review",
2408
  "match_percentage": 0,
 
2409
  "submission_remarks": None,
 
 
 
2410
  "rule_based_remark": "Gemini returned non-JSON output.",
2411
  "llm_used": False,
2412
  "llm_error": {"ok": False, "error_type": "GEMINI_BAD_JSON", "message": str(e), "raw": response_text[:800]},
2413
  "student_extracted_text": student_text,
 
2414
  "question_marks": make_question_marks([]),
2415
  "annotated_pdf": annotated_pdf_filename,
 
 
2416
  "extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
2417
  }
2418
 
@@ -2423,11 +2792,14 @@ async def homework_validate(
2423
  key_points = [str(x).strip() for x in key_points if str(x).strip()]
2424
 
2425
  if not ai_reference_answer:
 
2426
  # Save annotated PDF
2427
  if is_pdf_submission and original_file_bytes:
2428
  annotated_pdf_filename, annotated_pdf_url = save_annotated_pdf(
2429
  original_file_bytes, homework_id, student_id, [], 0, "Needs Review", student_level
2430
  )
 
 
2431
  return {
2432
  "student_id": student_id,
2433
  "homework_id": homework_id,
@@ -2437,12 +2809,19 @@ async def homework_validate(
2437
  "student_level": student_level,
2438
  "status": "Needs Review",
2439
  "match_percentage": 0,
 
2440
  "submission_remarks": None,
2441
  "rule_based_remark": "AI returned empty reference answer.",
2442
  "llm_used": True,
2443
  "student_extracted_text": student_text,
2444
  "question_marks": make_question_marks([]),
2445
  "annotated_pdf": annotated_pdf_filename,
 
 
 
 
 
 
2446
  "extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
2447
  }
2448
 
@@ -2473,7 +2852,11 @@ async def homework_validate(
2473
  f"{remark_prompt}"
2474
  )
2475
 
 
2476
  submission_remark = generate_gemini_response(
 
 
 
2477
  prompt=resp2_prompt,
2478
  system_prompt="You are a strict, helpful teacher. Be concise and factual.",
2479
  max_tokens=140,
@@ -2481,10 +2864,17 @@ async def homework_validate(
2481
  )
2482
 
2483
  rule_based_remark = None
 
2484
  remark_llm_used = bool(submission_remark)
2485
  remark_llm_error = None if submission_remark else (GEMINI_LAST_ERROR or "Unknown LLM error")
2486
 
2487
  if not submission_remark:
 
 
 
 
 
 
2488
  if status == "Verified":
2489
  rule_based_remark = "Homework matches the expected answer well. Good coverage of the key ideas."
2490
  elif status == "Partial":
@@ -2492,6 +2882,7 @@ async def homework_validate(
2492
  else:
2493
  rule_based_remark = "Homework does not match the expected answer enough. Please review the topic and resubmit with clearer, complete points."
2494
 
 
2495
  # Save annotated PDF — evaluate EACH question individually against student text
2496
  per_question_results = build_per_question_results(
2497
  prompt, student_text, status, match_pct,
@@ -2505,6 +2896,8 @@ async def homework_validate(
2505
  original_file_bytes, homework_id, student_id, per_question_results, match_pct, status, student_level, "narrative"
2506
  )
2507
 
 
 
2508
  return {
2509
  "student_id": student_id,
2510
  "homework_id": homework_id,
@@ -2514,7 +2907,11 @@ async def homework_validate(
2514
  "student_level": student_level,
2515
  "status": status,
2516
  "match_percentage": match_pct,
 
2517
  "submission_remarks": submission_remark if submission_remark else None,
 
 
 
2518
  "rule_based_remark": rule_based_remark,
2519
  "llm_used": True,
2520
  "remark_llm_used": remark_llm_used,
@@ -2524,15 +2921,21 @@ async def homework_validate(
2524
  "key_points": key_points,
2525
  "key_points_covered": covered,
2526
  "key_points_missing": missing,
 
2527
  "question_marks": make_question_marks(per_question_results),
2528
  "annotated_pdf": annotated_pdf_filename,
 
 
2529
  "debug": {
2530
  "similarity": sim,
2531
  "coverage": coverage,
2532
  "policy": policy,
 
2533
  "per_question_results": per_question_results,
2534
  "erp_row_fields": list(erp_row.keys()) if erp_row else [],
2535
  "erp_student_level_raw": erp_row.get("student_level") or erp_row.get("level") or erp_row.get("difficulty") or erp_row.get("difficulty_level"),
 
 
2536
  },
2537
  "extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
2538
  }
 
10
  from fastapi.middleware.cors import CORSMiddleware
11
  from PIL import Image, ImageOps, ImageFilter
12
  import pytesseract
13
+ <<<<<<< HEAD
14
  import os
15
 
16
  # Serve static files from outputs directory
17
  from fastapi.staticfiles import StaticFiles
18
  from fastapi.responses import FileResponse
19
+ =======
20
+
21
+ >>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
22
  from dotenv import load_dotenv
23
  load_dotenv()
24
 
 
34
  PdfReader = None
35
 
36
  try:
37
+ <<<<<<< HEAD
38
  from reportlab.pdfgen import canvas
39
  from reportlab.lib.pagesizes import letter
40
  from reportlab.lib import colors
 
45
  print(f"[WARN] reportlab import failed: {e}")
46
 
47
  try:
48
+ =======
49
+ >>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
50
  from pdf2image import convert_from_bytes # requires poppler
51
  except Exception:
52
  convert_from_bytes = None
 
62
  genai = None
63
  print(f"[WARN] google-genai import failed: {e}")
64
 
65
+ <<<<<<< HEAD
66
  # ✅ Google Cloud Vision SDK (for better handwritten OCR)
67
  try:
68
  from google.cloud import vision
 
119
  "num_keys": len(GOOGLE_API_KEYS),
120
  "has_openai_key": bool(os.getenv("OPENAI_API_KEY")),
121
  }
122
+ =======
123
+
124
+ # =========================================================
125
+ # ✅ FASTAPI APP INSTANCE
126
+ # =========================================================
127
+ app = FastAPI()
128
+ >>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
129
  app.add_middleware(
130
  CORSMiddleware,
131
  allow_origins=["*"],
 
134
  allow_headers=["*"],
135
  )
136
 
137
+ <<<<<<< HEAD
138
 
139
 
140
+ =======
141
+ # =========================================================
142
+ # ✅ TESSERACT PATH
143
+ # =========================================================
144
+ >>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
145
  if os.name == "nt":
146
  pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
147
  else:
148
  pytesseract.pytesseract.tesseract_cmd = "/usr/bin/tesseract"
149
 
150
 
151
+ <<<<<<< HEAD
152
 
153
+ =======
154
+ # =========================================================
155
+ # ✅ ERP CONFIG
156
+ # =========================================================
157
+ >>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
158
  ERP_BASE = os.getenv("ERP_BASE", "https://erp.triz.co.in/lms_data")
159
  STORAGE_BASE = os.getenv("STORAGE_BASE", "https://erp.triz.co.in/storage/student/")
160
  ERP_TOKEN = os.getenv("ERP_TOKEN", "")
161
 
162
 
163
+ <<<<<<< HEAD
164
  def get_public_base_url() -> str:
165
  """
166
  Returns the public base URL of this server.
 
279
  return
280
 
281
  api_key = GOOGLE_API_KEYS[key_index]
282
+ =======
283
+ # =========================================================
284
+ # ✅ GEMINI CONFIG
285
+ # =========================================================
286
+ GOOGLE_API_KEY = (os.getenv("GOOGLE_API_KEY") or "").strip()
287
+ GEMINI_MODEL = (os.getenv("GEMINI_MODEL", "models/gemini-2.0-flash") or "").strip()
288
+ if GEMINI_MODEL and not GEMINI_MODEL.startswith("models/"):
289
+ GEMINI_MODEL = "models/" + GEMINI_MODEL
290
+
291
+ gemini_client = None
292
+ GEMINI_LAST_ERROR = ""
293
+
294
+
295
+ def _init_gemini_client() -> None:
296
+ global gemini_client, GEMINI_LAST_ERROR
297
+
298
+ if gemini_client is not None:
299
+ return
300
+ >>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
301
 
302
  if not genai:
303
  GEMINI_LAST_ERROR = "google-genai not installed / import failed"
304
  gemini_client = None
305
  return
306
 
307
+ <<<<<<< HEAD
308
  if not api_key:
309
  GEMINI_LAST_ERROR = f"GOOGLE_API_KEY_{key_index + 1} not set"
310
+ =======
311
+ if not GOOGLE_API_KEY:
312
+ GEMINI_LAST_ERROR = "GOOGLE_API_KEY not set"
313
+ >>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
314
  gemini_client = None
315
  return
316
 
317
  try:
318
+ <<<<<<< HEAD
319
  gemini_client = genai.Client(api_key=api_key)
320
  GEMINI_LAST_ERROR = ""
321
  print(f"[INFO] Gemini client initialized with key #{key_index + 1}")
322
+ =======
323
+ gemini_client = genai.Client(api_key=GOOGLE_API_KEY)
324
+ GEMINI_LAST_ERROR = ""
325
+ print("[INFO] Gemini client initialized")
326
+ >>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
327
  except Exception as e:
328
  gemini_client = None
329
  GEMINI_LAST_ERROR = str(e)
330
  print(f"[WARN] Gemini init failed: {GEMINI_LAST_ERROR}")
331
 
332
 
333
+ <<<<<<< HEAD
334
  def _is_rate_limit_error(error_msg: str) -> bool:
335
  """Check if the error is a rate limit error (429) or service unavailable (503)."""
336
  if not error_msg:
 
373
 
374
 
375
  _init_gemini_client(0)
376
+ =======
377
+ _init_gemini_client()
378
+ >>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
379
 
380
 
381
  def parse_gemini_error(error_msg: str) -> dict:
 
391
  return {"ok": False, "error_type": "GEMINI_ERROR", "message": msg}
392
 
393
 
394
+ <<<<<<< HEAD
395
 
396
  def extract_qid_from_prompt(prompt: str, erp_row: dict = None) -> str:
397
  """
 
438
  return "Q1"
439
 
440
 
441
+ =======
442
+ >>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
443
  def generate_gemini_response(
444
  prompt: str,
445
  system_prompt: str = "",
446
  max_tokens: int = 650,
447
  temperature: float = 0.3,
448
  ) -> str:
449
+ <<<<<<< HEAD
450
  global GEMINI_LAST_ERROR, gemini_client, rate_limited_keys
451
+ =======
452
+ global GEMINI_LAST_ERROR
453
+ >>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
454
 
455
  if gemini_client is None:
456
  if not GEMINI_LAST_ERROR:
457
  GEMINI_LAST_ERROR = "Gemini client not initialized"
458
+ <<<<<<< HEAD
459
  # Try to reinitialize if we have keys available
460
  if GOOGLE_API_KEYS and current_key_index not in rate_limited_keys:
461
  _init_gemini_client(current_key_index)
462
  if gemini_client is None:
463
  return ""
464
+ =======
465
+ return ""
466
+ >>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
467
 
468
  try:
469
  contents = []
 
481
  GEMINI_LAST_ERROR = ""
482
  return text
483
  except Exception as e:
484
+ <<<<<<< HEAD
485
  error_msg = str(e)
486
  print(f"[ERROR] Gemini call failed: {error_msg}")
487
 
 
493
  return generate_gemini_response(prompt, system_prompt, max_tokens, temperature)
494
 
495
  GEMINI_LAST_ERROR = error_msg
496
+ =======
497
+ GEMINI_LAST_ERROR = str(e)
498
+ print(f"[ERROR] Gemini call failed: {GEMINI_LAST_ERROR}")
499
+ >>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
500
  return ""
501
 
502
  import time
 
530
  return int(round(min(0.6, overlap) * 100)) # cap at 60
531
 
532
 
533
+ <<<<<<< HEAD
534
 
535
+ =======
536
+ # =========================================================
537
+ # ✅ SMALL UTILS
538
+ # =========================================================
539
+ >>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
540
  def _norm(s: str) -> str:
541
  return re.sub(r"\s+", " ", (s or "").strip().lower())
542
 
 
571
  return {"w_sim": 0.6, "w_cov": 0.4, "verified": 75, "partial": 55, "kp_thr": 0.20}
572
 
573
 
574
+ <<<<<<< HEAD
575
  def mcq_partial_credit(student_level: str) -> dict:
576
  """
577
  Returns partial credit percentage for MCQ questions based on student level.
 
592
  return {"credit_per_question": 75, "passing_threshold": 75}
593
 
594
 
595
+ =======
596
+ >>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
597
  def keypoint_coverage(student_text: str, key_points: List[str], kp_threshold: float) -> Tuple[List[str], List[str], float]:
598
  covered, missing = [], []
599
  for kp in key_points:
 
611
  return covered, missing, coverage
612
 
613
 
614
+ <<<<<<< HEAD
615
 
616
  def infer_question_type_from_prompt(prompt: str, student_text: str = "") -> str:
617
+ =======
618
+ # =========================================================
619
+ # ✅ QUESTION TYPE INFERENCE + MCQ PARSING
620
+ # =========================================================
621
+ def infer_question_type_from_prompt(prompt: str) -> str:
622
+ >>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
623
  p = _norm(prompt)
624
 
625
  # Explicit markers - check for (mcq) first since it's common in parentheses
 
628
  if re.search(r"\btype\s*:\s*narrative\b", p) or re.search(r"\bquestion_type\s*:\s*narrative\b", p):
629
  return "narrative"
630
 
631
+ <<<<<<< HEAD
632
  # Heuristic: options A/B/C/D exist in prompt -> likely MCQ
633
  if re.search(r"\b(a|b|c|d)\s*[\)\.]\s+", p) or "option a" in p or "option b" in p:
634
  return "mcq"
 
646
  # If answer starts with A. or B. etc.
647
  if re.search(r"^[a-d]\.\s+", s.strip()):
648
  return "mcq"
649
+ =======
650
+ # Heuristic: options A/B/C/D exist -> likely MCQ
651
+ if re.search(r"\b(a|b|c|d)\s*[\)\.]\s+", p) or "option a" in p or "option b" in p:
652
+ return "mcq"
653
+ >>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
654
 
655
  return "narrative"
656
 
 
715
 
716
  # Check for correct answer (for MCQ)
717
  if current_type == 'mcq':
718
+ <<<<<<< HEAD
719
  # First check: is this line "Correct Answer(s):" with nothing after it?
720
  # If so, we need to look for the answer on the next line
721
  if re.search(r'^correct\s*answer\s*\(?s\)?\s*[:\.]?\s*$', line, re.IGNORECASE):
 
748
  else:
749
  # Try to extract first letter
750
  current_correct = correct_text[0].upper() if correct_text else None
751
+ =======
752
+ # Look for "Correct Answer(s):" or "Correct:" or "Answer:"
753
+ correct_match = re.search(r'(?:Correct\s*(?:Answer)?|Answer)[:.]\s*(?:[A-D]\.?\s*)?(.+)', line, re.IGNORECASE)
754
+ if correct_match and not current_correct:
755
+ current_correct = correct_match.group(1).strip()
756
+ >>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
757
 
758
  # Don't forget the last question
759
  if current_q is not None:
 
767
  # If no questions parsed, fall back to old behavior
768
  if not questions:
769
  qtype = infer_question_type_from_prompt(prompt)
770
+ <<<<<<< HEAD
771
  return [{'qid': extract_qid_from_prompt(prompt), 'type': qtype, 'question': prompt, 'correct_answer': None}]
772
+ =======
773
+ return [{'qid': 'Q1', 'type': qtype, 'question': prompt, 'correct_answer': None}]
774
+ >>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
775
 
776
  return questions
777
 
 
803
  return ""
804
 
805
 
806
+ <<<<<<< HEAD
807
  def extract_mcq_answers_with_qid(text: str) -> Dict[str, str]:
808
  """
809
  Extract MCQ answers WITH question numbers from student text.
 
859
  return results
860
 
861
 
862
+ =======
863
+ >>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
864
  def extract_correct_mcq_from_prompt(prompt: str) -> str:
865
  """
866
  This is IMPORTANT:
 
868
  - Correct: B
869
  - Answer: C
870
  - correct_option: D
871
+ <<<<<<< HEAD
872
  - Correct Answer(s): A. Devdatta
873
  or JSON: {"correct_option":"B"}
874
 
 
877
  - "Correct Answer(s): A. Devdatta"
878
  - "Correct: B"
879
  - "Answer: C"
880
+ =======
881
+ or JSON: {"correct_option":"B"}
882
+ >>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
883
  """
884
  p = (prompt or "").strip()
885
  if not p:
 
896
  except Exception:
897
  pass
898
 
899
+ <<<<<<< HEAD
900
  # Text prompt support - new format: "Correct Answer(s): A. Devdatta" or "Correct Answer: B"
901
  t = _norm(p)
902
 
 
919
  return m1c.group(1)
920
 
921
  # Pattern 2: "Correct: A" or "Answer: B" (original pattern)
922
+ =======
923
+ # Text prompt support
924
+ t = _norm(p)
925
+ >>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
926
  m = re.search(r"\b(correct|answer|ans)\s*[:\-]?\s*\(?\s*([a-d])\s*\)?\b", t)
927
  if m:
928
  return m.group(2)
 
930
  return ""
931
 
932
 
933
+ <<<<<<< HEAD
934
 
935
+ =======
936
+ # =========================================================
937
+ # ✅ ERP HELPERS
938
+ # =========================================================
939
+ >>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
940
  def _erp_get(params: dict) -> list:
941
  headers = {}
942
  if ERP_TOKEN:
 
968
  return "Medium"
969
 
970
 
971
+ <<<<<<< HEAD
972
 
973
  def _preprocess_for_ocr(img: Image.Image) -> Image.Image:
974
  """
 
1037
  return ""
1038
 
1039
 
1040
+ =======
1041
+ # =========================================================
1042
+ # ✅ OCR + TEXT EXTRACTION
1043
+ # =========================================================
1044
+ def _preprocess_for_ocr(img: Image.Image) -> Image.Image:
1045
+ img = img.convert("L")
1046
+ img = ImageOps.autocontrast(img)
1047
+
1048
+ w, h = img.size
1049
+ if max(w, h) < 1600:
1050
+ scale = 1600 / max(w, h)
1051
+ img = img.resize((int(w * scale), int(h * scale)))
1052
+
1053
+ img = img.filter(ImageFilter.SHARPEN)
1054
+ img = img.point(lambda p: 255 if p > 170 else 0)
1055
+ return img
1056
+
1057
+
1058
+ >>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
1059
  def extract_text_from_image(image_bytes: bytes, filename: str = "unknown") -> str:
1060
  if not image_bytes or len(image_bytes) < 50:
1061
  raise HTTPException(status_code=400, detail=f"Invalid file: '{filename}' - empty/too small")
 
1072
  head = image_bytes[:12]
1073
  raise HTTPException(status_code=400, detail=f"Invalid image format: '{filename}' (header={head})")
1074
 
1075
+ <<<<<<< HEAD
1076
  # First try Google Cloud Vision (better for handwriting)
1077
  if vision_client:
1078
  gv_text = _extract_text_google_vision(image_bytes)
 
1080
  return _clean_extracted_text(gv_text)
1081
 
1082
  # Fallback to Tesseract with improved preprocessing
1083
+ =======
1084
+ >>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
1085
  try:
1086
  img = Image.open(io.BytesIO(image_bytes))
1087
  except Exception as e:
 
1089
 
1090
  img = _preprocess_for_ocr(img)
1091
 
1092
+ <<<<<<< HEAD
1093
  # Try multiple OCR configurations for better handwritten recognition
1094
  ocr_configs = [
1095
  "--oem 3 --psm 6", # Default
 
1118
  raise HTTPException(status_code=500, detail=f"OCR failed: {e}")
1119
 
1120
  text = (best_text or "").strip()
1121
+ =======
1122
+ try:
1123
+ text = pytesseract.image_to_string(img, lang="eng", config="--oem 3 --psm 6")
1124
+ except pytesseract.TesseractNotFoundError:
1125
+ raise HTTPException(status_code=500, detail="Tesseract OCR not found. Install it / fix path.")
1126
+ except Exception as e:
1127
+ raise HTTPException(status_code=500, detail=f"OCR failed: {e}")
1128
+
1129
+ text = (text or "").strip()
1130
+ >>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
1131
  text = re.sub(r"[ \t]+", " ", text)
1132
  return text
1133
 
 
1179
  return {"text": extracted, "used_ocr": False, "needs_ocr": True}
1180
  try:
1181
  used_ocr = True
1182
+ <<<<<<< HEAD
1183
  # Higher DPI for better handwritten OCR
1184
  pages = convert_from_bytes(pdf_bytes, dpi=300)
1185
  page_texts = []
 
1205
  if img:
1206
  img = _preprocess_for_ocr(img)
1207
  extracted = pytesseract.image_to_string(img, lang="eng", config="--oem 3 --psm 6") or ""
1208
+ =======
1209
+ pages = convert_from_bytes(pdf_bytes, dpi=250)
1210
+ page_texts = []
1211
+ for img in pages:
1212
+ img = _preprocess_for_ocr(img)
1213
+ t = pytesseract.image_to_string(img, lang="eng", config="--oem 3 --psm 6") or ""
1214
+ if t.strip():
1215
+ page_texts.append(t)
1216
+ extracted = _clean_extracted_text("\n\n".join(page_texts))
1217
+ >>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
1218
  except Exception as e:
1219
  return {"text": extracted, "used_ocr": used_ocr, "needs_ocr": True, "ocr_error": str(e)}
1220
 
1221
  return {"text": extracted, "used_ocr": used_ocr, "needs_ocr": False}
1222
 
1223
 
1224
+ <<<<<<< HEAD
1225
  def get_question_positions_from_pdf(pdf_bytes: bytes) -> Dict[int, List[Dict]]:
1226
  """
1227
  Detect question number positions in a PDF.
 
1494
  print(f"[ERROR] Failed to create annotated PDF: {e}")
1495
  return original_pdf_bytes
1496
 
1497
+ =======
1498
+ >>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
1499
  async def extract_text_from_upload(file: UploadFile) -> Dict[str, Any]:
1500
  filename = getattr(file, "filename", "") or "upload"
1501
  content_type = (getattr(file, "content_type", "") or "").lower()
 
1545
 
1546
 
1547
 
1548
+ <<<<<<< HEAD
1549
 
1550
+ =======
1551
+ # =========================================================
1552
+ # ✅ ROUTES
1553
+ # =========================================================
1554
+ >>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
1555
  @app.get("/health")
1556
  def health():
1557
  return {"status": "ok"}
 
1560
  @app.get("/health/llm")
1561
  def health_llm():
1562
  return {
1563
+ <<<<<<< HEAD
1564
  "ok": bool(gemini_client) and bool(GOOGLE_API_KEYS),
1565
  "gemini": {
1566
  "sdk_import_ok": genai is not None,
 
1568
  "num_keys_configured": len(GOOGLE_API_KEYS),
1569
  "current_key_index": current_key_index + 1 if GOOGLE_API_KEYS else 0,
1570
  "rate_limited_keys": list(rate_limited_keys),
1571
+ =======
1572
+ "ok": bool(gemini_client) and bool(GOOGLE_API_KEY),
1573
+ "gemini": {
1574
+ "sdk_import_ok": genai is not None,
1575
+ "configured": bool(GOOGLE_API_KEY),
1576
+ >>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
1577
  "client_ready": gemini_client is not None,
1578
  "model": GEMINI_MODEL,
1579
  "last_error": GEMINI_LAST_ERROR if GEMINI_LAST_ERROR else None,
 
1581
  }
1582
 
1583
 
1584
+ <<<<<<< HEAD
1585
  @app.get("/homework/annotated-url/{homework_id}/{student_id}")
1586
  async def get_annotated_pdf_url(
1587
  homework_id: int,
 
1994
  return ai_evaluate_per_question(prompt, student_text, student_level)
1995
 
1996
 
1997
+ =======
1998
+ >>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
1999
  @app.post("/homework/validate")
2000
  async def homework_validate(
2001
  student_id: int = Form(...),
2002
  homework_id: int = Form(...),
2003
+ <<<<<<< HEAD
2004
  student_file: UploadFile = File(...),
2005
  ):
2006
  # 0) Fetch ERP record -> get all fields automatically
 
2030
  if final_question_type not in ("mcq", "narrative", "mixed"):
2031
  final_question_type = infer_question_type_from_prompt(prompt, student_text)
2032
 
2033
+ =======
2034
+ sub_institute_id: int = Form(...),
2035
+ syear: str = Form(...),
2036
+ prompt: str = Form(...),
2037
+ student_file: UploadFile = File(...),
2038
+ ):
2039
+ # 0) Fetch ERP record -> get student_level automatically
2040
+ erp_row = fetch_student_record(homework_id, student_id)
2041
+ student_level = fetch_student_level_from_erp(erp_row)
2042
+ policy = level_policy(student_level)
2043
+
2044
+ >>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
2045
  # 1) Infer question_type from prompt automatically (NO EXTRA FIELD)
2046
  # Try to parse mixed questions first
2047
  parsed_questions = parse_questions_from_prompt(prompt)
2048
  has_mcq = any(q.get('type') == 'mcq' for q in parsed_questions)
2049
  has_narrative = any(q.get('type') == 'narrative' for q in parsed_questions)
2050
 
2051
+ <<<<<<< HEAD
2052
  # Check if it's a PDF
2053
  is_pdf_submission = student_info.get("kind") == "pdf"
2054
 
 
2092
  annotated_pdf_filename, annotated_pdf_url = save_annotated_pdf(
2093
  original_file_bytes, homework_id, student_id, unreadable_result, 0, "Unreadable", student_level
2094
  )
2095
+ =======
2096
+ # Determine overall question type for backwards compatibility
2097
+ if has_mcq and has_narrative:
2098
+ question_type = "mixed"
2099
+ elif has_mcq:
2100
+ question_type = "mcq"
2101
+ elif has_narrative:
2102
+ question_type = "narrative"
2103
+ else:
2104
+ question_type = infer_question_type_from_prompt(prompt)
2105
+
2106
+ # 2) Extract student text
2107
+ student_info = await extract_text_from_upload(student_file)
2108
+ student_text = (student_info.get("text") or "").strip()
2109
+
2110
+ MIN_WORDS = 3 if question_type == "mcq" else 8
2111
+ if len(student_text.split()) < MIN_WORDS:
2112
+ >>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
2113
  return {
2114
  "student_id": student_id,
2115
  "homework_id": homework_id,
2116
  "sub_institute_id": sub_institute_id,
2117
  "syear": syear,
2118
+ <<<<<<< HEAD
2119
  "question_type": final_question_type,
2120
  "student_level": student_level,
2121
  "status": "Unreadable",
 
2126
  "llm_used": False,
2127
  "question_marks": make_question_marks([]),
2128
  "annotated_pdf": annotated_pdf_filename,
2129
+ =======
2130
+ "question_type": question_type,
2131
+ "student_level": student_level,
2132
+ "status": "Unreadable",
2133
+ "match_percentage": 0,
2134
+ "ai_generated_remark": None,
2135
+ "rule_based_remark": "Answer text could not be read clearly. Please upload a clearer file.",
2136
+ "student_extracted_text": student_text,
2137
+ "llm_used": False,
2138
+ >>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
2139
  "extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
2140
  }
2141
 
2142
  if student_info.get("needs_ocr") and not student_text:
2143
+ <<<<<<< HEAD
2144
  # Save annotated PDF even for unreadable (with status shown)
2145
  if is_pdf_submission and original_file_bytes:
2146
  # Show circle mark for scanned PDF that needs OCR
 
2148
  annotated_pdf_filename, annotated_pdf_url = save_annotated_pdf(
2149
  original_file_bytes, homework_id, student_id, ocr_result, 0, "Unreadable", student_level
2150
  )
2151
+ =======
2152
+ >>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
2153
  return {
2154
  "student_id": student_id,
2155
  "homework_id": homework_id,
2156
  "sub_institute_id": sub_institute_id,
2157
  "syear": syear,
2158
+ <<<<<<< HEAD
2159
  "question_type": final_question_type,
2160
  "student_level": student_level,
2161
  "status": "Unreadable",
 
2171
 
2172
 
2173
  if final_question_type == "mixed":
2174
+ =======
2175
+ "question_type": question_type,
2176
+ "student_level": student_level,
2177
+ "status": "Unreadable",
2178
+ "match_percentage": 0,
2179
+ "ai_generated_remark": None,
2180
+ "rule_based_remark": "This PDF looks scanned. OCR is required (install pdf2image + poppler) or upload a clearer file.",
2181
+ "student_extracted_text": student_text,
2182
+ "llm_used": False,
2183
+ "extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
2184
+ }
2185
+
2186
+ # =========================================================
2187
+ # ✅ MIXED QUESTION TYPES CHECK (MCQ + Narrative)
2188
+ # =========================================================
2189
+ if question_type == "mixed":
2190
+ >>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
2191
  # Process each question type separately and combine results
2192
  mcq_results = []
2193
  narrative_results = []
2194
 
2195
+ <<<<<<< HEAD
2196
  # Extract ALL MCQ answers from student text with question numbers
2197
  student_answers_by_qid = extract_mcq_answers_with_qid(student_text)
2198
 
 
2209
  if not chosen:
2210
  chosen = extract_mcq_choice(student_text)
2211
 
2212
+ =======
2213
+ # Extract MCQ answers from student text for each MCQ question
2214
+ for q in parsed_questions:
2215
+ if q.get('type') == 'mcq':
2216
+ # Try to find answer for this specific question in student's text
2217
+ # Use the question text to help locate the answer
2218
+ q_text = q.get('question', '')
2219
+ chosen = extract_mcq_choice(student_text)
2220
+ >>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
2221
  correct = q.get('correct_answer') or extract_correct_mcq_from_prompt(q.get('question', ''))
2222
 
2223
  if correct and chosen:
2224
  is_correct = (chosen.lower().strip() == correct.lower().strip())
2225
  mcq_results.append({
2226
+ <<<<<<< HEAD
2227
  'qid': qid,
2228
  'correct': is_correct,
2229
  'chosen': chosen,
 
2238
  'chosen': '',
2239
  'correct_answer': correct,
2240
  'unattempted': True
2241
+ =======
2242
+ 'qid': q.get('qid'),
2243
+ 'correct': is_correct,
2244
+ 'chosen': chosen,
2245
+ 'correct_answer': correct
2246
+ >>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
2247
  })
2248
 
2249
  # For narrative questions, use AI to generate reference
 
2300
  except Exception as e:
2301
  narrative_results = {'error': str(e)}
2302
 
2303
+ <<<<<<< HEAD
2304
  # Calculate combined score with level-based partial credit for MCQ
2305
  total_mcq = len(mcq_results)
2306
  correct_mcq = sum(1 for r in mcq_results if r.get('correct'))
 
2312
 
2313
  # Calculate MCQ score based on level (not just binary correct/incorrect)
2314
  mcq_score = (correct_mcq * credit_per_q) / max(1, total_mcq)
2315
+ =======
2316
+ # Calculate combined score
2317
+ total_mcq = len(mcq_results)
2318
+ correct_mcq = sum(1 for r in mcq_results if r.get('correct'))
2319
+ mcq_score = (correct_mcq / total_mcq * 100) if total_mcq > 0 else 0
2320
+ >>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
2321
 
2322
  narrative_score = narrative_results.get('match_percentage', 0) if narrative_results else 0
2323
 
 
2339
  else:
2340
  status = "Needs Review"
2341
 
2342
+ <<<<<<< HEAD
2343
  # Save annotated PDF
2344
  if is_pdf_submission and original_file_bytes and mcq_results:
2345
  annotated_pdf_filename, annotated_pdf_url = save_annotated_pdf(
2346
  original_file_bytes, homework_id, student_id, mcq_results, final_score, status, student_level
2347
  )
2348
 
2349
+ =======
2350
+ >>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
2351
  return {
2352
  "student_id": student_id,
2353
  "homework_id": homework_id,
 
2357
  "student_level": student_level,
2358
  "status": status,
2359
  "match_percentage": final_score,
2360
+ <<<<<<< HEAD
2361
  "submission_remarks": None,
2362
  "rule_based_remark": f"MCQ: {correct_mcq}/{total_mcq} correct. Narrative score: {narrative_score}%. (Level: {student_level}, Credit per Q: {credit_per_q}%)",
2363
+ =======
2364
+ "ai_generated_remark": None,
2365
+ "rule_based_remark": f"MCQ: {correct_mcq}/{total_mcq} correct. Narrative score: {narrative_score}%.",
2366
+ >>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
2367
  "llm_used": bool(narrative_results and 'error' not in narrative_results),
2368
  "student_extracted_text": student_text,
2369
  "mcq_results": mcq_results,
2370
  "narrative_results": narrative_results,
2371
+ <<<<<<< HEAD
2372
  "question_marks": make_question_marks(mcq_results),
2373
  "annotated_pdf": annotated_pdf_filename,
2374
  "extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
 
2525
  annotated_pdf_filename, annotated_pdf_url = save_annotated_pdf(
2526
  original_file_bytes, homework_id, student_id, no_correct_result, 0, "Needs Review", student_level
2527
  )
2528
+ =======
2529
+ "extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
2530
+ }
2531
+ correct = extract_correct_mcq_from_prompt(prompt)
2532
+ chosen = extract_mcq_choice(student_text)
2533
+
2534
+ if not correct:
2535
+ >>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
2536
  return {
2537
  "student_id": student_id,
2538
  "homework_id": homework_id,
 
2542
  "student_level": student_level,
2543
  "status": "Needs Review",
2544
  "match_percentage": 0,
2545
+ <<<<<<< HEAD
2546
  "submission_remarks": None,
2547
  "rule_based_remark": "MCQ correct option not found in prompt. Include 'Correct: B' or similar in prompt.",
2548
  "student_extracted_text": student_text,
 
2559
  annotated_pdf_filename, annotated_pdf_url = save_annotated_pdf(
2560
  original_file_bytes, homework_id, student_id, no_chosen_result, 0, "Needs Review", student_level
2561
  )
2562
+ =======
2563
+ "ai_generated_remark": None,
2564
+ "rule_based_remark": "MCQ correct option not found in prompt. Include 'Correct: B' or similar in prompt.",
2565
+ "student_extracted_text": student_text,
2566
+ "llm_used": False,
2567
+ "debug": {"correct": correct, "chosen": chosen},
2568
+ "extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
2569
+ }
2570
+
2571
+ if not chosen:
2572
+ >>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
2573
  return {
2574
  "student_id": student_id,
2575
  "homework_id": homework_id,
 
2579
  "student_level": student_level,
2580
  "status": "Needs Review",
2581
  "match_percentage": 0,
2582
+ <<<<<<< HEAD
2583
  "submission_remarks": None,
2584
  "rule_based_remark": "Student option (A/B/C/D) not detected clearly.",
2585
  "student_extracted_text": student_text,
2586
  "llm_used": False,
2587
  "question_marks": make_question_marks([]),
2588
  "annotated_pdf": annotated_pdf_filename,
2589
+ =======
2590
+ "ai_generated_remark": None,
2591
+ "rule_based_remark": "Student option (A/B/C/D) not detected clearly.",
2592
+ "student_extracted_text": student_text,
2593
+ "llm_used": False,
2594
+ >>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
2595
  "debug": {"correct": correct, "chosen": chosen},
2596
  "extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
2597
  }
2598
 
2599
+ <<<<<<< HEAD
2600
  # Only process MCQ validation if not redirecting to narrative
2601
  if not redirect_to_narrative:
2602
  is_correct = (chosen == correct)
 
2646
  annotated_pdf_filename, annotated_pdf_url = save_annotated_pdf(
2647
  original_file_bytes, homework_id, student_id, [], 0, "Needs Review", student_level
2648
  )
2649
+ =======
2650
+ is_correct = (chosen == correct)
2651
+ return {
2652
+ "student_id": student_id,
2653
+ "homework_id": homework_id,
2654
+ "sub_institute_id": sub_institute_id,
2655
+ "syear": syear,
2656
+ "question_type": "mcq",
2657
+ "student_level": student_level,
2658
+ "status": "Verified" if is_correct else "Needs Review",
2659
+ "match_percentage": 100 if is_correct else 0,
2660
+ "ai_generated_remark": None,
2661
+ "rule_based_remark": "Correct." if is_correct else f"Incorrect. Expected {correct.upper()}, got {chosen.upper()}.",
2662
+ "student_extracted_text": student_text,
2663
+ "llm_used": False,
2664
+ "debug": {"correct": correct, "chosen": chosen},
2665
+ "extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
2666
+ }
2667
+
2668
+ # =========================================================
2669
+ # ✅ NARRATIVE CHECK (Gemini generates reference)
2670
+ # =========================================================
2671
+ if gemini_client is None:
2672
+ >>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
2673
  return {
2674
  "student_id": student_id,
2675
  "homework_id": homework_id,
 
2679
  "student_level": student_level,
2680
  "status": "Needs Review",
2681
  "match_percentage": 0,
2682
+ <<<<<<< HEAD
2683
  "submission_remarks": None,
2684
+ =======
2685
+ "ai_generated_remark": None,
2686
+ >>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
2687
  "rule_based_remark": "Gemini not configured. Check /health/llm.",
2688
  "llm_used": False,
2689
  "llm_error": parse_gemini_error(GEMINI_LAST_ERROR),
2690
  "student_extracted_text": student_text,
2691
+ <<<<<<< HEAD
2692
  "question_marks": make_question_marks([]),
2693
  "annotated_pdf": annotated_pdf_filename,
2694
+ =======
2695
+ >>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
2696
  "extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
2697
  }
2698
 
 
2713
  )
2714
 
2715
  if not response_text:
2716
+ <<<<<<< HEAD
2717
  # Save annotated PDF
2718
  if is_pdf_submission and original_file_bytes:
2719
  annotated_pdf_filename, annotated_pdf_url = save_annotated_pdf(
2720
  original_file_bytes, homework_id, student_id, [], 0, "Needs Review", student_level
2721
  )
2722
+ =======
2723
+ >>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
2724
  return {
2725
  "student_id": student_id,
2726
  "homework_id": homework_id,
 
2730
  "student_level": student_level,
2731
  "status": "Needs Review",
2732
  "match_percentage": 0,
2733
+ <<<<<<< HEAD
2734
  "submission_remarks": None,
2735
+ =======
2736
+ "ai_generated_remark": None,
2737
+ >>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
2738
  "rule_based_remark": "Gemini failed. Check /health/llm.",
2739
  "llm_used": False,
2740
  "llm_error": parse_gemini_error(GEMINI_LAST_ERROR),
2741
  "student_extracted_text": student_text,
2742
+ <<<<<<< HEAD
2743
  "question_marks": make_question_marks([]),
2744
  "annotated_pdf": annotated_pdf_filename,
2745
+ =======
2746
+ >>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
2747
  "extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
2748
  }
2749
 
 
2751
  m = re.search(r"\{.*\}", response_text, flags=re.S)
2752
  payload = json.loads(m.group(0) if m else response_text)
2753
  except Exception as e:
2754
+ <<<<<<< HEAD
2755
  # Save annotated PDF
2756
  if is_pdf_submission and original_file_bytes:
2757
  annotated_pdf_filename, annotated_pdf_url = save_annotated_pdf(
2758
  original_file_bytes, homework_id, student_id, [], 0, "Needs Review", student_level
2759
  )
2760
+ =======
2761
+ >>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
2762
  return {
2763
  "student_id": student_id,
2764
  "homework_id": homework_id,
 
2768
  "student_level": student_level,
2769
  "status": "Needs Review",
2770
  "match_percentage": 0,
2771
+ <<<<<<< HEAD
2772
  "submission_remarks": None,
2773
+ =======
2774
+ "ai_generated_remark": None,
2775
+ >>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
2776
  "rule_based_remark": "Gemini returned non-JSON output.",
2777
  "llm_used": False,
2778
  "llm_error": {"ok": False, "error_type": "GEMINI_BAD_JSON", "message": str(e), "raw": response_text[:800]},
2779
  "student_extracted_text": student_text,
2780
+ <<<<<<< HEAD
2781
  "question_marks": make_question_marks([]),
2782
  "annotated_pdf": annotated_pdf_filename,
2783
+ =======
2784
+ >>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
2785
  "extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
2786
  }
2787
 
 
2792
  key_points = [str(x).strip() for x in key_points if str(x).strip()]
2793
 
2794
  if not ai_reference_answer:
2795
+ <<<<<<< HEAD
2796
  # Save annotated PDF
2797
  if is_pdf_submission and original_file_bytes:
2798
  annotated_pdf_filename, annotated_pdf_url = save_annotated_pdf(
2799
  original_file_bytes, homework_id, student_id, [], 0, "Needs Review", student_level
2800
  )
2801
+ =======
2802
+ >>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
2803
  return {
2804
  "student_id": student_id,
2805
  "homework_id": homework_id,
 
2809
  "student_level": student_level,
2810
  "status": "Needs Review",
2811
  "match_percentage": 0,
2812
+ <<<<<<< HEAD
2813
  "submission_remarks": None,
2814
  "rule_based_remark": "AI returned empty reference answer.",
2815
  "llm_used": True,
2816
  "student_extracted_text": student_text,
2817
  "question_marks": make_question_marks([]),
2818
  "annotated_pdf": annotated_pdf_filename,
2819
+ =======
2820
+ "ai_generated_remark": None,
2821
+ "rule_based_remark": "AI returned empty reference answer.",
2822
+ "llm_used": True,
2823
+ "student_extracted_text": student_text,
2824
+ >>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
2825
  "extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
2826
  }
2827
 
 
2852
  f"{remark_prompt}"
2853
  )
2854
 
2855
+ <<<<<<< HEAD
2856
  submission_remark = generate_gemini_response(
2857
+ =======
2858
+ ai_generated_remark = generate_gemini_response(
2859
+ >>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
2860
  prompt=resp2_prompt,
2861
  system_prompt="You are a strict, helpful teacher. Be concise and factual.",
2862
  max_tokens=140,
 
2864
  )
2865
 
2866
  rule_based_remark = None
2867
+ <<<<<<< HEAD
2868
  remark_llm_used = bool(submission_remark)
2869
  remark_llm_error = None if submission_remark else (GEMINI_LAST_ERROR or "Unknown LLM error")
2870
 
2871
  if not submission_remark:
2872
+ =======
2873
+ remark_llm_used = bool(ai_generated_remark)
2874
+ remark_llm_error = None if ai_generated_remark else (GEMINI_LAST_ERROR or "Unknown LLM error")
2875
+
2876
+ if not ai_generated_remark:
2877
+ >>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
2878
  if status == "Verified":
2879
  rule_based_remark = "Homework matches the expected answer well. Good coverage of the key ideas."
2880
  elif status == "Partial":
 
2882
  else:
2883
  rule_based_remark = "Homework does not match the expected answer enough. Please review the topic and resubmit with clearer, complete points."
2884
 
2885
+ <<<<<<< HEAD
2886
  # Save annotated PDF — evaluate EACH question individually against student text
2887
  per_question_results = build_per_question_results(
2888
  prompt, student_text, status, match_pct,
 
2896
  original_file_bytes, homework_id, student_id, per_question_results, match_pct, status, student_level, "narrative"
2897
  )
2898
 
2899
+ =======
2900
+ >>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
2901
  return {
2902
  "student_id": student_id,
2903
  "homework_id": homework_id,
 
2907
  "student_level": student_level,
2908
  "status": status,
2909
  "match_percentage": match_pct,
2910
+ <<<<<<< HEAD
2911
  "submission_remarks": submission_remark if submission_remark else None,
2912
+ =======
2913
+ "ai_generated_remark": ai_generated_remark if ai_generated_remark else None,
2914
+ >>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
2915
  "rule_based_remark": rule_based_remark,
2916
  "llm_used": True,
2917
  "remark_llm_used": remark_llm_used,
 
2921
  "key_points": key_points,
2922
  "key_points_covered": covered,
2923
  "key_points_missing": missing,
2924
+ <<<<<<< HEAD
2925
  "question_marks": make_question_marks(per_question_results),
2926
  "annotated_pdf": annotated_pdf_filename,
2927
+ =======
2928
+ >>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
2929
  "debug": {
2930
  "similarity": sim,
2931
  "coverage": coverage,
2932
  "policy": policy,
2933
+ <<<<<<< HEAD
2934
  "per_question_results": per_question_results,
2935
  "erp_row_fields": list(erp_row.keys()) if erp_row else [],
2936
  "erp_student_level_raw": erp_row.get("student_level") or erp_row.get("level") or erp_row.get("difficulty") or erp_row.get("difficulty_level"),
2937
+ =======
2938
+ >>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
2939
  },
2940
  "extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
2941
  }
db.py CHANGED
@@ -10,4 +10,7 @@ engine = create_engine(
10
 
11
  SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
12
  Base = declarative_base()
 
13
 
 
 
 
10
 
11
  SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
12
  Base = declarative_base()
13
+ <<<<<<< HEAD
14
 
15
+ =======
16
+ >>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
requirements.txt CHANGED
@@ -5,6 +5,7 @@ pillow
5
  sqlalchemy
6
  scikit-learn
7
  requests
 
8
  python-docx
9
  google-genai
10
  python-dotenv
@@ -15,3 +16,12 @@ google-cloud-vision
15
  easyocr
16
  pdf2image
17
  reportlab
 
 
 
 
 
 
 
 
 
 
5
  sqlalchemy
6
  scikit-learn
7
  requests
8
+ <<<<<<< HEAD
9
  python-docx
10
  google-genai
11
  python-dotenv
 
16
  easyocr
17
  pdf2image
18
  reportlab
19
+ =======
20
+ python-multipart
21
+ openai
22
+ google-generativeai
23
+ python-docx
24
+ pypdf
25
+ pdf2image
26
+ python-dotenv
27
+ >>>>>>> cdb5b148e5facdea1aec264a5b4d0b6293132b6e
scholar clone.lnk ADDED
Binary file (760 Bytes). View file