Moncey10 commited on
Commit
daa1cbd
·
1 Parent(s): 7049868

Initial deployment

Browse files
Files changed (6) hide show
  1. Dockerfile +19 -0
  2. README.md +5 -10
  3. answer_key.json +58 -0
  4. app.py +488 -0
  5. db.py +13 -0
  6. models.py +102 -0
Dockerfile ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ # Install Tesseract OCR + dependencies
4
+ RUN apt-get update && apt-get install -y \
5
+ tesseract-ocr \
6
+ libtesseract-dev \
7
+ && rm -rf /var/lib/apt/lists/*
8
+
9
+ WORKDIR /app
10
+
11
+ COPY requirements.txt /app/requirements.txt
12
+ RUN pip install --no-cache-dir -r /app/requirements.txt
13
+
14
+ COPY . /app
15
+
16
+ # Hugging Face Spaces expects 7860 by default
17
+ EXPOSE 7860
18
+
19
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
README.md CHANGED
@@ -1,10 +1,5 @@
1
- ---
2
- title: Homework Validation System
3
- emoji: 🐠
4
- colorFrom: gray
5
- colorTo: blue
6
- sdk: docker
7
- pinned: false
8
- ---
9
-
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
+ ---
2
+ title: Homework Validation System
3
+ sdk: docker
4
+ app_port: 7860
5
+ ---
 
 
 
 
 
answer_key.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "hw01": {
3
+ "questions": [
4
+ {
5
+ "qid": "Q1",
6
+ "type": "text",
7
+ "answer": "Artificial Intelligence is the simulation of human intelligence."
8
+ },
9
+ {
10
+ "qid": "Q2",
11
+ "type": "text",
12
+ "answer": "Machine Learning is a subset of AI that learns from data."
13
+ }
14
+ ]
15
+ },
16
+ "hw99": {
17
+ "questions": [
18
+ {
19
+ "qid": "Q1",
20
+ "type": "text",
21
+ "answer": "Artificial Intelligence is the simulation of human intelligence."
22
+ },
23
+ {
24
+ "qid": "Q2",
25
+ "type": "text",
26
+ "answer": "Machine Learning is a subset of AI that learns from data."
27
+ }
28
+ ]
29
+ },
30
+ "hw90": {
31
+ "questions": [
32
+ {
33
+ "qid": "Q1",
34
+ "type": "text",
35
+ "answer": "Artificial Intelligence is the simulation of human intelligence."
36
+ },
37
+ {
38
+ "qid": "Q2",
39
+ "type": "text",
40
+ "answer": "Machine Learning is a subset of AI that learns from data."
41
+ }
42
+ ]
43
+ },
44
+ "hw15": {
45
+ "questions": [
46
+ {
47
+ "qid": "Q1",
48
+ "type": "text",
49
+ "answer": "Artificial Intelligence is the simulation of human intelligence."
50
+ },
51
+ {
52
+ "qid": "Q2",
53
+ "type": "text",
54
+ "answer": "Machine Learning is a subset of AI that learns from data."
55
+ }
56
+ ]
57
+ }
58
+ }
app.py ADDED
@@ -0,0 +1,488 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import io
3
+ import re
4
+ import json
5
+ from typing import List, Dict, Any
6
+
7
+ from fastapi import FastAPI, UploadFile, File, Form, HTTPException
8
+ from PIL import Image
9
+ import pytesseract
10
+
11
+ from sklearn.feature_extraction.text import TfidfVectorizer
12
+ from sklearn.metrics.pairwise import cosine_similarity
13
+
14
+ from db import SessionLocal, engine, Base
15
+ from models import Student, HomeworkAssignment, HomeworkImage, Submission, Result, AuditLog
16
+
17
+
18
+
19
+ # =========================
20
+ # TESSERACT CONFIG (WINDOWS)
21
+ # =========================
22
+ # pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
23
+ # os.environ["TESSDATA_PREFIX"] = r"C:\Program Files\Tesseract-OCR\tessdata"
24
+
25
+
26
+ # =========================
27
+ # ANSWER KEY CONFIG
28
+ # =========================
29
+ ANSWER_KEY_PATH = "answer_key.json"
30
+
31
+
32
+ def load_answer_key(homework_id: str):
33
+ """
34
+ answer_key.json format:
35
+ {
36
+ "hw01": {"questions":[...]},
37
+ "hw02": {"questions":[...]}
38
+ }
39
+ """
40
+ try:
41
+ with open(ANSWER_KEY_PATH, "r", encoding="utf-8") as f:
42
+ all_keys = json.load(f)
43
+
44
+ # DEBUG (keep for now)
45
+ print("Available homework_ids in key:", list(all_keys.keys()))
46
+ print("Requested homework_id:", homework_id)
47
+
48
+ return all_keys.get(homework_id) # None if not found
49
+
50
+ except FileNotFoundError:
51
+ raise HTTPException(status_code=500, detail="answer_key.json file missing")
52
+ except json.JSONDecodeError:
53
+ raise HTTPException(status_code=500, detail="answer_key.json is invalid JSON")
54
+
55
+
56
+ # =========================
57
+ # OCR
58
+ # =========================
59
+ def extract_text_from_image(image_bytes: bytes) -> str:
60
+ try:
61
+ image = Image.open(io.BytesIO(image_bytes))
62
+ return pytesseract.image_to_string(image)
63
+ except Exception as e:
64
+ raise HTTPException(status_code=400, detail=f"Invalid image / OCR failed: {e}")
65
+
66
+
67
+ # =========================
68
+ # SEGMENTATION (Q1/Q2...)
69
+ # =========================
70
+ def segment_answers_by_question(text: str) -> Dict[str, str]:
71
+ cleaned = text.replace("\r", "\n")
72
+ pattern = re.compile(r"\bQ\s*([0-9]+)\s*[\.\:\-]?", re.IGNORECASE)
73
+ matches = list(pattern.finditer(cleaned))
74
+
75
+ segments: Dict[str, str] = {}
76
+ for i, m in enumerate(matches):
77
+ qnum = m.group(1)
78
+ start = m.end()
79
+ end = matches[i + 1].start() if i + 1 < len(matches) else len(cleaned)
80
+ answer_block = cleaned[start:end].strip()
81
+ segments[f"Q{qnum}"] = answer_block
82
+
83
+ return segments
84
+
85
+
86
+ # =========================
87
+ # CLEANING (remove UI junk)
88
+ # =========================
89
+ def clean_student_answer(raw_block: str) -> str:
90
+ lines = [ln.strip() for ln in raw_block.splitlines() if ln.strip()]
91
+ if not lines:
92
+ return ""
93
+
94
+ # remove question line if it looks like a question
95
+ if lines[0].endswith("?") or lines[0].lower().startswith(("what ", "why ", "how ", "define ")):
96
+ lines = lines[1:]
97
+
98
+ cleaned_lines = []
99
+ for ln in lines:
100
+ low = ln.lower()
101
+
102
+ # skip common editor/UI noise
103
+ if low.startswith("ln") and "col" in low:
104
+ continue
105
+ if "plain text" in low:
106
+ continue
107
+ if "c:\\users" in low or "cusers" in low:
108
+ continue
109
+ if low.endswith("%"): # like 100%
110
+ continue
111
+
112
+ # skip lines mostly symbols/numbers
113
+ letters = sum(ch.isalpha() for ch in ln)
114
+ if letters < 3 and len(ln) > 3:
115
+ continue
116
+
117
+ cleaned_lines.append(ln)
118
+
119
+ return " ".join(cleaned_lines).strip()
120
+
121
+
122
+ # =========================
123
+ # SIMILARITY (TF-IDF cosine)
124
+ # =========================
125
+ def semantic_similarity(a: str, b: str) -> float:
126
+ a = a.strip().lower()
127
+ b = b.strip().lower()
128
+ if not a or not b:
129
+ return 0.0
130
+ vect = TfidfVectorizer().fit([a, b])
131
+ X = vect.transform([a, b])
132
+ return float(cosine_similarity(X[0], X[1])[0][0])
133
+
134
+
135
+ def best_sentence_similarity(student_text: str, expected_text: str) -> float:
136
+ parts = re.split(r"[.\n]+", student_text)
137
+ parts = [p.strip() for p in parts if p.strip()]
138
+ if not parts:
139
+ return 0.0
140
+ scores = [semantic_similarity(p, expected_text) for p in parts]
141
+ return max(scores)
142
+
143
+
144
+ # =========================
145
+ # VALIDATION
146
+ # =========================
147
+ def validate_against_key(homework_id: str, segmented_answers: Dict[str, str]) -> Dict[str, Any]:
148
+ key = load_answer_key(homework_id)
149
+
150
+ # ✅ Allow ANY homework_id: if key missing, do OCR+segmentation only
151
+ if key is None:
152
+ return {
153
+ "status": "NO_ANSWER_KEY",
154
+ "total": 0,
155
+ "correct": 0,
156
+ "overall_score": None,
157
+ "per_question": [],
158
+ "message": f"No answer key found for homework_id={homework_id}. Stored OCR + segmentation only."
159
+ }
160
+
161
+ results = []
162
+ correct = 0
163
+
164
+ for q in key.get("questions", []):
165
+ qid = q.get("qid", "").strip()
166
+ qtype = q.get("type", "text")
167
+ expected_raw = q.get("answer", "")
168
+
169
+ raw_student = segmented_answers.get(qid, "").strip()
170
+ student_clean = clean_student_answer(raw_student)
171
+
172
+ # Missing
173
+ if not student_clean:
174
+ results.append({
175
+ "qid": qid,
176
+ "expected": expected_raw,
177
+ "student_answer": raw_student,
178
+ "cleaned_answer_used_for_check": student_clean,
179
+ "is_correct": False,
180
+ "confidence": 0.0,
181
+ "reason": "missing"
182
+ })
183
+ continue
184
+
185
+ # Numeric tolerance
186
+ if qtype == "numeric":
187
+ try:
188
+ student_num = float(student_clean)
189
+ expected_num = float(expected_raw)
190
+ tol = float(q.get("tolerance", 0.0))
191
+
192
+ is_correct = abs(student_num - expected_num) <= tol
193
+ confidence = 1.0 if is_correct else 0.0
194
+
195
+ if is_correct:
196
+ correct += 1
197
+
198
+ results.append({
199
+ "qid": qid,
200
+ "expected": expected_raw,
201
+ "student_answer": raw_student,
202
+ "cleaned_answer_used_for_check": student_clean,
203
+ "is_correct": is_correct,
204
+ "confidence": confidence,
205
+ "reason": "tolerance_check",
206
+ "tolerance": tol
207
+ })
208
+ continue
209
+ except:
210
+ results.append({
211
+ "qid": qid,
212
+ "expected": expected_raw,
213
+ "student_answer": raw_student,
214
+ "cleaned_answer_used_for_check": student_clean,
215
+ "is_correct": False,
216
+ "confidence": 0.0,
217
+ "reason": "numeric_parse_failed"
218
+ })
219
+ continue
220
+
221
+ # Text: exact OR best-sentence semantic match
222
+ expected_text = str(expected_raw).strip()
223
+ student_text = student_clean.strip()
224
+
225
+ if student_text.lower() == expected_text.lower():
226
+ is_correct = True
227
+ confidence = 1.0
228
+ reason = "exact_match"
229
+ else:
230
+ sim = best_sentence_similarity(student_text, expected_text)
231
+ confidence = sim
232
+ is_correct = sim >= 0.80
233
+ reason = "semantic_match" if is_correct else "semantic_mismatch"
234
+
235
+ if is_correct:
236
+ correct += 1
237
+
238
+ results.append({
239
+ "qid": qid,
240
+ "expected": expected_raw,
241
+ "student_answer": raw_student,
242
+ "cleaned_answer_used_for_check": student_clean,
243
+ "is_correct": is_correct,
244
+ "confidence": confidence,
245
+ "reason": reason
246
+ })
247
+
248
+ total = len(key.get("questions", []))
249
+ return {
250
+ "status": "GRADED",
251
+ "total": total,
252
+ "correct": correct,
253
+ "overall_score": (correct / total) if total else 0.0,
254
+ "per_question": results
255
+ }
256
+
257
+
258
+ # =========================
259
+ # FASTAPI APP + DB TABLES
260
+ # =========================
261
+ app = FastAPI(title="Homework Validation System")
262
+ Base.metadata.create_all(bind=engine)
263
+
264
+
265
+ @app.get("/health")
266
+ def health():
267
+ return {"status": "ok"}
268
+
269
+ @app.post("/submit")
270
+ async def submit_homework(
271
+ student_id: str = Form(...),
272
+ homework_id: str = Form(...),
273
+ images: List[UploadFile] = File(...)
274
+ ):
275
+ if not student_id.strip() or not homework_id.strip():
276
+ raise HTTPException(status_code=400, detail="student_id and homework_id are required")
277
+ if not images:
278
+ raise HTTPException(status_code=400, detail="At least one image is required")
279
+
280
+ db = SessionLocal()
281
+
282
+ try:
283
+ # -----------------------------
284
+ # 1) UPSERT Student
285
+ # -----------------------------
286
+ student = db.query(Student).filter(Student.student_id == student_id).first()
287
+ if not student:
288
+ student = Student(student_id=student_id)
289
+ db.add(student)
290
+ db.commit()
291
+ db.refresh(student)
292
+
293
+ # -----------------------------
294
+ # 2) UPSERT HomeworkAssignment
295
+ # -----------------------------
296
+ hw = db.query(HomeworkAssignment).filter(HomeworkAssignment.homework_id == homework_id).first()
297
+ if not hw:
298
+ hw = HomeworkAssignment(homework_id=homework_id)
299
+ db.add(hw)
300
+ db.commit()
301
+ db.refresh(hw)
302
+
303
+ # -----------------------------
304
+ # 3) Create Submission
305
+ # -----------------------------
306
+ submission = Submission(
307
+ student_id=student_id,
308
+ homework_id=homework_id,
309
+ student_ref_id=student.id,
310
+ homework_ref_id=hw.id,
311
+ status="processed"
312
+ )
313
+ db.add(submission)
314
+ db.commit()
315
+ db.refresh(submission)
316
+
317
+ extracted_data = []
318
+
319
+ # -----------------------------
320
+ # 4) For each image:
321
+ # save image row + OCR + segment + validate + result row
322
+ # -----------------------------
323
+ for img in images:
324
+ # store image metadata row (required by plan)
325
+ img_row = HomeworkImage(
326
+ submission_id=submission.id,
327
+ filename=img.filename,
328
+ content_type=img.content_type
329
+ )
330
+ db.add(img_row)
331
+ db.commit()
332
+
333
+ content = await img.read()
334
+
335
+ # OCR
336
+ text = extract_text_from_image(content)
337
+
338
+ # Segment
339
+ segmented = segment_answers_by_question(text)
340
+
341
+ # Reject invalid submissions (no Q1/Q2...)
342
+ if not segmented:
343
+ raise HTTPException(
344
+ status_code=400,
345
+ detail=f"No question numbers detected in {img.filename}. Expected Q1/Q2 format."
346
+ )
347
+
348
+ # Validate
349
+ validation_report = validate_against_key(homework_id, segmented)
350
+
351
+ # Save result row
352
+ result_row = Result(
353
+ submission_id=submission.id,
354
+ filename=img.filename,
355
+ extracted_text=text,
356
+ segmented_answers_json=json.dumps(segmented, ensure_ascii=False),
357
+ validation_json=json.dumps(validation_report, ensure_ascii=False)
358
+ )
359
+ db.add(result_row)
360
+ db.commit()
361
+
362
+ extracted_data.append({
363
+ "filename": img.filename,
364
+ "extracted_text": text,
365
+ "segmented_answers": segmented,
366
+ "validation": validation_report
367
+ })
368
+
369
+ db.add(AuditLog(submission_id=submission.id, level="INFO", message="Submission processed successfully"))
370
+ db.commit()
371
+
372
+ return {
373
+ "student_id": student_id,
374
+ "homework_id": homework_id,
375
+ "submission_id": submission.id,
376
+ "extracted_data": extracted_data,
377
+ "message": "OCR completed. Next step: answer extraction + validation."
378
+ }
379
+
380
+ except HTTPException as he:
381
+ # mark submission failed if created
382
+ try:
383
+ if "submission" in locals():
384
+ submission.status = "failed"
385
+ db.add(submission)
386
+ db.add(AuditLog(submission_id=submission.id, level="ERROR", message=str(he.detail)))
387
+ db.commit()
388
+ except:
389
+ pass
390
+
391
+ raise
392
+
393
+ except Exception as e:
394
+ try:
395
+ if "submission" in locals():
396
+ submission.status = "failed"
397
+ db.add(submission)
398
+ db.add(AuditLog(submission_id=submission.id, level="ERROR", message=str(e)))
399
+ db.commit()
400
+ except:
401
+ pass
402
+
403
+ raise HTTPException(status_code=500, detail=f"Internal error: {e}")
404
+
405
+ finally:
406
+ db.close()
407
+
408
+
409
+
410
+ @app.get("/submissions")
411
+ def list_submissions():
412
+ db = SessionLocal()
413
+ items = db.query(Submission).order_by(Submission.id.desc()).limit(20).all()
414
+ db.close()
415
+ return [
416
+ {
417
+ "id": s.id,
418
+ "student_id": s.student_id,
419
+ "homework_id": s.homework_id,
420
+ "status": s.status,
421
+ "created_at": str(s.created_at)
422
+ }
423
+ for s in items
424
+ ]
425
+
426
+
427
+ @app.post("/admin/answer-key")
428
+ def upsert_answer_key(homework_id: str = Form(...), answer_key_json: str = Form(...)):
429
+ """
430
+ Upload or update answer key for a given homework_id.
431
+ answer_key_json should be JSON string like:
432
+ {"questions":[{"qid":"Q1","type":"text","answer":"..."}]}
433
+ """
434
+ try:
435
+ new_key = json.loads(answer_key_json)
436
+ if "questions" not in new_key or not isinstance(new_key["questions"], list):
437
+ raise HTTPException(status_code=400, detail="Invalid key format: must contain 'questions' list")
438
+ except json.JSONDecodeError:
439
+ raise HTTPException(status_code=400, detail="answer_key_json must be valid JSON")
440
+
441
+ # Load existing file (or create new)
442
+ try:
443
+ with open(ANSWER_KEY_PATH, "r", encoding="utf-8") as f:
444
+ all_keys = json.load(f)
445
+ except FileNotFoundError:
446
+ all_keys = {}
447
+ except json.JSONDecodeError:
448
+ raise HTTPException(status_code=500, detail="answer_key.json is invalid JSON")
449
+
450
+ all_keys[homework_id] = new_key
451
+
452
+ with open(ANSWER_KEY_PATH, "w", encoding="utf-8") as f:
453
+ json.dump(all_keys, f, ensure_ascii=False, indent=2)
454
+
455
+ return {"status": "ok", "message": f"Answer key saved for {homework_id}", "questions": len(new_key["questions"])}
456
+
457
+
458
+ @app.post("/admin/regrade/{submission_id}")
459
+ def regrade_submission(submission_id: int):
460
+ db = SessionLocal()
461
+
462
+ sub = db.query(Submission).filter(Submission.id == submission_id).first()
463
+ if not sub:
464
+ db.close()
465
+ raise HTTPException(status_code=404, detail="Submission not found")
466
+
467
+ results = db.query(Result).filter(Result.submission_id == submission_id).all()
468
+ if not results:
469
+ db.close()
470
+ raise HTTPException(status_code=404, detail="No results found for this submission")
471
+
472
+ updated = 0
473
+ for r in results:
474
+ try:
475
+ segmented = json.loads(r.segmented_answers_json or "{}")
476
+ validation_report = validate_against_key(sub.homework_id, segmented)
477
+
478
+ r.validation_json = json.dumps(validation_report, ensure_ascii=False)
479
+ db.add(r)
480
+ updated += 1
481
+ except Exception as e:
482
+ db.add(AuditLog(submission_id=submission_id, level="ERROR", message=f"Regrade failed: {e}"))
483
+
484
+ db.add(AuditLog(submission_id=submission_id, level="INFO", message=f"Regraded {updated} result rows"))
485
+ db.commit()
486
+ db.close()
487
+
488
+ return {"status": "ok", "submission_id": submission_id, "updated_results": updated}
db.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sqlalchemy import create_engine
2
+ from sqlalchemy.orm import sessionmaker, declarative_base
3
+
4
+ # Keep SQLite for now (works locally + for demo)
5
+ DATABASE_URL = "sqlite:///./homework.db"
6
+
7
+ engine = create_engine(
8
+ DATABASE_URL,
9
+ connect_args={"check_same_thread": False} # needed for SQLite + FastAPI
10
+ )
11
+
12
+ SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
13
+ Base = declarative_base()
models.py ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sqlalchemy import Column, Integer, String, Text, DateTime, ForeignKey, UniqueConstraint
2
+ from sqlalchemy.orm import relationship
3
+ from sqlalchemy.sql import func
4
+ from db import Base
5
+
6
+
7
+ class Student(Base):
8
+ __tablename__ = "students"
9
+
10
+ id = Column(Integer, primary_key=True, index=True)
11
+ student_id = Column(String(100), unique=True, index=True, nullable=False) # like "st01"
12
+ name = Column(String(200), nullable=True)
13
+ email = Column(String(200), nullable=True)
14
+
15
+ created_at = Column(DateTime(timezone=True), server_default=func.now())
16
+
17
+
18
+ class HomeworkAssignment(Base):
19
+ __tablename__ = "homework_assignments"
20
+
21
+ id = Column(Integer, primary_key=True, index=True)
22
+ homework_id = Column(String(100), unique=True, index=True, nullable=False) # like "hw01"
23
+ title = Column(String(255), nullable=True)
24
+ description = Column(Text, nullable=True)
25
+
26
+ created_at = Column(DateTime(timezone=True), server_default=func.now())
27
+
28
+
29
+ class Submission(Base):
30
+ """
31
+ Keep your existing columns (student_id/homework_id as strings) so your current code works.
32
+ Also add optional FK links for a more proper schema.
33
+ """
34
+ __tablename__ = "submissions"
35
+
36
+ id = Column(Integer, primary_key=True, index=True)
37
+
38
+ # Existing fields used by your current API:
39
+ student_id = Column(String(100), index=True, nullable=False)
40
+ homework_id = Column(String(100), index=True, nullable=False)
41
+
42
+ # Optional normalized references (can be filled later; not required now)
43
+ student_ref_id = Column(Integer, ForeignKey("students.id"), nullable=True)
44
+ homework_ref_id = Column(Integer, ForeignKey("homework_assignments.id"), nullable=True)
45
+
46
+ status = Column(String(50), default="processed") # processed/failed/etc.
47
+ created_at = Column(DateTime(timezone=True), server_default=func.now())
48
+
49
+ # Relationships (optional usage)
50
+ student = relationship("Student", lazy="joined")
51
+ homework = relationship("HomeworkAssignment", lazy="joined")
52
+
53
+
54
+ class HomeworkImage(Base):
55
+ """
56
+ Store each uploaded image record (required by plan).
57
+ We store filename + content_type + optional disk_path.
58
+ If you later want, you can store image bytes too (BLOB), but not needed now.
59
+ """
60
+ __tablename__ = "homework_images"
61
+
62
+ id = Column(Integer, primary_key=True, index=True)
63
+ submission_id = Column(Integer, ForeignKey("submissions.id"), nullable=False)
64
+
65
+ filename = Column(String(255), nullable=False)
66
+ content_type = Column(String(100), nullable=True)
67
+ disk_path = Column(String(500), nullable=True) # if you save file to disk later
68
+
69
+ created_at = Column(DateTime(timezone=True), server_default=func.now())
70
+
71
+
72
+ class Result(Base):
73
+ __tablename__ = "results"
74
+
75
+ id = Column(Integer, primary_key=True, index=True)
76
+ submission_id = Column(Integer, ForeignKey("submissions.id"), nullable=False)
77
+
78
+ # Keep your existing filename field (ties to image filename)
79
+ filename = Column(String(255), nullable=False)
80
+
81
+ extracted_text = Column(Text, nullable=True)
82
+ segmented_answers_json = Column(Text, nullable=True)
83
+ validation_json = Column(Text, nullable=True)
84
+
85
+ created_at = Column(DateTime(timezone=True), server_default=func.now())
86
+
87
+ __table_args__ = (
88
+ # Prevent duplicate results for same submission+filename
89
+ UniqueConstraint("submission_id", "filename", name="uq_result_submission_filename"),
90
+ )
91
+
92
+
93
+ class AuditLog(Base):
94
+ __tablename__ = "audit_logs"
95
+
96
+ id = Column(Integer, primary_key=True, index=True)
97
+ submission_id = Column(Integer, ForeignKey("submissions.id"), nullable=True)
98
+
99
+ level = Column(String(20), default="INFO") # INFO/ERROR
100
+ message = Column(Text, nullable=False)
101
+
102
+ created_at = Column(DateTime(timezone=True), server_default=func.now())