Moncey10 commited on
Commit
b370ada
·
1 Parent(s): 15bc429

new update

Browse files
Files changed (1) hide show
  1. app.py +218 -5
app.py CHANGED
@@ -249,8 +249,8 @@ def keypoint_coverage(student_text: str, key_points: List[str], kp_threshold: fl
249
  def infer_question_type_from_prompt(prompt: str) -> str:
250
  p = _norm(prompt)
251
 
252
- # Explicit markers (recommended)
253
- if re.search(r"\btype\s*:\s*mcq\b", p) or re.search(r"\bquestion_type\s*:\s*mcq\b", p):
254
  return "mcq"
255
  if re.search(r"\btype\s*:\s*narrative\b", p) or re.search(r"\bquestion_type\s*:\s*narrative\b", p):
256
  return "narrative"
@@ -262,6 +262,88 @@ def infer_question_type_from_prompt(prompt: str) -> str:
262
  return "narrative"
263
 
264
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
265
  def extract_mcq_choice(text: str) -> str:
266
  """
267
  Extract chosen option from student text:
@@ -555,7 +637,20 @@ async def homework_validate(
555
  policy = level_policy(student_level)
556
 
557
  # 1) Infer question_type from prompt automatically (NO EXTRA FIELD)
558
- question_type = infer_question_type_from_prompt(prompt)
 
 
 
 
 
 
 
 
 
 
 
 
 
559
 
560
  # 2) Extract student text
561
  student_info = await extract_text_from_upload(student_file)
@@ -597,9 +692,127 @@ async def homework_validate(
597
  }
598
 
599
  # =========================================================
600
- # ✅ MCQ CHECK (deterministic, no Gemini)
601
  # =========================================================
602
- if question_type == "mcq":
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
603
  correct = extract_correct_mcq_from_prompt(prompt)
604
  chosen = extract_mcq_choice(student_text)
605
 
 
249
  def infer_question_type_from_prompt(prompt: str) -> str:
250
  p = _norm(prompt)
251
 
252
+ # Explicit markers - check for (mcq) first since it's common in parentheses
253
+ if re.search(r"\(mcq\)", p) or re.search(r"\btype\s*:\s*mcq\b", p) or re.search(r"\bquestion_type\s*:\s*mcq\b", p):
254
  return "mcq"
255
  if re.search(r"\btype\s*:\s*narrative\b", p) or re.search(r"\bquestion_type\s*:\s*narrative\b", p):
256
  return "narrative"
 
262
  return "narrative"
263
 
264
 
265
+ def parse_questions_from_prompt(prompt: str) -> List[Dict[str, Any]]:
266
+ """
267
+ Parse individual questions from the prompt, detecting MCQ vs Narrative for each.
268
+ Returns list of dicts with: qid, type, question_text, correct_answer (for MCQ)
269
+ """
270
+ questions = []
271
+ # Match patterns like "Q1:", "Q2.", "Question 1:", etc.
272
+ q_pattern = re.compile(r'(Q\s*\d+[.:]\s*|Question\s*\d+[.:]\s*)(.*?)(?=(Q\s*\d|Question\s*\d|$))', re.IGNORECASE | re.DOTALL)
273
+
274
+ # Alternative: split by Q1, Q2, etc.
275
+ lines = prompt.split('\n')
276
+ current_q = None
277
+ current_type = None
278
+ current_qid = None
279
+ current_correct = None
280
+
281
+ for line in lines:
282
+ line = line.strip()
283
+ if not line:
284
+ continue
285
+
286
+ # Detect new question
287
+ q_match = re.match(r'^(Q\s*\d+|Question\s*\d+)[.:]\s*(.*)', line, re.IGNORECASE)
288
+ if q_match:
289
+ # Save previous question if exists
290
+ if current_q is not None:
291
+ questions.append({
292
+ 'qid': current_qid,
293
+ 'type': current_type,
294
+ 'question': current_q,
295
+ 'correct_answer': current_correct
296
+ })
297
+ # Start new question
298
+ current_qid = q_match.group(1).strip()
299
+ remaining = q_match.group(2).strip()
300
+ current_q = remaining
301
+ current_type = None
302
+ current_correct = None
303
+
304
+ # Check if this is MCQ or Narrative
305
+ line_lower = line.lower()
306
+ if '(mcq)' in line_lower or 'multiple choice' in line_lower or 'type: mcq' in line_lower:
307
+ current_type = 'mcq'
308
+ elif 'narrative' in line_lower or 'type: narrative' in line_lower:
309
+ current_type = 'narrative'
310
+ else:
311
+ # This line belongs to current question
312
+ if current_q is not None:
313
+ current_q += ' ' + line
314
+
315
+ # Check for type markers
316
+ line_lower = line.lower()
317
+ if current_type is None:
318
+ if '(mcq)' in line_lower or 'multiple choice' in line_lower or 'type: mcq' in line_lower:
319
+ current_type = 'mcq'
320
+ elif 'narrative' in line_lower or 'type: narrative' in line_lower:
321
+ current_type = 'narrative'
322
+
323
+ # Check for correct answer (for MCQ)
324
+ if current_type == 'mcq':
325
+ # Look for "Correct Answer(s):" or "Correct:" or "Answer:"
326
+ correct_match = re.search(r'(?:Correct\s*(?:Answer)?|Answer)[:.]\s*(?:[A-D]\.?\s*)?(.+)', line, re.IGNORECASE)
327
+ if correct_match and not current_correct:
328
+ current_correct = correct_match.group(1).strip()
329
+
330
+ # Don't forget the last question
331
+ if current_q is not None:
332
+ questions.append({
333
+ 'qid': current_qid,
334
+ 'type': current_type,
335
+ 'question': current_q,
336
+ 'correct_answer': current_correct
337
+ })
338
+
339
+ # If no questions parsed, fall back to old behavior
340
+ if not questions:
341
+ qtype = infer_question_type_from_prompt(prompt)
342
+ return [{'qid': 'Q1', 'type': qtype, 'question': prompt, 'correct_answer': None}]
343
+
344
+ return questions
345
+
346
+
347
  def extract_mcq_choice(text: str) -> str:
348
  """
349
  Extract chosen option from student text:
 
637
  policy = level_policy(student_level)
638
 
639
  # 1) Infer question_type from prompt automatically (NO EXTRA FIELD)
640
+ # Try to parse mixed questions first
641
+ parsed_questions = parse_questions_from_prompt(prompt)
642
+ has_mcq = any(q.get('type') == 'mcq' for q in parsed_questions)
643
+ has_narrative = any(q.get('type') == 'narrative' for q in parsed_questions)
644
+
645
+ # Determine overall question type for backwards compatibility
646
+ if has_mcq and has_narrative:
647
+ question_type = "mixed"
648
+ elif has_mcq:
649
+ question_type = "mcq"
650
+ elif has_narrative:
651
+ question_type = "narrative"
652
+ else:
653
+ question_type = infer_question_type_from_prompt(prompt)
654
 
655
  # 2) Extract student text
656
  student_info = await extract_text_from_upload(student_file)
 
692
  }
693
 
694
  # =========================================================
695
+ # ✅ MIXED QUESTION TYPES CHECK (MCQ + Narrative)
696
  # =========================================================
697
+ if question_type == "mixed":
698
+ # Process each question type separately and combine results
699
+ mcq_results = []
700
+ narrative_results = []
701
+
702
+ # Extract MCQ answers from student text for each MCQ question
703
+ for q in parsed_questions:
704
+ if q.get('type') == 'mcq':
705
+ # Try to find answer for this specific question in student's text
706
+ # Use the question text to help locate the answer
707
+ q_text = q.get('question', '')
708
+ chosen = extract_mcq_choice(student_text)
709
+ correct = q.get('correct_answer') or extract_correct_mcq_from_prompt(q.get('question', ''))
710
+
711
+ if correct and chosen:
712
+ is_correct = (chosen.lower().strip() == correct.lower().strip())
713
+ mcq_results.append({
714
+ 'qid': q.get('qid'),
715
+ 'correct': is_correct,
716
+ 'chosen': chosen,
717
+ 'correct_answer': correct
718
+ })
719
+
720
+ # For narrative questions, use AI to generate reference
721
+ narrative_questions = [q for q in parsed_questions if q.get('type') == 'narrative']
722
+
723
+ if narrative_questions and gemini_client:
724
+ # Combine narrative questions into one prompt for AI
725
+ narrative_prompt_text = "\n".join([
726
+ f"{q.get('qid')}: {q.get('question')}" for q in narrative_questions
727
+ ])
728
+
729
+ ai_prompt = (
730
+ f"STUDENT_LEVEL: {student_level}\n"
731
+ f"QUESTIONS:\n{narrative_prompt_text}\n\n"
732
+ 'Return ONLY valid JSON with keys: {"ai_reference_answer": string, "key_points": [string, ...]}.'
733
+ )
734
+
735
+ response_text = generate_gemini_response(
736
+ prompt=ai_prompt,
737
+ system_prompt=(
738
+ "Generate correct reference answers for homework evaluation. "
739
+ "Keep it aligned with the student level. Output strict JSON only."
740
+ ),
741
+ max_tokens=650,
742
+ temperature=0.3,
743
+ )
744
+
745
+ if response_text:
746
+ try:
747
+ m = re.search(r'\{.*\}', response_text, flags=re.S)
748
+ payload = json.loads(m.group(0) if m else response_text)
749
+
750
+ ai_reference_answer = (payload.get("ai_reference_answer") or "").strip()
751
+ key_points = payload.get("key_points") or []
752
+
753
+ if isinstance(key_points, list):
754
+ key_points = [str(x).strip() for x in key_points if str(x).strip()]
755
+
756
+ sim = cosine_sim(student_text, ai_reference_answer)
757
+ covered, missing, coverage = keypoint_coverage(
758
+ student_text, key_points, kp_threshold=policy["kp_thr"]
759
+ )
760
+
761
+ final = policy["w_sim"] * sim + policy["w_cov"] * coverage
762
+ match_pct = int(round(final * 100))
763
+
764
+ narrative_results = {
765
+ 'similarity': sim,
766
+ 'coverage': coverage,
767
+ 'match_percentage': match_pct,
768
+ 'key_points_covered': covered,
769
+ 'key_points_missing': missing
770
+ }
771
+ except Exception as e:
772
+ narrative_results = {'error': str(e)}
773
+
774
+ # Calculate combined score
775
+ total_mcq = len(mcq_results)
776
+ correct_mcq = sum(1 for r in mcq_results if r.get('correct'))
777
+ mcq_score = (correct_mcq / total_mcq * 100) if total_mcq > 0 else 0
778
+
779
+ narrative_score = narrative_results.get('match_percentage', 0) if narrative_results else 0
780
+
781
+ # Weight: 50% MCQ, 50% Narrative (if both exist)
782
+ if total_mcq > 0 and narrative_results and 'error' not in narrative_results:
783
+ final_score = int((mcq_score + narrative_score) / 2)
784
+ elif total_mcq > 0:
785
+ final_score = mcq_score
786
+ elif narrative_results and 'error' not in narrative_results:
787
+ final_score = narrative_score
788
+ else:
789
+ final_score = 0
790
+
791
+ # Determine status
792
+ if final_score >= policy["verified"]:
793
+ status = "Verified"
794
+ elif final_score >= policy["partial"]:
795
+ status = "Partial"
796
+ else:
797
+ status = "Needs Review"
798
+
799
+ return {
800
+ "student_id": student_id,
801
+ "homework_id": homework_id,
802
+ "sub_institute_id": sub_institute_id,
803
+ "syear": syear,
804
+ "question_type": "mixed",
805
+ "student_level": student_level,
806
+ "status": status,
807
+ "match_percentage": final_score,
808
+ "ai_generated_remark": None,
809
+ "rule_based_remark": f"MCQ: {correct_mcq}/{total_mcq} correct. Narrative score: {narrative_score}%.",
810
+ "llm_used": bool(narrative_results and 'error' not in narrative_results),
811
+ "student_extracted_text": student_text,
812
+ "mcq_results": mcq_results,
813
+ "narrative_results": narrative_results,
814
+ "extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
815
+ }
816
  correct = extract_correct_mcq_from_prompt(prompt)
817
  chosen = extract_mcq_choice(student_text)
818