new update
Browse files
app.py
CHANGED
|
@@ -249,8 +249,8 @@ def keypoint_coverage(student_text: str, key_points: List[str], kp_threshold: fl
|
|
| 249 |
def infer_question_type_from_prompt(prompt: str) -> str:
|
| 250 |
p = _norm(prompt)
|
| 251 |
|
| 252 |
-
# Explicit markers (
|
| 253 |
-
if re.search(r"\btype\s*:\s*mcq\b", p) or re.search(r"\bquestion_type\s*:\s*mcq\b", p):
|
| 254 |
return "mcq"
|
| 255 |
if re.search(r"\btype\s*:\s*narrative\b", p) or re.search(r"\bquestion_type\s*:\s*narrative\b", p):
|
| 256 |
return "narrative"
|
|
@@ -262,6 +262,88 @@ def infer_question_type_from_prompt(prompt: str) -> str:
|
|
| 262 |
return "narrative"
|
| 263 |
|
| 264 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 265 |
def extract_mcq_choice(text: str) -> str:
|
| 266 |
"""
|
| 267 |
Extract chosen option from student text:
|
|
@@ -555,7 +637,20 @@ async def homework_validate(
|
|
| 555 |
policy = level_policy(student_level)
|
| 556 |
|
| 557 |
# 1) Infer question_type from prompt automatically (NO EXTRA FIELD)
|
| 558 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 559 |
|
| 560 |
# 2) Extract student text
|
| 561 |
student_info = await extract_text_from_upload(student_file)
|
|
@@ -597,9 +692,127 @@ async def homework_validate(
|
|
| 597 |
}
|
| 598 |
|
| 599 |
# =========================================================
|
| 600 |
-
# ✅
|
| 601 |
# =========================================================
|
| 602 |
-
if question_type == "
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 603 |
correct = extract_correct_mcq_from_prompt(prompt)
|
| 604 |
chosen = extract_mcq_choice(student_text)
|
| 605 |
|
|
|
|
| 249 |
def infer_question_type_from_prompt(prompt: str) -> str:
|
| 250 |
p = _norm(prompt)
|
| 251 |
|
| 252 |
+
# Explicit markers - check for (mcq) first since it's common in parentheses
|
| 253 |
+
if re.search(r"\(mcq\)", p) or re.search(r"\btype\s*:\s*mcq\b", p) or re.search(r"\bquestion_type\s*:\s*mcq\b", p):
|
| 254 |
return "mcq"
|
| 255 |
if re.search(r"\btype\s*:\s*narrative\b", p) or re.search(r"\bquestion_type\s*:\s*narrative\b", p):
|
| 256 |
return "narrative"
|
|
|
|
| 262 |
return "narrative"
|
| 263 |
|
| 264 |
|
| 265 |
+
def parse_questions_from_prompt(prompt: str) -> List[Dict[str, Any]]:
|
| 266 |
+
"""
|
| 267 |
+
Parse individual questions from the prompt, detecting MCQ vs Narrative for each.
|
| 268 |
+
Returns list of dicts with: qid, type, question_text, correct_answer (for MCQ)
|
| 269 |
+
"""
|
| 270 |
+
questions = []
|
| 271 |
+
# Match patterns like "Q1:", "Q2.", "Question 1:", etc.
|
| 272 |
+
q_pattern = re.compile(r'(Q\s*\d+[.:]\s*|Question\s*\d+[.:]\s*)(.*?)(?=(Q\s*\d|Question\s*\d|$))', re.IGNORECASE | re.DOTALL)
|
| 273 |
+
|
| 274 |
+
# Alternative: split by Q1, Q2, etc.
|
| 275 |
+
lines = prompt.split('\n')
|
| 276 |
+
current_q = None
|
| 277 |
+
current_type = None
|
| 278 |
+
current_qid = None
|
| 279 |
+
current_correct = None
|
| 280 |
+
|
| 281 |
+
for line in lines:
|
| 282 |
+
line = line.strip()
|
| 283 |
+
if not line:
|
| 284 |
+
continue
|
| 285 |
+
|
| 286 |
+
# Detect new question
|
| 287 |
+
q_match = re.match(r'^(Q\s*\d+|Question\s*\d+)[.:]\s*(.*)', line, re.IGNORECASE)
|
| 288 |
+
if q_match:
|
| 289 |
+
# Save previous question if exists
|
| 290 |
+
if current_q is not None:
|
| 291 |
+
questions.append({
|
| 292 |
+
'qid': current_qid,
|
| 293 |
+
'type': current_type,
|
| 294 |
+
'question': current_q,
|
| 295 |
+
'correct_answer': current_correct
|
| 296 |
+
})
|
| 297 |
+
# Start new question
|
| 298 |
+
current_qid = q_match.group(1).strip()
|
| 299 |
+
remaining = q_match.group(2).strip()
|
| 300 |
+
current_q = remaining
|
| 301 |
+
current_type = None
|
| 302 |
+
current_correct = None
|
| 303 |
+
|
| 304 |
+
# Check if this is MCQ or Narrative
|
| 305 |
+
line_lower = line.lower()
|
| 306 |
+
if '(mcq)' in line_lower or 'multiple choice' in line_lower or 'type: mcq' in line_lower:
|
| 307 |
+
current_type = 'mcq'
|
| 308 |
+
elif 'narrative' in line_lower or 'type: narrative' in line_lower:
|
| 309 |
+
current_type = 'narrative'
|
| 310 |
+
else:
|
| 311 |
+
# This line belongs to current question
|
| 312 |
+
if current_q is not None:
|
| 313 |
+
current_q += ' ' + line
|
| 314 |
+
|
| 315 |
+
# Check for type markers
|
| 316 |
+
line_lower = line.lower()
|
| 317 |
+
if current_type is None:
|
| 318 |
+
if '(mcq)' in line_lower or 'multiple choice' in line_lower or 'type: mcq' in line_lower:
|
| 319 |
+
current_type = 'mcq'
|
| 320 |
+
elif 'narrative' in line_lower or 'type: narrative' in line_lower:
|
| 321 |
+
current_type = 'narrative'
|
| 322 |
+
|
| 323 |
+
# Check for correct answer (for MCQ)
|
| 324 |
+
if current_type == 'mcq':
|
| 325 |
+
# Look for "Correct Answer(s):" or "Correct:" or "Answer:"
|
| 326 |
+
correct_match = re.search(r'(?:Correct\s*(?:Answer)?|Answer)[:.]\s*(?:[A-D]\.?\s*)?(.+)', line, re.IGNORECASE)
|
| 327 |
+
if correct_match and not current_correct:
|
| 328 |
+
current_correct = correct_match.group(1).strip()
|
| 329 |
+
|
| 330 |
+
# Don't forget the last question
|
| 331 |
+
if current_q is not None:
|
| 332 |
+
questions.append({
|
| 333 |
+
'qid': current_qid,
|
| 334 |
+
'type': current_type,
|
| 335 |
+
'question': current_q,
|
| 336 |
+
'correct_answer': current_correct
|
| 337 |
+
})
|
| 338 |
+
|
| 339 |
+
# If no questions parsed, fall back to old behavior
|
| 340 |
+
if not questions:
|
| 341 |
+
qtype = infer_question_type_from_prompt(prompt)
|
| 342 |
+
return [{'qid': 'Q1', 'type': qtype, 'question': prompt, 'correct_answer': None}]
|
| 343 |
+
|
| 344 |
+
return questions
|
| 345 |
+
|
| 346 |
+
|
| 347 |
def extract_mcq_choice(text: str) -> str:
|
| 348 |
"""
|
| 349 |
Extract chosen option from student text:
|
|
|
|
| 637 |
policy = level_policy(student_level)
|
| 638 |
|
| 639 |
# 1) Infer question_type from prompt automatically (NO EXTRA FIELD)
|
| 640 |
+
# Try to parse mixed questions first
|
| 641 |
+
parsed_questions = parse_questions_from_prompt(prompt)
|
| 642 |
+
has_mcq = any(q.get('type') == 'mcq' for q in parsed_questions)
|
| 643 |
+
has_narrative = any(q.get('type') == 'narrative' for q in parsed_questions)
|
| 644 |
+
|
| 645 |
+
# Determine overall question type for backwards compatibility
|
| 646 |
+
if has_mcq and has_narrative:
|
| 647 |
+
question_type = "mixed"
|
| 648 |
+
elif has_mcq:
|
| 649 |
+
question_type = "mcq"
|
| 650 |
+
elif has_narrative:
|
| 651 |
+
question_type = "narrative"
|
| 652 |
+
else:
|
| 653 |
+
question_type = infer_question_type_from_prompt(prompt)
|
| 654 |
|
| 655 |
# 2) Extract student text
|
| 656 |
student_info = await extract_text_from_upload(student_file)
|
|
|
|
| 692 |
}
|
| 693 |
|
| 694 |
# =========================================================
|
| 695 |
+
# ✅ MIXED QUESTION TYPES CHECK (MCQ + Narrative)
|
| 696 |
# =========================================================
|
| 697 |
+
if question_type == "mixed":
|
| 698 |
+
# Process each question type separately and combine results
|
| 699 |
+
mcq_results = []
|
| 700 |
+
narrative_results = []
|
| 701 |
+
|
| 702 |
+
# Extract MCQ answers from student text for each MCQ question
|
| 703 |
+
for q in parsed_questions:
|
| 704 |
+
if q.get('type') == 'mcq':
|
| 705 |
+
# Try to find answer for this specific question in student's text
|
| 706 |
+
# Use the question text to help locate the answer
|
| 707 |
+
q_text = q.get('question', '')
|
| 708 |
+
chosen = extract_mcq_choice(student_text)
|
| 709 |
+
correct = q.get('correct_answer') or extract_correct_mcq_from_prompt(q.get('question', ''))
|
| 710 |
+
|
| 711 |
+
if correct and chosen:
|
| 712 |
+
is_correct = (chosen.lower().strip() == correct.lower().strip())
|
| 713 |
+
mcq_results.append({
|
| 714 |
+
'qid': q.get('qid'),
|
| 715 |
+
'correct': is_correct,
|
| 716 |
+
'chosen': chosen,
|
| 717 |
+
'correct_answer': correct
|
| 718 |
+
})
|
| 719 |
+
|
| 720 |
+
# For narrative questions, use AI to generate reference
|
| 721 |
+
narrative_questions = [q for q in parsed_questions if q.get('type') == 'narrative']
|
| 722 |
+
|
| 723 |
+
if narrative_questions and gemini_client:
|
| 724 |
+
# Combine narrative questions into one prompt for AI
|
| 725 |
+
narrative_prompt_text = "\n".join([
|
| 726 |
+
f"{q.get('qid')}: {q.get('question')}" for q in narrative_questions
|
| 727 |
+
])
|
| 728 |
+
|
| 729 |
+
ai_prompt = (
|
| 730 |
+
f"STUDENT_LEVEL: {student_level}\n"
|
| 731 |
+
f"QUESTIONS:\n{narrative_prompt_text}\n\n"
|
| 732 |
+
'Return ONLY valid JSON with keys: {"ai_reference_answer": string, "key_points": [string, ...]}.'
|
| 733 |
+
)
|
| 734 |
+
|
| 735 |
+
response_text = generate_gemini_response(
|
| 736 |
+
prompt=ai_prompt,
|
| 737 |
+
system_prompt=(
|
| 738 |
+
"Generate correct reference answers for homework evaluation. "
|
| 739 |
+
"Keep it aligned with the student level. Output strict JSON only."
|
| 740 |
+
),
|
| 741 |
+
max_tokens=650,
|
| 742 |
+
temperature=0.3,
|
| 743 |
+
)
|
| 744 |
+
|
| 745 |
+
if response_text:
|
| 746 |
+
try:
|
| 747 |
+
m = re.search(r'\{.*\}', response_text, flags=re.S)
|
| 748 |
+
payload = json.loads(m.group(0) if m else response_text)
|
| 749 |
+
|
| 750 |
+
ai_reference_answer = (payload.get("ai_reference_answer") or "").strip()
|
| 751 |
+
key_points = payload.get("key_points") or []
|
| 752 |
+
|
| 753 |
+
if isinstance(key_points, list):
|
| 754 |
+
key_points = [str(x).strip() for x in key_points if str(x).strip()]
|
| 755 |
+
|
| 756 |
+
sim = cosine_sim(student_text, ai_reference_answer)
|
| 757 |
+
covered, missing, coverage = keypoint_coverage(
|
| 758 |
+
student_text, key_points, kp_threshold=policy["kp_thr"]
|
| 759 |
+
)
|
| 760 |
+
|
| 761 |
+
final = policy["w_sim"] * sim + policy["w_cov"] * coverage
|
| 762 |
+
match_pct = int(round(final * 100))
|
| 763 |
+
|
| 764 |
+
narrative_results = {
|
| 765 |
+
'similarity': sim,
|
| 766 |
+
'coverage': coverage,
|
| 767 |
+
'match_percentage': match_pct,
|
| 768 |
+
'key_points_covered': covered,
|
| 769 |
+
'key_points_missing': missing
|
| 770 |
+
}
|
| 771 |
+
except Exception as e:
|
| 772 |
+
narrative_results = {'error': str(e)}
|
| 773 |
+
|
| 774 |
+
# Calculate combined score
|
| 775 |
+
total_mcq = len(mcq_results)
|
| 776 |
+
correct_mcq = sum(1 for r in mcq_results if r.get('correct'))
|
| 777 |
+
mcq_score = (correct_mcq / total_mcq * 100) if total_mcq > 0 else 0
|
| 778 |
+
|
| 779 |
+
narrative_score = narrative_results.get('match_percentage', 0) if narrative_results else 0
|
| 780 |
+
|
| 781 |
+
# Weight: 50% MCQ, 50% Narrative (if both exist)
|
| 782 |
+
if total_mcq > 0 and narrative_results and 'error' not in narrative_results:
|
| 783 |
+
final_score = int((mcq_score + narrative_score) / 2)
|
| 784 |
+
elif total_mcq > 0:
|
| 785 |
+
final_score = mcq_score
|
| 786 |
+
elif narrative_results and 'error' not in narrative_results:
|
| 787 |
+
final_score = narrative_score
|
| 788 |
+
else:
|
| 789 |
+
final_score = 0
|
| 790 |
+
|
| 791 |
+
# Determine status
|
| 792 |
+
if final_score >= policy["verified"]:
|
| 793 |
+
status = "Verified"
|
| 794 |
+
elif final_score >= policy["partial"]:
|
| 795 |
+
status = "Partial"
|
| 796 |
+
else:
|
| 797 |
+
status = "Needs Review"
|
| 798 |
+
|
| 799 |
+
return {
|
| 800 |
+
"student_id": student_id,
|
| 801 |
+
"homework_id": homework_id,
|
| 802 |
+
"sub_institute_id": sub_institute_id,
|
| 803 |
+
"syear": syear,
|
| 804 |
+
"question_type": "mixed",
|
| 805 |
+
"student_level": student_level,
|
| 806 |
+
"status": status,
|
| 807 |
+
"match_percentage": final_score,
|
| 808 |
+
"ai_generated_remark": None,
|
| 809 |
+
"rule_based_remark": f"MCQ: {correct_mcq}/{total_mcq} correct. Narrative score: {narrative_score}%.",
|
| 810 |
+
"llm_used": bool(narrative_results and 'error' not in narrative_results),
|
| 811 |
+
"student_extracted_text": student_text,
|
| 812 |
+
"mcq_results": mcq_results,
|
| 813 |
+
"narrative_results": narrative_results,
|
| 814 |
+
"extraction": {"student": {k: v for k, v in student_info.items() if k != "text"}},
|
| 815 |
+
}
|
| 816 |
correct = extract_correct_mcq_from_prompt(prompt)
|
| 817 |
chosen = extract_mcq_choice(student_text)
|
| 818 |
|