Spaces:
Running
Running
github-actions[bot] commited on
Commit ยท
d3d533f
1
Parent(s): 10e98ed
๐ Auto-deploy backend from GitHub (2a6e007)
Browse files- routes/diagnostic.py +100 -35
routes/diagnostic.py
CHANGED
|
@@ -862,7 +862,7 @@ async def analyze_diagnostic(request: DiagnosticAnalysisRequest, req: Request):
|
|
| 862 |
for domain, scores in domain_scores.items():
|
| 863 |
domain_summary.append(f" {domain}: {scores.get('correct',0)}/{scores.get('total',0)} ({scores.get('percentage',0)}%) - {scores.get('mastery_level','')}")
|
| 864 |
|
| 865 |
-
prompt = f"""You are an expert math education analyst. Analyze this
|
| 866 |
|
| 867 |
ASSESSMENT DATA:
|
| 868 |
- Score: {total_correct}/{total_items} ({round(total_correct/total_items*100,1) if total_items else 0}%)
|
|
@@ -875,40 +875,46 @@ DOMAIN SCORES:
|
|
| 875 |
PER-QUESTION BREAKDOWN:
|
| 876 |
{chr(10).join(question_details)}
|
| 877 |
|
| 878 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 879 |
{{
|
| 880 |
-
"overall_summary": "2-3
|
| 881 |
"time_analysis": {{
|
| 882 |
-
"pattern": "
|
| 883 |
-
"fast_questions": ["topics
|
| 884 |
-
"slow_questions": ["topics
|
| 885 |
-
"insight": "what timing reveals about confidence
|
| 886 |
}},
|
| 887 |
"strength_areas": [
|
| 888 |
-
{{"domain": "
|
| 889 |
],
|
| 890 |
"weakness_areas": [
|
| 891 |
-
{{"domain": "
|
| 892 |
],
|
| 893 |
"answer_patterns": {{
|
| 894 |
-
"description": "
|
| 895 |
-
"common_mistakes": ["
|
| 896 |
-
"positive_patterns": ["
|
| 897 |
}},
|
| 898 |
"recommendations": [
|
| 899 |
-
{{"action": "specific
|
| 900 |
],
|
| 901 |
"difficulty_analysis": {{
|
| 902 |
-
"easy_performance": "
|
| 903 |
-
"medium_performance": "
|
| 904 |
-
"hard_performance": "
|
| 905 |
}}
|
| 906 |
}}
|
| 907 |
|
| 908 |
Return ONLY valid JSON, no markdown fences."""
|
| 909 |
|
| 910 |
try:
|
| 911 |
-
from main import call_hf_chat_async
|
| 912 |
raw = await call_hf_chat_async(
|
| 913 |
[{"role": "user", "content": prompt}],
|
| 914 |
max_tokens=1500,
|
|
@@ -925,9 +931,11 @@ Return ONLY valid JSON, no markdown fences."""
|
|
| 925 |
cleaned = cleaned.strip()
|
| 926 |
|
| 927 |
analysis = json.loads(cleaned)
|
| 928 |
-
except
|
| 929 |
-
logger.warning(f"[diagnostic/analyze] AI parse failed: {e}, using fallback")
|
| 930 |
-
|
|
|
|
|
|
|
| 931 |
analysis = _build_fallback_analysis(responses, domain_scores, risk_profile)
|
| 932 |
|
| 933 |
return DiagnosticAnalysisResponse(success=True, analysis=analysis)
|
|
@@ -938,21 +946,64 @@ def _build_fallback_analysis(
|
|
| 938 |
domain_scores: Dict[str, Any],
|
| 939 |
risk_profile: Dict[str, Any],
|
| 940 |
) -> Dict[str, Any]:
|
| 941 |
-
"""Build a
|
| 942 |
total_time = sum(r.get("time_spent_seconds", 0) for r in responses)
|
| 943 |
total_items = len(responses)
|
| 944 |
avg_time = round(total_time / total_items, 1) if total_items else 0
|
| 945 |
total_correct = sum(1 for r in responses if r.get("is_correct"))
|
| 946 |
|
| 947 |
-
#
|
| 948 |
times = [(r.get("topic", ""), r.get("time_spent_seconds", 0), r.get("is_correct", False)) for r in responses]
|
| 949 |
times_sorted = sorted(times, key=lambda x: x[1])
|
| 950 |
fast = [t[0] for t in times_sorted[:3] if t[0]]
|
| 951 |
slow = [t[0] for t in times_sorted[-3:] if t[0]]
|
| 952 |
|
| 953 |
-
#
|
| 954 |
-
|
| 955 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 956 |
|
| 957 |
# Difficulty breakdown
|
| 958 |
easy = [r for r in responses if r.get("difficulty") == "easy"]
|
|
@@ -965,25 +1016,39 @@ def _build_fallback_analysis(
|
|
| 965 |
correct = sum(1 for i in items if i.get("is_correct"))
|
| 966 |
return f"{correct}/{len(items)} correct ({round(correct/len(items)*100)}%)"
|
| 967 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 968 |
return {
|
| 969 |
-
"overall_summary":
|
| 970 |
"time_analysis": {
|
| 971 |
-
"pattern": "deliberate" if avg_time > 60 else "moderate" if avg_time > 30 else "quick",
|
| 972 |
"fast_questions": fast,
|
| 973 |
"slow_questions": slow,
|
| 974 |
-
"insight":
|
| 975 |
},
|
| 976 |
"strength_areas": strengths,
|
| 977 |
"weakness_areas": weaknesses,
|
| 978 |
"answer_patterns": {
|
| 979 |
-
"description": "
|
| 980 |
-
"common_mistakes": [
|
| 981 |
-
"positive_patterns": [f"
|
| 982 |
},
|
| 983 |
-
"recommendations": [
|
| 984 |
-
{"action": f"Focus on {w['domain']}", "reason": w["detail"], "priority": i + 1}
|
| 985 |
-
for i, w in enumerate(weaknesses[:3])
|
| 986 |
-
],
|
| 987 |
"difficulty_analysis": {
|
| 988 |
"easy_performance": pct(easy),
|
| 989 |
"medium_performance": pct(medium),
|
|
|
|
| 862 |
for domain, scores in domain_scores.items():
|
| 863 |
domain_summary.append(f" {domain}: {scores.get('correct',0)}/{scores.get('total',0)} ({scores.get('percentage',0)}%) - {scores.get('mastery_level','')}")
|
| 864 |
|
| 865 |
+
prompt = f"""You are an expert math education analyst for Filipino Senior High School STEM students. Analyze this diagnostic assessment and provide specific, actionable insights that go BEYOND just restating scores.
|
| 866 |
|
| 867 |
ASSESSMENT DATA:
|
| 868 |
- Score: {total_correct}/{total_items} ({round(total_correct/total_items*100,1) if total_items else 0}%)
|
|
|
|
| 875 |
PER-QUESTION BREAKDOWN:
|
| 876 |
{chr(10).join(question_details)}
|
| 877 |
|
| 878 |
+
RULES:
|
| 879 |
+
- Do NOT just restate scores. Explain WHY the student struggled (e.g., "confused function notation with equations" not "Errors in General Mathematics")
|
| 880 |
+
- Identify specific misconceptions from wrong answers
|
| 881 |
+
- Recommendations must be concrete study actions (e.g., "Practice evaluating f(x) by substituting values step-by-step") not generic ("Focus on General Mathematics")
|
| 882 |
+
- Timing insights should explain what speed reveals about guessing vs. deliberation
|
| 883 |
+
|
| 884 |
+
Return JSON with this exact structure:
|
| 885 |
{{
|
| 886 |
+
"overall_summary": "2-3 sentences: what this student understands vs. what they're missing, written encouragingly",
|
| 887 |
"time_analysis": {{
|
| 888 |
+
"pattern": "rushed/deliberate/inconsistent",
|
| 889 |
+
"fast_questions": ["specific topics answered quickly"],
|
| 890 |
+
"slow_questions": ["specific topics that took longest"],
|
| 891 |
+
"insight": "what timing reveals about confidence โ e.g. rushed through hard questions suggesting guessing"
|
| 892 |
}},
|
| 893 |
"strength_areas": [
|
| 894 |
+
{{"domain": "topic name", "detail": "specific skill demonstrated, e.g. 'correctly applies function evaluation with substitution'"}}
|
| 895 |
],
|
| 896 |
"weakness_areas": [
|
| 897 |
+
{{"domain": "topic name", "detail": "specific misconception, e.g. 'confuses permutation with combination when order matters'", "priority": "high/medium/low"}}
|
| 898 |
],
|
| 899 |
"answer_patterns": {{
|
| 900 |
+
"description": "observed pattern in errors โ e.g. 'tends to pick the first plausible option on hard questions'",
|
| 901 |
+
"common_mistakes": ["specific mistake patterns with examples from the data"],
|
| 902 |
+
"positive_patterns": ["specific positive patterns"]
|
| 903 |
}},
|
| 904 |
"recommendations": [
|
| 905 |
+
{{"action": "specific study action with example", "reason": "addresses which misconception", "priority": 1}}
|
| 906 |
],
|
| 907 |
"difficulty_analysis": {{
|
| 908 |
+
"easy_performance": "X/Y correct โ interpretation",
|
| 909 |
+
"medium_performance": "X/Y correct โ interpretation",
|
| 910 |
+
"hard_performance": "X/Y correct โ interpretation"
|
| 911 |
}}
|
| 912 |
}}
|
| 913 |
|
| 914 |
Return ONLY valid JSON, no markdown fences."""
|
| 915 |
|
| 916 |
try:
|
| 917 |
+
from main import call_hf_chat_async # noqa: E402
|
| 918 |
raw = await call_hf_chat_async(
|
| 919 |
[{"role": "user", "content": prompt}],
|
| 920 |
max_tokens=1500,
|
|
|
|
| 931 |
cleaned = cleaned.strip()
|
| 932 |
|
| 933 |
analysis = json.loads(cleaned)
|
| 934 |
+
except json.JSONDecodeError as e:
|
| 935 |
+
logger.warning(f"[diagnostic/analyze] AI JSON parse failed: {e}, using fallback")
|
| 936 |
+
analysis = _build_fallback_analysis(responses, domain_scores, risk_profile)
|
| 937 |
+
except Exception as e:
|
| 938 |
+
logger.warning(f"[diagnostic/analyze] AI call failed: {type(e).__name__}: {e}, using fallback")
|
| 939 |
analysis = _build_fallback_analysis(responses, domain_scores, risk_profile)
|
| 940 |
|
| 941 |
return DiagnosticAnalysisResponse(success=True, analysis=analysis)
|
|
|
|
| 946 |
domain_scores: Dict[str, Any],
|
| 947 |
risk_profile: Dict[str, Any],
|
| 948 |
) -> Dict[str, Any]:
|
| 949 |
+
"""Build a detailed analysis from response data when AI is unavailable."""
|
| 950 |
total_time = sum(r.get("time_spent_seconds", 0) for r in responses)
|
| 951 |
total_items = len(responses)
|
| 952 |
avg_time = round(total_time / total_items, 1) if total_items else 0
|
| 953 |
total_correct = sum(1 for r in responses if r.get("is_correct"))
|
| 954 |
|
| 955 |
+
# Analyze timing patterns
|
| 956 |
times = [(r.get("topic", ""), r.get("time_spent_seconds", 0), r.get("is_correct", False)) for r in responses]
|
| 957 |
times_sorted = sorted(times, key=lambda x: x[1])
|
| 958 |
fast = [t[0] for t in times_sorted[:3] if t[0]]
|
| 959 |
slow = [t[0] for t in times_sorted[-3:] if t[0]]
|
| 960 |
|
| 961 |
+
# Detect guessing: very fast + wrong
|
| 962 |
+
guessed = [r for r in responses if r.get("time_spent_seconds", 0) <= 2 and not r.get("is_correct")]
|
| 963 |
+
rushed_topics = list(set(r.get("topic", "") for r in guessed if r.get("topic")))
|
| 964 |
+
|
| 965 |
+
# Identify specific wrong topics
|
| 966 |
+
wrong_by_topic: Dict[str, List[Dict[str, Any]]] = {}
|
| 967 |
+
for r in responses:
|
| 968 |
+
if not r.get("is_correct"):
|
| 969 |
+
topic = r.get("topic", "Unknown")
|
| 970 |
+
wrong_by_topic.setdefault(topic, []).append(r)
|
| 971 |
+
|
| 972 |
+
# Build specific mistake descriptions
|
| 973 |
+
common_mistakes = []
|
| 974 |
+
for topic, wrongs in sorted(wrong_by_topic.items(), key=lambda x: -len(x[1])):
|
| 975 |
+
difficulties = [w.get("difficulty", "") for w in wrongs]
|
| 976 |
+
if all(d == "easy" for d in difficulties):
|
| 977 |
+
common_mistakes.append(f"Missed basic {topic} questions โ review foundational concepts")
|
| 978 |
+
elif all(d == "hard" for d in difficulties):
|
| 979 |
+
common_mistakes.append(f"Struggled with advanced {topic} โ needs more practice before tackling complex problems")
|
| 980 |
+
else:
|
| 981 |
+
common_mistakes.append(f"Inconsistent in {topic} ({len(wrongs)} errors across difficulty levels)")
|
| 982 |
+
|
| 983 |
+
# Strengths: correct answers with detail
|
| 984 |
+
correct_topics = list(set(r.get("topic", "") for r in responses if r.get("is_correct") and r.get("topic")))
|
| 985 |
+
strengths = [{"domain": t, "detail": "Answered correctly โ shows understanding of core concept"} for t in correct_topics[:3]]
|
| 986 |
+
|
| 987 |
+
# Weaknesses with specific detail
|
| 988 |
+
weaknesses = []
|
| 989 |
+
for domain, scores in sorted(domain_scores.items(), key=lambda x: x[1].get("percentage", 0)):
|
| 990 |
+
pct_val = scores.get("percentage", 0)
|
| 991 |
+
if pct_val < 70:
|
| 992 |
+
wrong_topics_in_domain = [r.get("topic", "") for r in responses if r.get("domain") == domain and not r.get("is_correct")]
|
| 993 |
+
detail = f"Missed questions on: {', '.join(set(wrong_topics_in_domain))}" if wrong_topics_in_domain else f"Scored {pct_val}%"
|
| 994 |
+
weaknesses.append({"domain": domain, "detail": detail, "priority": "high" if pct_val < 50 else "medium"})
|
| 995 |
+
|
| 996 |
+
# Actionable recommendations
|
| 997 |
+
recommendations = []
|
| 998 |
+
priority = 1
|
| 999 |
+
if rushed_topics:
|
| 1000 |
+
recommendations.append({"action": f"Slow down on {', '.join(rushed_topics[:2])} โ quick answers were mostly wrong", "reason": "Speed suggests guessing rather than solving", "priority": priority})
|
| 1001 |
+
priority += 1
|
| 1002 |
+
for w in weaknesses[:2]:
|
| 1003 |
+
wrong_in_domain = [r for r in responses if r.get("domain") == w["domain"] and not r.get("is_correct")]
|
| 1004 |
+
topics = list(set(r.get("topic", "") for r in wrong_in_domain))
|
| 1005 |
+
recommendations.append({"action": f"Review {', '.join(topics[:2])} with worked examples", "reason": f"0/{len(wrong_in_domain)} correct in these topics", "priority": priority})
|
| 1006 |
+
priority += 1
|
| 1007 |
|
| 1008 |
# Difficulty breakdown
|
| 1009 |
easy = [r for r in responses if r.get("difficulty") == "easy"]
|
|
|
|
| 1016 |
correct = sum(1 for i in items if i.get("is_correct"))
|
| 1017 |
return f"{correct}/{len(items)} correct ({round(correct/len(items)*100)}%)"
|
| 1018 |
|
| 1019 |
+
# Timing insight
|
| 1020 |
+
if guessed:
|
| 1021 |
+
timing_insight = f"Answered {len(guessed)} questions in โค2 seconds and got them wrong โ likely guessing on unfamiliar topics."
|
| 1022 |
+
elif avg_time < 5:
|
| 1023 |
+
timing_insight = "Very fast responses across the board. Consider spending more time reading questions carefully."
|
| 1024 |
+
else:
|
| 1025 |
+
timing_insight = f"Average {avg_time}s per question shows deliberate approach."
|
| 1026 |
+
|
| 1027 |
+
# Summary
|
| 1028 |
+
score_pct = round(total_correct / total_items * 100) if total_items else 0
|
| 1029 |
+
if score_pct >= 70:
|
| 1030 |
+
summary = f"Good foundation with {total_correct}/{total_items} correct. Some gaps in specific topics that can be addressed with targeted practice."
|
| 1031 |
+
elif score_pct >= 50:
|
| 1032 |
+
summary = f"Scored {total_correct}/{total_items} ({score_pct}%). Shows understanding of some concepts but needs reinforcement in weaker domains before advancing."
|
| 1033 |
+
else:
|
| 1034 |
+
summary = f"Scored {total_correct}/{total_items} ({score_pct}%). Multiple areas need attention โ start with the easiest missed topics to build confidence, then progress to harder ones."
|
| 1035 |
+
|
| 1036 |
return {
|
| 1037 |
+
"overall_summary": summary,
|
| 1038 |
"time_analysis": {
|
| 1039 |
+
"pattern": "deliberate" if avg_time > 60 else "moderate" if avg_time > 30 else "rushed" if avg_time < 5 else "quick",
|
| 1040 |
"fast_questions": fast,
|
| 1041 |
"slow_questions": slow,
|
| 1042 |
+
"insight": timing_insight,
|
| 1043 |
},
|
| 1044 |
"strength_areas": strengths,
|
| 1045 |
"weakness_areas": weaknesses,
|
| 1046 |
"answer_patterns": {
|
| 1047 |
+
"description": f"Got {len(guessed)} questions wrong in under 2 seconds (possible guessing). Performed better on medium-difficulty than easy questions." if guessed else "Mixed performance across difficulty levels.",
|
| 1048 |
+
"common_mistakes": common_mistakes[:4],
|
| 1049 |
+
"positive_patterns": [f"Correct on {t}" for t in correct_topics[:3]],
|
| 1050 |
},
|
| 1051 |
+
"recommendations": recommendations[:4] if recommendations else [{"action": "Start with basic concept review in weakest domain", "reason": "Build foundation before advancing", "priority": 1}],
|
|
|
|
|
|
|
|
|
|
| 1052 |
"difficulty_analysis": {
|
| 1053 |
"easy_performance": pct(easy),
|
| 1054 |
"medium_performance": pct(medium),
|