MinhTai commited on
Commit
a626bc3
·
0 Parent(s):

deploy: edafea3

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .dockerignore +51 -0
  2. .env.example +7 -0
  3. .github/workflows/admin-key-log.yml +33 -0
  4. .gitignore +81 -0
  5. AGENTS.md +87 -0
  6. CLAUDE.md +271 -0
  7. Dockerfile +30 -0
  8. README.md +15 -0
  9. backend/.env.example +23 -0
  10. backend/app/__init__.py +0 -0
  11. backend/app/abuse_detector.py +232 -0
  12. backend/app/admin_auth.py +67 -0
  13. backend/app/agent/__init__.py +0 -0
  14. backend/app/agent/core.py +7 -0
  15. backend/app/agent/exam_analyzer.py +444 -0
  16. backend/app/agent/exam_explainer.py +118 -0
  17. backend/app/agent/fsrs.py +59 -0
  18. backend/app/agent/hint_generator.py +90 -0
  19. backend/app/agent/memory.py +27 -0
  20. backend/app/agent/study_planner.py +131 -0
  21. backend/app/auth.py +41 -0
  22. backend/app/config.py +90 -0
  23. backend/app/data/__init__.py +0 -0
  24. backend/app/data/concepts.py +503 -0
  25. backend/app/data/question_answers.json +1 -0
  26. backend/app/db.py +214 -0
  27. backend/app/dependencies.py +108 -0
  28. backend/app/main.py +0 -0
  29. backend/app/math_wiki/__init__.py +0 -0
  30. backend/app/math_wiki/admin_router.py +419 -0
  31. backend/app/math_wiki/agents/__init__.py +0 -0
  32. backend/app/math_wiki/agents/classifier.py +135 -0
  33. backend/app/math_wiki/agents/concept_ingest.py +83 -0
  34. backend/app/math_wiki/agents/decomposer.py +63 -0
  35. backend/app/math_wiki/agents/ingest.py +64 -0
  36. backend/app/math_wiki/agents/ocr.py +73 -0
  37. backend/app/math_wiki/agents/quiz_generator.py +471 -0
  38. backend/app/math_wiki/agents/reranker.py +43 -0
  39. backend/app/math_wiki/agents/reviewer.py +75 -0
  40. backend/app/math_wiki/agents/solver.py +362 -0
  41. backend/app/math_wiki/agents/sympy_verifier.py +274 -0
  42. backend/app/math_wiki/agents/validator.py +59 -0
  43. backend/app/math_wiki/autoirt.py +74 -0
  44. backend/app/math_wiki/bobcat.py +85 -0
  45. backend/app/math_wiki/cache.py +43 -0
  46. backend/app/math_wiki/context_builder.py +41 -0
  47. backend/app/math_wiki/crag.py +57 -0
  48. backend/app/math_wiki/deep_cat.py +87 -0
  49. backend/app/math_wiki/difficulty_estimator.py +61 -0
  50. backend/app/math_wiki/dkvmn.py +133 -0
.dockerignore ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Python caches
2
+ **/__pycache__/
3
+ **/*.py[cod]
4
+ **/*.pyo
5
+ .venv/
6
+ venv/
7
+ *.egg-info/
8
+ dist/
9
+ build/
10
+ .pytest_cache/
11
+ .mypy_cache/
12
+ .ruff_cache/
13
+
14
+ # Node
15
+ node_modules/
16
+ exam-app/
17
+ !exam-app/src/data/exams.json
18
+ !exam-app/src/data/questions.json
19
+
20
+ # Env files (set secrets in Koyeb dashboard, not baked into image)
21
+ .env
22
+ *.env.local
23
+
24
+ # Local wiki artifacts (rebuilt at runtime)
25
+ math_wiki.db
26
+ math_wiki.db-shm
27
+ math_wiki.db-wal
28
+ math_wiki.bm25.pkl
29
+ math_wiki.faiss
30
+ math_wiki.meta.pkl
31
+ backend/math_wiki.db
32
+
33
+ # Dev/test artifacts
34
+ *.png
35
+ *.pen
36
+ test-results/
37
+ tests/
38
+ docs/
39
+ core/
40
+ validators/
41
+ exam-app-plan.md
42
+ response-*.json
43
+ .gitnexus/
44
+ .playwright-mcp/
45
+ .claude/
46
+ .kilo/
47
+ playwright.config.js
48
+
49
+ # Git
50
+ .git/
51
+ .gitignore
.env.example ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ ANTHROPIC_BASE_URL=https://ai-router.locdo.tech
2
+ ANTHROPIC_AUTH_TOKEN=your-auth-token-here
3
+ ANTHROPIC_DEFAULT_OPUS_MODEL=claude-opus-4.6
4
+ ANTHROPIC_DEFAULT_SONNET_MODEL=claude-sonnet-4.6
5
+ ANTHROPIC_DEFAULT_HAIKU_MODEL=claude-haiku-4.5
6
+ EMBEDDING_MODEL_NAME=BAAI/bge-m3
7
+ EMBEDDING_DIM=1024
.github/workflows/admin-key-log.yml ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Admin Key Log (fallback)
2
+
3
+ # Fallback scheduler — fires if cron-job.org misses a run.
4
+ # Primary scheduler: cron-job.org (POST /admin/generate-key-log, X-Cron-Secret header)
5
+ # This workflow is a safety net only; cron-job.org is preferred.
6
+ #
7
+ # Required GitHub repo secrets:
8
+ # HF_SPACE_URL — e.g. https://your-space.hf.space
9
+ # CRON_SECRET — same value as CRON_SECRET in HF Secrets (≥32 chars)
10
+ #
11
+ # Schedule: 20:05 UTC Sunday = 03:05 ICT Monday (5 min after cron-job.org fires at 20:00)
12
+ # If cron-job.org already ran, the backend will just append a duplicate line — harmless.
13
+
14
+ on:
15
+ schedule:
16
+ - cron: "5 20 * * 0"
17
+ workflow_dispatch:
18
+
19
+ jobs:
20
+ trigger-key-log:
21
+ runs-on: ubuntu-latest
22
+ steps:
23
+ - name: Trigger key log generation
24
+ run: |
25
+ HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" \
26
+ -X POST "${{ secrets.HF_SPACE_URL }}/admin/generate-key-log" \
27
+ -H "X-Cron-Secret: ${{ secrets.CRON_SECRET }}" \
28
+ -H "Content-Type: application/json")
29
+ echo "Response status: $HTTP_STATUS"
30
+ if [ "$HTTP_STATUS" != "200" ]; then
31
+ echo "Key log generation failed with status $HTTP_STATUS"
32
+ exit 1
33
+ fi
.gitignore ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Environment
2
+ .env
3
+ *.env.local
4
+
5
+ # Python
6
+ __pycache__/
7
+ *.py[cod]
8
+ *.pyo
9
+ .venv/
10
+ venv/
11
+ *.egg-info/
12
+ dist/
13
+ build/
14
+ .pytest_cache/
15
+ .mypy_cache/
16
+ .ruff_cache/
17
+
18
+ # Node
19
+ node_modules/
20
+ exam-app/dist/
21
+
22
+ # Claude Code local settings (machine-specific)
23
+ .claude/settings.local.json
24
+
25
+ # GitNexus index (regenerated via `gitnexus analyze`)
26
+ .gitnexus/
27
+
28
+ # Ingestion state (local run progress, not source)
29
+ exam-app/scripts/ingest/state.json
30
+ backend/scripts/ingest/ingest_state.json
31
+
32
+ # Cloudflare Pages / Wrangler local state
33
+ .wrangler/
34
+
35
+ # Misc
36
+ *.pen
37
+ .DS_Store
38
+ Thumbs.db
39
+
40
+ # Screenshots (all root-level PNGs are dev/test artifacts)
41
+ *.png
42
+ # Exception: question figure images are static assets shipped with the app
43
+ !exam-app/public/images/questions/*.png
44
+
45
+ # Math wiki local artifacts (regenerated by ingest pipeline)
46
+ math_wiki.db
47
+ math_wiki.db-shm
48
+ math_wiki.db-wal
49
+ math_wiki.bm25.pkl
50
+ math_wiki.faiss
51
+ math_wiki.meta.pkl
52
+ backend/math_wiki.db
53
+
54
+ # Crawl runtime state
55
+ scripts/crawl_progress.json
56
+
57
+ # Test results
58
+ test-results/
59
+
60
+ # Playwright MCP cache
61
+ .playwright-mcp/
62
+
63
+ # Local dev/agent artifacts
64
+ .claude/
65
+ docs/
66
+ exam-app-plan.md
67
+
68
+ # Kilo Code editor state
69
+ .kilo/
70
+
71
+ # Standalone agent framework (not integrated into the app)
72
+ core/
73
+
74
+ # Unused standalone validators (not imported by backend or exam-app)
75
+ validators/
76
+
77
+ # Empty local dev API response dumps
78
+ response-*.json
79
+
80
+ # Runtime caches
81
+ scripts/.pauls_sentences_cache.json
AGENTS.md ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Auth & Credit System
2
+
3
+ ### Auth-required endpoints
4
+ All AI endpoints (`/analyze`, `/hint`, `/explain`, `/study-plan`) require a valid JWT in `Authorization: Bearer <token>`. The token is obtained from `POST /auth/google`.
5
+
6
+ ### Credit deduction per feature
7
+ | Feature | Endpoint | Credits |
8
+ |---|---|---|
9
+ | Socratic hint | POST /hint | 1 |
10
+ | Answer explanation | POST /explain | 1 |
11
+ | Result analysis | POST /analyze | 3 |
12
+ | Study plan | POST /study-plan | 5 |
13
+
14
+ `/study-plan` also requires `subscription_tier` ∈ {student, complete} — returns 403 `tier_required` otherwise.
15
+
16
+ ### Getting the current admin key
17
+
18
+ Admin keys rotate automatically (default: weekly). Get the current key from either:
19
+ 1. **HF Spaces** → Files tab → `/data/admin_keys.txt` → copy the latest line's key
20
+ 2. **Local fallback**: `python tools/gen_admin_key.py` (prompts for `ADMIN_MASTER_SECRET`)
21
+
22
+ ### Granting manual top-ups (admin)
23
+ ```
24
+ POST /admin/users/{user_id}/credits
25
+ X-Admin-Key: <current derived key from admin_keys.txt or gen_admin_key.py>
26
+ {"amount": 500, "reason": "manual_topup_bank_transfer"}
27
+ ```
28
+
29
+ ### Activating subscriptions (admin)
30
+ ```
31
+ POST /admin/users/{user_id}/subscription
32
+ X-Admin-Key: <current derived key>
33
+ {"tier": "student", "period": "monthly", "expires_at": "2026-06-15T00:00:00Z", "bonus_credits": 0}
34
+ ```
35
+
36
+ ### Suspending abusive accounts (admin)
37
+ ```
38
+ POST /admin/users/{user_id}/suspend
39
+ X-Admin-Key: <current derived key>
40
+ {"reason": "credit_velocity abuse"}
41
+ ```
42
+
43
+ The abuse detector (`backend/app/abuse_detector.py`) runs every 5 minutes and auto-suspends on HIGH-confidence signals (credit velocity, burst >100 req/10min). MEDIUM-confidence events are logged to `security_events` for manual review via `GET /admin/security-events`.
44
+
45
+ <!-- gitnexus:start -->
46
+ # GitNexus — Code Intelligence
47
+
48
+ This project is indexed by GitNexus as **AI-Agent-App** (5942 symbols, 18107 relationships, 265 execution flows). Use the GitNexus MCP tools to understand code, assess impact, and navigate safely.
49
+
50
+ > If any GitNexus tool warns the index is stale, run `npx gitnexus analyze` in terminal first.
51
+
52
+ ## Always Do
53
+
54
+ - **MUST run impact analysis before editing any symbol.** Before modifying a function, class, or method, run `gitnexus_impact({target: "symbolName", direction: "upstream"})` and report the blast radius (direct callers, affected processes, risk level) to the user.
55
+ - **MUST run `gitnexus_detect_changes()` before committing** to verify your changes only affect expected symbols and execution flows.
56
+ - **MUST warn the user** if impact analysis returns HIGH or CRITICAL risk before proceeding with edits.
57
+ - When exploring unfamiliar code, use `gitnexus_query({query: "concept"})` to find execution flows instead of grepping. It returns process-grouped results ranked by relevance.
58
+ - When you need full context on a specific symbol — callers, callees, which execution flows it participates in — use `gitnexus_context({name: "symbolName"})`.
59
+
60
+ ## Never Do
61
+
62
+ - NEVER edit a function, class, or method without first running `gitnexus_impact` on it.
63
+ - NEVER ignore HIGH or CRITICAL risk warnings from impact analysis.
64
+ - NEVER rename symbols with find-and-replace — use `gitnexus_rename` which understands the call graph.
65
+ - NEVER commit changes without running `gitnexus_detect_changes()` to check affected scope.
66
+
67
+ ## Resources
68
+
69
+ | Resource | Use for |
70
+ |----------|---------|
71
+ | `gitnexus://repo/AI-Agent-App/context` | Codebase overview, check index freshness |
72
+ | `gitnexus://repo/AI-Agent-App/clusters` | All functional areas |
73
+ | `gitnexus://repo/AI-Agent-App/processes` | All execution flows |
74
+ | `gitnexus://repo/AI-Agent-App/process/{name}` | Step-by-step execution trace |
75
+
76
+ ## CLI
77
+
78
+ | Task | Read this skill file |
79
+ |------|---------------------|
80
+ | Understand architecture / "How does X work?" | `.claude/skills/gitnexus/gitnexus-exploring/SKILL.md` |
81
+ | Blast radius / "What breaks if I change X?" | `.claude/skills/gitnexus/gitnexus-impact-analysis/SKILL.md` |
82
+ | Trace bugs / "Why is X failing?" | `.claude/skills/gitnexus/gitnexus-debugging/SKILL.md` |
83
+ | Rename / extract / split / refactor | `.claude/skills/gitnexus/gitnexus-refactoring/SKILL.md` |
84
+ | Tools, resources, schema reference | `.claude/skills/gitnexus/gitnexus-guide/SKILL.md` |
85
+ | Index, status, clean, wiki CLI commands | `.claude/skills/gitnexus/gitnexus-cli/SKILL.md` |
86
+
87
+ <!-- gitnexus:end -->
CLAUDE.md ADDED
@@ -0,0 +1,271 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # AI Agent App
2
+
3
+ Zenith — AI-native adaptive learning system for Vietnamese students (FastAPI + Claude via ai-router proxy).
4
+
5
+ ## Stack
6
+
7
+ - **Python** — FastAPI, pydantic-settings, tenacity, openai SDK (>=1.58.0)
8
+ - **Runtime** — uvicorn
9
+ - **AI** — Claude models via internal OpenAI-compatible proxy at `https://ai-router.locdo.tech`
10
+
11
+ ## Dev commands
12
+
13
+ ```bash
14
+ # Run both backend + frontend together (preferred)
15
+ npm install # install concurrently (root, first time only)
16
+ npm run dev # starts backend :8000 and frontend :5173 concurrently
17
+
18
+ # Backend only
19
+ pip install -r requirements.txt
20
+ PYTHONPATH=backend uvicorn app.main:app --reload # http://localhost:8000
21
+ python3 -m pytest backend/tests/ # run tests
22
+
23
+ # Frontend only
24
+ cd exam-app && npm install && npm run dev # http://localhost:5173
25
+ ```
26
+
27
+ ## Project structure
28
+
29
+ ```
30
+ backend/app/
31
+ config.py # Settings (pydantic-settings), get_settings(); ALLOWED_ORIGINS for CORS
32
+ dependencies.py # get_ai_client() singleton (AsyncOpenAI)
33
+ middleware.py # RateLimitMiddleware — IP (20/min) + per-user (60/min) + rapid-fire hint detection
34
+ abuse_detector.py # Background loop (5 min) — credit velocity, burst, score anomaly, new-account checks
35
+ main.py # FastAPI routes: /analyze /hint /explain /study-plan /health
36
+ # + /auth/google, /users/me, /users/me/profile, /users/me/credits/log
37
+ # + /admin/users/{id}/subscription|credits|suspend|unsuspend
38
+ # + GET /admin/security-events
39
+ agent/
40
+ core.py # call_with_retry() — tenacity retry wrapper for all AI calls
41
+ memory.py # compress_conversation() via Haiku
42
+ exam_analyzer.py # analyze_exam_result() — grade+province → location-aware school recs
43
+ hint_generator.py# generate_hint() — Socratic hints via Haiku
44
+ study_planner.py # generate_study_plan() — 4-week study plan with JSON fallback
45
+ tests/
46
+ test_ai_endpoints.py # pytest tests covering AI endpoints (LLM mocked)
47
+
48
+ exam-app/src/
49
+ api/
50
+ index.js # Static data loaders (questions, exams, schools)
51
+ aiClient.js # Axios client wrapping all backend endpoints; wrap() preserves structured errors
52
+ components/
53
+ AIInsights.jsx # Renders AI analysis; handles 401/402/403 error codes + credit top-up CTA
54
+ AIErrorBoundary.jsx # React error boundary wrapping AI sections
55
+ QuestionCard.jsx # Question renderer + hint (⚡1 credit) + explanation toggle (practice mode)
56
+ ProfileOnboarding.jsx # Modal: grade (required) + province (required) + school type + ToS gate
57
+ LowCreditBanner.jsx # Sticky banner when credits_balance < 10; dismissible per session
58
+ Navbar.jsx # ⚡ credits badge → /account; avatar/name → /account
59
+ pages/
60
+ Results.jsx # Async AI analysis with grade+province in payload; "Tạo Kế Hoạch" button
61
+ StudyPlan.jsx # /study-plan/:resultId — 4-week plan with localStorage checkbox progress
62
+ Account.jsx # /account — profile, tier/credits, pricing table (monthly/annual toggle), credit log
63
+ ExamSelect.jsx # Auth gate (1 guest trial), grade/tier filter, category lock for non-complete tiers
64
+ context/
65
+ ExamContext.jsx # Exam state + hints: {} + SET_HINT action + useHints() hook
66
+ AuthContext.jsx # user (all profile fields), login, logout, updateProfile()
67
+ ```
68
+
69
+ ## User profile fields (users table)
70
+
71
+ | Field | Values | Effect |
72
+ |---|---|---|
73
+ | `grade` | '9','10','11','12' | ≤9 → grade10 exams only; 10-12 → thpt only |
74
+ | `province` | 63 VN provinces | AI school recs localized to province |
75
+ | `school_type` | 'chuyên','công lập','quốc tế' | Optional, informational |
76
+ | `subscription_tier` | 'basic','student','complete' | Controls exam access + study-plan gate |
77
+ | `subscription_period` | 'monthly','annual' | Annual shown with badge in Navbar/Account |
78
+ | `credits_balance` | integer ≥0 | Deducted per AI call; 402 when exhausted |
79
+ | `tos_accepted_at` | ISO timestamp | Required before any credit-deducting request |
80
+ | `is_suspended` | 0/1 | 403 account_suspended → suspension modal |
81
+
82
+ ## AI credit costs
83
+
84
+ | Endpoint | Credits |
85
+ |---|---|
86
+ | `/hint` | 1 |
87
+ | `/explain` | 1 |
88
+ | `/analyze` | 3 |
89
+ | `/study-plan` | 5 (student/complete tier only) |
90
+
91
+ ## Admin endpoints (require X-Admin-Key: current derived key)
92
+
93
+ Admin key rotates automatically (default: weekly). Get current key from `/data/admin_keys.txt` on HF Spaces or run `python tools/gen_admin_key.py`.
94
+
95
+ - `POST /admin/users/{id}/subscription` — set tier/period/expiry + bonus credits
96
+ - `POST /admin/users/{id}/credits` — grant top-up credits
97
+ - `POST /admin/users/{id}/suspend` — suspend with reason
98
+ - `POST /admin/users/{id}/unsuspend`
99
+ - `GET /admin/security-events` — recent HIGH/MEDIUM events with user status
100
+ - `POST /admin/generate-key-log` — (cron use only) derive + append current key to log; requires `X-Cron-Secret` header
101
+
102
+ ## AI router rules (CRITICAL)
103
+
104
+ - **SDK**: `openai` (never `anthropic`)
105
+ - **Base URL**: `https://ai-router.locdo.tech/v2` (set via `ANTHROPIC_BASE_URL` env var)
106
+ - **Auth**: env var `ANTHROPIC_AUTH_TOKEN` — never hardcode
107
+ - **Model names use dots**: `claude-sonnet-4.6`, `claude-opus-4.6`, `claude-haiku-4.5`
108
+ - **Never hardcode model names** — use `settings.default_model` / `settings.opus_model` / `settings.haiku_model`
109
+ - **Never create a new client per request** — use singleton `get_ai_client()` from `dependencies.py`
110
+
111
+ ## Model tiers
112
+
113
+ | Property | Model | Use |
114
+ |---|---|---|
115
+ | `settings.default_model` | `claude-sonnet-4.6` | Main agent loop |
116
+ | `settings.haiku_model` | `claude-haiku-4.5` | Cheap tasks: summarization, compression |
117
+ | `settings.opus_model` | `claude-opus-4.6` | Complex reasoning |
118
+
119
+ ## Env vars
120
+
121
+ **`backend/.env`** (copy from `backend/.env.example`, never commit)
122
+
123
+ | Variable | Example value |
124
+ |---|---|
125
+ | `ANTHROPIC_BASE_URL` | `https://ai-router.locdo.tech` |
126
+ | `ANTHROPIC_AUTH_TOKEN` | *(your token)* |
127
+ | `ANTHROPIC_DEFAULT_OPUS_MODEL` | `claude-opus-4.6` |
128
+ | `ANTHROPIC_DEFAULT_SONNET_MODEL` | `claude-sonnet-4.6` |
129
+ | `ANTHROPIC_DEFAULT_HAIKU_MODEL` | `claude-haiku-4.5` |
130
+ | `ALLOWED_ORIGINS` | `http://localhost:5173` |
131
+ | `SQLITE_PATH` | `./math_wiki.db` (local) / `/data/app.db` (HF Spaces) |
132
+ | `GOOGLE_CLIENT_ID` | *(Google OAuth client ID)* |
133
+ | `JWT_SECRET` | *(≥32 chars, required)* |
134
+ | `ADMIN_MASTER_SECRET` | *(≥32 chars — static master; effective key is HMAC-derived + time window)* |
135
+ | `ADMIN_KEY_ROTATION_PERIOD` | `weekly` *(daily\|weekly\|monthly\|quarterly\|annual)* |
136
+ | `ADMIN_KEY_LOG_PATH` | `./admin_keys.txt` (local) / `/data/admin_keys.txt` (HF Spaces) |
137
+ | `ADMIN_KEY_LOG_ENABLED` | `true` |
138
+ | `CRON_SECRET` | *(≥32 chars — authenticates POST /admin/generate-key-log from cron-job.org/GitHub Actions)* |
139
+
140
+ **`exam-app/.env`** (copy from `exam-app/.env.example`, never commit)
141
+
142
+ | Variable | Example value |
143
+ |---|---|
144
+ | `VITE_API_BASE_URL` | `http://localhost:8000` |
145
+
146
+ ## Key patterns
147
+
148
+ **Error handling** — wrap all `client.chat.completions.create()` with `call_with_retry()` from `agent/core.py`. Catches `RateLimitError` (retry), `APIConnectionError`, `APIStatusError`.
149
+
150
+ **Prefix caching** — static system prompt content first (e.g. `STATIC_EXAM_ANALYSIS_INSTRUCTIONS`); dynamic context (student name, score, weak topics) appended last.
151
+
152
+ **Pricing** — `PRICE_TABLE` in `tools/registry.py` maps product type → VND/m². Default fallback: 1,600,000 VND/m².
153
+
154
+ ## Development workflow
155
+
156
+ This project uses two collaborating tools for code intelligence and structured work:
157
+
158
+ - **GitNexus MCP** — knowledge graph of 109 symbols and 162 relationships, indexed from the codebase. Use it to understand blast radius before editing, trace execution flows, and do safe renames.
159
+ - **agent-skills plugin** — structured workflow skills (spec, plan, build, test, review, etc.) that map to common engineering tasks.
160
+
161
+ ### When to reach for each
162
+
163
+ | Task | Use |
164
+ |---|---|
165
+ | "What calls `run_agent()`?" / "What breaks if I change this?" | GitNexus: `gitnexus_impact`, `gitnexus_context` |
166
+ | "How does the tool loop work?" / "Find all entry points" | GitNexus: `gitnexus_query` |
167
+ | Adding a new feature end-to-end | agent-skills: `/spec` → `/plan` → `/build` |
168
+ | Fixing a bug with proof it's fixed | agent-skills: `/test` (Prove-It pattern) |
169
+ | Pre-merge check | agent-skills: `/review` + GitNexus: `gitnexus_detect_changes` |
170
+ | Renaming a symbol across files | GitNexus: `gitnexus_rename` |
171
+
172
+ ### GitNexus rules
173
+
174
+ - **Before any coding task in `exam-app/` or `backend/`** — ALWAYS run `npx gitnexus analyze --embeddings` to refresh the index, then run the relevant GitNexus MCP tools (impact, context, query) before writing a single line of code.
175
+ - **Before editing any symbol** — run `gitnexus_impact({target: "symbolName", direction: "upstream"})` and report the blast radius. Stop and warn the user on HIGH or CRITICAL risk.
176
+ - **Before committing** — run `gitnexus_detect_changes()` to verify only expected symbols were affected.
177
+ - **Never rename with find-and-replace** — use `gitnexus_rename` which understands the call graph.
178
+
179
+ ### GitNexus skill files
180
+
181
+ | Goal | Skill |
182
+ |---|---|
183
+ | Architecture exploration | `.claude/skills/gitnexus/gitnexus-exploring/SKILL.md` |
184
+ | Blast radius / impact analysis | `.claude/skills/gitnexus/gitnexus-impact-analysis/SKILL.md` |
185
+ | Bug tracing | `.claude/skills/gitnexus/gitnexus-debugging/SKILL.md` |
186
+ | Refactoring / rename / extract | `.claude/skills/gitnexus/gitnexus-refactoring/SKILL.md` |
187
+ | Full tool + resource reference | `.claude/skills/gitnexus/gitnexus-guide/SKILL.md` |
188
+
189
+ ### GitNexus index state
190
+
191
+ Indexed as **AI-Agent-App** — re-index with `gitnexus analyze /mnt/d/AI-Agent-App --skip-git` after significant changes.
192
+
193
+ ### GitNexus MCP resources
194
+
195
+ | Resource | Use for |
196
+ |----------|---------|
197
+ | `gitnexus://repo/AI-Agent-App/context` | Codebase overview, index freshness |
198
+ | `gitnexus://repo/AI-Agent-App/processes` | All execution flows |
199
+
200
+ <!-- gitnexus:start -->
201
+ # GitNexus ��� Code Intelligence
202
+
203
+ This project is indexed by GitNexus as **AI-Agent-App** (5942 symbols, 18107 relationships, 265 execution flows). Use the GitNexus MCP tools to understand code, assess impact, and navigate safely.
204
+
205
+ > If any GitNexus tool warns the index is stale, run `npx gitnexus analyze` in terminal first.
206
+
207
+ ## Always Do
208
+
209
+ - **MUST run impact analysis before editing any symbol.** Before modifying a function, class, or method, run `gitnexus_impact({target: "symbolName", direction: "upstream"})` and report the blast radius (direct callers, affected processes, risk level) to the user.
210
+ - **MUST run `gitnexus_detect_changes()` before committing** to verify your changes only affect expected symbols and execution flows.
211
+ - **MUST warn the user** if impact analysis returns HIGH or CRITICAL risk before proceeding with edits.
212
+ - When exploring unfamiliar code, use `gitnexus_query({query: "concept"})` to find execution flows instead of grepping. It returns process-grouped results ranked by relevance.
213
+ - When you need full context on a specific symbol — callers, callees, which execution flows it participates in — use `gitnexus_context({name: "symbolName"})`.
214
+
215
+ ## Never Do
216
+
217
+ - NEVER edit a function, class, or method without first running `gitnexus_impact` on it.
218
+ - NEVER ignore HIGH or CRITICAL risk warnings from impact analysis.
219
+ - NEVER rename symbols with find-and-replace — use `gitnexus_rename` which understands the call graph.
220
+ - NEVER commit changes without running `gitnexus_detect_changes()` to check affected scope.
221
+
222
+ ## Resources
223
+
224
+ | Resource | Use for |
225
+ |----------|---------|
226
+ | `gitnexus://repo/AI-Agent-App/context` | Codebase overview, check index freshness |
227
+ | `gitnexus://repo/AI-Agent-App/clusters` | All functional areas |
228
+ | `gitnexus://repo/AI-Agent-App/processes` | All execution flows |
229
+ | `gitnexus://repo/AI-Agent-App/process/{name}` | Step-by-step execution trace |
230
+
231
+ ## CLI
232
+
233
+ | Task | Read this skill file |
234
+ |------|---------------------|
235
+ | Understand architecture / "How does X work?" | `.claude/skills/gitnexus/gitnexus-exploring/SKILL.md` |
236
+ | Blast radius / "What breaks if I change X?" | `.claude/skills/gitnexus/gitnexus-impact-analysis/SKILL.md` |
237
+ | Trace bugs / "Why is X failing?" | `.claude/skills/gitnexus/gitnexus-debugging/SKILL.md` |
238
+ | Rename / extract / split / refactor | `.claude/skills/gitnexus/gitnexus-refactoring/SKILL.md` |
239
+ | Tools, resources, schema reference | `.claude/skills/gitnexus/gitnexus-guide/SKILL.md` |
240
+ | Index, status, clean, wiki CLI commands | `.claude/skills/gitnexus/gitnexus-cli/SKILL.md` |
241
+
242
+ <!-- gitnexus:end -->
243
+
244
+ ## Deploy commands
245
+
246
+ ### Hugging Face Space (backend) — orphan push required
247
+
248
+ ```bash
249
+ git checkout master
250
+ git checkout --orphan hf-deploy-new
251
+ git add -A
252
+ git commit -m "deploy: $(git log master --oneline -1 | cut -c1-7)"
253
+ git branch -D hf-deploy
254
+ git branch -m hf-deploy-new hf-deploy
255
+ git push --force space hf-deploy:main
256
+ git checkout master
257
+ ```
258
+
259
+ **Never** use `git merge master` on hf-deploy — the repo history contains old binary files that HF rejects. The orphan commit has no parents, so none of that history is included.
260
+
261
+ ### Cloudflare Pages (frontend) — must use `--branch=main`
262
+
263
+ ```bash
264
+ cd exam-app
265
+ VITE_API_BASE_URL=https://minhtai-ai-agent-app.hf.space npm run build
266
+ npx wrangler pages deploy dist --project-name exam-app --branch=main --commit-dirty=true
267
+ ```
268
+
269
+ **Always** pass `--branch=main`. Without it, wrangler creates a **Preview** deployment (not Production), and `exam-app-ey0.pages.dev` keeps serving the old bundle. The production URL only aliases Production deployments.
270
+
271
+ **Always** set `VITE_API_BASE_URL` explicitly. `exam-app/.env.local` (used for local dev) takes precedence over `exam-app/.env` in Vite's env loading order, so omitting the explicit override bakes `localhost:8000` into the production bundle.
Dockerfile ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ WORKDIR /app
4
+
5
+ RUN apt-get update && apt-get install -y --no-install-recommends \
6
+ libgomp1 \
7
+ && rm -rf /var/lib/apt/lists/*
8
+
9
+ COPY requirements.txt .
10
+ RUN pip install --no-cache-dir -r requirements.txt
11
+
12
+ # Fix root→appuser cache path mismatch; must be set before bake AND kept for CMD
13
+ ENV HF_HOME=/app/.cache/huggingface
14
+
15
+ # Bake BGE-M3 into the image; avoids cold-start download (~570 MB)
16
+ RUN python -c "from FlagEmbedding import BGEM3FlagModel; BGEM3FlagModel('BAAI/bge-m3', use_fp16=False)"
17
+
18
+ COPY backend/ backend/
19
+ COPY scripts/ scripts/
20
+ COPY exam-app/src/data/ exam-app/src/data/
21
+
22
+ ENV PYTHONPATH=/app/backend:/app/scripts
23
+
24
+ # HF Spaces requires a non-root user
25
+ RUN useradd -m -u 1000 appuser && chown -R appuser /app
26
+ USER appuser
27
+
28
+ EXPOSE 7860
29
+
30
+ CMD uvicorn app.main:app --host 0.0.0.0 --port 7860
README.md ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: AI Agent App
3
+ emoji: 🤖
4
+ colorFrom: blue
5
+ colorTo: indigo
6
+ sdk: docker
7
+ pinned: false
8
+ app_port: 7860
9
+ ---
10
+
11
+ # AI Agent App
12
+
13
+ Vietnamese aluminum/glass door sales chatbot + AI-powered exam backend.
14
+
15
+ Built with FastAPI + Claude via ai-router proxy.
backend/.env.example ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ANTHROPIC_BASE_URL=https://ai-router.locdo.tech
2
+ ANTHROPIC_AUTH_TOKEN=your_token_here
3
+ ANTHROPIC_DEFAULT_OPUS_MODEL=claude-opus-4.6
4
+ ANTHROPIC_DEFAULT_SONNET_MODEL=claude-sonnet-4.6
5
+ ANTHROPIC_DEFAULT_HAIKU_MODEL=claude-haiku-4.5
6
+ ALLOWED_ORIGINS=http://localhost:5173
7
+ # SQLite database path. On HF Spaces use /data/app.db (persistent storage must be enabled).
8
+ # For local dev override to e.g. ./math_wiki.db
9
+ SQLITE_PATH=/data/app.db
10
+ # Enable background wiki crawl on startup (local only; keep false on HF Spaces)
11
+ CRAWL_AUTO_SEED_ENABLED=false
12
+ # Wipe wiki_units and re-crawl from scratch; app self-disables this after one run
13
+ CRAWL_FORCE_RESEED=false
14
+ # Crawl only topics with zero wiki units (gap-fill); idempotent — safe to leave true
15
+ CRAWL_GAP_FILL_ENABLED=false
16
+ # Google OAuth 2.0 Client ID — create at console.cloud.google.com → Credentials → OAuth 2.0 Client IDs
17
+ # Add Authorised JavaScript Origins: http://localhost:5173 and your HF Space URL
18
+ GOOGLE_CLIENT_ID=your_google_client_id_here
19
+ # JWT signing secret — generate with: python -c "import secrets; print(secrets.token_hex(32))"
20
+ JWT_SECRET=your_jwt_secret_here
21
+ # Static admin key sent in X-Admin-Key header for all admin endpoints.
22
+ # Generate with: python -c "import secrets; print(secrets.token_hex(32))"
23
+ ADMIN_KEY=your_admin_key_here
backend/app/__init__.py ADDED
File without changes
backend/app/abuse_detector.py ADDED
@@ -0,0 +1,232 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Background abuse detection loop — runs every 5 minutes, no external infrastructure needed.
3
+ Launched in lifespan() via asyncio.ensure_future().
4
+ """
5
+ import asyncio
6
+ import json
7
+ import logging
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+ _INTERVAL = 300 # 5 minutes
12
+
13
+
14
+ async def _log_event(pool, user_id, ip, event_type, confidence, detail):
15
+ try:
16
+ await pool.execute(
17
+ "INSERT INTO security_events (user_id, ip, event_type, confidence, detail) VALUES (?, ?, ?, ?, ?)",
18
+ user_id, ip, event_type, confidence, json.dumps(detail) if isinstance(detail, dict) else detail,
19
+ )
20
+ except Exception as exc:
21
+ logger.warning("abuse_detector: could not log event: %s", exc)
22
+
23
+
24
+ async def _auto_suspend(pool, user_id, reason):
25
+ try:
26
+ await pool.execute(
27
+ "UPDATE users SET is_suspended = 1, suspension_reason = ? WHERE id = ?",
28
+ reason, user_id,
29
+ )
30
+ await _log_event(pool, user_id, None, "auto_suspend", "high", reason)
31
+ logger.warning("abuse_detector: AUTO-SUSPENDED user %s — %s", user_id, reason)
32
+ except Exception as exc:
33
+ logger.error("abuse_detector: failed to suspend user %s: %s", user_id, exc)
34
+
35
+
36
+ async def _flag_for_review(pool, user_id, detail):
37
+ try:
38
+ await _log_event(pool, user_id, None, "flagged_for_review", "medium", detail)
39
+ logger.info("abuse_detector: flagged user %s for review — %s", user_id, detail)
40
+ except Exception as exc:
41
+ logger.error("abuse_detector: failed to flag user %s: %s", user_id, exc)
42
+
43
+
44
+ async def _check_credit_velocity(pool):
45
+ """Credits 0→50+ in <1h after reset → HIGH confidence abuse."""
46
+ try:
47
+ rows = await pool.fetch(
48
+ """SELECT user_id, COUNT(*) as gains
49
+ FROM ai_credits_log
50
+ WHERE delta > 0
51
+ AND reason NOT LIKE 'admin_%'
52
+ AND reason NOT LIKE 'subscription_%'
53
+ AND created_at > datetime('now', '-1 hour')
54
+ GROUP BY user_id HAVING gains >= 3"""
55
+ )
56
+ for row in rows:
57
+ await _auto_suspend(pool, row["user_id"], "credit_velocity: rapid credit gains detected")
58
+ except Exception as exc:
59
+ logger.warning("abuse_detector: credit_velocity check error: %s", exc)
60
+
61
+
62
+ async def _check_burst_patterns(pool):
63
+ """More than 100 AI requests in a 10-min window → HIGH confidence."""
64
+ try:
65
+ rows = await pool.fetch(
66
+ """SELECT user_id, COUNT(*) as cnt
67
+ FROM ai_credits_log
68
+ WHERE created_at > datetime('now', '-10 minutes')
69
+ GROUP BY user_id HAVING cnt > 100"""
70
+ )
71
+ for row in rows:
72
+ await _auto_suspend(
73
+ pool, row["user_id"],
74
+ f"burst_pattern: {row['cnt']} AI requests in 10 minutes"
75
+ )
76
+ except Exception as exc:
77
+ logger.warning("abuse_detector: burst_patterns check error: %s", exc)
78
+
79
+
80
+ async def _check_score_anomalies(pool):
81
+ """Score=10 on >3 exams in 30 min by same user → flag for review."""
82
+ try:
83
+ rows = await pool.fetch(
84
+ """SELECT user_id, COUNT(*) as cnt
85
+ FROM exam_results
86
+ WHERE score = 10 AND created_at > datetime('now', '-30 minutes')
87
+ GROUP BY user_id HAVING cnt > 3"""
88
+ )
89
+ for row in rows:
90
+ await _flag_for_review(
91
+ pool, row["user_id"],
92
+ f"score_anomaly: {row['cnt']} perfect-score exams in 30 minutes"
93
+ )
94
+ except Exception as exc:
95
+ logger.warning("abuse_detector: score_anomalies check error: %s", exc)
96
+
97
+
98
+ async def _check_new_account_burst(pool):
99
+ """Account age <2h AND credits=0 (exhausted immediately) → flag for review."""
100
+ try:
101
+ rows = await pool.fetch(
102
+ """SELECT id FROM users
103
+ WHERE credits_balance = 0
104
+ AND created_at > datetime('now', '-2 hours')
105
+ AND is_suspended = 0"""
106
+ )
107
+ for row in rows:
108
+ await _flag_for_review(
109
+ pool, row["id"],
110
+ "new_account_burst: new account exhausted credits within 2 hours"
111
+ )
112
+ except Exception as exc:
113
+ logger.warning("abuse_detector: new_account_burst check error: %s", exc)
114
+
115
+
116
+ async def _check_behavioral_anomalies(pool):
117
+ """Tab switches >10 or DevTools detected in a single day → behavior_anomaly event."""
118
+ try:
119
+ rows = await pool.fetch(
120
+ """SELECT user_id,
121
+ SUM(CAST(json_extract(payload, '$.tab_switches') AS INTEGER)) AS total_tabs,
122
+ MAX(CAST(json_extract(payload, '$.devtools_detected') AS INTEGER)) AS any_devtools
123
+ FROM exam_results
124
+ WHERE created_at > datetime('now', '-1 day')
125
+ GROUP BY user_id
126
+ HAVING total_tabs > 10 OR any_devtools = 1"""
127
+ )
128
+ for row in rows:
129
+ reason = f"behavior_anomaly: tab_switches={row['total_tabs']}, devtools={row['any_devtools']}"
130
+ await _log_event(pool, row["user_id"], None, "behavior_anomaly", "medium", reason)
131
+ # If user already has a HIGH event in the same window, auto-lock
132
+ high_events = await pool.fetchrow(
133
+ """SELECT COUNT(*) AS cnt FROM security_events
134
+ WHERE user_id = ? AND confidence = 'high'
135
+ AND created_at > datetime('now', '-1 day')""",
136
+ row["user_id"],
137
+ )
138
+ if high_events and high_events["cnt"] > 0:
139
+ await pool.execute(
140
+ "UPDATE users SET is_locked = 1, lock_reason = ? WHERE id = ? AND is_locked = 0",
141
+ f"auto-lock: {reason}", row["user_id"],
142
+ )
143
+ await _log_event(pool, row["user_id"], None, "auto_lock", "high", f"auto-lock: {reason}")
144
+ from app.dependencies import invalidate_account_cache
145
+ invalidate_account_cache(row["user_id"])
146
+ except Exception as exc:
147
+ logger.warning("abuse_detector: behavioral_anomalies check error: %s", exc)
148
+
149
+
150
+ async def _auto_lock_on_high_confidence(pool):
151
+ """Auto-lock users with HIGH confidence abuse events if not already locked."""
152
+ try:
153
+ rows = await pool.fetch(
154
+ """SELECT DISTINCT user_id FROM security_events
155
+ WHERE confidence = 'high'
156
+ AND event_type IN ('credit_velocity', 'burst_pattern', 'exam_anomaly')
157
+ AND created_at > datetime('now', '-1 hour')"""
158
+ )
159
+ for row in rows:
160
+ result = await pool.execute(
161
+ "UPDATE users SET is_locked = 1, lock_reason = 'auto-lock: high-confidence abuse' WHERE id = ? AND is_locked = 0",
162
+ row["user_id"],
163
+ )
164
+ if result:
165
+ await _log_event(pool, row["user_id"], None, "auto_lock", "high", "auto-lock: high-confidence abuse event")
166
+ from app.dependencies import invalidate_account_cache
167
+ invalidate_account_cache(row["user_id"])
168
+ except Exception as exc:
169
+ logger.warning("abuse_detector: auto_lock check error: %s", exc)
170
+
171
+
172
+ _DORMANT_DAYS = 365
173
+ _DELETION_WARNING_DAYS = 30
174
+
175
+
176
+ async def _mark_dormant_accounts(pool):
177
+ """Phase 1: mark basic-tier accounts inactive > _DORMANT_DAYS as pending deletion."""
178
+ try:
179
+ await pool.execute(
180
+ f"""UPDATE users
181
+ SET pending_deletion_at = datetime('now', '+{_DELETION_WARNING_DAYS} days')
182
+ WHERE subscription_tier = 'basic'
183
+ AND is_suspended = 0 AND is_locked = 0 AND is_deactivated = 0
184
+ AND pending_deletion_at IS NULL
185
+ AND (last_seen_at IS NULL
186
+ OR last_seen_at < datetime('now', '-{_DORMANT_DAYS} days'))
187
+ """
188
+ )
189
+ except Exception as exc:
190
+ logger.warning("abuse_detector: mark_dormant error: %s", exc)
191
+
192
+
193
+ async def _deactivate_expired_pending(pool):
194
+ """Phase 2: deactivate accounts whose warning period has expired."""
195
+ try:
196
+ rows = await pool.fetch(
197
+ """SELECT id FROM users
198
+ WHERE pending_deletion_at IS NOT NULL
199
+ AND pending_deletion_at < datetime('now')
200
+ AND is_deactivated = 0"""
201
+ )
202
+ for row in rows:
203
+ await pool.execute(
204
+ "UPDATE users SET is_deactivated = 1 WHERE id = ?",
205
+ row["id"],
206
+ )
207
+ await _log_event(pool, row["id"], None, "auto_deactivated", "low",
208
+ f"dormant account — no login for {_DORMANT_DAYS} days")
209
+ logger.info("abuse_detector: deactivated dormant account %s", row["id"])
210
+ except Exception as exc:
211
+ logger.warning("abuse_detector: deactivate_expired_pending error: %s", exc)
212
+
213
+
214
+ async def _run_abuse_detector(pool):
215
+ """Main detection loop — runs every 5 minutes."""
216
+ logger.info("abuse_detector: starting background loop (interval=%ds)", _INTERVAL)
217
+ while True:
218
+ try:
219
+ await asyncio.sleep(_INTERVAL)
220
+ await _check_credit_velocity(pool)
221
+ await _check_burst_patterns(pool)
222
+ await _check_score_anomalies(pool)
223
+ await _check_new_account_burst(pool)
224
+ await _check_behavioral_anomalies(pool)
225
+ await _auto_lock_on_high_confidence(pool)
226
+ await _mark_dormant_accounts(pool)
227
+ await _deactivate_expired_pending(pool)
228
+ except asyncio.CancelledError:
229
+ logger.info("abuse_detector: loop cancelled, shutting down")
230
+ break
231
+ except Exception as exc:
232
+ logger.error("abuse_detector: unhandled error in loop: %s", exc)
backend/app/admin_auth.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import hmac
2
+ import hashlib
3
+ import datetime
4
+
5
+
6
+ def get_window_label(period: str, offset: int = 0) -> str:
7
+ today = datetime.date.today()
8
+ if period == "daily":
9
+ return (today - datetime.timedelta(days=offset)).strftime("%Y-%m-%d")
10
+ elif period == "weekly":
11
+ return (today - datetime.timedelta(weeks=offset)).strftime("%Y-W%W")
12
+ elif period == "monthly":
13
+ year, month = today.year, today.month
14
+ month -= offset
15
+ while month <= 0:
16
+ month += 12
17
+ year -= 1
18
+ return f"{year}-{month:02d}"
19
+ elif period == "quarterly":
20
+ q = ((today.month - 1) // 3) + 1
21
+ year = today.year
22
+ q -= offset
23
+ while q <= 0:
24
+ q += 4
25
+ year -= 1
26
+ return f"{year}-Q{q}"
27
+ elif period == "annual":
28
+ return str(today.year - offset)
29
+ return (today - datetime.timedelta(weeks=offset)).strftime("%Y-W%W")
30
+
31
+
32
+ def get_expiry_date(period: str) -> str:
33
+ today = datetime.date.today()
34
+ if period == "daily":
35
+ return (today + datetime.timedelta(days=1)).strftime("%Y-%m-%d")
36
+ elif period == "weekly":
37
+ days_until_next = (7 - today.weekday()) % 7 or 7
38
+ return (today + datetime.timedelta(days=days_until_next)).strftime("%Y-%m-%d")
39
+ elif period == "monthly":
40
+ if today.month == 12:
41
+ return f"{today.year + 1}-01-01"
42
+ return f"{today.year}-{today.month + 1:02d}-01"
43
+ elif period == "quarterly":
44
+ q = ((today.month - 1) // 3) + 1
45
+ next_q_month = q * 3 + 1
46
+ if next_q_month > 12:
47
+ return f"{today.year + 1}-01-01"
48
+ return f"{today.year}-{next_q_month:02d}-01"
49
+ elif period == "annual":
50
+ return f"{today.year + 1}-01-01"
51
+ days_until_next = (7 - today.weekday()) % 7 or 7
52
+ return (today + datetime.timedelta(days=days_until_next)).strftime("%Y-%m-%d")
53
+
54
+
55
+ def derive_key(master: str, label: str) -> str:
56
+ return hmac.new(master.encode(), label.encode(), hashlib.sha256).hexdigest()
57
+
58
+
59
+ def validate_admin_key(provided: str, master: str, period: str) -> bool:
60
+ if not master or not provided:
61
+ return False
62
+ current = derive_key(master, get_window_label(period, offset=0))
63
+ previous = derive_key(master, get_window_label(period, offset=1))
64
+ return (
65
+ hmac.compare_digest(provided.encode(), current.encode()) or
66
+ hmac.compare_digest(provided.encode(), previous.encode())
67
+ )
backend/app/agent/__init__.py ADDED
File without changes
backend/app/agent/core.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ from openai import AsyncOpenAI
2
+ from tenacity import retry, stop_after_attempt, wait_exponential
3
+
4
+
5
+ @retry(stop=stop_after_attempt(3), wait=wait_exponential(min=1, max=10))
6
+ async def call_with_retry(client: AsyncOpenAI, **kwargs):
7
+ return await client.chat.completions.create(**kwargs)
backend/app/agent/exam_analyzer.py ADDED
@@ -0,0 +1,444 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ from openai import AsyncOpenAI
3
+ from app.config import get_settings
4
+ from app.agent.core import call_with_retry
5
+
6
+ THPT_ANALYSIS_CONTEXT = """
7
+ Khi phân tích kết quả thi THPT:
8
+ - Đề cập đến phân phối điểm chuẩn vào đại học theo tỉnh thành
9
+ - Nhấn mạnh rằng 8.0+ thường cần thiết cho trường top
10
+ - Chỉ ra xu hướng đề thi theo năm (khó hơn ở phần hình học không gian và tích phân)
11
+ - Ưu tiên gợi ý trường phù hợp với điểm thực tế, không chỉ trường mơ ước
12
+ """
13
+
14
+ STATIC_EXAM_ANALYSIS_INSTRUCTIONS = THPT_ANALYSIS_CONTEXT + """Bạn là chuyên gia phân tích kết quả học tập cho học sinh ôn thi Toán.
15
+ Phân tích kết quả thi, các câu trả lời đúng/sai cụ thể, và gợi ý trường phù hợp dựa trên điểm số.
16
+ Trả lời bằng tiếng Việt. Luôn trả về JSON hợp lệ theo đúng định dạng yêu cầu, không có text ngoài JSON."""
17
+
18
+ # Per-province difficulty data (mirrors provincialData.js)
19
+ # topic_weights: approximate % share of each topic in recent provincial grade-9 math exams.
20
+ # Derived from analysis of 2021–2024 provincial exam papers. Topics with higher % are
21
+ # higher priority in Recovery Path focus area selection.
22
+ _PROVINCE_DATA = {
23
+ 'Hà Nội': {
24
+ 'difficulty': 4, 'typical_cutoff': 8.0, 'top_schools_cutoff': 9.2,
25
+ 'topic_weights': {'calculus': 18, 'functions': 15, 'logarithm': 12, 'algebra': 14, 'geometry': 12, 'combinatorics': 10, 'hệ phương trình': 8, 'statistics': 6, 'sequences': 5},
26
+ },
27
+ 'TP.HCM': {
28
+ 'difficulty': 4, 'typical_cutoff': 7.8, 'top_schools_cutoff': 9.0,
29
+ 'topic_weights': {'calculus': 16, 'functions': 15, 'algebra': 15, 'geometry': 13, 'logarithm': 10, 'combinatorics': 10, 'hệ phương trình': 8, 'statistics': 7, 'trigonometry': 6},
30
+ },
31
+ 'Đà Nẵng': {
32
+ 'difficulty': 3, 'typical_cutoff': 7.2, 'top_schools_cutoff': 8.5,
33
+ 'topic_weights': {'algebra': 20, 'geometry': 18, 'functions': 13, 'calculus': 10, 'hệ phương trình': 10, 'statistics': 9, 'number_theory': 8, 'logarithm': 7, 'combinatorics': 5},
34
+ },
35
+ 'Hải Phòng': {
36
+ 'difficulty': 3, 'typical_cutoff': 7.0, 'top_schools_cutoff': 8.2,
37
+ 'topic_weights': {'algebra': 20, 'geometry': 18, 'hệ phương trình': 12, 'functions': 12, 'statistics': 9, 'number_theory': 9, 'calculus': 8, 'logarithm': 7, 'combinatorics': 5},
38
+ },
39
+ 'Cần Thơ': {
40
+ 'difficulty': 3, 'typical_cutoff': 6.8, 'top_schools_cutoff': 8.0,
41
+ 'topic_weights': {'algebra': 22, 'geometry': 18, 'hệ phương trình': 12, 'arithmetic': 10, 'functions': 10, 'statistics': 9, 'number_theory': 8, 'calculus': 6, 'căn thức': 5},
42
+ },
43
+ 'Bình Dương': {
44
+ 'difficulty': 3, 'typical_cutoff': 7.0, 'top_schools_cutoff': 8.2,
45
+ 'topic_weights': {'algebra': 20, 'geometry': 18, 'hệ phương trình': 12, 'functions': 12, 'statistics': 9, 'number_theory': 9, 'calculus': 8, 'logarithm': 7, 'combinatorics': 5},
46
+ },
47
+ 'Đồng Nai': {
48
+ 'difficulty': 3, 'typical_cutoff': 6.8, 'top_schools_cutoff': 8.0,
49
+ 'topic_weights': {'algebra': 22, 'geometry': 18, 'hệ phương trình': 12, 'arithmetic': 10, 'functions': 10, 'statistics': 9, 'number_theory': 8, 'calculus': 6, 'căn thức': 5},
50
+ },
51
+ 'Khánh Hòa': {
52
+ 'difficulty': 3, 'typical_cutoff': 6.8, 'top_schools_cutoff': 7.8,
53
+ 'topic_weights': {'algebra': 22, 'geometry': 18, 'hệ phương trình': 11, 'arithmetic': 10, 'functions': 10, 'statistics': 9, 'number_theory': 8, 'calculus': 7, 'căn thức': 5},
54
+ },
55
+ 'Nghệ An': {
56
+ 'difficulty': 3, 'typical_cutoff': 6.6, 'top_schools_cutoff': 7.8,
57
+ 'topic_weights': {'algebra': 22, 'geometry': 19, 'hệ phương trình': 12, 'arithmetic': 10, 'functions': 10, 'statistics': 8, 'number_theory': 8, 'calculus': 6, 'căn thức': 5},
58
+ },
59
+ 'Thanh Hóa': {
60
+ 'difficulty': 2, 'typical_cutoff': 6.4, 'top_schools_cutoff': 7.5,
61
+ 'topic_weights': {'algebra': 25, 'geometry': 22, 'hệ phương trình': 12, 'arithmetic': 11, 'functions': 10, 'statistics': 8, 'number_theory': 7, 'căn thức': 5},
62
+ },
63
+ 'Hà Tĩnh': {
64
+ 'difficulty': 3, 'typical_cutoff': 6.8, 'top_schools_cutoff': 7.8,
65
+ 'topic_weights': {'algebra': 22, 'geometry': 18, 'hệ phương trình': 12, 'arithmetic': 10, 'functions': 10, 'statistics': 9, 'number_theory': 8, 'calculus': 6, 'căn thức': 5},
66
+ },
67
+ 'Bắc Ninh': {
68
+ 'difficulty': 3, 'typical_cutoff': 7.0, 'top_schools_cutoff': 8.2,
69
+ 'topic_weights': {'algebra': 20, 'geometry': 18, 'hệ phương trình': 12, 'functions': 12, 'statistics': 9, 'number_theory': 9, 'calculus': 8, 'logarithm': 7, 'combinatorics': 5},
70
+ },
71
+ 'Vĩnh Phúc': {
72
+ 'difficulty': 3, 'typical_cutoff': 6.8, 'top_schools_cutoff': 7.8,
73
+ 'topic_weights': {'algebra': 21, 'geometry': 18, 'hệ phương trình': 12, 'arithmetic': 10, 'functions': 10, 'statistics': 9, 'number_theory': 8, 'calculus': 7, 'căn thức': 5},
74
+ },
75
+ 'Hà Giang': {
76
+ 'difficulty': 1, 'typical_cutoff': 5.8, 'top_schools_cutoff': 6.8,
77
+ 'topic_weights': {'algebra': 30, 'geometry': 25, 'arithmetic': 15, 'hệ phương trình': 10, 'statistics': 8, 'functions': 7, 'number_theory': 5},
78
+ },
79
+ 'Điện Biên': {
80
+ 'difficulty': 1, 'typical_cutoff': 5.6, 'top_schools_cutoff': 6.6,
81
+ 'topic_weights': {'algebra': 32, 'geometry': 26, 'arithmetic': 16, 'hệ phương trình': 10, 'statistics': 8, 'functions': 5, 'number_theory': 3},
82
+ },
83
+ 'Lai Châu': {
84
+ 'difficulty': 1, 'typical_cutoff': 5.6, 'top_schools_cutoff': 6.6,
85
+ 'topic_weights': {'algebra': 32, 'geometry': 26, 'arithmetic': 16, 'hệ phương trình': 10, 'statistics': 8, 'functions': 5, 'number_theory': 3},
86
+ },
87
+ 'Sơn La': {
88
+ 'difficulty': 1, 'typical_cutoff': 5.8, 'top_schools_cutoff': 6.8,
89
+ 'topic_weights': {'algebra': 30, 'geometry': 25, 'arithmetic': 15, 'hệ phương trình': 10, 'statistics': 8, 'functions': 7, 'number_theory': 5},
90
+ },
91
+ 'Cà Mau': {
92
+ 'difficulty': 1, 'typical_cutoff': 5.8, 'top_schools_cutoff': 6.8,
93
+ 'topic_weights': {'algebra': 30, 'geometry': 25, 'arithmetic': 15, 'hệ phương trình': 10, 'statistics': 8, 'functions': 7, 'number_theory': 5},
94
+ },
95
+ 'Kiên Giang': {
96
+ 'difficulty': 2, 'typical_cutoff': 6.2, 'top_schools_cutoff': 7.2,
97
+ 'topic_weights': {'algebra': 25, 'geometry': 22, 'hệ phương trình': 12, 'arithmetic': 11, 'functions': 10, 'statistics': 8, 'number_theory': 7, 'căn thức': 5},
98
+ },
99
+ 'Bà Rịa - Vũng Tàu': {
100
+ 'difficulty': 3, 'typical_cutoff': 7.0, 'top_schools_cutoff': 8.0,
101
+ 'topic_weights': {'algebra': 20, 'geometry': 18, 'hệ phương trình': 12, 'functions': 12, 'statistics': 9, 'number_theory': 9, 'calculus': 8, 'logarithm': 7, 'combinatorics': 5},
102
+ },
103
+ # ── Difficulty-3 provinces (Khá) ──────────────────────────────────────────
104
+ 'Thừa Thiên - Huế': {
105
+ 'difficulty': 3, 'typical_cutoff': 6.8, 'top_schools_cutoff': 8.0,
106
+ 'topic_weights': {'algebra': 20, 'geometry': 18, 'functions': 13, 'calculus': 10, 'hệ phương trình': 10, 'statistics': 9, 'number_theory': 9, 'logarithm': 7, 'combinatorics': 4},
107
+ },
108
+ 'Quảng Ninh': {
109
+ 'difficulty': 3, 'typical_cutoff': 7.0, 'top_schools_cutoff': 8.2,
110
+ 'topic_weights': {'algebra': 20, 'geometry': 18, 'functions': 12, 'hệ phương trình': 12, 'statistics': 9, 'number_theory': 9, 'calculus': 8, 'logarithm': 7, 'combinatorics': 5},
111
+ },
112
+ 'Nam Định': {
113
+ 'difficulty': 3, 'typical_cutoff': 6.8, 'top_schools_cutoff': 7.8,
114
+ 'topic_weights': {'algebra': 21, 'geometry': 18, 'hệ phương trình': 12, 'functions': 12, 'statistics': 9, 'number_theory': 9, 'calculus': 8, 'logarithm': 6, 'combinatorics': 5},
115
+ },
116
+ 'Ninh Bình': {
117
+ 'difficulty': 3, 'typical_cutoff': 6.6, 'top_schools_cutoff': 7.8,
118
+ 'topic_weights': {'algebra': 21, 'geometry': 18, 'hệ phương trình': 12, 'arithmetic': 10, 'functions': 11, 'statistics': 9, 'number_theory': 8, 'calculus': 7, 'căn thức': 4},
119
+ },
120
+ 'Hải Dương': {
121
+ 'difficulty': 3, 'typical_cutoff': 6.8, 'top_schools_cutoff': 7.8,
122
+ 'topic_weights': {'algebra': 20, 'geometry': 18, 'hệ phương trình': 12, 'functions': 12, 'statistics': 9, 'number_theory': 9, 'calculus': 8, 'logarithm': 7, 'combinatorics': 5},
123
+ },
124
+ 'Hưng Yên': {
125
+ 'difficulty': 3, 'typical_cutoff': 7.0, 'top_schools_cutoff': 8.0,
126
+ 'topic_weights': {'algebra': 20, 'geometry': 18, 'hệ phương trình': 12, 'functions': 12, 'statistics': 9, 'number_theory': 9, 'calculus': 8, 'logarithm': 7, 'combinatorics': 5},
127
+ },
128
+ 'Hà Nam': {
129
+ 'difficulty': 3, 'typical_cutoff': 6.6, 'top_schools_cutoff': 7.6,
130
+ 'topic_weights': {'algebra': 21, 'geometry': 18, 'hệ phương trình': 12, 'arithmetic': 10, 'functions': 11, 'statistics': 9, 'number_theory': 8, 'calculus': 7, 'căn thức': 4},
131
+ },
132
+ 'Thái Bình': {
133
+ 'difficulty': 3, 'typical_cutoff': 6.8, 'top_schools_cutoff': 7.8,
134
+ 'topic_weights': {'algebra': 21, 'geometry': 18, 'hệ phương trình': 12, 'arithmetic': 10, 'functions': 11, 'statistics': 9, 'number_theory': 8, 'calculus': 7, 'căn thức': 4},
135
+ },
136
+ 'Lâm Đồng': {
137
+ 'difficulty': 3, 'typical_cutoff': 6.8, 'top_schools_cutoff': 7.8,
138
+ 'topic_weights': {'algebra': 20, 'geometry': 18, 'hệ phương trình': 12, 'arithmetic': 10, 'functions': 11, 'statistics': 9, 'number_theory': 8, 'calculus': 7, 'căn thức': 5},
139
+ },
140
+ 'Thái Nguyên': {
141
+ 'difficulty': 3, 'typical_cutoff': 6.8, 'top_schools_cutoff': 7.8,
142
+ 'topic_weights': {'algebra': 20, 'geometry': 18, 'hệ phương trình': 12, 'arithmetic': 10, 'functions': 11, 'statistics': 9, 'number_theory': 8, 'calculus': 7, 'căn thức': 5},
143
+ },
144
+ 'Bình Định': {
145
+ 'difficulty': 3, 'typical_cutoff': 6.8, 'top_schools_cutoff': 7.8,
146
+ 'topic_weights': {'algebra': 21, 'geometry': 18, 'hệ phương trình': 11, 'arithmetic': 10, 'functions': 11, 'statistics': 9, 'number_theory': 8, 'calculus': 7, 'căn thức': 5},
147
+ },
148
+ 'Quảng Nam': {
149
+ 'difficulty': 3, 'typical_cutoff': 6.6, 'top_schools_cutoff': 7.6,
150
+ 'topic_weights': {'algebra': 22, 'geometry': 18, 'hệ phương trình': 12, 'arithmetic': 10, 'functions': 10, 'statistics': 9, 'number_theory': 8, 'calculus': 6, 'căn thức': 5},
151
+ },
152
+ 'Phú Thọ': {
153
+ 'difficulty': 3, 'typical_cutoff': 6.8, 'top_schools_cutoff': 7.8,
154
+ 'topic_weights': {'algebra': 20, 'geometry': 18, 'hệ phương trình': 12, 'arithmetic': 10, 'functions': 11, 'statistics': 9, 'number_theory': 8, 'calculus': 7, 'căn thức': 5},
155
+ },
156
+ 'Bắc Giang': {
157
+ 'difficulty': 3, 'typical_cutoff': 6.6, 'top_schools_cutoff': 7.6,
158
+ 'topic_weights': {'algebra': 21, 'geometry': 18, 'hệ phương trình': 12, 'arithmetic': 10, 'functions': 11, 'statistics': 9, 'number_theory': 8, 'calculus': 7, 'căn thức': 4},
159
+ },
160
+ 'Quảng Bình': {
161
+ 'difficulty': 3, 'typical_cutoff': 6.6, 'top_schools_cutoff': 7.6,
162
+ 'topic_weights': {'algebra': 22, 'geometry': 18, 'hệ phương trình': 12, 'arithmetic': 10, 'functions': 10, 'statistics': 9, 'number_theory': 8, 'calculus': 6, 'căn thức': 5},
163
+ },
164
+ # ── Difficulty-2 provinces (Trung bình) ───────────────────────────────────
165
+ 'An Giang': {
166
+ 'difficulty': 2, 'typical_cutoff': 6.2, 'top_schools_cutoff': 7.2,
167
+ 'topic_weights': {'algebra': 25, 'geometry': 22, 'hệ phương trình': 12, 'arithmetic': 11, 'functions': 10, 'statistics': 8, 'number_theory': 7, 'căn thức': 5},
168
+ },
169
+ 'Bạc Liêu': {
170
+ 'difficulty': 2, 'typical_cutoff': 6.0, 'top_schools_cutoff': 7.0,
171
+ 'topic_weights': {'algebra': 26, 'geometry': 22, 'hệ phương trình': 12, 'arithmetic': 11, 'functions': 9, 'statistics': 8, 'number_theory': 7, 'căn thức': 5},
172
+ },
173
+ 'Bến Tre': {
174
+ 'difficulty': 2, 'typical_cutoff': 6.2, 'top_schools_cutoff': 7.2,
175
+ 'topic_weights': {'algebra': 25, 'geometry': 22, 'hệ phương trình': 12, 'arithmetic': 11, 'functions': 10, 'statistics': 8, 'number_theory': 7, 'căn thức': 5},
176
+ },
177
+ 'Bình Phước': {
178
+ 'difficulty': 2, 'typical_cutoff': 6.2, 'top_schools_cutoff': 7.2,
179
+ 'topic_weights': {'algebra': 25, 'geometry': 22, 'hệ phương trình': 12, 'arithmetic': 11, 'functions': 10, 'statistics': 8, 'number_theory': 7, 'căn thức': 5},
180
+ },
181
+ 'Bình Thuận': {
182
+ 'difficulty': 2, 'typical_cutoff': 6.4, 'top_schools_cutoff': 7.4,
183
+ 'topic_weights': {'algebra': 25, 'geometry': 21, 'hệ phương trình': 12, 'arithmetic': 11, 'functions': 10, 'statistics': 8, 'number_theory': 8, 'căn thức': 5},
184
+ },
185
+ 'Đắk Lắk': {
186
+ 'difficulty': 2, 'typical_cutoff': 6.2, 'top_schools_cutoff': 7.2,
187
+ 'topic_weights': {'algebra': 25, 'geometry': 22, 'hệ phương trình': 12, 'arithmetic': 11, 'functions': 10, 'statistics': 8, 'number_theory': 7, 'căn thức': 5},
188
+ },
189
+ 'Đắk Nông': {
190
+ 'difficulty': 2, 'typical_cutoff': 6.0, 'top_schools_cutoff': 7.0,
191
+ 'topic_weights': {'algebra': 26, 'geometry': 22, 'hệ phương trình': 12, 'arithmetic': 11, 'functions': 9, 'statistics': 8, 'number_theory': 7, 'căn thức': 5},
192
+ },
193
+ 'Đồng Tháp': {
194
+ 'difficulty': 2, 'typical_cutoff': 6.2, 'top_schools_cutoff': 7.2,
195
+ 'topic_weights': {'algebra': 25, 'geometry': 22, 'hệ phương trình': 12, 'arithmetic': 11, 'functions': 10, 'statistics': 8, 'number_theory': 7, 'căn thức': 5},
196
+ },
197
+ 'Gia Lai': {
198
+ 'difficulty': 2, 'typical_cutoff': 6.0, 'top_schools_cutoff': 7.0,
199
+ 'topic_weights': {'algebra': 26, 'geometry': 22, 'hệ phương trình': 12, 'arithmetic': 11, 'functions': 9, 'statistics': 8, 'number_theory': 7, 'căn thức': 5},
200
+ },
201
+ 'Hậu Giang': {
202
+ 'difficulty': 2, 'typical_cutoff': 6.0, 'top_schools_cutoff': 7.0,
203
+ 'topic_weights': {'algebra': 26, 'geometry': 22, 'hệ phương trình': 12, 'arithmetic': 11, 'functions': 9, 'statistics': 8, 'number_theory': 7, 'căn thức': 5},
204
+ },
205
+ 'Hòa Bình': {
206
+ 'difficulty': 2, 'typical_cutoff': 6.0, 'top_schools_cutoff': 7.0,
207
+ 'topic_weights': {'algebra': 26, 'geometry': 23, 'hệ phương trình': 11, 'arithmetic': 12, 'functions': 9, 'statistics': 8, 'number_theory': 6, 'căn thức': 5},
208
+ },
209
+ 'Kon Tum': {
210
+ 'difficulty': 2, 'typical_cutoff': 5.8, 'top_schools_cutoff': 6.8,
211
+ 'topic_weights': {'algebra': 27, 'geometry': 23, 'arithmetic': 13, 'hệ phương trình': 11, 'statistics': 8, 'functions': 8, 'number_theory': 6, 'căn thức': 4},
212
+ },
213
+ 'Lạng Sơn': {
214
+ 'difficulty': 2, 'typical_cutoff': 6.0, 'top_schools_cutoff': 7.0,
215
+ 'topic_weights': {'algebra': 26, 'geometry': 23, 'hệ phương trình': 11, 'arithmetic': 12, 'functions': 9, 'statistics': 8, 'number_theory': 6, 'căn thức': 5},
216
+ },
217
+ 'Lào Cai': {
218
+ 'difficulty': 2, 'typical_cutoff': 6.2, 'top_schools_cutoff': 7.2,
219
+ 'topic_weights': {'algebra': 25, 'geometry': 23, 'hệ phương trình': 11, 'arithmetic': 12, 'functions': 9, 'statistics': 8, 'number_theory': 7, 'căn thức': 5},
220
+ },
221
+ 'Long An': {
222
+ 'difficulty': 2, 'typical_cutoff': 6.4, 'top_schools_cutoff': 7.4,
223
+ 'topic_weights': {'algebra': 25, 'geometry': 21, 'hệ phương trình': 12, 'arithmetic': 11, 'functions': 10, 'statistics': 8, 'number_theory': 8, 'căn thức': 5},
224
+ },
225
+ 'Ninh Thuận': {
226
+ 'difficulty': 2, 'typical_cutoff': 6.2, 'top_schools_cutoff': 7.2,
227
+ 'topic_weights': {'algebra': 25, 'geometry': 22, 'hệ phương trình': 12, 'arithmetic': 11, 'functions': 10, 'statistics': 8, 'number_theory': 7, 'căn thức': 5},
228
+ },
229
+ 'Phú Yên': {
230
+ 'difficulty': 2, 'typical_cutoff': 6.2, 'top_schools_cutoff': 7.2,
231
+ 'topic_weights': {'algebra': 25, 'geometry': 22, 'hệ phương trình': 12, 'arithmetic': 11, 'functions': 10, 'statistics': 8, 'number_theory': 7, 'căn thức': 5},
232
+ },
233
+ 'Quảng Ngãi': {
234
+ 'difficulty': 2, 'typical_cutoff': 6.2, 'top_schools_cutoff': 7.2,
235
+ 'topic_weights': {'algebra': 25, 'geometry': 22, 'hệ phương trình': 12, 'arithmetic': 11, 'functions': 10, 'statistics': 8, 'number_theory': 7, 'căn thức': 5},
236
+ },
237
+ 'Quảng Trị': {
238
+ 'difficulty': 2, 'typical_cutoff': 6.2, 'top_schools_cutoff': 7.2,
239
+ 'topic_weights': {'algebra': 25, 'geometry': 22, 'hệ phương trình': 12, 'arithmetic': 11, 'functions': 10, 'statistics': 8, 'number_theory': 7, 'căn thức': 5},
240
+ },
241
+ 'Sóc Trăng': {
242
+ 'difficulty': 2, 'typical_cutoff': 6.0, 'top_schools_cutoff': 7.0,
243
+ 'topic_weights': {'algebra': 26, 'geometry': 22, 'hệ phương trình': 12, 'arithmetic': 11, 'functions': 9, 'statistics': 8, 'number_theory': 7, 'căn thức': 5},
244
+ },
245
+ 'Tây Ninh': {
246
+ 'difficulty': 2, 'typical_cutoff': 6.4, 'top_schools_cutoff': 7.4,
247
+ 'topic_weights': {'algebra': 25, 'geometry': 21, 'hệ phương trình': 12, 'arithmetic': 11, 'functions': 10, 'statistics': 8, 'number_theory': 8, 'căn thức': 5},
248
+ },
249
+ 'Tiền Giang': {
250
+ 'difficulty': 2, 'typical_cutoff': 6.2, 'top_schools_cutoff': 7.2,
251
+ 'topic_weights': {'algebra': 25, 'geometry': 22, 'hệ phương trình': 12, 'arithmetic': 11, 'functions': 10, 'statistics': 8, 'number_theory': 7, 'căn thức': 5},
252
+ },
253
+ 'Tuyên Quang': {
254
+ 'difficulty': 2, 'typical_cutoff': 5.8, 'top_schools_cutoff': 6.8,
255
+ 'topic_weights': {'algebra': 27, 'geometry': 23, 'arithmetic': 13, 'hệ phương trình': 11, 'statistics': 8, 'functions': 8, 'number_theory': 5, 'căn thức': 5},
256
+ },
257
+ 'Vĩnh Long': {
258
+ 'difficulty': 2, 'typical_cutoff': 6.2, 'top_schools_cutoff': 7.2,
259
+ 'topic_weights': {'algebra': 25, 'geometry': 22, 'hệ phương trình': 12, 'arithmetic': 11, 'functions': 10, 'statistics': 8, 'number_theory': 7, 'căn thức': 5},
260
+ },
261
+ 'Yên Bái': {
262
+ 'difficulty': 2, 'typical_cutoff': 5.8, 'top_schools_cutoff': 6.8,
263
+ 'topic_weights': {'algebra': 27, 'geometry': 23, 'arithmetic': 14, 'hệ phương trình': 11, 'statistics': 8, 'functions': 7, 'number_theory': 5, 'căn thức': 5},
264
+ },
265
+ # ── Difficulty-1 provinces (Dễ / vùng cao) ───────────────────────────────
266
+ 'Bắc Kạn': {
267
+ 'difficulty': 1, 'typical_cutoff': 5.8, 'top_schools_cutoff': 6.8,
268
+ 'topic_weights': {'algebra': 30, 'geometry': 25, 'arithmetic': 15, 'hệ phương trình': 10, 'statistics': 8, 'functions': 7, 'number_theory': 5},
269
+ },
270
+ 'Cao Bằng': {
271
+ 'difficulty': 1, 'typical_cutoff': 5.6, 'top_schools_cutoff': 6.6,
272
+ 'topic_weights': {'algebra': 32, 'geometry': 25, 'arithmetic': 15, 'hệ phương trình': 10, 'statistics': 8, 'functions': 5, 'number_theory': 5},
273
+ },
274
+ }
275
+ _DIFFICULTY_LABELS = {1: 'Dễ', 2: 'Trung bình', 3: 'Khá', 4: 'Khó', 5: 'Rất khó'}
276
+
277
+
278
+ def _get_province_context(province: str | None) -> str:
279
+ if not province or province not in _PROVINCE_DATA:
280
+ return "National average THPT Math 2024: 6.51. Calibrate recommendations to general Vietnamese exam standards."
281
+ d = _PROVINCE_DATA[province]
282
+ label = _DIFFICULTY_LABELS.get(d['difficulty'], 'Trung bình')
283
+ return (
284
+ f"Province: {province} | Difficulty: {label} ({d['difficulty']}/5) | "
285
+ f"Typical Math cutoff: {d['typical_cutoff']} | Top schools require: {d['top_schools_cutoff']}+ | "
286
+ f"National avg: 6.51. Calibrate school recommendations to {province} standards specifically."
287
+ )
288
+
289
+
290
+ def _strip_code_fence(text: str) -> str:
291
+ if text.startswith("```"):
292
+ parts = text.split("```")
293
+ text = parts[1] if len(parts) > 1 else text
294
+ if text.startswith("json"):
295
+ text = text[4:]
296
+ return text.strip()
297
+
298
+
299
+ def build_analyze_prompt(
300
+ result: dict,
301
+ history: list[dict],
302
+ student_name: str = "",
303
+ wrong_questions: list[dict] = None,
304
+ school_recommendations: list[dict] = None,
305
+ exam_category: str = "",
306
+ user_profile: dict = None,
307
+ learner_archetype: str | None = None,
308
+ device_province: str | None = None,
309
+ ) -> str:
310
+ topic_breakdown = result.get("topicBreakdown", {})
311
+ weak_topics = [t for t, tb in topic_breakdown.items() if tb.get("accuracy", 1) < 0.6]
312
+
313
+ dynamic_parts = []
314
+ if student_name:
315
+ dynamic_parts.append(f"Học sinh: {student_name}")
316
+ dynamic_parts.append(f"Điểm: {result.get('score', 0)}/10")
317
+ dynamic_parts.append(f"Độ chính xác: {round(result.get('accuracy', 0) * 100)}%")
318
+ dynamic_parts.append(f"Chủ đề yếu (< 60%): {', '.join(weak_topics) or 'Không có'}")
319
+ dynamic_parts.append(f"Chi tiết theo chủ đề: {json.dumps(topic_breakdown, ensure_ascii=False)}")
320
+ if len(history) >= 2:
321
+ recent_scores = [r.get("score", 0) for r in history[-5:]]
322
+ dynamic_parts.append(f"Điểm gần đây: {recent_scores}")
323
+
324
+ if wrong_questions:
325
+ wrong_summary = [
326
+ {"topic": q.get("topic"), "difficulty": q.get("difficulty"), "question": q.get("question", "")[:80]}
327
+ for q in wrong_questions[:5]
328
+ ]
329
+ dynamic_parts.append(f"Câu sai ({len(wrong_questions)} câu, ví dụ): {json.dumps(wrong_summary, ensure_ascii=False)}")
330
+
331
+ grade = str((user_profile or {}).get("grade", ""))
332
+ province = (user_profile or {}).get("province", "") or (user_profile or {}).get("location", "")
333
+
334
+ if school_recommendations:
335
+ school_list = [
336
+ f"{s['school']['name']} ({s['matchStrength']}, điểm chuẩn Toán: {s['cutoff']})"
337
+ for s in school_recommendations[:6]
338
+ ]
339
+ # Derive school type from grade: ≤9 → high school (lớp 10), 10-12 → university
340
+ if grade and grade.isdigit() and int(grade) <= 9:
341
+ exam_type = "lớp 10"
342
+ school_type_note = "trường THPT"
343
+ else:
344
+ exam_type = "đại học/THPT"
345
+ school_type_note = "trường đại học/cao đẳng"
346
+ loc_note = f" tại {province}" if province else ""
347
+ dynamic_parts.append(
348
+ f"Trường gợi ý{loc_note} ({school_type_note}, kỳ thi {exam_type}): {'; '.join(school_list)}"
349
+ )
350
+
351
+ # Add grade + province context for personalized school recommendation prompt
352
+ if grade:
353
+ dynamic_parts.append(f"Lớp học sinh: {grade}")
354
+ if province:
355
+ dynamic_parts.append(f"Tỉnh/thành phố: {province}")
356
+ if learner_archetype:
357
+ dynamic_parts.append(f"Learner type: {learner_archetype}")
358
+
359
+ # Append per-province difficulty context (dynamic, not in static system prompt)
360
+ province_ctx = _get_province_context(province or None)
361
+ dynamic_parts.append(f"Provincial context: {province_ctx}")
362
+
363
+ # Device-detected location context — supplements (does not replace) user-selected province
364
+ if device_province:
365
+ note = f"Vị trí thiết bị phát hiện: {device_province}"
366
+ if device_province != province:
367
+ note += f" (khác với tỉnh trong hồ sơ: {province or 'chưa đặt'})"
368
+ dynamic_parts.append(
369
+ f"{note}. Dùng thông tin này để bổ sung nhận xét về đặc thù đề thi địa phương "
370
+ f"(trọng số chủ đề, mức độ cạnh tranh) trong phần insights và recommendations. "
371
+ f"Không thêm tên trường vào insights/recommendations — danh sách trường được hiển thị riêng."
372
+ )
373
+ if not province:
374
+ dynamic_parts.append(f"Device provincial context: {_get_province_context(device_province)}")
375
+
376
+ school_json_field = ""
377
+ if school_recommendations:
378
+ if grade and grade.isdigit() and int(grade) <= 9:
379
+ school_insight_hint = "Nhận xét ngắn 1-2 câu tổng quan về trường THPT phù hợp để thi vào lớp 10"
380
+ school_type_example = "THPT"
381
+ else:
382
+ school_insight_hint = "Nhận xét ngắn 1-2 câu tổng quan về trường đại học/cao đẳng phù hợp"
383
+ school_type_example = "Đại học"
384
+ school_json_field = (
385
+ f',\n "school_insight": "{school_insight_hint}",'
386
+ f'\n "schools": ['
387
+ f'\n {{'
388
+ f'\n "name": "Tên trường đầy đủ",'
389
+ f'\n "score_range": "Ngưỡng điểm chuẩn Toán (vd: 7.5–8.5 điểm)",'
390
+ f'\n "type": "{school_type_example}",'
391
+ f'\n "region_note": "Tỉnh/thành của trường — quan hệ với tỉnh học sinh (cùng tỉnh/tỉnh lân cận/...)",'
392
+ f'\n "note": "1 câu nhận xét tại sao phù hợp với điểm số này"'
393
+ f'\n }}'
394
+ f'\n ] // Liệt kê 3-5 trường phù hợp nhất theo thứ tự ưu tiên'
395
+ )
396
+
397
+ prompt = "\n".join(dynamic_parts) + f"""
398
+
399
+ Trả về JSON (không có text ngoài JSON):
400
+ {{
401
+ "insights": "Nhận xét tổng quan 2-3 câu về kết quả thi",
402
+ "question_analysis": "Phân tích cụ thể các câu trả lời sai nếu có, chỉ ra điểm cần cải thiện (2-3 câu)",
403
+ "weak_topics": ["topic_key1", "topic_key2"],
404
+ "recommendations": ["khuyến nghị 1", "khuyến nghị 2", "khuyến nghị 3"]{school_json_field}
405
+ }}"""
406
+ return prompt
407
+
408
+
409
+ async def analyze_exam_result(
410
+ client: AsyncOpenAI,
411
+ result: dict,
412
+ history: list[dict],
413
+ student_name: str = "",
414
+ wrong_questions: list[dict] = None,
415
+ school_recommendations: list[dict] = None,
416
+ exam_category: str = "",
417
+ user_profile: dict = None,
418
+ learner_archetype: str | None = None,
419
+ device_province: str | None = None,
420
+ ) -> dict:
421
+ settings = get_settings()
422
+
423
+ prompt = build_analyze_prompt(
424
+ result, history, student_name,
425
+ wrong_questions=wrong_questions,
426
+ school_recommendations=school_recommendations,
427
+ exam_category=exam_category,
428
+ user_profile=user_profile,
429
+ learner_archetype=learner_archetype,
430
+ device_province=device_province,
431
+ )
432
+
433
+ response = await call_with_retry(
434
+ client,
435
+ model=settings.default_model,
436
+ max_tokens=1200,
437
+ messages=[
438
+ {"role": "system", "content": STATIC_EXAM_ANALYSIS_INSTRUCTIONS},
439
+ {"role": "user", "content": prompt},
440
+ ],
441
+ )
442
+
443
+ content = _strip_code_fence(response.choices[0].message.content or "{}")
444
+ return json.loads(content)
backend/app/agent/exam_explainer.py ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ from openai import AsyncOpenAI
3
+ from app.config import get_settings
4
+ from app.agent.core import call_with_retry
5
+
6
+ THPT_CONTEXT = """
7
+ Bối cảnh: Đây là kỳ thi THPT Quốc gia Việt Nam. Các câu hỏi thường có bẫy sau:
8
+ - Nhầm lẫn giữa điều kiện cần và điều kiện đủ trong bài toán logarit, hàm số
9
+ - Bỏ sót nghiệm ngoài miền xác định
10
+ - Tính sai dấu khi khai triển công thức lượng giác
11
+ - Nhầm chiều tích phân hoặc quên hằng số C
12
+ Luôn gợi ý học sinh kiểm tra lại điều kiện trước khi kết luận.
13
+ """
14
+
15
+ STATIC_EXPLAIN_INSTRUCTIONS = """Bạn là gia sư toán học chuyên ôn thi lớp 10 TPHCM. \
16
+ Phân tích câu hỏi trắc nghiệm, xác định đáp án đúng bằng lập luận toán học, rồi giải thích ngắn gọn. \
17
+ Trả lời bằng tiếng Việt."""
18
+
19
+ LABELS = ["A", "B", "C", "D"]
20
+
21
+
22
+ import re
23
+
24
+ def _extract_json(text: str) -> str:
25
+ """Return the first {...} block found anywhere in the text."""
26
+ match = re.search(r'\{[^{}]*\}', text, re.DOTALL)
27
+ if match:
28
+ return match.group(0)
29
+ # Fallback: strip code fences and return
30
+ text = re.sub(r'^```(?:json)?\s*', '', text.strip())
31
+ text = re.sub(r'\s*```$', '', text)
32
+ return text.strip()
33
+
34
+
35
+ _EXPLANATION_DEPTH_INSTRUCTIONS = {
36
+ "brief": "Giải thích ngắn gọn — chỉ 2-3 câu nêu bật ý chính.",
37
+ "detailed": "Giải thích đầy đủ, chi tiết để học sinh hiểu rõ.",
38
+ "step-by-step": "Trình bày giải thích theo các bước đánh số. Mỗi bước trên một dòng riêng.",
39
+ }
40
+
41
+ _ENCOURAGEMENT_INSTRUCTIONS = {
42
+ 'minimal': 'Be concise and direct. Skip praise.',
43
+ 'moderate': 'Brief encouragement is welcome.',
44
+ 'high': 'Be warm and encouraging throughout.',
45
+ }
46
+
47
+
48
+ async def generate_explanation(
49
+ client: AsyncOpenAI,
50
+ question: dict,
51
+ chosen_index: int,
52
+ ai_preferences: dict | None = None,
53
+ ) -> dict:
54
+ settings = get_settings()
55
+
56
+ explanation_depth = (ai_preferences or {}).get("explanation_depth", "detailed")
57
+ depth_instruction = _EXPLANATION_DEPTH_INSTRUCTIONS.get(explanation_depth, _EXPLANATION_DEPTH_INSTRUCTIONS["detailed"])
58
+ encouragement_level = (ai_preferences or {}).get("encouragement_level", "moderate")
59
+ encouragement_instruction = _ENCOURAGEMENT_INSTRUCTIONS.get(encouragement_level, _ENCOURAGEMENT_INSTRUCTIONS["moderate"])
60
+ choices = question.get("choices", [])
61
+
62
+ # Ground truth from question data — never let AI guess the correct answer
63
+ correct_index = int(question.get("correct", 0))
64
+ correct_index = max(0, min(correct_index, len(choices) - 1))
65
+ base_explanation = question.get("explanation", "")
66
+
67
+ chosen_label = LABELS[chosen_index] if chosen_index < len(LABELS) else str(chosen_index)
68
+ correct_label = LABELS[correct_index] if correct_index < len(LABELS) else str(correct_index)
69
+
70
+ choices_text = "\n".join(
71
+ f" {LABELS[i]}. {c}" for i, c in enumerate(choices) if i < len(LABELS)
72
+ )
73
+
74
+ # If the question already has an explanation, use it directly without an AI call
75
+ if base_explanation:
76
+ student_context = (
77
+ f"Bạn đã chọn đúng ({correct_label})! " if chosen_index == correct_index
78
+ else f"Bạn chọn {chosen_label}, đáp án đúng là {correct_label}. "
79
+ )
80
+ return {
81
+ "correct_index": correct_index,
82
+ "explanation": student_context + base_explanation,
83
+ }
84
+
85
+ # No pre-written explanation — ask AI to explain, but correct_index is already known
86
+ prompt = f"""Câu hỏi trắc nghiệm toán lớp 10:
87
+ {question.get('question', '')}
88
+
89
+ Các lựa chọn:
90
+ {choices_text}
91
+
92
+ Chủ đề: {question.get('topic', '')} | Mức độ: {question.get('difficulty', '')}
93
+ Học sinh đã chọn: {chosen_label}
94
+ Đáp án đúng: {correct_label} (index {correct_index}) — đây là sự thật, không được thay đổi.
95
+
96
+ QUAN TRỌNG: Chỉ trả về JSON, không có bất kỳ văn bản nào khác trước hoặc sau.
97
+ Giải thích ngắn gọn tại sao đáp án {correct_label} đúng:
98
+ {{"correct_index": {correct_index}, "explanation": "<2–3 câu tiếng Việt giải thích, không dùng markdown>"}}"""
99
+
100
+ response = await call_with_retry(
101
+ client,
102
+ model=settings.haiku_model,
103
+ max_tokens=400,
104
+ messages=[
105
+ {"role": "system", "content": THPT_CONTEXT + STATIC_EXPLAIN_INSTRUCTIONS + "\n" + depth_instruction + "\n" + encouragement_instruction},
106
+ {"role": "user", "content": prompt},
107
+ ],
108
+ )
109
+
110
+ raw = response.choices[0].message.content or ""
111
+ content = _extract_json(raw)
112
+ try:
113
+ data = json.loads(content)
114
+ # Always use ground-truth correct_index regardless of what AI returns
115
+ data["correct_index"] = correct_index
116
+ return data
117
+ except (json.JSONDecodeError, ValueError):
118
+ return {"correct_index": correct_index, "explanation": raw.strip()}
backend/app/agent/fsrs.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ FSRS v5 spaced-repetition algorithm — mirrors the frontend implementation exactly.
3
+
4
+ Parameters (FSRS_W) are identical to exam-app/src/pages/ReviewSession.jsx so that
5
+ server-computed next_review_date matches what the client would have computed.
6
+
7
+ Quality scale (same as frontend):
8
+ 1 = Đoán (Again / forgot)
9
+ 3 = Khá (Good)
10
+ 5 = Chắc (Easy / remembered well)
11
+ """
12
+ import math
13
+
14
+ FSRS_W = [0.4, 0.6, 2.4, 5.8, 4.93, 0.94, 0.86, 0.01, 1.49, 0.14, 0.94, 2.18, 0.05, 0.34, 1.26, 0.29, 2.61]
15
+
16
+
17
+ def fsrs_update(
18
+ stability: float,
19
+ difficulty: float,
20
+ elapsed: int,
21
+ quality: int,
22
+ ) -> tuple[float, float, int]:
23
+ """
24
+ Apply one FSRS review step and return (new_stability, new_difficulty, interval_days).
25
+
26
+ quality: 1 | 3 | 5 (frontend scale)
27
+ """
28
+ stability = max(0.5, float(stability))
29
+ difficulty = max(1.0, min(10.0, float(difficulty)))
30
+ elapsed = max(1, int(elapsed))
31
+
32
+ # Map frontend quality (1/3/5) to FSRS internal scale (1/3/4)
33
+ q = 1 if quality <= 1 else (3 if quality <= 3 else 4)
34
+
35
+ retrievability = math.exp(math.log(0.9) * elapsed / stability)
36
+
37
+ if q >= 3:
38
+ new_stability = stability * (
39
+ math.exp(FSRS_W[8])
40
+ * (11 - difficulty)
41
+ * math.pow(stability, -FSRS_W[9])
42
+ * (math.exp(FSRS_W[10] * (1 - retrievability)) - 1)
43
+ + 1
44
+ )
45
+ else:
46
+ new_stability = (
47
+ FSRS_W[11]
48
+ * math.pow(difficulty, -FSRS_W[12])
49
+ * (math.pow(stability + 1, FSRS_W[13]) - 1)
50
+ * math.exp(FSRS_W[14] * (1 - retrievability))
51
+ )
52
+
53
+ new_stability = max(0.5, new_stability)
54
+ # interval = round(new_stability) — the log(0.9)/log(0.9) terms cancel to 1.0
55
+ interval = max(1, round(new_stability))
56
+
57
+ new_difficulty = max(1.0, min(10.0, difficulty + FSRS_W[6] * (3 - q)))
58
+
59
+ return new_stability, new_difficulty, interval
backend/app/agent/hint_generator.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ from openai import AsyncOpenAI
3
+ from app.config import get_settings
4
+ from app.agent.core import call_with_retry
5
+
6
+ THPT_CONTEXT = """
7
+ Bối cảnh: Đây là kỳ thi THPT Quốc gia Việt Nam. Các câu hỏi thường có bẫy sau:
8
+ - Nhầm lẫn giữa điều kiện cần và điều kiện đủ trong bài toán logarit, hàm số
9
+ - Bỏ sót nghiệm ngoài miền xác định
10
+ - Tính sai dấu khi khai triển công thức lượng giác
11
+ - Nhầm chiều tích phân hoặc quên hằng số C
12
+ Luôn gợi ý học sinh kiểm tra lại điều kiện trước khi kết luận.
13
+ """
14
+
15
+ STATIC_HINT_INSTRUCTIONS = """Bạn là trợ lý AI của ứng dụng luyện thi toán lớp 10 TPHCM. \
16
+ Hỗ trợ tạo nội dung giáo dục. Trả lời bằng tiếng Việt."""
17
+
18
+ _DETAIL_LEVEL = {
19
+ 1: "gợi ý nhẹ — chỉ gợi hướng tư duy, không tiết lộ bất kỳ thông tin về đáp án",
20
+ 2: "gợi ý vừa — chỉ ra phương pháp giải cụ thể, vẫn không tiết lộ đáp án",
21
+ 3: "gợi ý chi tiết — giải thích từng bước tiếp cận, nhưng để học sinh tự chọn đáp án",
22
+ }
23
+
24
+
25
+ def _strip_code_fence(text: str) -> str:
26
+ if text.startswith("```"):
27
+ parts = text.split("```")
28
+ text = parts[1] if len(parts) > 1 else text
29
+ if text.startswith("json"):
30
+ text = text[4:]
31
+ return text.strip()
32
+
33
+
34
+ _HINT_STYLE_INSTRUCTIONS = {
35
+ "socratic": "Hướng dẫn bằng cách đặt câu hỏi gợi mở, KHÔNG tiết lộ đáp án — hãy để học sinh tự khám phá.",
36
+ "direct": "Đưa ra gợi ý trực tiếp, rõ ràng về cách tiếp cận từng bước. Hãy cụ thể và rõ ràng.",
37
+ "visual": "Trình bày gợi ý theo các bước đánh số rõ ràng. Dùng ký hiệu toán học chuẩn và nhãn bước rõ ràng.",
38
+ }
39
+
40
+ _ENCOURAGEMENT_INSTRUCTIONS = {
41
+ 'minimal': 'Be concise and direct. Skip praise.',
42
+ 'moderate': 'Brief encouragement is welcome.',
43
+ 'high': 'Be warm and encouraging throughout.',
44
+ }
45
+
46
+
47
+ async def generate_hint(
48
+ client: AsyncOpenAI,
49
+ question: dict,
50
+ attempt_count: int = 1,
51
+ previous_hints: list[str] | None = None,
52
+ ai_preferences: dict | None = None,
53
+ ) -> dict:
54
+ settings = get_settings()
55
+ level = _DETAIL_LEVEL.get(min(attempt_count, 3), _DETAIL_LEVEL[3])
56
+
57
+ hint_style = (ai_preferences or {}).get("hint_style", "socratic")
58
+ style_instruction = _HINT_STYLE_INSTRUCTIONS.get(hint_style, _HINT_STYLE_INSTRUCTIONS["socratic"])
59
+ encouragement_level = (ai_preferences or {}).get("encouragement_level", "moderate")
60
+ encouragement_instruction = _ENCOURAGEMENT_INSTRUCTIONS.get(encouragement_level, _ENCOURAGEMENT_INSTRUCTIONS["moderate"])
61
+
62
+ prev_context = ""
63
+ if previous_hints:
64
+ shown = "\n".join(f" Lần {i+1}: {h}" for i, h in enumerate(previous_hints))
65
+ prev_context = f"\nCác gợi ý đã cung cấp (KHÔNG lặp lại, phải tiến xa hơn):\n{shown}\n"
66
+
67
+ prompt = f"""Tôi cần bạn tạo một GỢI Ý ngắn (KHÔNG phải lời giải) cho câu hỏi toán sau.
68
+ Yêu cầu ({level}): Đặt 1–2 câu hỏi gợi mở hoặc nhắc 1 khái niệm liên quan để học sinh tự suy nghĩ.
69
+ Quy tắc bắt buộc:
70
+ - Tối đa 2 câu, viết liền mạch, không xuống dòng
71
+ - KHÔNG dùng markdown, KHÔNG dùng số thứ tự, KHÔNG dùng gạch đầu dòng
72
+ - KHÔNG tiết lộ đáp án hay ký hiệu A/B/C/D
73
+ Chủ đề: {question.get('topic', '')} | Mức độ: {question.get('difficulty', '')} | Lần {attempt_count}/3
74
+ Câu hỏi: {question.get('question', '')}{prev_context}
75
+ Trả về đúng định dạng JSON sau, không thêm text nào khác:
76
+ {{"hint": "<1–2 câu gợi ý tiếng Việt, không markdown>", "difficulty_note": ""}}"""
77
+
78
+ response = await call_with_retry(
79
+ client,
80
+ model=settings.hint_model,
81
+ max_tokens=512,
82
+ messages=[
83
+ {"role": "system", "content": THPT_CONTEXT + STATIC_HINT_INSTRUCTIONS + "\n" + style_instruction + "\n" + encouragement_instruction},
84
+ {"role": "user", "content": prompt},
85
+ ],
86
+ )
87
+
88
+ raw = response.choices[0].message.content or ""
89
+ content = _strip_code_fence(raw)
90
+ return json.loads(content)
backend/app/agent/memory.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ from openai import AsyncOpenAI
3
+ from app.config import get_settings
4
+
5
+
6
+ async def compress_conversation(
7
+ client: AsyncOpenAI,
8
+ messages: list[dict],
9
+ ) -> str:
10
+ settings = get_settings()
11
+ history_text = "\n".join(
12
+ f"{m['role'].upper()}: {m['content'] if isinstance(m['content'], str) else json.dumps(m['content'], ensure_ascii=False)}"
13
+ for m in messages
14
+ if m["role"] != "system"
15
+ )
16
+ response = await client.chat.completions.create(
17
+ model=settings.haiku_model,
18
+ max_tokens=512,
19
+ messages=[
20
+ {
21
+ "role": "system",
22
+ "content": "Tóm tắt ngắn gọn cuộc hội thoại dưới đây, giữ lại các thông tin quan trọng về yêu cầu và sản phẩm của khách.",
23
+ },
24
+ {"role": "user", "content": history_text},
25
+ ],
26
+ )
27
+ return response.choices[0].message.content or ""
backend/app/agent/study_planner.py ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ from openai import AsyncOpenAI
3
+ from app.config import get_settings
4
+ from app.agent.core import call_with_retry
5
+ from app.agent.exam_analyzer import _PROVINCE_DATA
6
+
7
+ STATIC_RECOVERY_PATH_INSTRUCTIONS = """Bạn là huấn luyện viên thi Toán lớp 9 vào lớp 10.
8
+ Nhiệm vụ: nhìn vào điểm thi và từng câu sai cụ thể, xác định 1–2 lỗ hổng kiến thức quan trọng nhất, rồi tạo Recovery Path ngắn hạn (2–3 tuần) nhắm trực tiếp vào các lỗ hổng đó.
9
+
10
+ Nguyên tắc bắt buộc:
11
+ - score_gap: nêu khoảng cách điểm cụ thể và trường mục tiêu (nếu có). Không nói chung chung.
12
+ - focus_areas: tối đa 2 chủ đề. Ưu tiên chủ đề sai nhiều nhất hoặc ảnh hưởng điểm nhất.
13
+ - error_pattern: nêu đúng lỗi kỹ thuật cụ thể (không phải tên chủ đề). Ví dụ: "Sai ở bước xác định miền xác định logarit — xuất hiện 4/5 lần".
14
+ - tasks: 2–3 nhiệm vụ luyện tập cụ thể, liên hệ trực tiếp đến lỗi đã xác định.
15
+ - checkpoint: số câu cần trả lời đúng liên tiếp để coi là nắm vững (target: 3–5).
16
+ - LATEX BẮT BUỘC: mọi ký hiệu toán học trong tasks/error_pattern PHẢI bọc trong $...$. Ví dụ: $\\Delta > 0$, $\\log_a x$, $x^2 - 5x + 6 = 0$.
17
+ Trả lời bằng tiếng Việt. Luôn trả về JSON hợp lệ, không có text ngoài JSON."""
18
+
19
+
20
+ def _strip_code_fence(text: str) -> str:
21
+ if text.startswith("```"):
22
+ parts = text.split("```")
23
+ text = parts[1] if len(parts) > 1 else text
24
+ if text.startswith("json"):
25
+ text = text[4:]
26
+ return text.strip()
27
+
28
+
29
+ async def generate_study_plan(
30
+ client: AsyncOpenAI,
31
+ result: dict,
32
+ history: list[dict],
33
+ wrong_questions: list[dict] | None = None,
34
+ topic_miss_counts: dict | None = None,
35
+ student_name: str = "",
36
+ learner_archetype: str | None = None,
37
+ province: str = "",
38
+ ) -> dict:
39
+ settings = get_settings()
40
+
41
+ lines = []
42
+ if student_name:
43
+ lines.append(f"Học sinh: {student_name}")
44
+ lines.append(f"Điểm: {result.get('score', 0)}/10 ({round(result.get('accuracy', 0) * 100)}% đúng)")
45
+ lines.append(f"Số đề đã thi: {len(history)}")
46
+ if province and province in _PROVINCE_DATA:
47
+ p = _PROVINCE_DATA[province]
48
+ lines.append(
49
+ f"Tỉnh: {province} | Mức điểm an toàn: {p['typical_cutoff']} | "
50
+ f"Trường tốt yêu cầu: {p['top_schools_cutoff']}+ | "
51
+ f"Dùng các ngưỡng này để xác định khoảng cách điểm (score_gap) cụ thể cho học sinh."
52
+ )
53
+ if p.get("topic_weights"):
54
+ top_topics = sorted(p["topic_weights"].items(), key=lambda x: -x[1])[:5]
55
+ weights_str = ", ".join(f"{t} ({w}%)" for t, w in top_topics)
56
+ lines.append(
57
+ f"Phân bố chủ đề đề thi {province} (5 chủ đề chiếm tỷ trọng cao nhất): {weights_str}. "
58
+ f"Ưu tiên chọn focus_areas từ các chủ đề này vì chúng xuất hiện nhiều nhất trong đề thi tỉnh."
59
+ )
60
+ elif province:
61
+ lines.append(f"Tỉnh: {province}")
62
+
63
+ if wrong_questions:
64
+ if topic_miss_counts:
65
+ summary = ", ".join(f"{t}: {c} câu sai" for t, c in topic_miss_counts.items())
66
+ lines.append(f"Tổng hợp câu sai theo chủ đề: {summary}")
67
+
68
+ lines.append(
69
+ f"\nCâu sai đại diện ({len(wrong_questions)} câu — câu khó nhất mỗi chủ đề):"
70
+ )
71
+ for i, wq in enumerate(wrong_questions, 1):
72
+ topic = wq.get("topic", "")
73
+ diff = wq.get("difficulty", "")
74
+ q_text = wq.get("question", "")[:130]
75
+ correct = wq.get("correct_answer", "")
76
+ expl = wq.get("explanation", "")[:100]
77
+ lines.append(f"\nCâu {i} [{topic} / {diff}]: {q_text}")
78
+ lines.append(f" Đáp án đúng: {correct}")
79
+ if expl:
80
+ lines.append(f" Vì sao: {expl}")
81
+ else:
82
+ topic_breakdown = result.get("topicBreakdown", {})
83
+ weak = [t for t, tb in topic_breakdown.items() if tb.get("accuracy", 1) < 0.6]
84
+ lines.append(f"Chủ đề yếu: {', '.join(weak) or 'Không có'}")
85
+
86
+ prompt = "\n".join(lines) + """
87
+
88
+ Tạo Recovery Path dựa trên dữ liệu trên.
89
+ Trả về JSON (không có text ngoài JSON):
90
+ {
91
+ "score_gap": "Mô tả khoảng cách điểm và mục tiêu cụ thể (1–2 câu)",
92
+ "focus_areas": [
93
+ {
94
+ "topic": "Tên chủ đề",
95
+ "error_pattern": "Mô tả lỗi kỹ thuật cụ thể",
96
+ "tasks": ["Nhiệm vụ luyện tập cụ thể 1", "Nhiệm vụ 2"],
97
+ "checkpoint": {"target": 5, "description": "Trả lời đúng 5 câu [chủ đề] liên tiếp"}
98
+ }
99
+ ],
100
+ "retake_note": "Sau khi hoàn thành Focus 1 → Thử lại đề thi để so sánh điểm"
101
+ }"""
102
+
103
+ try:
104
+ response = await call_with_retry(
105
+ client,
106
+ model=settings.default_model,
107
+ max_tokens=1200,
108
+ messages=[
109
+ {"role": "system", "content": STATIC_RECOVERY_PATH_INSTRUCTIONS},
110
+ {"role": "user", "content": prompt},
111
+ ],
112
+ )
113
+ content = _strip_code_fence(response.choices[0].message.content or "{}")
114
+ return json.loads(content)
115
+ except Exception:
116
+ return {
117
+ "score_gap": "Phân tích cho thấy còn một số lỗ hổng cần bù. Tiếp tục ôn tập và thử lại đề thi.",
118
+ "focus_areas": [
119
+ {
120
+ "topic": "Chủ đề yếu nhất",
121
+ "error_pattern": "Xem lại giải thích từng câu sai để xác định lỗi cụ thể.",
122
+ "tasks": [
123
+ "Xem lại giải thích chi tiết từng câu sai",
124
+ "Luyện 5–10 câu cùng dạng",
125
+ "Ghi chú kỹ thuật cần nhớ",
126
+ ],
127
+ "checkpoint": {"target": 3, "description": "Trả lời đúng 3 câu cùng dạng liên tiếp"},
128
+ }
129
+ ],
130
+ "retake_note": "Sau khi luyện xong → Thử lại đề thi để so sánh điểm",
131
+ }
backend/app/auth.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ from datetime import datetime, timedelta, timezone
3
+
4
+ import jwt
5
+ import google.auth.exceptions
6
+ import google.auth.transport.requests
7
+ import google.oauth2.id_token
8
+
9
+ from app.config import get_settings
10
+
11
+
12
+ async def verify_google_token(id_token_str: str) -> dict:
13
+ """Verify a Google ID token and return its payload. Raises ValueError on failure."""
14
+ settings = get_settings()
15
+ try:
16
+ payload = await asyncio.to_thread(
17
+ google.oauth2.id_token.verify_oauth2_token,
18
+ id_token_str,
19
+ google.auth.transport.requests.Request(),
20
+ settings.google_client_id,
21
+ )
22
+ return payload
23
+ except google.auth.exceptions.GoogleAuthError as exc:
24
+ raise ValueError(f"Invalid or expired Google token: {exc}") from exc
25
+
26
+
27
+ def create_jwt(user_id: int) -> str:
28
+ settings = get_settings()
29
+ now = datetime.now(tz=timezone.utc)
30
+ payload = {
31
+ "sub": str(user_id),
32
+ "iat": now,
33
+ "exp": now + timedelta(days=7),
34
+ "aud": "exam-app",
35
+ }
36
+ return jwt.encode(payload, settings.jwt_secret, algorithm="HS256")
37
+
38
+
39
+ def decode_jwt(token: str) -> dict:
40
+ settings = get_settings()
41
+ return jwt.decode(token, settings.jwt_secret, algorithms=["HS256"], audience="exam-app")
backend/app/config.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from functools import lru_cache
2
+ from pathlib import Path
3
+ from pydantic_settings import BaseSettings, SettingsConfigDict
4
+
5
+ # Resolve .env relative to this file so it works regardless of CWD (e.g. npm run dev from repo root)
6
+ _ENV_FILE = Path(__file__).parent.parent / ".env"
7
+
8
+
9
+ class Settings(BaseSettings):
10
+ model_config = SettingsConfigDict(env_file=str(_ENV_FILE), env_file_encoding="utf-8", extra="ignore")
11
+
12
+ anthropic_base_url: str = "https://ai-router.locdo.tech"
13
+ anthropic_auth_token: str
14
+
15
+ anthropic_default_opus_model: str = "claude-opus-4.6"
16
+ anthropic_default_sonnet_model: str = "claude-sonnet-4.6"
17
+ anthropic_default_haiku_model: str = "claude-haiku-4.5"
18
+ anthropic_default_hint_model: str = "claude-haiku-4.5"
19
+
20
+ allowed_origins: str = "http://localhost:5173,https://exam-app-ey0.pages.dev"
21
+ database_url: str = ""
22
+ # Set to "true" to run the background wiki crawl on startup.
23
+ # Keep "false" on HF Spaces until local testing is complete.
24
+ crawl_auto_seed_enabled: bool = False
25
+ # Set to "true" to wipe wiki_units and re-crawl everything from scratch.
26
+ # The app self-disables this flag via the HF Spaces API after one successful run.
27
+ crawl_force_reseed: bool = False
28
+ # Set to "true" to crawl only topics that have zero wiki units (gap-fill).
29
+ # Idempotent: re-runs are safe — the zero-unit check is the gate.
30
+ crawl_gap_fill_enabled: bool = False
31
+ # Set to "true" to fix non-canonical topic/type labels and remove content duplicates on startup.
32
+ # Self-disables via HF Spaces API after one successful run.
33
+ wiki_sanitize_enabled: bool = False
34
+ # Set to "true" to translate English wiki units (exam_upload source) to Vietnamese.
35
+ # Self-disables via HF Spaces API after one successful run.
36
+ wiki_fix_english_enabled: bool = False
37
+ embedding_model_name: str = "BAAI/bge-m3"
38
+ google_client_id: str = ""
39
+ jwt_secret: str = ""
40
+ admin_key: str = ""
41
+ admin_master_secret: str = ""
42
+ admin_key_rotation_period: str = "weekly"
43
+ admin_key_log_path: str = "./admin_keys.txt"
44
+ admin_key_log_enabled: bool = True
45
+ admin_key_webhook_url: str = ""
46
+ cron_secret: str = ""
47
+ sqlite_path: str = "/data/app.db"
48
+ payment_bank_name: str = ""
49
+ payment_account_number: str = ""
50
+ payment_account_name: str = ""
51
+
52
+ def __init__(self, **data):
53
+ super().__init__(**data)
54
+ if not self.jwt_secret:
55
+ raise RuntimeError("JWT_SECRET must be set in environment variables")
56
+ if len(self.jwt_secret) < 32:
57
+ raise RuntimeError("JWT_SECRET must be at least 32 characters")
58
+ if self.admin_master_secret and len(self.admin_master_secret) < 32:
59
+ raise RuntimeError("ADMIN_MASTER_SECRET must be at least 32 characters if set")
60
+ if self.cron_secret and len(self.cron_secret) < 32:
61
+ raise RuntimeError("CRON_SECRET must be at least 32 characters if set")
62
+ embedding_dim: int = 1024
63
+ use_sqlite_vec: bool = True
64
+ vector_top_k: int = 20
65
+ crag_threshold: float = 0.20
66
+
67
+ @property
68
+ def allowed_origins_list(self) -> list[str]:
69
+ return [o.strip() for o in self.allowed_origins.split(",")]
70
+
71
+ @property
72
+ def default_model(self) -> str:
73
+ return self.anthropic_default_sonnet_model
74
+
75
+ @property
76
+ def opus_model(self) -> str:
77
+ return self.anthropic_default_opus_model
78
+
79
+ @property
80
+ def haiku_model(self) -> str:
81
+ return self.anthropic_default_haiku_model
82
+
83
+ @property
84
+ def hint_model(self) -> str:
85
+ return self.anthropic_default_hint_model
86
+
87
+
88
+ @lru_cache
89
+ def get_settings() -> Settings:
90
+ return Settings()
backend/app/data/__init__.py ADDED
File without changes
backend/app/data/concepts.py ADDED
@@ -0,0 +1,503 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Concept taxonomy for the Zenith Learning Graph — 62 concepts, grades 9–12.
3
+
4
+ Each concept has:
5
+ prerequisite_ids — edges pointing from required → this concept (DAG)
6
+ exam_weight — relative THPT exam frequency (1.0 = rare, 5.0 = very common)
7
+ bloom_ceiling — highest Bloom's level typically reached for this concept
8
+ topic — canonical topic label (matches wiki_units.topic)
9
+
10
+ Edge semantics: "B requires A" means A.prerequisite_ids is upstream of B,
11
+ i.e. A → B is encoded as B.prerequisite_ids includes A.
12
+ """
13
+
14
+ CONCEPTS: list[dict] = [
15
+
16
+ # ── Grade 9 foundation ────────────────────────────────────────────────────
17
+
18
+ {
19
+ "id": "linear_eq",
20
+ "name": "Linear Equations",
21
+ "name_vi": "Phương trình bậc nhất",
22
+ "grade": 9, "topic": "algebra",
23
+ "prerequisite_ids": [],
24
+ "exam_weight": 2.0, "bloom_ceiling": 3,
25
+ },
26
+ {
27
+ "id": "linear_systems",
28
+ "name": "Systems of Linear Equations",
29
+ "name_vi": "Hệ phương trình bậc nhất",
30
+ "grade": 9, "topic": "algebra",
31
+ "prerequisite_ids": ["linear_eq"],
32
+ "exam_weight": 2.5, "bloom_ceiling": 4,
33
+ },
34
+ {
35
+ "id": "quad_eq",
36
+ "name": "Quadratic Equations",
37
+ "name_vi": "Phương trình bậc hai",
38
+ "grade": 9, "topic": "algebra",
39
+ "prerequisite_ids": ["linear_eq"],
40
+ "exam_weight": 3.5, "bloom_ceiling": 4,
41
+ },
42
+ {
43
+ "id": "radicals",
44
+ "name": "Radical Expressions",
45
+ "name_vi": "Căn thức và biến đổi",
46
+ "grade": 9, "topic": "algebra",
47
+ "prerequisite_ids": ["linear_eq"],
48
+ "exam_weight": 2.0, "bloom_ceiling": 3,
49
+ },
50
+ {
51
+ "id": "inequalities",
52
+ "name": "Inequalities",
53
+ "name_vi": "Bất phương trình",
54
+ "grade": 9, "topic": "algebra",
55
+ "prerequisite_ids": ["linear_eq", "quad_eq"],
56
+ "exam_weight": 2.5, "bloom_ceiling": 4,
57
+ },
58
+ {
59
+ "id": "basic_geo",
60
+ "name": "Basic Plane Geometry",
61
+ "name_vi": "Hình học phẳng cơ bản",
62
+ "grade": 9, "topic": "geometry",
63
+ "prerequisite_ids": [],
64
+ "exam_weight": 2.0, "bloom_ceiling": 4,
65
+ },
66
+ {
67
+ "id": "triangles",
68
+ "name": "Triangles",
69
+ "name_vi": "Tam giác và các tính chất",
70
+ "grade": 9, "topic": "geometry",
71
+ "prerequisite_ids": ["basic_geo"],
72
+ "exam_weight": 2.5, "bloom_ceiling": 5,
73
+ },
74
+ {
75
+ "id": "circles",
76
+ "name": "Circles",
77
+ "name_vi": "Đường tròn",
78
+ "grade": 9, "topic": "geometry",
79
+ "prerequisite_ids": ["basic_geo", "triangles"],
80
+ "exam_weight": 2.5, "bloom_ceiling": 4,
81
+ },
82
+ {
83
+ "id": "stats_basic",
84
+ "name": "Basic Statistics",
85
+ "name_vi": "Thống kê cơ bản",
86
+ "grade": 9, "topic": "statistics",
87
+ "prerequisite_ids": [],
88
+ "exam_weight": 1.5, "bloom_ceiling": 3,
89
+ },
90
+ {
91
+ "id": "prob_basic",
92
+ "name": "Basic Probability",
93
+ "name_vi": "Xác suất cơ bản",
94
+ "grade": 9, "topic": "probability",
95
+ "prerequisite_ids": ["stats_basic"],
96
+ "exam_weight": 2.0, "bloom_ceiling": 4,
97
+ },
98
+ {
99
+ "id": "combinatorics",
100
+ "name": "Combinatorics",
101
+ "name_vi": "Tổ hợp và chỉnh hợp",
102
+ "grade": 9, "topic": "combinatorics",
103
+ "prerequisite_ids": [],
104
+ "exam_weight": 2.5, "bloom_ceiling": 4,
105
+ },
106
+ {
107
+ "id": "number_theory",
108
+ "name": "Number Theory",
109
+ "name_vi": "Lý thuyết số cơ bản",
110
+ "grade": 9, "topic": "number_theory",
111
+ "prerequisite_ids": [],
112
+ "exam_weight": 1.5, "bloom_ceiling": 5,
113
+ },
114
+ {
115
+ "id": "sets",
116
+ "name": "Sets",
117
+ "name_vi": "Tập hợp",
118
+ "grade": 9, "topic": "algebra",
119
+ "prerequisite_ids": [],
120
+ "exam_weight": 1.0, "bloom_ceiling": 3,
121
+ },
122
+
123
+ # ── Grade 10 ──────────────────────────────────────────────────────────────
124
+
125
+ {
126
+ "id": "linear_func",
127
+ "name": "Linear Functions",
128
+ "name_vi": "Hàm số bậc nhất",
129
+ "grade": 10, "topic": "functions_and_graphs",
130
+ "prerequisite_ids": ["linear_eq"],
131
+ "exam_weight": 2.0, "bloom_ceiling": 4,
132
+ },
133
+ {
134
+ "id": "quad_func",
135
+ "name": "Quadratic Functions & Parabola",
136
+ "name_vi": "Hàm số bậc hai và parabol",
137
+ "grade": 10, "topic": "functions_and_graphs",
138
+ "prerequisite_ids": ["quad_eq"],
139
+ "exam_weight": 2.5, "bloom_ceiling": 4,
140
+ },
141
+ {
142
+ "id": "coord_geo",
143
+ "name": "Coordinate Geometry",
144
+ "name_vi": "Hình học tọa độ Oxy",
145
+ "grade": 10, "topic": "geometry",
146
+ "prerequisite_ids": ["linear_eq", "basic_geo"],
147
+ "exam_weight": 2.5, "bloom_ceiling": 4,
148
+ },
149
+ {
150
+ "id": "trig_basic",
151
+ "name": "Basic Trigonometry",
152
+ "name_vi": "Lượng giác cơ bản",
153
+ "grade": 10, "topic": "trigonometry",
154
+ "prerequisite_ids": ["basic_geo", "triangles"],
155
+ "exam_weight": 2.5, "bloom_ceiling": 4,
156
+ },
157
+ {
158
+ "id": "vectors",
159
+ "name": "Plane Vectors",
160
+ "name_vi": "Vectơ phẳng",
161
+ "grade": 10, "topic": "geometry",
162
+ "prerequisite_ids": ["coord_geo"],
163
+ "exam_weight": 2.0, "bloom_ceiling": 4,
164
+ },
165
+ {
166
+ "id": "sequences",
167
+ "name": "Sequences & Series",
168
+ "name_vi": "Dãy số",
169
+ "grade": 10, "topic": "algebra",
170
+ "prerequisite_ids": ["quad_eq"],
171
+ "exam_weight": 2.0, "bloom_ceiling": 4,
172
+ },
173
+ {
174
+ "id": "financial_math",
175
+ "name": "Financial Mathematics",
176
+ "name_vi": "Toán tài chính",
177
+ "grade": 10, "topic": "algebra",
178
+ "prerequisite_ids": ["sequences"],
179
+ "exam_weight": 1.5, "bloom_ceiling": 3,
180
+ },
181
+
182
+ # ── Grade 11 ──────────────────────────────────────────────────────────────
183
+
184
+ {
185
+ "id": "trig_advanced",
186
+ "name": "Advanced Trigonometry",
187
+ "name_vi": "Lượng giác nâng cao — công thức cộng và hàm",
188
+ "grade": 11, "topic": "trigonometry",
189
+ "prerequisite_ids": ["trig_basic"],
190
+ "exam_weight": 3.0, "bloom_ceiling": 4,
191
+ },
192
+ {
193
+ "id": "trig_eq",
194
+ "name": "Trigonometric Equations",
195
+ "name_vi": "Phương trình lượng giác",
196
+ "grade": 11, "topic": "trigonometry",
197
+ "prerequisite_ids": ["trig_advanced"],
198
+ "exam_weight": 3.5, "bloom_ceiling": 4,
199
+ },
200
+ {
201
+ "id": "exponential",
202
+ "name": "Exponential Functions",
203
+ "name_vi": "Hàm số mũ và hàm số lũy thừa",
204
+ "grade": 11, "topic": "algebra",
205
+ "prerequisite_ids": ["quad_func"],
206
+ "exam_weight": 3.5, "bloom_ceiling": 4,
207
+ },
208
+ {
209
+ "id": "logarithm",
210
+ "name": "Logarithms",
211
+ "name_vi": "Logarit và hàm logarit",
212
+ "grade": 11, "topic": "algebra",
213
+ "prerequisite_ids": ["exponential"],
214
+ "exam_weight": 3.5, "bloom_ceiling": 4,
215
+ },
216
+ {
217
+ "id": "exp_log_eq",
218
+ "name": "Exponential & Logarithmic Equations",
219
+ "name_vi": "Phương trình mũ và logarit",
220
+ "grade": 11, "topic": "algebra",
221
+ "prerequisite_ids": ["logarithm", "exponential"],
222
+ "exam_weight": 4.0, "bloom_ceiling": 4,
223
+ },
224
+ {
225
+ "id": "exp_log_ineq",
226
+ "name": "Exponential & Logarithmic Inequalities",
227
+ "name_vi": "Bất phương trình mũ và logarit",
228
+ "grade": 11, "topic": "algebra",
229
+ "prerequisite_ids": ["exp_log_eq", "inequalities"],
230
+ "exam_weight": 3.0, "bloom_ceiling": 5,
231
+ },
232
+ {
233
+ "id": "spatial_geo",
234
+ "name": "Spatial Geometry — Lines & Planes",
235
+ "name_vi": "Hình học không gian — đường thẳng và mặt phẳng",
236
+ "grade": 11, "topic": "geometry",
237
+ "prerequisite_ids": ["basic_geo", "vectors"],
238
+ "exam_weight": 3.0, "bloom_ceiling": 4,
239
+ },
240
+ {
241
+ "id": "spatial_solids",
242
+ "name": "Spatial Geometry — Polyhedra & Solids",
243
+ "name_vi": "Hình học không gian — khối đa diện và hình tròn xoay",
244
+ "grade": 11, "topic": "geometry",
245
+ "prerequisite_ids": ["spatial_geo"],
246
+ "exam_weight": 3.5, "bloom_ceiling": 4,
247
+ },
248
+ {
249
+ "id": "comb_advanced",
250
+ "name": "Advanced Combinatorics",
251
+ "name_vi": "Tổ hợp nâng cao — nhị thức Newton",
252
+ "grade": 11, "topic": "combinatorics",
253
+ "prerequisite_ids": ["combinatorics"],
254
+ "exam_weight": 2.5, "bloom_ceiling": 5,
255
+ },
256
+ {
257
+ "id": "prob_advanced",
258
+ "name": "Advanced Probability",
259
+ "name_vi": "Xác suất nâng cao — xác suất có điều kiện",
260
+ "grade": 11, "topic": "probability",
261
+ "prerequisite_ids": ["prob_basic", "comb_advanced"],
262
+ "exam_weight": 3.0, "bloom_ceiling": 5,
263
+ },
264
+ {
265
+ "id": "complex_numbers",
266
+ "name": "Complex Numbers",
267
+ "name_vi": "Số phức",
268
+ "grade": 11, "topic": "algebra",
269
+ "prerequisite_ids": ["quad_eq", "trig_basic"],
270
+ "exam_weight": 1.5, "bloom_ceiling": 4,
271
+ },
272
+
273
+ # ── Grade 12 — Functions & Calculus ──────────────────────────────────────
274
+
275
+ {
276
+ "id": "func_monotone",
277
+ "name": "Function Monotonicity",
278
+ "name_vi": "Tính đơn điệu của hàm số",
279
+ "grade": 12, "topic": "calculus",
280
+ "prerequisite_ids": ["quad_func", "logarithm"],
281
+ "exam_weight": 4.5, "bloom_ceiling": 4,
282
+ },
283
+ {
284
+ "id": "func_extrema",
285
+ "name": "Local Extrema",
286
+ "name_vi": "Cực trị của hàm số",
287
+ "grade": 12, "topic": "calculus",
288
+ "prerequisite_ids": ["func_monotone"],
289
+ "exam_weight": 4.5, "bloom_ceiling": 4,
290
+ },
291
+ {
292
+ "id": "func_asymptote",
293
+ "name": "Asymptotes",
294
+ "name_vi": "Đường tiệm cận",
295
+ "grade": 12, "topic": "calculus",
296
+ "prerequisite_ids": ["func_monotone"],
297
+ "exam_weight": 3.5, "bloom_ceiling": 4,
298
+ },
299
+ {
300
+ "id": "func_graph",
301
+ "name": "Function Graph Sketching",
302
+ "name_vi": "Khảo sát và vẽ đồ thị hàm số",
303
+ "grade": 12, "topic": "calculus",
304
+ "prerequisite_ids": ["func_extrema", "func_asymptote"],
305
+ "exam_weight": 4.0, "bloom_ceiling": 5,
306
+ },
307
+ {
308
+ "id": "derivative_rules",
309
+ "name": "Differentiation Rules",
310
+ "name_vi": "Quy tắc tính đạo hàm",
311
+ "grade": 12, "topic": "calculus",
312
+ "prerequisite_ids": ["linear_func", "exponential", "trig_basic"],
313
+ "exam_weight": 4.0, "bloom_ceiling": 3,
314
+ },
315
+ {
316
+ "id": "derivative_apps",
317
+ "name": "Derivative Applications",
318
+ "name_vi": "Ứng dụng đạo hàm — tốc độ, tối ưu",
319
+ "grade": 12, "topic": "calculus",
320
+ "prerequisite_ids": ["derivative_rules", "func_extrema"],
321
+ "exam_weight": 3.5, "bloom_ceiling": 5,
322
+ },
323
+ {
324
+ "id": "global_extrema",
325
+ "name": "Global Extrema on Closed Interval",
326
+ "name_vi": "GTLN và GTNN trên đoạn",
327
+ "grade": 12, "topic": "calculus",
328
+ "prerequisite_ids": ["func_extrema", "derivative_rules"],
329
+ "exam_weight": 4.0, "bloom_ceiling": 4,
330
+ },
331
+
332
+ # ── Grade 12 — Integrals ──────────────────────────────────────────────────
333
+
334
+ {
335
+ "id": "antiderivative",
336
+ "name": "Antiderivatives",
337
+ "name_vi": "Nguyên hàm",
338
+ "grade": 12, "topic": "calculus",
339
+ "prerequisite_ids": ["derivative_rules"],
340
+ "exam_weight": 4.0, "bloom_ceiling": 3,
341
+ },
342
+ {
343
+ "id": "definite_integral",
344
+ "name": "Definite Integrals",
345
+ "name_vi": "Tích phân xác định — Newton-Leibniz",
346
+ "grade": 12, "topic": "calculus",
347
+ "prerequisite_ids": ["antiderivative"],
348
+ "exam_weight": 4.5, "bloom_ceiling": 3,
349
+ },
350
+ {
351
+ "id": "integral_area",
352
+ "name": "Area Under a Curve",
353
+ "name_vi": "Ứng dụng tích phân — diện tích hình phẳng",
354
+ "grade": 12, "topic": "calculus",
355
+ "prerequisite_ids": ["definite_integral"],
356
+ "exam_weight": 4.0, "bloom_ceiling": 4,
357
+ },
358
+ {
359
+ "id": "integral_volume",
360
+ "name": "Volume of Revolution",
361
+ "name_vi": "Ứng dụng tích phân — thể tích vật thể tròn xoay",
362
+ "grade": 12, "topic": "calculus",
363
+ "prerequisite_ids": ["integral_area"],
364
+ "exam_weight": 3.0, "bloom_ceiling": 5,
365
+ },
366
+ {
367
+ "id": "integral_by_parts",
368
+ "name": "Integration by Parts",
369
+ "name_vi": "Tích phân từng phần",
370
+ "grade": 12, "topic": "calculus",
371
+ "prerequisite_ids": ["antiderivative"],
372
+ "exam_weight": 2.5, "bloom_ceiling": 4,
373
+ },
374
+ {
375
+ "id": "integral_substitution",
376
+ "name": "Integration by Substitution",
377
+ "name_vi": "Tích phân bằng phương pháp đổi biến",
378
+ "grade": 12, "topic": "calculus",
379
+ "prerequisite_ids": ["antiderivative"],
380
+ "exam_weight": 3.0, "bloom_ceiling": 4,
381
+ },
382
+
383
+ # ── Grade 12 — 3D Geometry (Oxyz) ─────────────────────────────────────────
384
+
385
+ {
386
+ "id": "oxyz_coords",
387
+ "name": "Oxyz Coordinate System",
388
+ "name_vi": "Hệ tọa độ không gian Oxyz",
389
+ "grade": 12, "topic": "geometry",
390
+ "prerequisite_ids": ["spatial_geo", "vectors"],
391
+ "exam_weight": 3.5, "bloom_ceiling": 3,
392
+ },
393
+ {
394
+ "id": "oxyz_plane",
395
+ "name": "Planes in Space",
396
+ "name_vi": "Phương trình mặt phẳng",
397
+ "grade": 12, "topic": "geometry",
398
+ "prerequisite_ids": ["oxyz_coords"],
399
+ "exam_weight": 4.0, "bloom_ceiling": 4,
400
+ },
401
+ {
402
+ "id": "oxyz_line",
403
+ "name": "Lines in Space",
404
+ "name_vi": "Phương trình đường thẳng trong không gian",
405
+ "grade": 12, "topic": "geometry",
406
+ "prerequisite_ids": ["oxyz_coords"],
407
+ "exam_weight": 3.5, "bloom_ceiling": 4,
408
+ },
409
+ {
410
+ "id": "oxyz_distance",
411
+ "name": "Distance & Angle in Space",
412
+ "name_vi": "Khoảng cách và góc trong không gian",
413
+ "grade": 12, "topic": "geometry",
414
+ "prerequisite_ids": ["oxyz_plane", "oxyz_line"],
415
+ "exam_weight": 4.0, "bloom_ceiling": 5,
416
+ },
417
+ {
418
+ "id": "oxyz_sphere",
419
+ "name": "Sphere Equation",
420
+ "name_vi": "Phương trình mặt cầu",
421
+ "grade": 12, "topic": "geometry",
422
+ "prerequisite_ids": ["oxyz_coords"],
423
+ "exam_weight": 3.0, "bloom_ceiling": 4,
424
+ },
425
+
426
+ # ── Grade 12 — Probability & Statistics ──────────────────────────────────
427
+
428
+ {
429
+ "id": "bayes",
430
+ "name": "Bayes' Theorem",
431
+ "name_vi": "Công thức Bayes và xác suất toàn phần",
432
+ "grade": 12, "topic": "probability",
433
+ "prerequisite_ids": ["prob_advanced"],
434
+ "exam_weight": 3.5, "bloom_ceiling": 5,
435
+ },
436
+ {
437
+ "id": "binomial_dist",
438
+ "name": "Binomial Distribution",
439
+ "name_vi": "Phân phối nhị thức",
440
+ "grade": 12, "topic": "probability",
441
+ "prerequisite_ids": ["comb_advanced", "prob_advanced"],
442
+ "exam_weight": 2.5, "bloom_ceiling": 4,
443
+ },
444
+ {
445
+ "id": "normal_dist",
446
+ "name": "Normal Distribution",
447
+ "name_vi": "Phân phối chuẩn và ứng dụng",
448
+ "grade": 12, "topic": "statistics",
449
+ "prerequisite_ids": ["binomial_dist", "stats_basic"],
450
+ "exam_weight": 2.0, "bloom_ceiling": 4,
451
+ },
452
+
453
+ # ── Advanced / Cross-domain ───────────────────────────────────────────────
454
+
455
+ {
456
+ "id": "lhopital",
457
+ "name": "L'Hôpital's Rule",
458
+ "name_vi": "Quy tắc L'Hôpital",
459
+ "grade": 12, "topic": "calculus",
460
+ "prerequisite_ids": ["derivative_rules"],
461
+ "exam_weight": 1.5, "bloom_ceiling": 4,
462
+ },
463
+ {
464
+ "id": "optimization",
465
+ "name": "Optimization Problems",
466
+ "name_vi": "Bài toán tối ưu hóa thực tế",
467
+ "grade": 12, "topic": "calculus",
468
+ "prerequisite_ids": ["derivative_apps", "global_extrema"],
469
+ "exam_weight": 4.0, "bloom_ceiling": 6,
470
+ },
471
+ {
472
+ "id": "math_induction",
473
+ "name": "Mathematical Induction",
474
+ "name_vi": "Quy nạp toán học",
475
+ "grade": 11, "topic": "algebra",
476
+ "prerequisite_ids": ["sequences"],
477
+ "exam_weight": 1.5, "bloom_ceiling": 6,
478
+ },
479
+ {
480
+ "id": "coordinate_circle",
481
+ "name": "Circle Equation in Oxy",
482
+ "name_vi": "Phương trình đường tròn trong mặt phẳng tọa độ",
483
+ "grade": 10, "topic": "geometry",
484
+ "prerequisite_ids": ["coord_geo", "circles"],
485
+ "exam_weight": 3.0, "bloom_ceiling": 4,
486
+ },
487
+ {
488
+ "id": "conic_sections",
489
+ "name": "Conic Sections",
490
+ "name_vi": "Elip, hypebol, parabol trong tọa độ",
491
+ "grade": 12, "topic": "geometry",
492
+ "prerequisite_ids": ["coord_geo", "quad_func"],
493
+ "exam_weight": 2.5, "bloom_ceiling": 5,
494
+ },
495
+ {
496
+ "id": "func_rational",
497
+ "name": "Rational Functions",
498
+ "name_vi": "Hàm số phân thức bậc nhất trên bậc nhất",
499
+ "grade": 12, "topic": "calculus",
500
+ "prerequisite_ids": ["func_asymptote", "derivative_rules"],
501
+ "exam_weight": 4.0, "bloom_ceiling": 4,
502
+ },
503
+ ]
backend/app/data/question_answers.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"q_amc8_001":0,"q_amc8_002":0,"q_amc8_003":0,"q_amc8_004":0,"q_amc8_005":0,"q_amc8_006":0,"q_amc8_007":0,"q_amc8_008":0,"q_amc8_009":0,"q_amc8_010":0,"q_amc8_011":0,"q_amc8_012":0,"q_amc8_013":0,"q_amc8_014":0,"q_amc8_015":0,"q_amc8_016":0,"q_amc8_017":0,"q_amc8_018":0,"q_amc8_019":0,"q_amc8_020":0,"q_amc10_001":0,"q_amc10_002":0,"q_amc10_003":0,"q_amc10_004":0,"q_amc10_005":0,"q_amc10_006":0,"q_amc10_007":0,"q_amc10_008":0,"q_amc10_009":0,"q_amc10_010":0,"q_amc10_011":0,"q_amc10_012":0,"q_amc10_013":0,"q_amc10_014":0,"q_amc10_015":0,"q_amc10_016":0,"q_amc10_017":0,"q_amc10_018":0,"q_amc10_019":0,"q_amc10_020":0,"q_gcse_001":0,"q_gcse_002":0,"q_gcse_003":0,"q_gcse_004":0,"q_gcse_005":0,"q_gcse_006":0,"q_gcse_007":0,"q_gcse_008":0,"q_gcse_009":0,"q_gcse_010":0,"q_gcse_011":0,"q_gcse_012":0,"q_gcse_013":0,"q_gcse_014":0,"q_gcse_015":0,"q_gcse_016":0,"q_gcse_017":0,"q_gcse_018":0,"q_gcse_019":0,"q_gcse_020":0,"q_sgol_001":0,"q_sgol_002":0,"q_sgol_003":0,"q_sgol_004":0,"q_sgol_005":0,"q_sgol_006":0,"q_sgol_007":0,"q_sgol_008":0,"q_sgol_009":0,"q_sgol_010":0,"q_sgol_011":0,"q_sgol_012":0,"q_sgol_013":0,"q_sgol_014":0,"q_sgol_015":0,"q_sgol_016":0,"q_sgol_017":0,"q_sgol_018":0,"q_sgol_019":0,"q_sgol_020":0,"q_amc_au_001":0,"q_amc_au_002":0,"q_amc_au_003":0,"q_amc_au_004":0,"q_amc_au_005":0,"q_amc_au_006":0,"q_amc_au_007":0,"q_amc_au_008":0,"q_amc_au_009":0,"q_amc_au_010":0,"q_amc_au_011":0,"q_amc_au_012":0,"q_amc_au_013":0,"q_amc_au_014":0,"q_amc_au_015":0,"q_amc_au_016":0,"q_amc_au_017":0,"q_amc_au_018":0,"q_amc_au_019":0,"q_amc_au_020":0,"q_vn_hn_001":0,"q_vn_hn_002":0,"q_vn_hn_003":0,"q_vn_hn_004":0,"q_vn_hn_005":0,"q_vn_hn_006":0,"q_vn_hn_007":0,"q_vn_hn_008":0,"q_vn_hn_009":0,"q_vn_hn_010":0,"q_vn_hn_011":0,"q_vn_hn_012":0,"q_vn_hn_013":0,"q_vn_hn_014":0,"q_vn_hn_015":0,"q_vn_hn_016":0,"q_vn_hn_017":0,"q_vn_hn_018":0,"q_vn_hn_019":0,"q_vn_hn_020":0,"q_vn_hcmc_001":0,"q_vn_hcmc_002":0,"q_vn_hcmc_003":0,"q_vn_hcmc_004":0,"q_vn_hcmc_005":0,"q_vn_hcmc_006":0,"q_vn_hcmc_007":0,"q_vn_hcmc_008":0,"q_vn_hcmc_009":0,"q_vn_hcmc_010":0,"q_vn_hcmc_011":0,"q_vn_hcmc_012":0,"q_vn_hcmc_013":0,"q_vn_hcmc_014":0,"q_vn_hcmc_015":0,"q_vn_hcmc_016":0,"q_vn_hcmc_017":0,"q_vn_hcmc_018":0,"q_vn_hcmc_019":0,"q_vn_hcmc_020":0,"q_vn_dn_001":0,"q_vn_dn_002":0,"q_vn_dn_003":0,"q_vn_dn_004":0,"q_vn_dn_005":0,"q_vn_dn_006":0,"q_vn_dn_007":0,"q_vn_dn_008":0,"q_vn_dn_009":0,"q_vn_dn_010":0,"q_vn_dn_011":0,"q_vn_dn_012":0,"q_vn_dn_013":0,"q_vn_dn_014":0,"q_vn_dn_015":0,"q_vn_dn_016":0,"q_vn_dn_017":0,"q_vn_dn_018":0,"q_vn_dn_019":0,"q_vn_dn_020":0,"q_vn_hp_001":0,"q_vn_hp_002":0,"q_vn_hp_003":0,"q_vn_hp_004":0,"q_vn_hp_005":0,"q_vn_hp_006":0,"q_vn_hp_007":0,"q_vn_hp_008":0,"q_vn_hp_009":0,"q_vn_hp_010":0,"q_vn_hp_011":0,"q_vn_hp_012":0,"q_vn_hp_013":0,"q_vn_hp_014":0,"q_vn_hp_015":0,"q_vn_hp_016":0,"q_vn_hp_017":0,"q_vn_hp_018":0,"q_vn_hp_019":0,"q_vn_hp_020":0,"q_vn_bgd_001":0,"q_vn_bgd_002":0,"q_vn_bgd_003":0,"q_vn_bgd_004":0,"q_vn_bgd_005":0,"q_vn_bgd_006":0,"q_vn_bgd_007":0,"q_vn_bgd_008":0,"q_vn_bgd_009":0,"q_vn_bgd_010":0,"q_vn_bgd_011":0,"q_vn_bgd_012":0,"q_vn_bgd_013":0,"q_vn_bgd_014":0,"q_vn_bgd_015":0,"q_vn_bgd_016":0,"q_vn_bgd_017":0,"q_vn_bgd_018":0,"q_vn_bgd_019":0,"q_vn_bgd_020":0,"q_thpt24_001":2,"q_thpt24_002":2,"q_thpt24_003":1,"q_thpt24_004":2,"q_thpt24_005":0,"q_thpt24_006":1,"q_thpt24_007":1,"q_thpt24_008":2,"q_thpt24_009":1,"q_thpt24_010":1,"q_thpt24_011":2,"q_thpt24_012":0,"q_thpt24_013":0,"q_thpt24_014":1,"q_thpt24_015":1,"q_thpt24_016":0,"q_thpt24_017":1,"q_thpt24_018":2,"q_thpt24_019":1,"q_thpt24_020":0,"q_thpt24_021":1,"q_thpt24_022":2,"q_thpt24_023":2,"q_thpt24_024":0,"q_thpt24_025":0,"q_thpt24_026":1,"q_thpt24_027":1,"q_thpt24_028":1,"q_thpt24_029":1,"q_thpt24_030":1,"q_thpt24_031":1,"q_thpt24_032":2,"q_thpt24_033":1,"q_thpt24_034":1,"q_thpt24_035":2,"q_thpt24_036":2,"q_thpt24_037":0,"q_thpt24_038":1,"q_thpt24_039":1,"q_thpt24_040":3,"q_thpt24_041":1,"q_thpt24_042":1,"q_thpt24_043":1,"q_thpt24_044":2,"q_thpt24_045":1,"q_thpt24_046":1,"q_thpt24_047":1,"q_thpt24_048":2,"q_thpt24_049":1,"q_thpt24_050":2,"q_thpt23_001":0,"q_thpt23_002":1,"q_thpt23_003":2,"q_thpt23_004":0,"q_thpt23_005":1,"q_thpt23_006":0,"q_thpt23_007":2,"q_thpt23_008":1,"q_thpt23_009":1,"q_thpt23_010":1,"q_thpt23_011":0,"q_thpt23_012":1,"q_thpt23_013":0,"q_thpt23_014":1,"q_thpt23_015":0,"q_thpt23_016":2,"q_thpt23_017":1,"q_thpt23_018":1,"q_thpt23_019":1,"q_thpt23_020":0,"q_thpt23_021":2,"q_thpt23_022":2,"q_thpt23_023":1,"q_thpt23_024":1,"q_thpt23_025":0,"q_thpt23_026":2,"q_thpt23_027":0,"q_thpt23_028":2,"q_thpt23_029":1,"q_thpt23_030":2,"q_thpt23_031":1,"q_thpt23_032":0,"q_thpt23_033":0,"q_thpt23_034":1,"q_thpt23_035":1,"q_thpt23_036":1,"q_thpt23_037":0,"q_thpt23_038":2,"q_thpt23_039":1,"q_thpt23_040":1,"q_thpt23_041":0,"q_thpt23_042":3,"q_thpt23_043":1,"q_thpt23_044":3,"q_thpt23_045":2,"q_thpt23_046":1,"q_thpt23_047":2,"q_thpt23_048":1,"q_thpt23_049":0,"q_thpt23_050":2,"q_thpt22_001":0,"q_thpt22_002":1,"q_thpt22_003":0,"q_thpt22_004":0,"q_thpt22_005":0,"q_thpt22_006":0,"q_thpt22_007":2,"q_thpt22_008":1,"q_thpt22_009":2,"q_thpt22_010":1,"q_thpt22_011":0,"q_thpt22_012":0,"q_thpt22_013":2,"q_thpt22_014":1,"q_thpt22_015":1,"q_thpt22_016":1,"q_thpt22_017":0,"q_thpt22_018":1,"q_thpt22_019":0,"q_thpt22_020":0,"q_thpt22_021":0,"q_thpt22_022":1,"q_thpt22_023":2,"q_thpt22_024":1,"q_thpt22_025":1,"q_thpt22_026":1,"q_thpt22_027":1,"q_thpt22_028":0,"q_thpt22_029":1,"q_thpt22_030":1,"q_thpt22_031":1,"q_thpt22_032":1,"q_thpt22_033":2,"q_thpt22_034":2,"q_thpt22_035":0,"q_thpt22_036":1,"q_thpt22_037":0,"q_thpt22_038":0,"q_thpt22_039":1,"q_thpt22_040":1,"q_thpt22_041":1,"q_thpt22_042":0,"q_thpt22_043":1,"q_thpt22_044":1,"q_thpt22_045":0,"q_thpt22_046":1,"q_thpt22_047":1,"q_thpt22_048":1,"q_thpt22_049":0,"q_thpt22_050":1,"q_thithu_hn24_001":1,"q_thithu_hn24_002":0,"q_thithu_hn24_003":1,"q_thithu_hn24_004":0,"q_thithu_hn24_005":1,"q_thithu_hn24_006":0,"q_thithu_hn24_007":2,"q_thithu_hn24_008":0,"q_thithu_hn24_009":0,"q_thithu_hn24_010":0,"q_thithu_hn24_011":2,"q_thithu_hn24_012":1,"q_thithu_hn24_013":0,"q_thithu_hn24_014":0,"q_thithu_hn24_015":1,"q_thithu_hn24_016":1,"q_thithu_hn24_017":2,"q_thithu_hn24_018":0,"q_thithu_hn24_019":0,"q_thithu_hn24_020":1,"q_thithu_hn24_021":0,"q_thithu_hn24_022":0,"q_thithu_hn24_023":0,"q_thithu_hn24_024":0,"q_thithu_hn24_025":0,"q_thithu_hn24_026":0,"q_thithu_hn24_027":0,"q_thithu_hn24_028":0,"q_thithu_hn24_029":0,"q_thithu_hn24_030":0,"q_thithu_hn24_031":0,"q_thithu_hn24_032":0,"q_thithu_hn24_033":0,"q_thithu_hn24_034":0,"q_thithu_hn24_035":0,"q_thithu_hn24_036":0,"q_thithu_hn24_037":0,"q_thithu_hn24_038":0,"q_thithu_hn24_039":0,"q_thithu_hn24_040":0,"q_thithu_hn24_041":0,"q_thithu_hn24_042":0,"q_thithu_hn24_043":0,"q_thithu_hn24_044":0,"q_thithu_hn24_045":0,"q_thithu_hn24_046":0,"q_thithu_hn24_047":0,"q_thithu_hn24_048":0,"q_thithu_hn24_049":0,"q_thithu_hn24_050":0,"q_thithu_hcm24_001":2,"q_thithu_hcm24_002":1,"q_thithu_hcm24_003":1,"q_thithu_hcm24_004":2,"q_thithu_hcm24_005":2,"q_thithu_hcm24_006":1,"q_thithu_hcm24_007":0,"q_thithu_hcm24_008":1,"q_thithu_hcm24_009":0,"q_thithu_hcm24_010":0,"q_thithu_hcm24_011":1,"q_thithu_hcm24_012":1,"q_thithu_hcm24_013":0,"q_thithu_hcm24_014":0,"q_thithu_hcm24_015":2,"q_thithu_hcm24_016":2,"q_thithu_hcm24_017":1,"q_thithu_hcm24_018":1,"q_thithu_hcm24_019":0,"q_thithu_hcm24_020":1,"q_thithu_hcm24_021":0,"q_thithu_hcm24_022":0,"q_thithu_hcm24_023":0,"q_thithu_hcm24_024":0,"q_thithu_hcm24_025":0,"q_thithu_hcm24_026":0,"q_thithu_hcm24_027":0,"q_thithu_hcm24_028":0,"q_thithu_hcm24_029":0,"q_thithu_hcm24_030":0,"q_thithu_hcm24_031":0,"q_thithu_hcm24_032":0,"q_thithu_hcm24_033":0,"q_thithu_hcm24_034":0,"q_thithu_hcm24_035":0,"q_thithu_hcm24_036":0,"q_thithu_hcm24_037":0,"q_thithu_hcm24_038":0,"q_thithu_hcm24_039":0,"q_thithu_hcm24_040":0,"q_thithu_hcm24_041":0,"q_thithu_hcm24_042":0,"q_thithu_hcm24_043":0,"q_thithu_hcm24_044":0,"q_thithu_hcm24_045":0,"q_thithu_hcm24_046":0,"q_thithu_hcm24_047":0,"q_thithu_hcm24_048":0,"q_thithu_hcm24_049":0,"q_thithu_hcm24_050":0,"q_sat_001":0,"q_sat_002":0,"q_sat_003":0,"q_sat_004":0,"q_sat_005":0,"q_sat_006":0,"q_sat_007":0,"q_sat_008":0,"q_sat_009":0,"q_sat_010":0,"q_sat_011":0,"q_sat_012":0,"q_sat_013":0,"q_sat_014":0,"q_sat_015":0,"q_sat_016":0,"q_sat_017":0,"q_sat_018":0,"q_sat_019":0,"q_sat_020":0,"q_gauss_001":0,"q_gauss_002":0,"q_gauss_003":0,"q_gauss_004":0,"q_gauss_005":0,"q_gauss_006":0,"q_gauss_007":0,"q_gauss_008":0,"q_gauss_009":0,"q_gauss_010":0,"q_gauss_011":0,"q_gauss_012":0,"q_gauss_013":0,"q_gauss_014":0,"q_gauss_015":0,"q_gauss_016":0,"q_gauss_017":0,"q_gauss_018":0,"q_gauss_019":0,"q_gauss_020":0,"q_ib_sl_001":0,"q_ib_sl_002":0,"q_ib_sl_003":0,"q_ib_sl_004":0,"q_ib_sl_005":0,"q_ib_sl_006":0,"q_ib_sl_007":0,"q_ib_sl_008":0,"q_ib_sl_009":0,"q_ib_sl_010":0,"q_ib_sl_011":0,"q_ib_sl_012":0,"q_ib_sl_013":0,"q_ib_sl_014":0,"q_ib_sl_015":0,"q_ib_sl_016":0,"q_ib_sl_017":0,"q_ib_sl_018":0,"q_ib_sl_019":0,"q_ib_sl_020":0,"q_ksat_001":0,"q_ksat_002":0,"q_ksat_003":0,"q_ksat_004":0,"q_ksat_005":0,"q_ksat_006":0,"q_ksat_007":0,"q_ksat_008":0,"q_ksat_009":0,"q_ksat_010":0,"q_ksat_011":0,"q_ksat_012":0,"q_ksat_013":0,"q_ksat_014":0,"q_ksat_015":0,"q_ksat_016":0,"q_ksat_017":0,"q_ksat_018":0,"q_ksat_019":0,"q_ksat_020":0,"q_jee_001":0,"q_jee_002":0,"q_jee_003":0,"q_jee_004":0,"q_jee_005":0,"q_jee_006":0,"q_jee_007":0,"q_jee_008":0,"q_jee_009":0,"q_jee_010":0,"q_jee_011":0,"q_jee_012":0,"q_jee_013":0,"q_jee_014":0,"q_jee_015":0,"q_jee_016":0,"q_jee_017":0,"q_jee_018":0,"q_jee_019":0,"q_jee_020":0,"q_sat_021":0,"q_sat_022":0,"q_sat_023":0,"q_sat_024":0,"q_sat_025":0,"q_sat_026":0,"q_sat_027":0,"q_sat_028":0,"q_sat_029":0,"q_sat_030":0,"q_sat_031":0,"q_sat_032":0,"q_sat_033":0,"q_sat_034":0,"q_sat_035":0,"q_sat_036":0,"q_sat_037":0,"q_sat_038":0,"q_sat_039":0,"q_sat_040":0,"q_sat_041":0,"q_sat_042":0,"q_sat_043":0,"q_sat_044":0,"q_gauss_021":0,"q_gauss_022":0,"q_gauss_023":0,"q_gauss_024":0,"q_gauss_025":0,"q_ib_sl_021":0,"q_ib_sl_022":0,"q_ib_sl_023":0,"q_ib_sl_024":0,"q_ib_sl_025":0,"q_ib_sl_026":0,"q_ib_sl_027":0,"q_ib_sl_028":0,"q_ib_sl_029":0,"q_ib_sl_030":0,"q_ksat_021":0,"q_ksat_022":0,"q_ksat_023":0,"q_ksat_024":0,"q_ksat_025":0,"q_ksat_026":0,"q_ksat_027":0,"q_ksat_028":0,"q_ksat_029":0,"q_ksat_030":0,"q_jee_021":0,"q_jee_022":0,"q_jee_023":0,"q_jee_024":0,"q_jee_025":0,"q_jee_026":0,"q_jee_027":0,"q_jee_028":0,"q_jee_029":0,"q_jee_030":0,"q_mx01_001":3,"q_mx01_002":3,"q_mx01_003":1,"q_mx01_004":0,"q_mx01_005":1,"q_mx01_006":3,"q_mx01_007":1,"q_mx01_008":2,"q_mx01_009":0,"q_mx01_010":2,"q_mx01_011":1,"q_mx01_012":3,"q_mx01_013":3,"q_mx01_014":3,"q_mx01_015":0,"q_mx01_016":0,"q_mx01_017":3,"q_mx01_018":1,"q_mx01_019":0,"q_mx01_020":0,"q_mx01_021":1,"q_mx01_022":3,"q_mx01_023":1,"q_mx01_024":1,"q_mx01_025":3,"q_mx01_026":1,"q_mx01_027":0,"q_mx01_028":0,"q_mx01_029":1,"q_mx01_030":1,"q_mx01_031":2,"q_mx01_032":3,"q_mx01_033":0,"q_mx01_034":2,"q_mx01_035":3,"q_mx01_036":2,"q_mx01_037":2,"q_mx01_038":2,"q_mx01_039":3,"q_mx01_040":3,"q_mx01_041":1,"q_mx01_042":1,"q_mx01_043":0,"q_mx01_044":3,"q_mx01_045":0,"q_mx01_046":0,"q_mx01_047":2,"q_mx01_048":3,"q_mx01_049":0,"q_mx01_050":1,"q_vj01_01":1,"q_vj01_02":2,"q_vj01_03":1,"q_vj01_04":0,"q_vj01_05":3,"q_vj01_06":0,"q_vj01_07":2,"q_vj01_08":3,"q_vj02_01":2,"q_vj02_02":0,"q_vj02_03":3,"q_vj02_04":2,"q_vj02_05":1,"q_vj02_06":0,"q_vj02_07":1,"q_vj02_08":2,"q_vj03_01":3,"q_vj03_02":0,"q_vj03_03":3,"q_vj03_04":1,"q_vj03_05":0,"q_vj03_06":2,"q_vj03_07":1,"q_vj03_08":2,"q_vj04_01":1,"q_vj04_02":2,"q_vj04_03":0,"q_vj04_04":3,"q_vj04_05":0,"q_vj04_06":3,"q_vj04_07":2,"q_vj04_08":1,"q_vj05_01":2,"q_vj05_02":3,"q_vj05_03":0,"q_vj05_04":3,"q_vj05_05":1,"q_vj05_06":0,"q_vj05_07":3,"q_vj05_08":1,"q_lgh_173342_01":3,"q_lgh_173342_02":1,"q_lgh_173342_03":1,"q_lgh_173342_04":2,"q_lgh_173342_05":3,"q_lgh_173342_06":1,"q_lgh_173342_07":1,"q_lgh_173342_08":0,"q_lgh_173342_09":0,"q_lgh_173342_10":2,"q_lgh_173342_11":3,"q_lgh_173342_12":3,"q_lgh_173478_01":0,"q_lgh_173478_02":3,"q_lgh_173478_03":1,"q_lgh_173478_04":1,"q_lgh_173478_05":2,"q_lgh_173478_06":3,"q_lgh_173478_07":3,"q_lgh_173478_08":0,"q_lgh_173478_09":3,"q_lgh_173478_10":1,"q_lgh_173478_11":3,"q_lgh_173478_12":3,"q_lgh_173753_01":1,"q_lgh_173753_02":3,"q_lgh_173753_03":0,"q_lgh_173753_04":0,"q_lgh_173753_05":1,"q_lgh_173753_06":3,"q_lgh_173753_07":3,"q_lgh_173753_08":0,"q_lgh_173753_09":1,"q_lgh_173753_10":1,"q_lgh_173753_11":1,"q_lgh_173753_12":1,"q_lgh_173871_01":3,"q_lgh_173871_02":2,"q_lgh_173871_03":1,"q_lgh_173871_04":0,"q_lgh_173871_05":3,"q_lgh_173871_06":1,"q_lgh_173871_07":2,"q_lgh_173871_08":0,"q_lgh_173871_09":3,"q_lgh_173871_10":2,"q_lgh_173871_11":3,"q_lgh_173871_12":0,"q_lgh_173945_01":2,"q_lgh_173945_02":1,"q_lgh_173945_03":2,"q_lgh_173945_04":2,"q_lgh_173945_05":3,"q_lgh_173945_06":3,"q_lgh_173945_07":2,"q_lgh_173945_08":2,"q_lgh_173945_09":3,"q_lgh_173945_10":2,"q_lgh_173945_11":1,"q_lgh_173945_12":3,"q_lgh_180179_01":2,"q_lgh_180179_02":0,"q_lgh_180179_03":3,"q_lgh_180179_04":3,"q_lgh_180179_05":2,"q_lgh_180179_06":1,"q_lgh_180840_01":0,"q_lgh_180840_02":1,"q_lgh_180840_03":0,"q_lgh_180840_04":0,"q_lgh_180840_05":2,"q_lgh_180840_06":1,"q_lgh_180918_01":0,"q_lgh_180918_02":2,"q_lgh_180918_03":3,"q_lgh_180918_04":2,"q_lgh_180918_05":1,"q_lgh_180918_06":2,"q_lgh_180978_01":2,"q_lgh_180978_02":0,"q_lgh_180978_03":1,"q_lgh_180978_04":3,"q_lgh_180978_05":2,"q_lgh_180978_06":2,"q_lgh_181040_01":3,"q_lgh_181040_02":2,"q_lgh_181040_03":2,"q_lgh_181040_04":3,"q_lgh_181040_05":1,"q_lgh_181040_06":1,"q_lgh_177656_01":0,"q_lgh_177656_02":3,"q_lgh_177656_03":1,"q_lgh_177656_04":2,"q_lgh_177656_05":2,"q_lgh_177656_06":2,"q_lgh_177656_07":1,"q_lgh_177656_08":3,"q_lgh_177656_09":0,"q_lgh_177656_10":1,"q_lgh_177656_11":3,"q_lgh_177656_12":3,"q_lgh_177657_01":0,"q_lgh_177657_02":0,"q_lgh_177657_03":3,"q_lgh_177657_04":2,"q_lgh_177657_05":2,"q_lgh_177657_06":0,"q_lgh_177657_07":3,"q_lgh_177657_08":1,"q_lgh_177657_09":0,"q_lgh_177657_10":3,"q_lgh_177657_11":1,"q_lgh_177657_12":3,"q_lgh_177661_01":1,"q_lgh_177661_02":0,"q_lgh_177661_03":2,"q_lgh_177661_04":0,"q_lgh_177661_05":1,"q_lgh_177661_06":2,"q_lgh_177661_07":3,"q_lgh_177661_08":1,"q_lgh_177661_10":0,"q_lgh_177661_11":3,"q_lgh_177661_12":2,"q_lgh_177714_01":2,"q_lgh_177714_02":2,"q_lgh_177714_03":1,"q_lgh_177714_04":3,"q_lgh_177714_05":1,"q_lgh_177714_06":1,"q_lgh_177714_07":2,"q_lgh_177714_08":2,"q_lgh_177714_09":1,"q_lgh_177714_10":2,"q_lgh_177714_11":0,"q_lgh_177714_12":0,"q_lgh_177717_01":3,"q_lgh_177717_02":0,"q_lgh_177717_03":2,"q_lgh_177717_04":1,"q_lgh_177717_05":2,"q_lgh_177717_06":3,"q_lgh_177717_07":2,"q_lgh_177717_08":2,"q_lgh_177717_09":3,"q_lgh_177717_10":0,"q_lgh_177717_11":1,"q_lgh_177717_12":3,"q_lgh_182201_01":2,"q_lgh_182201_02":2,"q_lgh_182201_03":2,"q_lgh_182201_04":2,"q_lgh_182201_05":3,"q_lgh_182201_06":3,"q_lgh_182201_07":1,"q_lgh_182201_08":0,"q_lgh_182201_09":0,"q_lgh_182201_10":2,"q_lgh_182201_11":3,"q_lgh_182201_12":1,"q_lgh_182313_01":1,"q_lgh_182313_02":1,"q_lgh_182313_03":2,"q_lgh_182313_04":2,"q_lgh_182313_05":2,"q_lgh_182313_06":0,"q_lgh_182313_07":3,"q_lgh_182313_08":1,"q_lgh_182313_09":3,"q_lgh_182313_10":0,"q_lgh_182313_11":2,"q_lgh_182313_12":1,"q_lgh_182339_01":0,"q_lgh_182339_02":3,"q_lgh_182339_03":1,"q_lgh_182339_04":2,"q_lgh_182339_05":3,"q_lgh_182339_06":2,"q_lgh_182339_07":2,"q_lgh_182339_08":3,"q_lgh_182339_09":3,"q_lgh_182339_10":1,"q_lgh_182339_11":0,"q_lgh_182339_12":2,"q_lgh_182635_01":1,"q_lgh_182635_02":2,"q_lgh_182635_03":2,"q_lgh_182635_04":3,"q_lgh_182635_05":2,"q_lgh_182635_06":2,"q_lgh_182635_07":1,"q_lgh_182635_09":2,"q_lgh_182635_10":3,"q_lgh_182635_11":1,"q_lgh_182635_12":2,"q_lgh_182707_01":0,"q_lgh_182707_02":0,"q_lgh_182707_03":1,"q_lgh_182707_04":0,"q_lgh_182707_05":1,"q_lgh_182707_06":3,"q_lgh_182707_07":3,"q_lgh_182707_08":0,"q_lgh_182707_09":2,"q_lgh_182707_10":0,"q_lgh_182707_11":1,"q_lgh_182707_12":0,"q_bece22_01":1,"q_bece22_02":0,"q_bece22_03":2,"q_bece22_04":3,"q_bece22_05":1,"q_bece22_06":1,"q_bece22_07":1,"q_bece22_08":2,"q_bece22_09":3,"q_bece22_10":1,"q_bece22_11":3,"q_bece22_12":1,"q_bece22_13":0,"q_bece22_14":0,"q_bece22_15":1,"q_bece22_16":1,"q_bece22_17":2,"q_bece22_18":2,"q_bece22_19":3,"q_bece22_20":0,"q_bece22_21":1,"q_bece22_22":3,"q_bece22_23":0,"q_bece22_24":3,"q_bece22_25":2,"q_bece21_01":0,"q_bece21_02":0,"q_bece21_03":2,"q_bece21_04":2,"q_bece21_05":2,"q_bece21_06":0,"q_bece21_07":2,"q_bece21_08":2,"q_bece21_09":0,"q_bece21_10":2,"q_bece21_11":2,"q_bece21_12":3,"q_bece21_13":0,"q_bece21_14":2,"q_bece21_15":2,"q_bece21_16":1,"q_bece21_17":1,"q_bece21_18":2,"q_bece21_19":1,"q_bece21_20":2,"q_bece21_21":0,"q_bece21_22":3,"q_bece21_23":1,"q_bece21_24":0,"q_bece21_25":0,"q_bece24_01":3,"q_bece24_02":1,"q_bece24_03":2,"q_bece24_04":1,"q_bece24_05":0,"q_bece24_06":0,"q_bece24_07":0,"q_bece24_08":0,"q_bece24_09":1,"q_bece24_10":3,"q_bece24_11":1,"q_bece24_12":2,"q_bece24_13":1,"q_bece24_14":3,"q_bece24_15":0,"q_bece24_16":0,"q_bece24_17":0,"q_bece24_18":1,"q_bece24_19":0,"q_bece24_20":2,"q_bece24_21":2,"q_bece24_22":3,"q_bece24_23":2,"q_bece24_24":1,"q_bece24_25":2,"q_cbse24p1_01":3,"q_cbse24p1_02":1,"q_cbse24p1_03":0,"q_cbse24p1_04":0,"q_cbse24p1_05":0,"q_cbse24p1_06":2,"q_cbse24p1_07":1,"q_cbse24p1_08":0,"q_cbse24p1_09":0,"q_cbse24p1_10":3,"q_cbse24p1_11":1,"q_cbse24p1_12":1,"q_cbse24p1_13":1,"q_cbse24p1_14":3,"q_cbse24p1_15":1,"q_cbse24p1_16":1,"q_cbse24p1_17":0,"q_cbse24p1_18":1,"q_cbse24p1_19":2,"q_cbse24p1_20":3,"q_cbse24p1_21":0,"q_cbse24p1_22":1,"q_cbse24p1_23":0,"q_cbse24p1_24":0,"q_cbse24p1_25":0,"q_cbse24p2_01":1,"q_cbse24p2_02":0,"q_cbse24p2_03":2,"q_cbse24p2_04":1,"q_cbse24p2_05":0,"q_cbse24p2_06":0,"q_cbse24p2_07":1,"q_cbse24p2_08":1,"q_cbse24p2_09":2,"q_cbse24p2_10":2,"q_cbse24p2_11":1,"q_cbse24p2_12":0,"q_cbse24p2_13":2,"q_cbse24p2_14":0,"q_cbse24p2_15":0,"q_cbse24p2_16":3,"q_cbse24p2_17":2,"q_cbse24p2_18":2,"q_cbse24p2_19":1,"q_cbse24p2_20":1,"q_cbse24p2_21":3,"q_cbse24p2_22":0,"q_cbse24p2_23":1,"q_cbse24p2_24":0,"q_cbse24p2_25":0,"q_bece22_26":0,"q_bece22_27":1,"q_bece22_28":1,"q_bece22_29":0,"q_bece22_30":2,"q_bece22_31":1,"q_bece22_32":0,"q_bece22_33":2,"q_bece22_34":1,"q_bece22_35":0,"q_bece22_36":1,"q_bece22_37":1,"q_bece22_38":1,"q_bece22_39":1,"q_bece21_26":1,"q_bece21_27":2,"q_bece21_28":0,"q_bece21_29":0,"q_bece21_30":0,"q_bece21_31":0,"q_bece21_32":0,"q_bece21_33":0,"q_bece21_34":0,"q_bece21_35":0,"q_bece21_36":3,"q_bece21_37":1,"q_bece21_38":0,"q_bece21_39":1,"q_bece24_26":2,"q_bece24_27":0,"q_bece24_28":1,"q_bece24_29":1,"q_bece24_30":2,"q_bece24_31":2,"q_bece24_32":1,"q_bece24_33":3,"q_bece24_34":2,"q_bece24_35":0,"q_bece24_36":1,"q_bece24_37":1,"q_bece24_38":3,"q_bece24_39":3,"q_cbse24p1_26":2,"q_cbse24p1_27":0,"q_cbse24p1_28":0,"q_cbse24p1_29":1,"q_cbse24p1_30":1,"q_cbse24p1_31":3,"q_cbse24p1_32":2,"q_cbse24p1_33":2,"q_cbse24p1_34":0,"q_cbse24p1_35":1,"q_cbse24p1_36":0,"q_cbse24p1_37":1,"q_cbse24p1_38":0,"q_cbse24p1_39":1,"q_cbse24p1_40":0,"q_cbse24p2_26":0,"q_cbse24p2_27":1,"q_cbse24p2_28":1,"q_cbse24p2_29":1,"q_cbse24p2_30":2,"q_cbse24p2_31":1,"q_cbse24p2_32":1,"q_cbse24p2_33":3,"q_cbse24p2_34":1,"q_cbse24p2_35":2,"q_cbse24p2_36":2,"q_cbse24p2_37":0,"q_cbse24p2_38":0,"q_cbse24p2_39":1,"q_cbse24p2_40":0}
backend/app/db.py ADDED
@@ -0,0 +1,214 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """asyncpg-compatible wrapper around aiosqlite for local SQLite storage.
2
+
3
+ Drop-in replacement for the asyncpg connection pool: same acquire(), fetchrow(),
4
+ fetch(), fetchval(), execute(), and transaction() API, so pg_db.py / analytics.py /
5
+ sanitizer.py require zero changes.
6
+
7
+ SQL translation handled automatically:
8
+ $N placeholders → ?
9
+ = ANY($N) → IN (?,?,?) (array expansion)
10
+ ::type casts → stripped
11
+ NOW() → datetime('now')
12
+ list params → JSON-serialised (for embedding storage)
13
+
14
+ Architecture note:
15
+ A single persistent aiosqlite connection is shared across all callers. An
16
+ asyncio.Lock serialises every acquire() so only one coroutine touches the
17
+ SQLite file at a time. This eliminates the concurrent-writer corruption that
18
+ WAL mode's shared-memory coordination (-shm/-wal files) causes on
19
+ network/container filesystems (NFS, Docker overlays, HuggingFace Spaces /data).
20
+ """
21
+ import asyncio
22
+ import json
23
+ import logging
24
+ import re
25
+ import sqlite3
26
+ from contextlib import asynccontextmanager
27
+ from pathlib import Path
28
+ from typing import Any, AsyncGenerator, Optional
29
+
30
+ import aiosqlite
31
+
32
+ logger = logging.getLogger(__name__)
33
+
34
+
35
+ class Row(dict):
36
+ """Dict that also supports positional (integer) access like asyncpg Record."""
37
+
38
+ def __getitem__(self, key: Any) -> Any:
39
+ if isinstance(key, int):
40
+ return list(self.values())[key]
41
+ return super().__getitem__(key)
42
+
43
+
44
+ def _translate(query: str, params: tuple) -> tuple[str, list]:
45
+ """Translate PostgreSQL query + params to SQLite equivalents."""
46
+ # Detect which $N positions are used by ANY($N) (0-based)
47
+ any_positions: set[int] = set()
48
+ for m in re.finditer(r"=\s*ANY\(\$(\d+)\)", query, re.IGNORECASE):
49
+ any_positions.add(int(m.group(1)) - 1)
50
+
51
+ # Expand = ANY($N) → IN (?,?,?)
52
+ def _expand_any(m: re.Match) -> str:
53
+ idx = int(m.group(1)) - 1
54
+ arr = list(params[idx]) if params[idx] else []
55
+ return f"IN ({','.join(['?'] * len(arr))})" if arr else "IN (NULL)"
56
+
57
+ query = re.sub(r"=\s*ANY\(\$(\d+)\)", _expand_any, query, flags=re.IGNORECASE)
58
+
59
+ # Strip PostgreSQL type casts: ::jsonb, ::timestamptz, ::text[], ::float, etc.
60
+ query = re.sub(r"::[a-zA-Z_][\w\[\]]*", "", query)
61
+
62
+ # Replace NOW() with SQLite equivalent
63
+ query = re.sub(r"\bNOW\(\)", "datetime('now')", query, flags=re.IGNORECASE)
64
+
65
+ # Build flat params, expanding ANY arrays and serialising lists→JSON
66
+ new_params: list = []
67
+ for i, p in enumerate(params):
68
+ if i in any_positions:
69
+ new_params.extend(list(p) if p else [])
70
+ elif isinstance(p, list):
71
+ new_params.append(json.dumps(p))
72
+ else:
73
+ new_params.append(p)
74
+
75
+ # Replace remaining $N placeholders with ?
76
+ query = re.sub(r"\$\d+", "?", query)
77
+
78
+ return query, new_params
79
+
80
+
81
+ class _Connection:
82
+ def __init__(self, conn: aiosqlite.Connection) -> None:
83
+ self._conn = conn
84
+ self._in_transaction = False
85
+
86
+ async def fetchrow(self, query: str, *args) -> Optional[Row]:
87
+ q, params = _translate(query, args)
88
+ cur = await self._conn.execute(q, params)
89
+ row = await cur.fetchone()
90
+ # Commit after fetch so INSERT … RETURNING rows are both readable and persisted.
91
+ if not self._in_transaction:
92
+ await self._conn.commit()
93
+ if row is None or cur.description is None:
94
+ return None
95
+ cols = [d[0] for d in cur.description]
96
+ return Row(zip(cols, row))
97
+
98
+ async def fetch(self, query: str, *args) -> list[Row]:
99
+ q, params = _translate(query, args)
100
+ cur = await self._conn.execute(q, params)
101
+ rows = await cur.fetchall()
102
+ if not self._in_transaction:
103
+ await self._conn.commit()
104
+ if cur.description is None:
105
+ return []
106
+ cols = [d[0] for d in cur.description]
107
+ return [Row(zip(cols, r)) for r in rows]
108
+
109
+ async def fetchval(self, query: str, *args) -> Any:
110
+ q, params = _translate(query, args)
111
+ cur = await self._conn.execute(q, params)
112
+ row = await cur.fetchone()
113
+ if not self._in_transaction:
114
+ await self._conn.commit()
115
+ return row[0] if row else None
116
+
117
+ async def execute(self, query: str, *args) -> str:
118
+ q, params = _translate(query, args)
119
+ cur = await self._conn.execute(q, params)
120
+ if not self._in_transaction:
121
+ await self._conn.commit()
122
+ return f"UPDATE {cur.rowcount}"
123
+
124
+ @asynccontextmanager
125
+ async def transaction(self) -> AsyncGenerator[None, None]:
126
+ self._in_transaction = True
127
+ try:
128
+ yield
129
+ await self._conn.commit()
130
+ except BaseException:
131
+ await self._conn.rollback()
132
+ raise
133
+ finally:
134
+ self._in_transaction = False
135
+
136
+
137
+ class AsyncSQLitePool:
138
+ """Single-connection asyncpg-compatible pool backed by a local SQLite file.
139
+
140
+ One persistent aiosqlite connection is shared by all callers. An asyncio.Lock
141
+ ensures only one coroutine executes against the connection at a time, which is
142
+ both sufficient (single uvicorn process) and necessary (prevents WAL-mode
143
+ shared-memory corruption on container/NFS filesystems).
144
+ """
145
+
146
+ def __init__(self, db_path: str) -> None:
147
+ self._path = db_path
148
+ self._conn: Optional[aiosqlite.Connection] = None
149
+ self._lock: asyncio.Lock = asyncio.Lock()
150
+ Path(db_path).parent.mkdir(parents=True, exist_ok=True)
151
+
152
+ async def initialize(self) -> None:
153
+ """Open the persistent connection; auto-recover if the DB file is corrupted."""
154
+ try:
155
+ conn = await aiosqlite.connect(self._path)
156
+ await conn.execute("PRAGMA foreign_keys = ON")
157
+ await conn.execute("PRAGMA cache_size = -64000") # 64 MB in-process page cache
158
+ await conn.execute("PRAGMA busy_timeout = 5000") # queue 5 s before failing
159
+ cur = await conn.execute("PRAGMA integrity_check")
160
+ row = await cur.fetchone()
161
+ if row and row[0] != "ok":
162
+ await conn.close()
163
+ raise sqlite3.DatabaseError(f"integrity_check: {row[0]}")
164
+ await conn.commit()
165
+ self._conn = conn
166
+ except (sqlite3.DatabaseError, Exception) as exc:
167
+ logger.warning("DB at %s is corrupt (%s) — wiping and recreating", self._path, exc)
168
+ if self._conn is not None:
169
+ try:
170
+ await self._conn.close()
171
+ except Exception:
172
+ pass
173
+ self._conn = None
174
+ path = Path(self._path)
175
+ for suffix in ("", "-wal", "-shm"):
176
+ candidate = Path(str(path) + suffix)
177
+ if candidate.exists():
178
+ candidate.unlink()
179
+ conn = await aiosqlite.connect(self._path)
180
+ await conn.execute("PRAGMA foreign_keys = ON")
181
+ await conn.execute("PRAGMA cache_size = -64000")
182
+ await conn.execute("PRAGMA busy_timeout = 5000")
183
+ await conn.commit()
184
+ self._conn = conn
185
+ logger.info("Fresh DB created at %s", self._path)
186
+
187
+ @asynccontextmanager
188
+ async def acquire(self) -> AsyncGenerator[_Connection, None]:
189
+ async with self._lock:
190
+ yield _Connection(self._conn)
191
+
192
+ # Shortcut methods (asyncpg pools expose these directly)
193
+
194
+ async def fetchrow(self, query: str, *args) -> Optional[Row]:
195
+ async with self.acquire() as conn:
196
+ return await conn.fetchrow(query, *args)
197
+
198
+ async def fetch(self, query: str, *args) -> list[Row]:
199
+ async with self.acquire() as conn:
200
+ return await conn.fetch(query, *args)
201
+
202
+ async def fetchval(self, query: str, *args) -> Any:
203
+ async with self.acquire() as conn:
204
+ return await conn.fetchval(query, *args)
205
+
206
+ async def execute(self, query: str, *args) -> str:
207
+ async with self.acquire() as conn:
208
+ return await conn.execute(query, *args)
209
+
210
+ async def close(self) -> None:
211
+ async with self._lock:
212
+ if self._conn is not None:
213
+ await self._conn.close()
214
+ self._conn = None
backend/app/dependencies.py ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ from functools import lru_cache
3
+ from openai import AsyncOpenAI
4
+ from fastapi import Depends, HTTPException, Request, status
5
+ from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
6
+ from pydantic import BaseModel
7
+ import jwt
8
+ from cachetools import TTLCache
9
+
10
+ from app.config import get_settings
11
+ from app.auth import decode_jwt
12
+
13
+ # Cache account status (suspended/locked/deactivated) for 30 s per user.
14
+ _account_status_cache: TTLCache = TTLCache(maxsize=500, ttl=30)
15
+
16
+ _last_seen_flush: dict[int, float] = {}
17
+ _SEEN_DEBOUNCE = 60 # seconds
18
+
19
+
20
+ def invalidate_account_cache(user_id: int) -> None:
21
+ _account_status_cache.pop(user_id, None)
22
+
23
+
24
+ @lru_cache
25
+ def get_ai_client() -> AsyncOpenAI:
26
+ settings = get_settings()
27
+ router_root = settings.anthropic_base_url.rstrip("/")
28
+ return AsyncOpenAI(
29
+ api_key=settings.anthropic_auth_token,
30
+ base_url=f"{router_root}/v2",
31
+ )
32
+
33
+
34
+ # Backward-compat alias
35
+ get_anthropic_client = get_ai_client
36
+
37
+
38
+ class CurrentUser(BaseModel):
39
+ user_id: int
40
+ email: str
41
+
42
+
43
+ _bearer = HTTPBearer(auto_error=False)
44
+
45
+
46
+ async def get_current_user(
47
+ request: Request,
48
+ credentials: HTTPAuthorizationCredentials | None = Depends(_bearer),
49
+ ) -> CurrentUser:
50
+ if not credentials:
51
+ raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail="Not authenticated")
52
+ try:
53
+ payload = decode_jwt(credentials.credentials)
54
+ except jwt.ExpiredSignatureError:
55
+ raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail="Token expired")
56
+ except jwt.InvalidTokenError:
57
+ raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail="Invalid token")
58
+
59
+ user = CurrentUser(user_id=int(payload["sub"]), email=payload.get("email", ""))
60
+
61
+ pool = getattr(request.app.state, "pool", None)
62
+ if not pool:
63
+ raise HTTPException(status_code=503, detail="Service unavailable")
64
+ ip = request.client.host if request.client else None
65
+
66
+ cached = _account_status_cache.get(user.user_id)
67
+ if cached is None:
68
+ row = await pool.fetchrow(
69
+ "SELECT is_suspended, suspension_reason, is_locked, lock_reason, is_deactivated FROM users WHERE id = ?",
70
+ user.user_id,
71
+ )
72
+ cached = {
73
+ "suspended": bool(row and row["is_suspended"]),
74
+ "suspension_reason": (row["suspension_reason"] or "") if row else "",
75
+ "locked": bool(row and row["is_locked"]),
76
+ "lock_reason": (row["lock_reason"] or "") if row else "",
77
+ "deactivated": bool(row and row["is_deactivated"]),
78
+ }
79
+ _account_status_cache[user.user_id] = cached
80
+
81
+ if cached["locked"]:
82
+ raise HTTPException(
83
+ status_code=403,
84
+ detail={"code": "account_locked", "reason": cached["lock_reason"]},
85
+ )
86
+ if cached["suspended"]:
87
+ raise HTTPException(
88
+ status_code=403,
89
+ detail={"code": "account_suspended", "reason": cached["suspension_reason"]},
90
+ )
91
+ if cached["deactivated"]:
92
+ raise HTTPException(
93
+ status_code=403,
94
+ detail={"code": "account_deactivated"},
95
+ )
96
+ if ip:
97
+ now_mono = time.monotonic()
98
+ needs_seen = (now_mono - _last_seen_flush.get(user.user_id, 0)) >= _SEEN_DEBOUNCE
99
+ if needs_seen:
100
+ _last_seen_flush[user.user_id] = now_mono
101
+ await pool.execute(
102
+ "UPDATE users SET last_ip = ?, last_seen_at = datetime('now') WHERE id = ?",
103
+ ip, user.user_id,
104
+ )
105
+ else:
106
+ await pool.execute("UPDATE users SET last_ip = ? WHERE id = ?", ip, user.user_id)
107
+
108
+ return user
backend/app/main.py ADDED
The diff for this file is too large to render. See raw diff
 
backend/app/math_wiki/__init__.py ADDED
File without changes
backend/app/math_wiki/admin_router.py ADDED
@@ -0,0 +1,419 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import logging
3
+ from datetime import datetime, timezone
4
+ from fastapi import APIRouter, Depends, HTTPException, Header, Query, Request
5
+ from pydantic import BaseModel
6
+ from openai import AsyncOpenAI
7
+ from app.config import get_settings
8
+ from app.dependencies import get_ai_client
9
+ from app.math_wiki.storage import pg_db
10
+ from app.math_wiki.storage.analytics import get_retrieval_effectiveness, get_unit_usage_stats
11
+ from app.math_wiki.schemas import WikiUnit, StagedWikiUnit
12
+ import asyncio
13
+
14
+ logger = logging.getLogger(__name__)
15
+ router = APIRouter(prefix="/admin", tags=["admin"])
16
+
17
+ # ── Crawl job state (in-process singleton, one crawl at a time) ───────────────
18
+
19
+ _crawl: dict = {
20
+ "running": False,
21
+ "started_at": None,
22
+ "finished_at": None,
23
+ "topics": [],
24
+ "sources": [],
25
+ "dry_run": False,
26
+ "stats": {},
27
+ "current_topic": None,
28
+ "error": None,
29
+ }
30
+
31
+
32
+ def _check_admin_key(x_admin_key: str = Header(...)):
33
+ settings = get_settings()
34
+ if not settings.admin_key or x_admin_key != settings.admin_key:
35
+ raise HTTPException(status_code=401, detail="Invalid admin key")
36
+
37
+
38
+ def _get_pool(request: Request):
39
+ return request.app.state.pool
40
+
41
+
42
+ # ── Wiki Units ────────────────────────────────────────────────────────────────
43
+
44
+ @router.get("/units")
45
+ async def admin_list_units(
46
+ topic: str | None = Query(None),
47
+ source: str | None = Query(None),
48
+ include_deleted: bool = Query(False),
49
+ limit: int = Query(50, ge=1, le=200),
50
+ offset: int = Query(0, ge=0),
51
+ _: None = Depends(_check_admin_key),
52
+ pool=Depends(_get_pool),
53
+ ):
54
+ units = await pg_db.list_wiki_units_admin(
55
+ pool, topic=topic, source=source,
56
+ include_deleted=include_deleted,
57
+ limit=limit, offset=offset,
58
+ )
59
+ return {"units": units, "count": len(units)}
60
+
61
+
62
+ @router.get("/staged-units", response_model=list[StagedWikiUnit])
63
+ async def admin_list_staged_units(
64
+ status: str = Query("pending"),
65
+ _: None = Depends(_check_admin_key),
66
+ pool=Depends(_get_pool),
67
+ ):
68
+ return await pg_db.get_staged_wiki_units(pool, status=status)
69
+
70
+
71
+ @router.post("/staged-units/{unit_id}/approve")
72
+ async def admin_approve_staged_unit(
73
+ unit_id: str,
74
+ _: None = Depends(_check_admin_key),
75
+ pool=Depends(_get_pool),
76
+ ):
77
+ try:
78
+ approved_unit = await pg_db.approve_staged_wiki_unit(pool, unit_id)
79
+ if approved_unit:
80
+ return {"status": "approved", "id": unit_id}
81
+ raise HTTPException(status_code=404, detail="Staged unit not found")
82
+ except Exception as e:
83
+ raise HTTPException(status_code=400, detail=str(e))
84
+
85
+
86
+ @router.delete("/staged-units/{unit_id}")
87
+ async def admin_delete_staged_unit(
88
+ unit_id: str,
89
+ _: None = Depends(_check_admin_key),
90
+ pool=Depends(_get_pool),
91
+ ):
92
+ try:
93
+ await pg_db.delete_staged_wiki_unit(pool, unit_id)
94
+ return {"status": "deleted", "id": unit_id}
95
+ except Exception as e:
96
+ raise HTTPException(status_code=400, detail=str(e))
97
+
98
+
99
+ @router.get("/units/{unit_id}")
100
+ async def admin_get_unit(
101
+ unit_id: str,
102
+ include_history: bool = Query(False),
103
+ _: None = Depends(_check_admin_key),
104
+ pool=Depends(_get_pool),
105
+ ):
106
+ result = await pg_db.get_wiki_unit_with_history(pool, unit_id)
107
+ if not result:
108
+ raise HTTPException(status_code=404, detail="Unit not found")
109
+ if not include_history:
110
+ result.pop("history", None)
111
+ return result
112
+
113
+
114
+ class UnitUpdateRequest(BaseModel):
115
+ content: str
116
+ editor: str = "admin"
117
+ reason: str | None = None
118
+
119
+
120
+ @router.put("/units/{unit_id}")
121
+ async def admin_update_unit(
122
+ unit_id: str,
123
+ req: UnitUpdateRequest,
124
+ _: None = Depends(_check_admin_key),
125
+ pool=Depends(_get_pool),
126
+ ):
127
+ data = await pg_db.get_wiki_unit_with_history(pool, unit_id)
128
+ if not data:
129
+ raise HTTPException(status_code=404, detail="Unit not found")
130
+ row = data["unit"]
131
+ updated = WikiUnit(
132
+ id=row["id"],
133
+ type=row["type"],
134
+ topic=row["topic"],
135
+ subtopic=row["subtopic"],
136
+ content=req.content,
137
+ problem_ids=json.loads(row["problem_ids"]) if isinstance(row["problem_ids"], str) else row["problem_ids"],
138
+ )
139
+ await pg_db.upsert_wiki_unit(pool, updated, source=row["source"], editor=req.editor, reason=req.reason)
140
+ return {"status": "updated", "id": unit_id}
141
+
142
+
143
+ @router.delete("/units/{unit_id}")
144
+ async def admin_delete_unit(
145
+ unit_id: str,
146
+ editor: str = Query("admin"),
147
+ _: None = Depends(_check_admin_key),
148
+ pool=Depends(_get_pool),
149
+ ):
150
+ ok = await pg_db.soft_delete_wiki_unit(pool, unit_id, editor=editor)
151
+ if not ok:
152
+ raise HTTPException(status_code=404, detail="Unit not found")
153
+ return {"status": "deleted", "id": unit_id}
154
+
155
+
156
+ @router.post("/units/{unit_id}/restore")
157
+ async def admin_restore_unit(
158
+ unit_id: str,
159
+ version: int | None = Query(None),
160
+ editor: str = Query("admin"),
161
+ _: None = Depends(_check_admin_key),
162
+ pool=Depends(_get_pool),
163
+ ):
164
+ ok = await pg_db.restore_wiki_unit(pool, unit_id, version=version, editor=editor)
165
+ if not ok:
166
+ raise HTTPException(status_code=404, detail="Unit or version not found")
167
+ return {"status": "restored", "id": unit_id}
168
+
169
+
170
+ # ── Feedback ──────────────────────────────────────────────────────────────────
171
+
172
+ @router.get("/feedback")
173
+ async def admin_list_feedback(
174
+ unresolved_only: bool = Query(True),
175
+ _: None = Depends(_check_admin_key),
176
+ pool=Depends(_get_pool),
177
+ ):
178
+ rows = await pg_db.list_feedback(pool, unresolved_only=unresolved_only)
179
+ return {"feedback": rows, "count": len(rows)}
180
+
181
+
182
+ @router.post("/feedback/{feedback_id}/resolve")
183
+ async def admin_resolve_feedback(
184
+ feedback_id: int,
185
+ _: None = Depends(_check_admin_key),
186
+ pool=Depends(_get_pool),
187
+ ):
188
+ ok = await pg_db.resolve_feedback(pool, feedback_id)
189
+ if not ok:
190
+ raise HTTPException(status_code=404, detail="Feedback not found")
191
+ return {"status": "resolved", "id": feedback_id}
192
+
193
+
194
+ # ── Flagged solutions ─────────────────────────────────────────────────────────
195
+
196
+ @router.get("/flagged")
197
+ async def admin_list_flagged(
198
+ unreviewed_only: bool = Query(True),
199
+ _: None = Depends(_check_admin_key),
200
+ pool=Depends(_get_pool),
201
+ ):
202
+ rows = await pg_db.get_flagged_solutions(pool, unreviewed_only=unreviewed_only)
203
+ return {"flagged": rows, "count": len(rows)}
204
+
205
+
206
+ # ── Drafts ────────────────────────────────────────────────────────────────────
207
+
208
+ @router.get("/drafts")
209
+ async def admin_list_drafts(
210
+ status: str = Query("pending"),
211
+ _: None = Depends(_check_admin_key),
212
+ pool=Depends(_get_pool),
213
+ ):
214
+ rows = await pg_db.list_drafts(pool, status=status)
215
+ return {"drafts": rows, "count": len(rows)}
216
+
217
+
218
+ class DraftReviewRequest(BaseModel):
219
+ decision: str # approve | reject | edit
220
+ reviewer: str = "admin"
221
+ edits: list[dict] | None = None
222
+
223
+
224
+ @router.post("/drafts/{draft_id}/review")
225
+ async def admin_review_draft(
226
+ draft_id: str,
227
+ req: DraftReviewRequest,
228
+ _: None = Depends(_check_admin_key),
229
+ pool=Depends(_get_pool),
230
+ ):
231
+ try:
232
+ result = await pg_db.review_draft(
233
+ pool,
234
+ draft_id=draft_id,
235
+ decision=req.decision,
236
+ reviewer=req.reviewer,
237
+ edits=req.edits,
238
+ )
239
+ except ValueError as exc:
240
+ raise HTTPException(status_code=404, detail=str(exc))
241
+ return result
242
+
243
+
244
+ # ── Source ingest → draft ─────────────────────────────────────────────────────
245
+
246
+ class IngestSourceRequest(BaseModel):
247
+ text: str
248
+ source_url: str | None = None
249
+ topic_hint: str | None = None
250
+
251
+
252
+ @router.post("/ingest/source")
253
+ async def admin_ingest_source(
254
+ req: IngestSourceRequest,
255
+ client: AsyncOpenAI = Depends(get_ai_client),
256
+ _: None = Depends(_check_admin_key),
257
+ pool=Depends(_get_pool),
258
+ ):
259
+ from app.math_wiki.agents.concept_ingest import concept_ingest
260
+ try:
261
+ output = await concept_ingest(client, req.text, pool=pool)
262
+ except Exception as exc:
263
+ raise HTTPException(status_code=502, detail=f"AI ingest failed: {exc}")
264
+
265
+ draft_id = await pg_db.create_draft(
266
+ pool,
267
+ source_text=req.text,
268
+ source_url=req.source_url,
269
+ topic_hint=req.topic_hint,
270
+ proposed_units=output.wiki_units if hasattr(output, "wiki_units") else [],
271
+ )
272
+ return {
273
+ "draft_id": draft_id,
274
+ "proposed_unit_count": len(output.wiki_units) if hasattr(output, "wiki_units") else 0,
275
+ }
276
+
277
+
278
+ # ── Analytics ─────────────────────────────────────────────────────────────────
279
+
280
+ @router.get("/analytics")
281
+ async def admin_analytics(
282
+ days: int = Query(30, ge=1, le=365),
283
+ _: None = Depends(_check_admin_key),
284
+ pool=Depends(_get_pool),
285
+ ):
286
+ return await get_retrieval_effectiveness(pool, days=days)
287
+
288
+
289
+ @router.get("/analytics/units/{unit_id}")
290
+ async def admin_unit_analytics(
291
+ unit_id: str,
292
+ days: int = Query(30, ge=1, le=365),
293
+ _: None = Depends(_check_admin_key),
294
+ pool=Depends(_get_pool),
295
+ ):
296
+ all_stats = await get_unit_usage_stats(pool, days=days)
297
+ unit_stats = next((s for s in all_stats if s["unit_id"] == unit_id), None)
298
+ if not unit_stats:
299
+ return {"unit_id": unit_id, "times_used": 0, "message": "no data"}
300
+ return unit_stats
301
+
302
+
303
+ # ── Crawl trigger ─────────────────────────────────────────────────────────────
304
+
305
+ class CrawlRequest(BaseModel):
306
+ gap_threshold: int = 50 # crawl topics with fewer units than this
307
+ sources: list[str] = ["aops", "pauls", "generic"]
308
+ dry_run: bool = False
309
+
310
+
311
+ async def _run_crawl(client, pool, topics: list[str], sources: list[str], dry_run: bool) -> None:
312
+ from crawl.runner import crawl_and_ingest
313
+
314
+ _crawl["current_topic"] = None
315
+ combined: dict = {
316
+ "topics": len(topics),
317
+ "pages_fetched": 0,
318
+ "chunks_sent": 0,
319
+ "wiki_units_added": 0,
320
+ "skipped_seen": 0,
321
+ "errors": 0,
322
+ }
323
+ try:
324
+ for topic in topics:
325
+ _crawl["current_topic"] = topic
326
+ stats = await crawl_and_ingest(
327
+ client, topics=[topic], sources=sources, dry_run=dry_run, pool=pool
328
+ )
329
+ for k in ("pages_fetched", "chunks_sent", "wiki_units_added", "skipped_seen", "errors"):
330
+ combined[k] = combined.get(k, 0) + stats.get(k, 0)
331
+ _crawl["stats"] = dict(combined)
332
+ logger.info("crawl [%s]: %s", topic, stats)
333
+ await asyncio.sleep(3) # inter-topic pause
334
+ except Exception as exc:
335
+ _crawl["error"] = str(exc)
336
+ logger.error("admin crawl failed: %s", exc)
337
+ finally:
338
+ _crawl["running"] = False
339
+ _crawl["finished_at"] = datetime.now(timezone.utc).isoformat()
340
+ _crawl["current_topic"] = None
341
+ _crawl["stats"] = dict(combined)
342
+
343
+
344
+ @router.post("/crawl")
345
+ async def admin_trigger_crawl(
346
+ req: CrawlRequest,
347
+ request: Request,
348
+ client: AsyncOpenAI = Depends(get_ai_client),
349
+ _: None = Depends(_check_admin_key),
350
+ pool=Depends(_get_pool),
351
+ ):
352
+ if _crawl["running"]:
353
+ return {
354
+ "status": "already_running",
355
+ "started_at": _crawl["started_at"],
356
+ "current_topic": _crawl["current_topic"],
357
+ }
358
+
359
+ from app.math_wiki.taxonomy import CANONICAL_TOPICS
360
+
361
+ topic_counts = await pg_db.count_wiki_units_by_topic(pool)
362
+ gap_topics = [
363
+ t for t in CANONICAL_TOPICS
364
+ if topic_counts.get(t, 0) < req.gap_threshold
365
+ ]
366
+
367
+ if not gap_topics:
368
+ return {"status": "no_gaps", "message": f"All topics have ≥ {req.gap_threshold} units"}
369
+
370
+ _crawl.update({
371
+ "running": True,
372
+ "started_at": datetime.now(timezone.utc).isoformat(),
373
+ "finished_at": None,
374
+ "topics": gap_topics,
375
+ "sources": req.sources,
376
+ "dry_run": req.dry_run,
377
+ "stats": {},
378
+ "current_topic": None,
379
+ "error": None,
380
+ })
381
+
382
+ asyncio.ensure_future(_run_crawl(client, pool, gap_topics, req.sources, req.dry_run))
383
+
384
+ return {
385
+ "status": "started",
386
+ "topics": gap_topics,
387
+ "gap_threshold": req.gap_threshold,
388
+ "sources": req.sources,
389
+ "dry_run": req.dry_run,
390
+ }
391
+
392
+
393
+ @router.get("/crawl/status")
394
+ async def admin_crawl_status(_: None = Depends(_check_admin_key)):
395
+ return {
396
+ "running": _crawl["running"],
397
+ "started_at": _crawl["started_at"],
398
+ "finished_at": _crawl["finished_at"],
399
+ "topics_queued": _crawl["topics"],
400
+ "current_topic": _crawl["current_topic"],
401
+ "sources": _crawl["sources"],
402
+ "dry_run": _crawl["dry_run"],
403
+ "stats": _crawl["stats"],
404
+ "error": _crawl["error"],
405
+ }
406
+
407
+
408
+ # ── Sanitize ──────────────────────────────────────────────────────────────────
409
+
410
+ @router.post("/sanitize")
411
+ async def admin_sanitize(
412
+ dry_run: bool = Query(False, description="Report changes without applying them"),
413
+ _: None = Depends(_check_admin_key),
414
+ pool=Depends(_get_pool),
415
+ ):
416
+ """Fix non-canonical topic/type labels and remove content-duplicate wiki units."""
417
+ from app.math_wiki.storage.sanitizer import run_all
418
+ report = await run_all(pool, dry_run=dry_run)
419
+ return report
backend/app/math_wiki/agents/__init__.py ADDED
File without changes
backend/app/math_wiki/agents/classifier.py ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ from openai import AsyncOpenAI
3
+ from app.config import get_settings
4
+ from app.agent.core import call_with_retry
5
+ from app.math_wiki.prompts import MODE_PROMPTS
6
+ from app.math_wiki.utils import _extract_json, VALID_LABELS
7
+ from app.math_wiki.taxonomy import CANONICAL_TOPICS # noqa: F401 — imported for canonical reference
8
+
9
+
10
+ async def classify_problem(client: AsyncOpenAI, problem_text: str) -> str:
11
+ settings = get_settings()
12
+ user_msg = (
13
+ f"{problem_text}\n\n"
14
+ "Respond ONLY with valid JSON in this exact format: "
15
+ '{"label": "<category>"} '
16
+ "where category is one of: algebra, geometry, statistics, probability, "
17
+ "calculus, trigonometry, combinatorics, number_theory, "
18
+ "complex_numbers, sequences, vectors, functions."
19
+ )
20
+ response = await call_with_retry(
21
+ client,
22
+ model=settings.default_model,
23
+ messages=[
24
+ {"role": "system", "content": MODE_PROMPTS["CLASSIFY"]},
25
+ {"role": "user", "content": user_msg},
26
+ ],
27
+ max_tokens=50,
28
+ )
29
+ raw = response.choices[0].message.content or ""
30
+ content = _extract_json(raw)
31
+ try:
32
+ parsed = json.loads(content)
33
+ label = parsed.get("label", "")
34
+ except json.JSONDecodeError:
35
+ label = ""
36
+
37
+ keyword_map = {
38
+ "calculus": [
39
+ "calculus", "derivative", "integral", "integrate", "differentiat",
40
+ "limit", r"\int", "antiderivative", "indefinite", "definite",
41
+ "dy/dx", "d/dx", "partial", "gradient", "divergence", "curl",
42
+ "differential equation", "ode", "pde", "y''", "y'",
43
+ # Vietnamese
44
+ "đạo hàm", "tích phân", "nguyên hàm", "giới hạn", "vi phân",
45
+ "phương trình vi phân", "cực trị hàm", "tiếp tuyến",
46
+ ],
47
+ "trigonometry": [
48
+ "trigonometry", "trigonometric", "sine", "cosine", "tangent",
49
+ r"\sin", r"\cos", r"\tan", r"\cot", r"\sec", r"\csc",
50
+ "sin(", "cos(", "tan(", "arcsin", "arccos", "arctan",
51
+ # Vietnamese
52
+ "sin ", "cos ", "tan ", "cot ", "sinx", "cosx", "tanx",
53
+ "công thức lượng giác", "hệ thức lượng",
54
+ ],
55
+ "algebra": [
56
+ "algebra", "equation", "quadratic", "polynomial", "linear", "variable",
57
+ # Vietnamese
58
+ "phương trình", "hệ phương trình", "bất phương trình",
59
+ "đa thức", "nhân tử", "rút gọn", "hằng đẳng thức",
60
+ ],
61
+ "geometry": [
62
+ "geometry", "geometric", "triangle", "circle", "area", "perimeter", "volume", "angle",
63
+ # Vietnamese
64
+ "tam giác", "hình vuông", "hình chữ nhật", "hình thang", "hình tròn",
65
+ "đường tròn", "hình hộp", "hình chóp", "hình trụ", "hình cầu",
66
+ "diện tích", "chu vi", "thể tích", "góc", "đường thẳng", "mặt phẳng",
67
+ ],
68
+ "statistics": [
69
+ "statistic", "mean", "median", "mode", "variance", "deviation", "frequency",
70
+ # Vietnamese
71
+ "trung bình", "trung vị", "phương sai", "độ lệch chuẩn", "tần số",
72
+ "bảng số liệu", "biểu đồ",
73
+ ],
74
+ "probability": [
75
+ "probability", "chance", "likelihood", "random", "event",
76
+ # Vietnamese
77
+ "xác suất", "biến cố", "ngẫu nhiên", "không gian mẫu",
78
+ ],
79
+ "combinatorics": [
80
+ "combinatoric", "permutation", "combination", "factorial", "arrange",
81
+ # Vietnamese
82
+ "tổ hợp", "chỉnh hợp", "hoán vị", "giai thừa", "đếm",
83
+ ],
84
+ "number_theory": [
85
+ "number theory", "prime", "divisor", "modular", "gcd", "lcm",
86
+ # Vietnamese
87
+ "số nguyên tố", "ước", "bội", "chia hết", "đồng dư", "ucln", "bcnn",
88
+ ],
89
+ "sequences": [
90
+ # Vietnamese
91
+ "dãy số", "cấp số cộng", "cấp số nhân", "công sai", "công bội",
92
+ "số hạng", "tổng n số hạng", "giới hạn dãy",
93
+ # English
94
+ "sequence", "series", "arithmetic sequence", "geometric sequence",
95
+ ],
96
+ "vectors": [
97
+ # Vietnamese
98
+ "vectơ", "tích vô hướng", "tích có hướng", "tọa độ vectơ",
99
+ # English
100
+ "vector", "dot product", "cross product", "magnitude",
101
+ ],
102
+ "functions": [
103
+ # Vietnamese
104
+ "hàm số", "đồ thị hàm số", "tập xác định", "tập giá trị",
105
+ "đơn điệu", "đồng biến", "nghịch biến", "hàm bậc", "hàm mũ", "h��m logarit",
106
+ # English
107
+ "function", "domain", "range", "monoton",
108
+ ],
109
+ "complex_numbers": [
110
+ # Vietnamese
111
+ "số phức", "phần thực", "phần ảo", "module", "argument",
112
+ # English
113
+ "complex number", "imaginary", "real part", "imaginary part",
114
+ ],
115
+ }
116
+
117
+ # Score every label by counting keyword hits. Highest score wins.
118
+ # First-match (next(...)) was order-dependent: "phương trình vi phân" matched
119
+ # algebra ("phương trình") before calculus, mis-classifying ODEs.
120
+ question_lower = problem_text.lower()
121
+ scores: dict[str, int] = {}
122
+ for lbl, kws in keyword_map.items():
123
+ count = sum(1 for kw in kws if kw in question_lower)
124
+ if count:
125
+ scores[lbl] = count
126
+ keyword_label = max(scores, key=scores.__getitem__) if scores else None
127
+
128
+ if label not in VALID_LABELS:
129
+ label = keyword_label or "algebra"
130
+ elif keyword_label and keyword_label != label:
131
+ # Keyword score beats LLM label only when it has strictly more hits.
132
+ # Tie means ambiguous — keep the LLM's more contextual judgement.
133
+ if scores.get(keyword_label, 0) > scores.get(label, 0):
134
+ label = keyword_label
135
+ return label
backend/app/math_wiki/agents/concept_ingest.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import hashlib
2
+ import json
3
+ from openai import AsyncOpenAI
4
+ from app.config import get_settings
5
+ from app.agent.core import call_with_retry
6
+ from app.math_wiki.prompts import MODE_PROMPTS
7
+ from app.math_wiki.utils import _extract_json
8
+ from app.math_wiki.schemas import ConceptIngestOutput
9
+ from app.math_wiki.storage import pg_db
10
+ from app.math_wiki.storage.pg_vectors import is_near_duplicate_pg
11
+ from app.metrics import inc_wiki_units_added
12
+ from app.math_wiki.taxonomy import CANONICAL_TOPICS, TOPIC_MAP, CANONICAL_TYPES, TYPE_MAP
13
+
14
+ _VALID_TOPICS = ", ".join(sorted(CANONICAL_TOPICS))
15
+ _VALID_TYPES = ", ".join(sorted(CANONICAL_TYPES))
16
+
17
+ _JSON_REMINDER = (
18
+ "\n\nExtract wiki knowledge units from the above math text. "
19
+ "Return ONLY valid JSON in this exact format: "
20
+ '{"wiki_units": [{"id": "slug", "type": "concept", "topic": "statistics", '
21
+ '"subtopic": "...", "content": "...", "problem_ids": []}]}\n'
22
+ f"topic MUST be one of: {_VALID_TOPICS}\n"
23
+ f"type MUST be one of: {_VALID_TYPES}\n"
24
+ "IMPORTANT: Write math expressions in plain text (e.g. 'x^2 + bx + c = 0'), "
25
+ "NOT LaTeX backslash notation. Backslashes break JSON."
26
+ )
27
+
28
+
29
+ def _normalize_unit(unit, fallback_topic: str | None) -> None:
30
+ topic = unit.topic
31
+ if topic not in CANONICAL_TOPICS:
32
+ topic = TOPIC_MAP.get(topic) or TOPIC_MAP.get(topic.lower().replace(" ", "_"))
33
+ if topic not in CANONICAL_TOPICS:
34
+ topic = fallback_topic
35
+ if topic:
36
+ unit.topic = topic
37
+
38
+ unit_type = unit.type
39
+ if unit_type not in CANONICAL_TYPES:
40
+ unit.type = TYPE_MAP.get(unit_type, "concept")
41
+
42
+
43
+ async def concept_ingest(
44
+ client: AsyncOpenAI,
45
+ raw_text: str,
46
+ pool=None,
47
+ source: str = "manual",
48
+ source_url: str | None = None,
49
+ fallback_topic: str | None = None,
50
+ ) -> ConceptIngestOutput:
51
+ settings = get_settings()
52
+ response = await call_with_retry(
53
+ client,
54
+ model=settings.default_model,
55
+ messages=[
56
+ {"role": "system", "content": MODE_PROMPTS["CONCEPT_INGEST"]},
57
+ {"role": "user", "content": raw_text + _JSON_REMINDER},
58
+ ],
59
+ max_tokens=4096,
60
+ )
61
+ content = _extract_json(response.choices[0].message.content or "{}")
62
+ parsed = json.loads(content)
63
+ output = ConceptIngestOutput(**parsed)
64
+
65
+ if pool:
66
+ existing_hashes = await pg_db.get_all_content_hashes(pool)
67
+ for unit in output.wiki_units:
68
+ _normalize_unit(unit, fallback_topic)
69
+ if unit.topic not in CANONICAL_TOPICS:
70
+ logger.warning("concept_ingest: skipped %s — invalid topic %r (not in CANONICAL_TOPICS)", unit.id, unit.topic)
71
+ continue
72
+ content_hash = hashlib.md5(unit.content.encode()).hexdigest()
73
+ if content_hash in existing_hashes:
74
+ logger.info("concept_ingest: skipped %s — duplicate content hash", unit.id)
75
+ continue
76
+ if await is_near_duplicate_pg(pool, unit.content):
77
+ logger.info("concept_ingest: skipped %s — near-duplicate by embedding (similarity>0.92)", unit.id)
78
+ continue
79
+ await pg_db.upsert_wiki_unit(pool, unit, source=source, source_url=source_url)
80
+ existing_hashes.add(content_hash)
81
+ inc_wiki_units_added()
82
+
83
+ return output
backend/app/math_wiki/agents/decomposer.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Multi-domain query decomposer.
2
+
3
+ Detects when a problem spans two THPT math topics and splits it into
4
+ focused sub-questions for independent retrieval.
5
+ Returns quickly (Haiku) and is non-fatal — callers catch all exceptions.
6
+ """
7
+ import json
8
+ import logging
9
+ from openai import AsyncOpenAI
10
+ from app.config import get_settings
11
+ from app.agent.core import call_with_retry
12
+ from app.math_wiki.utils import _extract_json
13
+ from app.math_wiki.schemas import DecomposedQuery
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+ _PROMPT = """Bạn là hệ thống phân tích lĩnh vực toán học cho kỳ thi THPT Việt Nam.
18
+
19
+ Cho một bài toán, xác định xem bài toán có THỰC SỰ cần kiến thức từ hai chủ đề THPT riêng biệt không.
20
+ Trả lời bằng tiếng Việt. Chỉ xuất JSON theo đúng schema sau:
21
+
22
+ {
23
+ "primary_topic": "algebra",
24
+ "secondary_topics": ["calculus"],
25
+ "sub_questions": ["Tìm f'(x)", "Giải f'(x)=0"],
26
+ "requires_multi_domain": true
27
+ }
28
+
29
+ Chủ đề THPT: algebra, calculus, geometry, trigonometry, combinatorics, probability, statistics, logarithm, functions, spatial_geometry
30
+
31
+ Quy tắc:
32
+ - requires_multi_domain = true CHỈ KHI bài toán KHÔNG THỂ giải được chỉ với kiến thức từ một chủ đề duy nhất.
33
+ - sub_questions: 2-3 câu hỏi phụ bằng tiếng Việt, phân tách bài toán theo từng chủ đề.
34
+ - Nếu bài toán chỉ thuộc một chủ đề, đặt requires_multi_domain=false và sub_questions=[].
35
+ - Chỉ xuất JSON hợp lệ. Không viết thêm bất kỳ văn bản nào."""
36
+
37
+
38
+ async def decompose_query(client: AsyncOpenAI, question: str) -> DecomposedQuery:
39
+ settings = get_settings()
40
+ response = await call_with_retry(
41
+ client,
42
+ model=settings.haiku_model,
43
+ messages=[
44
+ {"role": "system", "content": _PROMPT},
45
+ {"role": "user", "content": question},
46
+ ],
47
+ max_tokens=256,
48
+ )
49
+ content = _extract_json(response.choices[0].message.content or "{}")
50
+ try:
51
+ parsed = json.loads(content)
52
+ except json.JSONDecodeError:
53
+ return DecomposedQuery(
54
+ primary_topic="algebra", secondary_topics=[],
55
+ sub_questions=[], requires_multi_domain=False,
56
+ )
57
+
58
+ return DecomposedQuery(
59
+ primary_topic=parsed.get("primary_topic", "algebra"),
60
+ secondary_topics=parsed.get("secondary_topics", []),
61
+ sub_questions=parsed.get("sub_questions", []),
62
+ requires_multi_domain=bool(parsed.get("requires_multi_domain", False)),
63
+ )
backend/app/math_wiki/agents/ingest.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import hashlib
2
+ import json
3
+ from openai import AsyncOpenAI
4
+ from app.config import get_settings
5
+ from app.agent.core import call_with_retry
6
+ from app.math_wiki.prompts import MODE_PROMPTS
7
+ from app.math_wiki.utils import _extract_json
8
+ from app.math_wiki.schemas import IngestOutput
9
+ from app.math_wiki.storage import pg_db
10
+ from app.math_wiki.storage.pg_vectors import is_near_duplicate_pg
11
+ from app.metrics import inc_wiki_units_added
12
+
13
+
14
+ async def ingest_exam(
15
+ client: AsyncOpenAI,
16
+ raw_text: str,
17
+ pool=None,
18
+ source: str = "exam_upload",
19
+ source_url: str | None = None,
20
+ ) -> IngestOutput:
21
+ settings = get_settings()
22
+ response = await call_with_retry(
23
+ client,
24
+ model=settings.default_model,
25
+ messages=[
26
+ {"role": "system", "content": MODE_PROMPTS["INGEST"]},
27
+ {"role": "user", "content": raw_text},
28
+ ],
29
+ max_tokens=2000,
30
+ )
31
+ content = _extract_json(response.choices[0].message.content or "{}")
32
+ parsed = json.loads(content)
33
+ output = IngestOutput(**parsed)
34
+
35
+ # Validate: each problem must have >= 2 wiki units
36
+ unit_map: dict[str, set[str]] = {}
37
+ for unit in output.wiki_units:
38
+ for pid in unit.problem_ids:
39
+ unit_map.setdefault(pid, set()).add(unit.id)
40
+
41
+ for problem in output.problems:
42
+ if len(unit_map.get(problem.problem_id, set())) < 2:
43
+ raise ValueError(
44
+ f"Problem {problem.problem_id} has fewer than 2 wiki units"
45
+ )
46
+
47
+ if pool:
48
+ existing_hashes = await pg_db.get_all_content_hashes(pool)
49
+ for unit in output.wiki_units:
50
+ content_hash = hashlib.md5(unit.content.encode()).hexdigest()
51
+ if content_hash in existing_hashes:
52
+ logger.info("ingest: skipped %s — duplicate content hash", unit.id)
53
+ continue
54
+ if await is_near_duplicate_pg(pool, unit.content):
55
+ logger.info("ingest: skipped %s — near-duplicate by embedding (similarity>0.92)", unit.id)
56
+ continue
57
+ await pg_db.upsert_wiki_unit(pool, unit, source=source, source_url=source_url)
58
+ existing_hashes.add(content_hash)
59
+ inc_wiki_units_added()
60
+
61
+ for problem in output.problems:
62
+ await pg_db.upsert_problem(pool, problem)
63
+
64
+ return output
backend/app/math_wiki/agents/ocr.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import base64
2
+ from openai import AsyncOpenAI
3
+ from app.config import get_settings
4
+ from app.agent.core import call_with_retry
5
+
6
+ # Phrases Claude returns when image content was stripped by the proxy
7
+ _NO_IMAGE_PHRASES = (
8
+ "chưa đính kèm hình ảnh",
9
+ "không thấy hình ảnh",
10
+ "không có hình ảnh",
11
+ "vui lòng tải lên hình ảnh",
12
+ "vui lòng gửi hình ảnh",
13
+ "no image attached",
14
+ "no image provided",
15
+ "i don't see any image",
16
+ )
17
+
18
+ _SYSTEM_PROMPT = (
19
+ "You are a Vietnamese math OCR assistant. Extract all text, mathematical content, and visual elements from the image.\n"
20
+ "Rules:\n"
21
+ "- Preserve all LaTeX notation exactly; wrap inline math in $...$ and display math in $$...$$\n"
22
+ "- Keep Vietnamese text exactly as written\n"
23
+ "- List each numbered problem separately\n"
24
+ "- Do not solve or explain — only transcribe or describe what is visible\n"
25
+ "- If a symbol is unclear, use your best judgment\n"
26
+ "- For handwritten content: interpret symbols especially carefully. Common handwritten forms:\n"
27
+ " fraction a/b → \\frac{a}{b}; square root → \\sqrt{}; exponent → ^{}; subscript → _{};\n"
28
+ " absolute value bars → |...|; multiplication dot → \\cdot\n"
29
+ "- When a symbol is ambiguous, choose the most mathematically plausible interpretation\n"
30
+ "- Preserve problem number labels exactly as they appear (Bài 1, Câu 2, etc.)\n"
31
+ "Visual elements — when the image contains shapes, graphs, or drawings that cannot be expressed as plain text:\n"
32
+ "- Geometric figures: describe the shape (triangle, circle, quadrilateral…), label each vertex/point as shown,"
33
+ " list all given side lengths, angles, and any marked equal/parallel/perpendicular relationships."
34
+ " Example: 'Tam giác ABC vuông tại A, AB = 3, BC = 5, AC = 4'\n"
35
+ "- Coordinate graphs / function plots: state axis labels and scale, identify key points (intercepts, maxima,"
36
+ " minima, intersection points) with their coordinates, describe the curve type (line, parabola, circle…)."
37
+ " Example: 'Đồ thị hàm số y = f(x) qua các điểm (0, 2) và (3, 0), là đường thẳng giảm dần'\n"
38
+ "- Hand-drawn or complex diagrams: give a concise prose description of every element, dimension, and label"
39
+ " that appears, sufficient for a solver to reconstruct the problem without seeing the image\n"
40
+ "- Place visual descriptions inline, immediately after the problem text they accompany"
41
+ )
42
+
43
+
44
+ async def extract_math_from_image(
45
+ client: AsyncOpenAI, image_bytes: bytes, mime_type: str
46
+ ) -> str:
47
+ settings = get_settings()
48
+ data_uri = f"data:{mime_type};base64,{base64.b64encode(image_bytes).decode()}"
49
+
50
+ response = await call_with_retry(
51
+ client,
52
+ model=settings.default_model,
53
+ messages=[
54
+ {"role": "system", "content": _SYSTEM_PROMPT},
55
+ {
56
+ "role": "user",
57
+ "content": [
58
+ {"type": "image_url", "image_url": {"url": data_uri}},
59
+ {"type": "text", "text": "Trích xuất toàn bộ nội dung toán học từ hình ảnh này."},
60
+ ],
61
+ },
62
+ ],
63
+ max_tokens=4096,
64
+ )
65
+
66
+ text = (response.choices[0].message.content or "").strip()
67
+ if not text:
68
+ raise ValueError("Claude Vision returned empty response")
69
+ if any(phrase in text.lower() for phrase in _NO_IMAGE_PHRASES):
70
+ raise ValueError(
71
+ "Vision API not supported by this AI router — please type the problem manually."
72
+ )
73
+ return text
backend/app/math_wiki/agents/quiz_generator.py ADDED
@@ -0,0 +1,471 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import logging
3
+ import random
4
+ import re
5
+ from openai import AsyncOpenAI
6
+ from app.config import get_settings
7
+ from app.agent.core import call_with_retry
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+ # Sources for distractor taxonomy and techniques:
12
+ # - Vanderbilt / Lamar algebra error catalogs (sign, distribution, sqrt errors)
13
+ # - NAACL 2024: naming error mechanisms raises distractor plausibility ~2.68→~3.7 on 5-pt scale
14
+ # - INFORMS 2022: partial-solution and conceptual-reversal are most convincing traps in quant MCQs
15
+ # - Student Choice Prediction (ACL 2025): conceptual-overlap traps attract highest-ability students
16
+ # - LookAlike ACL 2025: surface-form consistency across all options blocks answer-by-elimination
17
+ # - NYSED item writing guide: sign/coefficient/unit alteration as a systematic distractor family
18
+ _SYSTEM = r"""Bạn là giáo viên Toán lớp 9 chuyên ôn thi vào lớp 10 TPHCM, đồng thời là chuyên gia thiết kế đề trắc nghiệm.
19
+ Nhiệm vụ: tạo câu hỏi trắc nghiệm chất lượng cao — mỗi phương án sai phải dựa trên lỗi nhận thức thực sự của học sinh và không thể loại bằng hình thức.
20
+
21
+ ═══ NGUYÊN TẮC CỐT LÕI ═══
22
+
23
+ 1. TÍNH NHẨM ĐƯỢC: Mọi con số phải tính được bằng đầu óc — KHÔNG cần máy tính.
24
+ 2. 4 PHƯƠNG ÁN: Mỗi câu có đúng 4 lựa chọn (A–D), chỉ một đúng.
25
+ 3. BẪY CÓ NGUỒN GỐC: Mỗi phương án sai PHẢI xuất phát từ một loại lỗi cụ thể trong bảng taxonomy.
26
+ Ba bẫy trong một câu phải đến từ BA loại lỗi KHÁC NHAU.
27
+ 4. LOOKALIKE — NGOẠI HÌNH GIỐNG NHAU: Tất cả 4 phương án phải có cùng dạng ký hiệu, cùng cấu trúc
28
+ (cùng loại biểu thức, số hạng tương đương, độ phức tạp tương tự). Học sinh không được loại
29
+ phương án sai chỉ bằng cách nhìn hình thức mà phải tính toán.
30
+ 5. TỰ KIỂM TRA: Tính lại đáp án đúng từng bước trước khi viết JSON. Xác nhận mỗi bẫy thực sự sai.
31
+ 6. ĐỘ KHÓ BLOOM: Sắp xếp từ dễ → khó theo thang nhận thức.
32
+ 7. NGÔN NGỮ: Tiếng Việt. LaTeX trong $...$ cho ký hiệu toán.
33
+ 8. JSON THUẦN: Chỉ trả về JSON, không có text ngoài.
34
+
35
+ ═══ BẢNG LỖI SAI THỰC CHỨNG (DISTRACTOR TAXONOMY) ═══
36
+
37
+ Mỗi bẫy phải thuộc một trong 13 loại sau — ghi tên loại trong explanation:
38
+
39
+ ── NHÓM DẤU VÀ HỆ SỐ ──
40
+
41
+ [SIGN_ERROR] Nhầm dấu âm/dương
42
+ • $-(a-b)$ → ghi $-a-b$ thay vì $-a+b$
43
+ • Tổng Viète $x_1+x_2=-b/a$ → ghi $+b/a$ (bỏ dấu âm)
44
+ • Tích Viète $x_1x_2=c/a$ → ghi $-c/a$
45
+ • $-x$ khi $x=-5$ → ghi $-5$ thay vì $5$
46
+
47
+ [COEFFICIENT_ERROR] Sai hệ số hoặc bội số
48
+ • $(a+b)^2=a^2+2ab+b^2$ → bỏ hệ số 2: viết $a^2+ab+b^2$
49
+ • $\Delta=b^2-4ac$ → viết $b^2-ac$ (quên hệ số 4)
50
+ • $2a\cdot x_0 = -b$ → viết $a\cdot x_0=-b$ (quên hệ số 2)
51
+ • Kết quả đúng nhân hay chia thêm 2, 4, hoặc $\pi$ do nhầm công thức
52
+
53
+ [WRONG_OPERATION] Dùng phép tính sai — số đúng, phép tính sai
54
+ • Cộng thay nhân: $P = a \times b$ → viết $P = a + b$
55
+ • Bình phương thay nhân đôi: $2r$ → viết $r^2$
56
+ • Khai căn thay bình phương: $x^2=k$ → ghi $x=k^2$ thay $x=\sqrt{k}$
57
+ • Chia thay trừ trong hệ thức lượng
58
+
59
+ ── NHÓM NGHIỆM VÀ MIỀN ──
60
+
61
+ [MISSING_ROOT] Bỏ sót nghiệm
62
+ • $x^2=9$ → chỉ lấy $x=3$, quên $x=-3$
63
+ • Chia hai vế cho $x$ → mất nghiệm $x=0$
64
+ • $|x|=5$ → quên $x=-5$
65
+ • Phương trình tích: chỉ lấy một trong hai nghiệm
66
+
67
+ [EXTRANEOUS_ROOT] Nghiệm ngoại lai (không kiểm tra lại)
68
+ • Bình phương hai vế rồi không thử lại vào phương trình gốc
69
+ • Đặt ẩn phụ $t=\sqrt{x}\geq0$ rồi nhận $t<0$
70
+ • Nhận nghiệm nằm ngoài điều kiện xác định
71
+
72
+ [INEQUALITY_FLIP] Quên đảo chiều bất phương trình
73
+ • Nhân/chia hai vế với số âm mà không lật dấu $\leq\to\geq$
74
+ • Kết quả là phần bù của tập nghiệm đúng
75
+
76
+ ── NHÓM CĂN VÀ PHÂN PHỐI ──
77
+
78
+ [SQRT_LINEARITY] Giả sử căn là tuyến tính
79
+ • $\sqrt{a^2+b^2}\to a+b$ (cộng thẳng, bỏ dấu căn)
80
+ • $\sqrt{(a+b)^2}=a+b$ (bỏ trị tuyệt đối)
81
+ • $\sqrt{9+16}=3+4=7$ thay vì $\sqrt{25}=5$
82
+
83
+ [SQRT_NO_ABS] Quên trị tuyệt đối khi khai căn
84
+ • $\sqrt{x^2}=x$ thay vì $|x|$
85
+ • $\sqrt{(x-3)^2}=x-3$ thay vì $|x-3|$
86
+
87
+ [DISTRIBUTION_ERROR] Phân phối/nhân sai
88
+ • $(a+b)^2\neq a^2+b^2$ (bỏ hạng tử $2ab$)
89
+ • $a(b-c)^2\neq(ab-ac)^2$: nhân $a$ vào trước khi bình phương
90
+ • $\frac{a+b}{c}\neq\frac{a}{c}+b$: chỉ rút gọn một hạng tử
91
+
92
+ ── NHÓM CÔNG THỨC VÀ KHÁI NIỆM ──
93
+
94
+ [DELTA_ERROR] Sai công thức Delta
95
+ • $\Delta=b^2-4ac$: nhầm dấu $c$, hoặc dùng $b$ thay $b'=b/2$
96
+ • Chỉ lấy một nghiệm $x_1$ hoặc chỉ lấy $x_2$
97
+
98
+ [CONCEPTUAL_REVERSAL] Đảo ngược một quan hệ toán học
99
+ • Phân số: $\frac{a}{b}\to\frac{b}{a}$ (đảo tử/mẫu)
100
+ • Tỉ số lượng giác: $\sin\theta=\frac{\text{đối}}{\text{huyền}}$ → dùng $\frac{\text{huyền}}{\text{đối}}$
101
+ • Viète: dùng tổng thay tích hoặc ngược lại
102
+ • Hệ thức: $x_1\cdot x_2=c/a$ → học sinh dùng $x_1+x_2$
103
+ • Tỉ lệ thức: $\frac{a}{b}=\frac{c}{d}$ → giải nhầm thành $ad=bc$ rồi hoán vị sai
104
+
105
+ [VERTEX_SIGN] Nhầm dấu tọa độ đỉnh parabol
106
+ • $x_0=-b/(2a)$ → dùng $+b/(2a)$
107
+ • Tính $f(x_0)$ nhưng thay $-x_0$
108
+
109
+ [FORMULA_MIX] Nhầm công thức hoặc điều kiện áp dụng
110
+ • Diện tích/chu vi: nhầm hình tròn với hình quạt hay hình chữ nhật
111
+ • Định lý Pythagore: nhầm vai trò cạnh huyền
112
+ • Hệ thức lượng trong tam giác vuông: nhầm cạnh với chiều cao
113
+
114
+ ── NHÓM ĐẶC BIỆT: BẪY MẠNH NHẤT ──
115
+
116
+ [PARTIAL_SOLUTION] Nghiệm trung gian — học sinh dừng lại quá sớm
117
+ Đây là loại bẫy hiệu quả nhất với học sinh giỏi.
118
+ Kỹ thuật: lấy KẾT QUẢ CỦA BƯỚC TRUNG GIAN đúng trong lời giải đầy đủ làm phương án sai.
119
+ • Bài 3 bước: kết quả bước 2 là đáp án trông "hợp lý" nhất
120
+ • Tìm $x$: học sinh tính được $2x=10$ rồi ghi ngay $10$ thay vì $5$
121
+ • Tìm diện tích: tính đúng bán kính $r$ rồi ghi $r$ thay vì $\pi r^2$
122
+ • Giải hệ: tìm đúng $x$ rồi quên thay vào tìm $y$
123
+ Cách tạo: Viết lời giải đầy đủ 4–5 bước → lấy kết quả bước 2 và bước 3 làm hai bẫy.
124
+
125
+ ═══ QUY TRÌNH ESSAY-TO-MCQ (chuyển bài tự luận thành trắc nghiệm) ═══
126
+
127
+ Đây là kỹ thuật cốt lõi để tạo bẫy cực kỳ thuyết phục:
128
+
129
+ Bước 1 — Viết lời giải tự luận đầy đủ:
130
+ Liệt kê từng bước tính: $k_1=\ldots$, $k_2=\ldots$, $k_3=\ldots$, đáp án $=k_n$.
131
+
132
+ Bước 2 — Thu hoạch bẫy từ lời giải:
133
+ • Bẫy A = $k_{n-1}$ (kết quả bước cuối-1): [PARTIAL_SOLUTION]
134
+ • Bẫy B = kết quả nếu dùng sai công thức tại bước quan trọng nhất: [FORMULA_MIX] hoặc [DELTA_ERROR]
135
+ • Bẫy C = đáp án đúng nhưng đổi dấu hoặc đảo tử/mẫu: [SIGN_ERROR] hoặc [CONCEPTUAL_REVERSAL]
136
+
137
+ Bước 3 — Áp dụng nguyên tắc LOOKALIKE:
138
+ Đảm bảo A, B, C, D (đáp án đúng) cùng dạng: đều là số nguyên, đều là phân số, đều có $\sqrt{}$,
139
+ cùng số hạng, giá trị gần nhau về độ lớn.
140
+
141
+ Bước 4 — Sắp xếp các phương án theo thứ tự tăng dần (với số) hoặc theo độ phức tạp.
142
+
143
+ Bước 5 — Kiểm tra: Đọc từng bẫy và tự hỏi "Học sinh nào sẽ chọn cái này và tại sao?"
144
+ Bẫy tốt = có câu trả lời rõ ràng cho câu hỏi đó.
145
+
146
+ ═══ THANG ĐỘ KHÓ BLOOM ═══
147
+
148
+ "easy" → Nhớ/Hiểu: nhận dạng công thức, tính một bước, bẫy là lỗi cơ bản (SIGN_ERROR, MISSING_ROOT)
149
+ "medium" → Vận dụng: giải 2–3 bước, bẫy bao gồm PARTIAL_SOLUTION từ bước trung gian
150
+ "hard" → Phân tích/Đánh giá: 3–5 bước, hai bẫy kết hợp (ví dụ PARTIAL_SOLUTION + CONCEPTUAL_REVERSAL),
151
+ điều kiện ẩn, học sinh giỏi vẫn có thể mắc bẫy nếu không kiểm tra kỹ
152
+
153
+ ═══ TỰ KIỂM TRA BẮT BUỘC TRƯỚC KHI XUẤT JSON ═══
154
+
155
+ Sau khi tạo xong từng câu, thực hiện kiểm tra sau — nếu không qua thì viết lại câu đó:
156
+
157
+ CHK-1 TOÁN HỌC CHÍNH XÁC — GIẢI TRƯỚC KHI ĐẶT correct_index
158
+ → Viết lời giải tự luận đầy đủ từng bước số học cụ thể.
159
+ → Ghi kết quả cuối: "Đáp án đúng = <giá trị>".
160
+ → Tìm phần tử trong choices khớp giá trị đó → đó là correct_index.
161
+ → Xác nhận 3 phương án còn lại đều SAI.
162
+ → KHÔNG được gán correct_index trước rồi mới giải — luôn giải trước, gán sau.
163
+
164
+ CHK-2 NHẤT QUÁN GIẢI THÍCH — ĐÁP ÁN
165
+ → Nội dung "Đáp án đúng:" trong explanation PHẢI KHỚP giá trị số với choices[correct_index].
166
+ → Nếu không khớp → viết lại câu từ đầu.
167
+
168
+ CHK-3 NHẤT QUÁN BẪY — EXPLANATION
169
+ → Với mỗi bẫy (phương án sai), explanation phải ghi đúng tên loại lỗi và cơ chế sai khớp với giá trị trong choices.
170
+
171
+ CHK-4 LOOKALIKE ĐỦ ĐIỀU KIỆN
172
+ → Tất cả 4 phương án cùng dạng ký hiệu và cấu trúc.
173
+ → Không có phương án nào quá dài/ngắn hoặc phức tạp khác biệt rõ ràng so với các phương án khác.
174
+
175
+ CHK-5 EXPLANATION NGẮN GỌN — KHÔNG BIỆN HỘ SAU
176
+ → Explanation CHỈ được giải bài toán GỐC trong stem — KHÔNG được thử nghiệm thay đổi đề bài.
177
+ → Nếu tính toán cho kết quả không khớp choices nào → viết lại câu hỏi để sửa, KHÔNG viết dài thêm để biện hộ.
178
+ → Giới hạn: phần "Đáp án đúng:" tối đa 4 câu/bước; mỗi bẫy tối đa 1 câu ngắn.
179
+
180
+ QUAN TRỌNG: correct_index trong JSON PHẢI trỏ đúng vào phương án chứa đáp án đã tính ở CHK-1.
181
+ Đây là điều kiện tối thiểu — sai ở đây là lỗi nghiêm trọng nhất."""
182
+
183
+ _PROMPT_TMPL = """Trọng tâm tuần: {focus}
184
+ Nhiệm vụ học trong tuần:
185
+ {tasks}
186
+
187
+ Ngữ cảnh kiến thức từ kho tri thức:
188
+ {context}
189
+
190
+ Tạo {n} câu hỏi trắc nghiệm (từ dễ → khó theo Bloom).
191
+
192
+ YÊU CẦU BẮT BUỘC cho mỗi câu:
193
+ 1. Áp dụng quy trình Essay-to-MCQ: viết lời giải tự luận trước, sau đó thu hoạch bẫy.
194
+ 2. Đảm bảo nguyên tắc LOOKALIKE: 4 phương án cùng dạng ký hiệu, giá trị gần nhau.
195
+ 3. Ba bẫy từ ba loại lỗi KHÁC NHAU trong taxonomy.
196
+ 4. Explanation nêu rõ: tính toán đáp án đúng + tên loại lỗi + cơ chế sai của từng bẫy.
197
+ 5. PHÂN BỐ correct_index: trong {n} câu, đáp án đúng phải rải đều A/B/C/D — KHÔNG được tập trung vào một vị trí. Đặt đáp án đúng vào vị trí bất kỳ (0, 1, 2, hoặc 3) sau khi sắp xếp choices.
198
+
199
+ Trả về JSON hợp lệ, không có text nào ngoài JSON:
200
+ {{
201
+ "questions": [
202
+ {{
203
+ "stem": "Nội dung câu hỏi (tiếng Việt, LaTeX $...$)",
204
+ "choices": ["A. ...", "B. ...", "C. ...", "D. ..."],
205
+ "correct_index": "<số nguyên 0-3, phân bố đều giữa các câu — KHÔNG luôn là 0>",
206
+ "difficulty": "easy|medium|hard",
207
+ "bloom_level": "remember|understand|apply|analyze",
208
+ "explanation": "Đáp án đúng: <lời giải từng bước>. Bẫy <tên PA sai 1> [LOẠI_LỖI]: <cơ chế>. Bẫy <tên PA sai 2> [LOẠI_LỖI]: <cơ chế>. Bẫy <tên PA sai 3> [LOẠI_LỖI]: <cơ chế>."
209
+ }}
210
+ ]
211
+ }}"""
212
+
213
+
214
+ # Reviewer prompt: independent mathematical validation of each generated question.
215
+ # Uses default_model (sonnet) because haiku cannot reliably verify multi-step algebra
216
+ # (Vieta, completing the square, optimization with constraints, etc.).
217
+ _REVIEWER_SYSTEM = r"""Bạn là giáo viên Toán lớp 9 kiểm duyệt độc lập. Với MỖI câu hỏi:
218
+
219
+ BƯỚC 1 — GIẢI ĐỘC LẬP (bắt buộc, không đọc explanation trước):
220
+ Đọc "stem". Tính kết quả đúng từ đầu theo từng bước số học cụ thể.
221
+ Ghi kết quả tính được: result = <giá trị cụ thể>.
222
+
223
+ BƯỚC 2 — ĐỐI CHIẾU VỚI CHOICES:
224
+ Tìm phần tử trong "choices" chứa giá trị = result ở Bước 1.
225
+ Đó là correct_index thực sự (0=A, 1=B, 2=C, 3=D).
226
+
227
+ BƯỚC 3 — SO SÁNH VỚI correct_index ĐÃ CHO:
228
+ Nếu correct_index đã cho = correct_index thực sự → valid: true.
229
+ Nếu khác → valid: false, báo corrected_correct_index = correct_index thực sự.
230
+ Nếu không có choice nào khớp result → valid: false, corrected_correct_index: null (bỏ câu).
231
+
232
+ QUY TẮC QUAN TRỌNG:
233
+ - KHÔNG được tin vào explanation — nó có thể sai hoặc cố tình biện hộ cho đáp án sai.
234
+ - KHÔNG được chấp nhận lý luận dài dòng thay đổi đề bài. Chỉ kiểm tra stem gốc.
235
+ - Nếu explanation mâu thuẫn với kết quả tính ở Bước 1 → luôn tin vào tính toán, không tin explanation.
236
+
237
+ Trả về JSON thuần (không có text ngoài):
238
+ {"results": [{"index": <i>, "valid": true|false, "corrected_correct_index": <j|null>}]}"""
239
+
240
+
241
+ async def _review_and_patch(
242
+ client: AsyncOpenAI,
243
+ questions: list[dict],
244
+ settings,
245
+ ) -> list[dict]:
246
+ """Send generated questions to a reviewer model; drop or patch invalid ones."""
247
+ if not questions:
248
+ return questions
249
+
250
+ payload = json.dumps({"questions": questions}, ensure_ascii=False)
251
+ try:
252
+ response = await call_with_retry(
253
+ client,
254
+ model=settings.default_model,
255
+ max_tokens=3000,
256
+ messages=[
257
+ {"role": "system", "content": _REVIEWER_SYSTEM},
258
+ {"role": "user", "content": payload},
259
+ ],
260
+ )
261
+ raw = _extract_json(response.choices[0].message.content or "{}")
262
+ data = json.loads(raw)
263
+ results = {r["index"]: r for r in data.get("results", [])}
264
+ except Exception as exc:
265
+ logger.warning("quiz_generator: reviewer call failed (%s), skipping review", exc)
266
+ return questions
267
+
268
+ patched: list[dict] = []
269
+ for i, q in enumerate(questions):
270
+ verdict = results.get(i)
271
+ if verdict is None or verdict.get("valid"):
272
+ patched.append(q)
273
+ continue
274
+ corrected = verdict.get("corrected_correct_index")
275
+ if corrected is not None and 0 <= corrected < len(q.get("choices", [])):
276
+ logger.info(
277
+ "quiz_generator: patching correct_index %d→%d for question %d (issues: %s)",
278
+ q["correct_index"], corrected, i, verdict.get("issues"),
279
+ )
280
+ q = dict(q, correct_index=corrected)
281
+ patched.append(q)
282
+ else:
283
+ logger.warning(
284
+ "quiz_generator: dropping question %d — reviewer flagged unfixable issues: %s",
285
+ i, verdict.get("issues"),
286
+ )
287
+ return patched
288
+
289
+
290
+ def _fix_latex_escapes(text: str) -> str:
291
+ """Double-escape backslashes that are not valid JSON escape sequences.
292
+
293
+ LLMs frequently emit bare LaTeX (e.g. \\sqrt, \\frac) inside JSON strings.
294
+ Valid JSON escapes after '\\' are: " \\ / b f n r t u.
295
+ """
296
+ return re.sub(r'\\(?!["\\/bfnrtu])', r'\\\\', text)
297
+
298
+
299
+ def _extract_json(text: str) -> str:
300
+ """Strip code fences, repair LaTeX escapes, and extract a valid JSON object."""
301
+ text = text.strip()
302
+ if text.startswith("```"):
303
+ parts = text.split("```")
304
+ text = parts[1] if len(parts) > 1 else text
305
+ if text.startswith("json"):
306
+ text = text[4:]
307
+ text = text.strip()
308
+ try:
309
+ json.loads(text)
310
+ return text
311
+ except json.JSONDecodeError:
312
+ pass
313
+ fixed = _fix_latex_escapes(text)
314
+ try:
315
+ json.loads(fixed)
316
+ return fixed
317
+ except json.JSONDecodeError:
318
+ pass
319
+ m = re.search(r'\{[\s\S]*\}', text)
320
+ if m:
321
+ candidate = m.group(0)
322
+ return _fix_latex_escapes(candidate)
323
+ return fixed
324
+
325
+
326
+ def _validate_structure(questions: list[dict]) -> list[dict]:
327
+ """Deterministic post-generation guard before questions reach the UI.
328
+
329
+ Hard drops (structural):
330
+ - correct_index not an int 0-3
331
+ - choices count != 4
332
+ - any choice is empty/missing
333
+ - stem is empty
334
+
335
+ Soft warns (content consistency — LLM reviewer already validated the math):
336
+ - explanation missing "Đáp án đúng" section
337
+ - choices[correct_index] value not found in explanation answer section
338
+ """
339
+ valid: list[dict] = []
340
+ for i, q in enumerate(questions):
341
+ ci = q.get("correct_index")
342
+ choices = q.get("choices") or []
343
+ stem = (q.get("stem") or "").strip()
344
+ explanation = (q.get("explanation") or "").strip()
345
+
346
+ # ── Hard structural checks ──────────────────────────────────────────
347
+ if not isinstance(ci, int) or not (0 <= ci <= 3):
348
+ logger.warning("quiz_validate: q%d dropped — invalid correct_index %r", i, ci)
349
+ continue
350
+ if len(choices) != 4:
351
+ logger.warning("quiz_validate: q%d dropped — expected 4 choices, got %d", i, len(choices))
352
+ continue
353
+ if not all(isinstance(c, str) and c.strip() for c in choices):
354
+ logger.warning("quiz_validate: q%d dropped — empty or non-string choice", i)
355
+ continue
356
+ if not stem:
357
+ logger.warning("quiz_validate: q%d dropped — empty stem", i)
358
+ continue
359
+
360
+ # ── Soft content-consistency checks (warn only) ─────────────────────
361
+ if not explanation:
362
+ logger.warning("quiz_validate: q%d has no explanation", i)
363
+ elif "Đáp án đúng" not in explanation:
364
+ logger.warning("quiz_validate: q%d explanation missing 'Đáp án đúng' section", i)
365
+ else:
366
+ # Extract correct choice value (strip "A. " label and LaTeX $ markers + whitespace)
367
+ correct_body = re.sub(r'^[A-D]\.\s*', '', choices[ci]).strip()
368
+ # Get answer section (text before first "Bẫy" label)
369
+ ans_section = re.split(r'\bBẫy\s+[A-D]\b', explanation, maxsplit=1)[0]
370
+
371
+ def _norm(s: str) -> str:
372
+ return re.sub(r'[\s$]', '', s)
373
+
374
+ if correct_body and _norm(correct_body) not in _norm(ans_section):
375
+ logger.warning(
376
+ "quiz_validate: q%d explanation/answer mismatch — "
377
+ "choice[%d]=%r not found in answer section %r",
378
+ i, ci, correct_body[:50], ans_section[:100],
379
+ )
380
+
381
+ valid.append(q)
382
+ return valid
383
+
384
+
385
+ def _shuffle_answer_position(questions: list[dict]) -> list[dict]:
386
+ """Randomly redistribute the correct answer across A/B/C/D positions.
387
+
388
+ LLMs have a strong bias toward correct_index=0. This post-processor
389
+ reassigns each question's correct answer to a random position so the
390
+ distribution is uniform regardless of what the model output.
391
+ """
392
+ result = []
393
+ for q in questions:
394
+ old_idx = q["correct_index"]
395
+ choices = list(q["choices"])
396
+ new_idx = random.randint(0, 3)
397
+ if new_idx != old_idx:
398
+ choices[old_idx], choices[new_idx] = choices[new_idx], choices[old_idx]
399
+ # Re-label A/B/C/D to match new positions
400
+ relabeled = []
401
+ for i, c in enumerate(choices):
402
+ label = chr(65 + i) + ". "
403
+ body = re.sub(r'^[A-D]\.\s*', '', c)
404
+ relabeled.append(label + body)
405
+ q = dict(q, choices=relabeled, correct_index=new_idx)
406
+ result.append(q)
407
+ return result
408
+
409
+
410
+ async def generate_week_quiz(
411
+ client: AsyncOpenAI,
412
+ pool,
413
+ week_focus: str,
414
+ week_tasks: list[str],
415
+ n: int = 4,
416
+ ) -> list[dict]:
417
+ """Generate n MCQ for a study-plan week, grounded in wiki knowledge."""
418
+ context = ""
419
+ if pool:
420
+ try:
421
+ from app.math_wiki.storage import pg_vectors, pg_db
422
+ query = week_focus + " " + " ".join(week_tasks)
423
+ ids = await pg_vectors.query_pgvector(pool, query, top_k=8)
424
+ units = await pg_db.get_wiki_units_by_ids(pool, ids) if ids else []
425
+ if units:
426
+ context = "\n\n".join(
427
+ f"[{u.get('id', '')}] {u.get('content', '')}"
428
+ for u in units[:6]
429
+ )
430
+ except Exception as exc:
431
+ logger.warning("quiz_generator: wiki retrieval failed (%s), continuing without context", exc)
432
+
433
+ if not context:
434
+ context = "(Không có ngữ cảnh từ kho tri thức — tự tạo câu hỏi dựa trên trọng tâm)"
435
+
436
+ prompt = _PROMPT_TMPL.format(
437
+ focus=week_focus,
438
+ tasks="\n".join(f"- {t}" for t in week_tasks),
439
+ context=context,
440
+ n=n,
441
+ )
442
+
443
+ settings = get_settings()
444
+ try:
445
+ response = await call_with_retry(
446
+ client,
447
+ model=settings.default_model,
448
+ max_tokens=4000,
449
+ messages=[
450
+ {"role": "system", "content": _SYSTEM},
451
+ {"role": "user", "content": prompt},
452
+ ],
453
+ )
454
+ raw = _extract_json(response.choices[0].message.content or "{}")
455
+ data = json.loads(raw)
456
+ questions = data.get("questions", [])
457
+ # bloom_level is optional for backward compatibility
458
+ questions = [
459
+ q for q in questions
460
+ if isinstance(q.get("stem"), str)
461
+ and isinstance(q.get("choices"), list)
462
+ and len(q["choices"]) == 4
463
+ and isinstance(q.get("correct_index"), int)
464
+ ]
465
+ questions = await _review_and_patch(client, questions, settings)
466
+ questions = _validate_structure(questions)
467
+ questions = _shuffle_answer_position(questions)
468
+ return questions
469
+ except Exception as exc:
470
+ logger.error("quiz_generator: generation failed: %s", exc)
471
+ raise
backend/app/math_wiki/agents/reranker.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import logging
3
+ from openai import AsyncOpenAI
4
+ from app.config import get_settings
5
+ from app.agent.core import call_with_retry
6
+ from app.math_wiki.prompts import MODE_PROMPTS
7
+ from app.math_wiki.utils import _extract_json
8
+ from app.math_wiki.schemas import WikiUnit
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+
13
+ async def rerank(client: AsyncOpenAI, query: str, candidates: list[WikiUnit]) -> list[str]:
14
+ settings = get_settings()
15
+ candidate_input = [
16
+ {"id": u.id, "type": u.type, "content": u.content}
17
+ for u in candidates
18
+ ]
19
+ payload = json.dumps({"query": query, "candidates": candidate_input})
20
+ response = await call_with_retry(
21
+ client,
22
+ model=settings.default_model,
23
+ messages=[
24
+ {"role": "system", "content": MODE_PROMPTS["RERANK"]},
25
+ {"role": "user", "content": payload},
26
+ ],
27
+ max_tokens=200,
28
+ )
29
+ content = _extract_json(response.choices[0].message.content or "{}")
30
+ try:
31
+ parsed = json.loads(content)
32
+ except json.JSONDecodeError:
33
+ parsed = {}
34
+ top_ids: list[str] = parsed.get("top_ids", [])
35
+
36
+ valid_ids = {u.id for u in candidates}
37
+ filtered = [uid for uid in top_ids if uid in valid_ids]
38
+ if len(filtered) < len(top_ids):
39
+ logger.warning(
40
+ "Reranker returned %d unknown ID(s), filtered out",
41
+ len(top_ids) - len(filtered),
42
+ )
43
+ return filtered[:5]
backend/app/math_wiki/agents/reviewer.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import logging
3
+ from openai import AsyncOpenAI
4
+ from app.config import get_settings
5
+ from app.agent.core import call_with_retry
6
+ from app.math_wiki.prompts import MODE_PROMPTS
7
+ from app.math_wiki.utils import _extract_json
8
+ from app.math_wiki.schemas import WikiUnit, ReviewOutput
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+ _VALID_VERDICTS = {"correct", "partial", "incorrect"}
13
+
14
+
15
+ def _is_inconsistent(parsed: dict) -> bool:
16
+ """Return True when verdict is non-correct but all explanatory fields are empty."""
17
+ if parsed.get("verdict") == "correct":
18
+ return False
19
+ has_errors = bool([e for e in parsed.get("errors", []) if str(e).strip()])
20
+ has_feedback = bool(str(parsed.get("feedback", "")).strip())
21
+ return not has_errors and not has_feedback
22
+
23
+
24
+ async def _call_reviewer(client: AsyncOpenAI, messages: list, settings) -> dict:
25
+ response = await call_with_retry(
26
+ client,
27
+ model=settings.default_model,
28
+ messages=messages,
29
+ max_tokens=2048,
30
+ )
31
+ content = _extract_json(response.choices[0].message.content or "{}")
32
+ try:
33
+ return json.loads(content)
34
+ except json.JSONDecodeError:
35
+ raise ValueError("Review agent returned malformed JSON")
36
+
37
+
38
+ async def review_solution(
39
+ client: AsyncOpenAI,
40
+ problem: str,
41
+ solution: str,
42
+ context: list[WikiUnit],
43
+ ) -> ReviewOutput:
44
+ settings = get_settings()
45
+ payload = (
46
+ json.dumps({
47
+ "problem": problem,
48
+ "solution": solution,
49
+ "context": [{"id": u.id, "content": u.content} for u in context],
50
+ })
51
+ + "\n\nRespond with ONLY a JSON object. No prose or markdown."
52
+ )
53
+ messages = [
54
+ {"role": "system", "content": MODE_PROMPTS["REVIEW"]},
55
+ {"role": "user", "content": payload},
56
+ ]
57
+
58
+ parsed = await _call_reviewer(client, messages, settings)
59
+
60
+ if _is_inconsistent(parsed):
61
+ logger.warning("Reviewer returned inconsistent response (non-correct with no errors/feedback) — retrying")
62
+ parsed = await _call_reviewer(client, messages, settings)
63
+
64
+ verdict = parsed.get("verdict", "incorrect")
65
+ if verdict not in _VALID_VERDICTS:
66
+ verdict = "incorrect"
67
+
68
+ return ReviewOutput(
69
+ verdict=verdict,
70
+ score=str(parsed.get("score", "0/10")),
71
+ correct_steps=[str(s) for s in parsed.get("correct_steps", []) if str(s).strip()],
72
+ errors=[str(e) for e in parsed.get("errors", []) if str(e).strip()],
73
+ feedback=str(parsed.get("feedback", "")),
74
+ correct_approach=str(parsed.get("correct_approach", "")),
75
+ )
backend/app/math_wiki/agents/solver.py ADDED
@@ -0,0 +1,362 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import re
3
+ import logging
4
+ from openai import AsyncOpenAI
5
+ from app.config import get_settings
6
+ from app.agent.core import call_with_retry
7
+ from app.math_wiki.prompts import MODE_PROMPTS
8
+ from app.math_wiki.utils import _extract_json, InsufficientKnowledgeError, VALID_CONFIDENCE
9
+ from app.math_wiki.schemas import WikiUnit, SolverOutput
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+ # Bare slug/ID with no whitespace — not a human-readable step
14
+ _SLUG_RE = re.compile(r'^[\w-]+$')
15
+ _BUOC_RE = re.compile(r'^Bước\s+\d+\s*:', re.UNICODE)
16
+ _PHAN_RE = re.compile(r'^\*\*Phần\s+[a-dA-D]', re.UNICODE) # multi-part section headers
17
+
18
+ _EXPECTED_KEYS = {"problem_type", "steps", "final_answer", "confidence", "used_knowledge_ids"}
19
+
20
+
21
+ def _inject_buoc_prefix(steps: list[str]) -> list[str]:
22
+ """Ensure every non-header step starts with 'Bước N: '.
23
+
24
+ The SOLVE prompt requires this format but the model often skips it for
25
+ simple problems. Post-processing guarantees format compliance without
26
+ changing any mathematical content.
27
+ """
28
+ result: list[str] = []
29
+ n = 0
30
+ for step in steps:
31
+ s = step.strip()
32
+ if not s:
33
+ continue
34
+ # Section headers (multi-part) and already-prefixed steps pass through unchanged
35
+ if _PHAN_RE.match(s) or _BUOC_RE.match(s):
36
+ if not _PHAN_RE.match(s):
37
+ n += 1 # count prefixed steps so subsequent injections are sequential
38
+ result.append(s)
39
+ else:
40
+ n += 1
41
+ result.append(f"Bước {n}: {s}")
42
+ return result
43
+
44
+
45
+ def _safe_parse_literal(s: str):
46
+ """Parse a Python-style literal string using json.loads only (no ast.literal_eval).
47
+ Single quotes are normalized to double quotes as a best-effort step."""
48
+ try:
49
+ return json.loads(s)
50
+ except json.JSONDecodeError:
51
+ pass
52
+ try:
53
+ return json.loads(s.replace("'", '"'))
54
+ except json.JSONDecodeError:
55
+ return None
56
+
57
+
58
+ def _normalize(parsed: dict, valid_ids: set[str], _label_hint: str = "") -> SolverOutput:
59
+ """Map whatever JSON structure the model returns into SolverOutput fields."""
60
+
61
+ # Model sometimes wraps the response in a single top-level key (e.g. {"proof": {...}}).
62
+ # Unwrap when none of the expected keys are present at the top level.
63
+ if parsed and not (_EXPECTED_KEYS & parsed.keys()):
64
+ inner = next(iter(parsed.values()))
65
+ if isinstance(inner, dict):
66
+ parsed = inner
67
+
68
+ # Multi-part problems: model may return {"parts": [{"label": "a", "steps": [...], "final_answer": "..."}]}
69
+ # Fold into the flat schema: part headers injected into steps, combined final_answer "a) X; b) Y".
70
+ raw_parts = parsed.get("parts")
71
+ if isinstance(raw_parts, list) and raw_parts and isinstance(raw_parts[0], dict):
72
+ combined_steps: list[str] = []
73
+ combined_answers: list[str] = []
74
+ for part in raw_parts:
75
+ label = str(part.get("label", "")).strip().rstrip(")")
76
+ header = f"**Phần {label})**" if label else None
77
+ if header:
78
+ combined_steps.append(header)
79
+ part_steps = part.get("steps", [])
80
+ if isinstance(part_steps, list):
81
+ combined_steps.extend(str(s) for s in part_steps if str(s).strip())
82
+ fa = str(part.get("final_answer", part.get("answer", ""))).strip()
83
+ if fa:
84
+ prefix = f"{label}) " if label else ""
85
+ combined_answers.append(f"{prefix}{fa}")
86
+ if combined_answers:
87
+ parsed = dict(parsed)
88
+ parsed["steps"] = combined_steps
89
+ parsed["final_answer"] = "; ".join(combined_answers)
90
+ parsed.pop("parts", None)
91
+
92
+ # --- steps ---
93
+ def _step_to_str(s) -> str:
94
+ if isinstance(s, str):
95
+ # Slug-like strings (no whitespace) are leaked wiki IDs, not steps
96
+ if _SLUG_RE.match(s):
97
+ return ""
98
+ return s
99
+ if isinstance(s, dict):
100
+ # Model returned {"step": N, "description"/"action"/"detail": "...", "result": "..."}
101
+ desc = str(s.get("description") or s.get("action") or s.get("statement")
102
+ or s.get("detail") or s.get("work") or s.get("explanation") or "")
103
+ result = str(s.get("result") or "")
104
+ if not desc:
105
+ # Collect string values; for nested dicts, recurse one level deep
106
+ parts = []
107
+ for k, v in s.items():
108
+ if k == "step":
109
+ continue
110
+ if isinstance(v, str) and v.strip():
111
+ parts.append(v)
112
+ elif isinstance(v, dict):
113
+ parts.append(_step_to_str(v))
114
+ desc = " ".join(p for p in parts if p)
115
+ if desc and result and result not in desc:
116
+ return f"{desc} → {result}"
117
+ return desc or result or str(s)
118
+ return str(s)
119
+
120
+ steps: list[str] = []
121
+ if isinstance(parsed.get("steps"), list):
122
+ steps = [_step_to_str(s) for s in parsed["steps"]]
123
+ elif isinstance(parsed.get("solution"), dict):
124
+ sol = parsed["solution"]
125
+ if isinstance(sol.get("steps"), list):
126
+ steps = [_step_to_str(s) for s in sol["steps"]]
127
+ if not steps:
128
+ for key in ("work", "explanation", "method"):
129
+ if val := parsed.get(key):
130
+ steps = [str(val)]
131
+ break
132
+ # Drop empty strings AND slug-like tokens regardless of how they were produced
133
+ steps = [s for s in steps if s.strip() and not _SLUG_RE.match(s.strip())]
134
+
135
+ # --- final_answer ---
136
+ _SIMPLE_VAR = re.compile(r'^[a-zA-Z_]\w{0,3}$') # x, y, x1, y_0 — short math vars only
137
+
138
+ def _dict_to_str(d: dict) -> str:
139
+ """Convert a dict final_answer to a readable string.
140
+ Simple variable keys (x, y) → $x = 3$; Vietnamese/long keys → plain "key: value"."""
141
+ parts = []
142
+ for k, v in d.items():
143
+ k_str = str(k).replace("_", " ")
144
+ v_str = str(v)
145
+ if _SIMPLE_VAR.match(str(k)):
146
+ parts.append(f"${k_str} = {v_str}$")
147
+ else:
148
+ parts.append(f"{k_str}: {v_str}")
149
+ return " và ".join(parts) if parts else ""
150
+
151
+ def _coerce_final(val) -> str:
152
+ if isinstance(val, dict):
153
+ return _dict_to_str(val)
154
+ s = str(val)
155
+ # Model returned a Python dict repr string like "{'x': 3, 'y': 2}"
156
+ if s.startswith("{") and s.endswith("}"):
157
+ parsed_val = _safe_parse_literal(s)
158
+ if isinstance(parsed_val, dict):
159
+ return _dict_to_str(parsed_val)
160
+ return s
161
+
162
+ final_answer: str = ""
163
+ for key in ("final_answer", "answer"):
164
+ if val := parsed.get(key):
165
+ final_answer = _coerce_final(val)
166
+ break
167
+ if not final_answer:
168
+ sol = parsed.get("solution")
169
+ if isinstance(sol, str):
170
+ final_answer = sol
171
+ elif isinstance(sol, dict):
172
+ inner = sol.get("answer") or sol.get("result") or sol.get("conclusion") or sol.get("summary")
173
+ if inner:
174
+ final_answer = _coerce_final(inner)
175
+ else:
176
+ # Try solution.results (e.g. {"cuc_dai": {"x": -1, "y": 10}, ...})
177
+ results = sol.get("results")
178
+ if isinstance(results, dict):
179
+ parts = []
180
+ for k, v in results.items():
181
+ k_label = str(k).replace("_", " ")
182
+ v_str = _dict_to_str(v) if isinstance(v, dict) else str(v)
183
+ parts.append(f"{k_label}: {v_str}")
184
+ final_answer = "; ".join(parts)
185
+ # Don't call _dict_to_str(sol) — that formats steps+results together which is ugly
186
+ if not final_answer:
187
+ for key in ("roots", "solutions", "result", "x"):
188
+ if val := parsed.get(key):
189
+ final_answer = str(val)
190
+ break
191
+ # Proof-mode fallbacks: model may use "statement" or "conclusion" instead of "final_answer"
192
+ if not final_answer:
193
+ for key in ("statement", "conclusion", "summary"):
194
+ if val := parsed.get(key):
195
+ final_answer = str(val)
196
+ if not final_answer.rstrip().endswith("∎"):
197
+ final_answer = final_answer.rstrip() + " ∎"
198
+ break
199
+
200
+ # Last-resort catch-all: model used completely non-standard keys.
201
+ if not final_answer:
202
+ # 1. If steps were extracted, use the last step as final_answer.
203
+ if steps:
204
+ final_answer = steps[-1]
205
+ logger.warning("Derived final_answer from last step (keys: %s)", list(parsed.keys()))
206
+ else:
207
+ # 2. Collect string values; use them as steps + answer.
208
+ all_vals = [str(v) for v in parsed.values() if isinstance(v, str) and str(v).strip()]
209
+ if all_vals:
210
+ steps = all_vals
211
+ final_answer = all_vals[-1]
212
+ logger.warning("Used string catch-all for keys: %s", list(parsed.keys()))
213
+ else:
214
+ # 3. Format list-of-dict values (e.g. extrema, roots) into a readable string.
215
+ for v in parsed.values():
216
+ if isinstance(v, list) and v and isinstance(v[0], dict):
217
+ parts = []
218
+ for item in v:
219
+ parts.append(", ".join(f"{k} = {val}" for k, val in item.items()))
220
+ final_answer = "; ".join(parts)
221
+ steps = [final_answer]
222
+ logger.warning("Formatted list-of-dict value for keys: %s", list(parsed.keys()))
223
+ break
224
+
225
+ # Guard: model returned a list (JSON array or Python literal) instead of a formatted string.
226
+ # Reformat as human-readable "x = a hoặc x = b" — the validator still flags
227
+ # ODE cases where roots ≠ general solution.
228
+ if final_answer and final_answer.lstrip().startswith('['):
229
+ parsed_fa = _safe_parse_literal(final_answer)
230
+ if isinstance(parsed_fa, list) and parsed_fa:
231
+ raw_items = [str(v) for v in parsed_fa]
232
+
233
+ def _has_equation(v: str) -> bool:
234
+ inner = v.strip()
235
+ if inner.startswith('$') and inner.endswith('$'):
236
+ inner = inner[1:-1]
237
+ return '=' in inner
238
+
239
+ parts = [item if _has_equation(item) else f"x = {item}" for item in raw_items]
240
+ final_answer = parts[0] if len(parts) == 1 else " hoặc ".join(parts)
241
+ logger.warning("Reformatted list final_answer to: %r", final_answer)
242
+
243
+ # --- problem_type ---
244
+ _LABEL_VI = {
245
+ "algebra": "đại số", "geometry": "hình học", "calculus": "giải tích",
246
+ "trigonometry": "lượng giác", "statistics": "thống kê", "probability": "xác suất",
247
+ "combinatorics": "tổ hợp", "number_theory": "số học",
248
+ "complex_numbers": "số phức", "sequences": "dãy số",
249
+ "vectors": "vectơ", "functions": "hàm số",
250
+ }
251
+ _raw_pt = str(parsed.get("problem_type", parsed.get("method", "")))
252
+ if _raw_pt:
253
+ problem_type = _raw_pt
254
+ else:
255
+ problem_type = _LABEL_VI.get(_label_hint, _label_hint or "đại số")
256
+
257
+ # --- used_knowledge_ids — keep only IDs that actually exist in context ---
258
+ raw_ids = parsed.get("used_knowledge_ids", [])
259
+ if not isinstance(raw_ids, list):
260
+ raw_ids = []
261
+ used_ids = [uid for uid in raw_ids if uid in valid_ids]
262
+
263
+ # --- confidence ---
264
+ confidence = str(parsed.get("confidence", "medium"))
265
+ if confidence not in VALID_CONFIDENCE:
266
+ confidence = "medium"
267
+
268
+ if not final_answer:
269
+ raise InsufficientKnowledgeError("Solver returned no answer")
270
+ if final_answer.strip().upper() == "INSUFFICIENT_KNOWLEDGE":
271
+ raise InsufficientKnowledgeError("Solver indicated insufficient knowledge")
272
+
273
+ # Warn when final_answer is not mentioned in any step — likely a commit-before-compute error.
274
+ # Skip for multi-part answers (format "a) X; b) Y") — the combined string won't appear verbatim.
275
+ _is_multipart = bool(re.match(r'^[a-dA-D]\)', final_answer.strip()))
276
+ if steps and not _is_multipart and not any(final_answer.lower()[:20] in s.lower() for s in steps):
277
+ logger.warning(
278
+ "final_answer %r not found in steps — possible answer/step mismatch", final_answer
279
+ )
280
+
281
+ if not steps:
282
+ logger.warning("solver: no parseable steps in response — using final_answer as sole step. raw=%r", str(parsed)[:200])
283
+
284
+ final_steps = _inject_buoc_prefix(steps or [final_answer])
285
+
286
+ return SolverOutput(
287
+ problem_type=problem_type,
288
+ used_knowledge_ids=used_ids,
289
+ steps=final_steps,
290
+ final_answer=final_answer,
291
+ confidence=confidence,
292
+ )
293
+
294
+
295
+ async def solve(
296
+ client: AsyncOpenAI,
297
+ problem_text: str,
298
+ context: list[WikiUnit],
299
+ label: str = "",
300
+ prior_failure: str | None = None,
301
+ ) -> SolverOutput:
302
+ settings = get_settings()
303
+ payload = json.dumps({
304
+ "problem": problem_text,
305
+ "context": [{"id": u.id, "type": u.type, "content": u.content} for u in context],
306
+ })
307
+ if prior_failure:
308
+ payload += f"\n\n⚠ Lưu ý từ lần giải trước: {prior_failure}"
309
+ payload += "\n\nRespond with ONLY a JSON object. No prose or markdown."
310
+ response = await call_with_retry(
311
+ client,
312
+ model=settings.default_model,
313
+ messages=[
314
+ {"role": "system", "content": MODE_PROMPTS["SOLVE"]},
315
+ {"role": "user", "content": payload},
316
+ ],
317
+ max_tokens=4096,
318
+ )
319
+ content = _extract_json(response.choices[0].message.content or "{}")
320
+ try:
321
+ parsed = json.loads(content)
322
+ except json.JSONDecodeError:
323
+ # Truncated response — retry without context payload (shorter prompt, better chance)
324
+ logger.warning("Solver response truncated; retrying without context")
325
+ bare_payload = (
326
+ json.dumps({"problem": problem_text, "context": []})
327
+ + "\n\nRespond with ONLY a JSON object. No prose or markdown."
328
+ )
329
+ retry_response = await call_with_retry(
330
+ client,
331
+ model=settings.default_model,
332
+ messages=[
333
+ {"role": "system", "content": MODE_PROMPTS["SOLVE"]},
334
+ {"role": "user", "content": bare_payload},
335
+ ],
336
+ max_tokens=4096,
337
+ )
338
+ retry_content = _extract_json(retry_response.choices[0].message.content or "{}")
339
+ try:
340
+ parsed = json.loads(retry_content)
341
+ except json.JSONDecodeError:
342
+ raise InsufficientKnowledgeError("Malformed solver response")
343
+ valid_ids = {u.id for u in context}
344
+ result = _normalize(parsed, valid_ids, _label_hint=label)
345
+
346
+ # Guard: if final_answer echoes the problem (starts with an imperative verb and overlaps
347
+ # significantly with the question), replace it with the last non-trivial step.
348
+ _STARTERS = ("tìm ", "cho ", "tính ", "giải ", "chứng ", "hãy ", "biết ", "xét ")
349
+ fa_lower = result.final_answer.lower().strip()
350
+ pt_lower = problem_text.lower()
351
+ if (any(fa_lower.startswith(s) for s in _STARTERS)
352
+ and pt_lower[:40] in fa_lower):
353
+ candidate = next(
354
+ (s for s in reversed(result.steps)
355
+ if s.strip() and not any(s.lower().strip().startswith(st) for st in _STARTERS)),
356
+ None,
357
+ )
358
+ if candidate:
359
+ logger.warning("final_answer resembled the question — substituted last useful step")
360
+ result = result.model_copy(update={"final_answer": candidate})
361
+
362
+ return result
backend/app/math_wiki/agents/sympy_verifier.py ADDED
@@ -0,0 +1,274 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Deterministic symbolic verification of math solutions using SymPy.
2
+
3
+ Returns (True, []) on confirmed correct, (False, [issues]) on confirmed wrong,
4
+ (None, []) when inconclusive (parse failure, proof/geometry/combinatorics, etc.).
5
+ Never raises — all exceptions produce (None, []) to avoid false negatives.
6
+ """
7
+ from __future__ import annotations
8
+ import logging
9
+ import re
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+ # Topics where symbolic substitution doesn't apply.
14
+ _SKIP_TYPES = frozenset({
15
+ "chứng minh", "proof", "geometry", "hình học",
16
+ "combinatorics", "tổ hợp", "statistics", "thống kê",
17
+ "probability", "xác suất", "number_theory", "số học",
18
+ })
19
+
20
+
21
+ def _should_skip(problem_type: str) -> bool:
22
+ pt = problem_type.lower()
23
+ return any(s in pt for s in _SKIP_TYPES)
24
+
25
+
26
+ # ── answer parsing ────────────────────────────────────────────────────────────
27
+
28
+ def _parse_candidates(final_answer: str) -> list[str]:
29
+ """Extract individual candidate value strings from a final_answer string.
30
+
31
+ Handles forms like:
32
+ "x = 2 hoặc x = 3" → ["2", "3"]
33
+ "$x = 2$ hoặc $x = 3$" → ["2", "3"]
34
+ "x = 2" → ["2"]
35
+ "x1 = 1, x2 = -1" → ["1", "-1"]
36
+ Returns raw value strings to be parsed by SymPy.
37
+ """
38
+ # Strip LaTeX delimiters
39
+ text = re.sub(r'\$', '', final_answer)
40
+ # Split on Vietnamese "hoặc", commas, semicolons, or "or"
41
+ parts = re.split(r'\bhoặc\b|\bor\b|[,;]', text, flags=re.IGNORECASE)
42
+ values: list[str] = []
43
+ for part in parts:
44
+ part = part.strip()
45
+ # Look for "var = value" pattern
46
+ m = re.search(r'=\s*(.+)$', part)
47
+ if m:
48
+ values.append(m.group(1).strip())
49
+ return values
50
+
51
+
52
+ def _parse_system_assignments(final_answer: str) -> dict[str, str] | None:
53
+ """Parse 'x = a, y = b' style system answers into {var: value} dict."""
54
+ text = re.sub(r'\$', '', final_answer)
55
+ parts = re.split(r'\bvà\b|\band\b|[,;]', text, flags=re.IGNORECASE)
56
+ assignments: dict[str, str] = {}
57
+ for part in parts:
58
+ part = part.strip()
59
+ m = re.match(r'^([a-zA-Z]\w*)\s*=\s*(.+)$', part)
60
+ if m:
61
+ assignments[m.group(1).strip()] = m.group(2).strip()
62
+ return assignments if len(assignments) >= 2 else None
63
+
64
+
65
+ def _extract_ode_solution(final_answer: str):
66
+ """Extract y = f(x) from an ODE general solution string.
67
+ Returns a SymPy expression for f(x), or None on failure."""
68
+ try:
69
+ import sympy as sp
70
+ text = re.sub(r'\$', '', final_answer).strip()
71
+ # Match "y = ..." at start of answer
72
+ m = re.match(r'y\s*=\s*(.+)', text, re.IGNORECASE)
73
+ if not m:
74
+ return None
75
+ expr_str = m.group(1).strip()
76
+ # Replace C1, C2 constants with SymPy symbols
77
+ expr_str = re.sub(r'\bC_?(\d+)\b', r'C\1', expr_str)
78
+ x, C1, C2, C3 = sp.symbols('x C1 C2 C3')
79
+ local_dict = {
80
+ 'x': x, 'C1': C1, 'C2': C2, 'C3': C3,
81
+ 'e': sp.E, 'pi': sp.pi, 'sin': sp.sin, 'cos': sp.cos,
82
+ 'exp': sp.exp, 'ln': sp.ln, 'sqrt': sp.sqrt,
83
+ }
84
+ return sp.sympify(expr_str, locals=local_dict)
85
+ except Exception:
86
+ return None
87
+
88
+
89
+ def _strip_prose(s: str) -> str:
90
+ """Strip leading prose words from a potential math expression."""
91
+ m = re.search(r'[0-9x\(\-\+\*\/\^\.]', s)
92
+ return s[m.start():] if m else s
93
+
94
+
95
+ # ── verification routines ─────────────────────────────────────────────────────
96
+
97
+ def _verify_equation(problem_text: str, candidate_values: list[str]) -> tuple[bool | None, list[str]]:
98
+ """Substitute each candidate into the equation extracted from problem_text."""
99
+ try:
100
+ import sympy as sp
101
+ text = re.sub(r'\$', '', problem_text)
102
+ x = sp.Symbol('x')
103
+ local = {'x': x, 'sqrt': sp.sqrt, 'abs': sp.Abs, 'log': sp.log, 'ln': sp.ln}
104
+
105
+ # Try each "lhs = rhs" match; skip those whose lhs won't sympify
106
+ expr = None
107
+ for eq_match in re.finditer(r'([^:=\n]+)=([^:=\n]+)', text):
108
+ lhs_raw = _strip_prose(eq_match.group(1).strip())
109
+ rhs_raw = eq_match.group(2).strip()
110
+ try:
111
+ lhs = sp.sympify(lhs_raw, locals=local)
112
+ rhs = sp.sympify(rhs_raw, locals=local)
113
+ expr = lhs - rhs
114
+ break
115
+ except Exception:
116
+ continue
117
+
118
+ if expr is None:
119
+ return None, []
120
+
121
+ issues: list[str] = []
122
+ valid_count = 0
123
+ for val_str in candidate_values:
124
+ try:
125
+ val = sp.sympify(val_str, locals={'sqrt': sp.sqrt})
126
+ residual = sp.simplify(expr.subs(x, val))
127
+ if residual == 0:
128
+ valid_count += 1
129
+ else:
130
+ issues.append(f"x = {val_str} does not satisfy the equation (residual = {residual})")
131
+ except Exception:
132
+ return None, [] # can't evaluate — inconclusive
133
+
134
+ if issues:
135
+ return False, issues
136
+ if valid_count > 0:
137
+ return True, []
138
+ return None, []
139
+ except Exception as exc:
140
+ logger.debug("_verify_equation failed: %s", exc)
141
+ return None, []
142
+
143
+
144
+ def _verify_system(problem_text: str, assignments: dict[str, str]) -> tuple[bool | None, list[str]]:
145
+ """Substitute variable assignments into all equations in problem_text."""
146
+ try:
147
+ import sympy as sp
148
+ text = re.sub(r'\$', '', problem_text)
149
+ # Split on conjunctions first so each clause is a single equation candidate
150
+ clauses = re.split(r'\bvà\b|\band\b|[;\n]', text, flags=re.IGNORECASE)
151
+
152
+ syms = {v: sp.Symbol(v) for v in assignments}
153
+ local = {**syms, 'sqrt': sp.sqrt, 'abs': sp.Abs}
154
+
155
+ val_map = {}
156
+ for var, val_str in assignments.items():
157
+ try:
158
+ val_map[syms[var]] = sp.sympify(val_str, locals=local)
159
+ except Exception:
160
+ return None, []
161
+
162
+ issues: list[str] = []
163
+ checked = 0
164
+ for clause in clauses[:4]: # limit to first 4 clauses
165
+ if '=' not in clause:
166
+ continue
167
+ parts = clause.split('=', 1)
168
+ if len(parts) != 2:
169
+ continue
170
+ try:
171
+ lhs = sp.sympify(_strip_prose(parts[0].strip()), locals=local)
172
+ rhs = sp.sympify(parts[1].strip(), locals=local)
173
+ residual = sp.simplify((lhs - rhs).subs(val_map))
174
+ checked += 1
175
+ if residual != 0:
176
+ issues.append(f"Assignment {assignments} does not satisfy equation '{clause.strip()}'")
177
+ except Exception:
178
+ continue
179
+
180
+ if not checked:
181
+ return None, []
182
+ return (False, issues) if issues else (True, [])
183
+ except Exception as exc:
184
+ logger.debug("_verify_system failed: %s", exc)
185
+ return None, []
186
+
187
+
188
+ def _verify_ode(problem_text: str, solution_expr) -> tuple[bool | None, list[str]]:
189
+ """Differentiate the proposed solution and substitute into the ODE."""
190
+ try:
191
+ import sympy as sp
192
+ text = re.sub(r'\$', '', problem_text)
193
+ # Extract ODE — look for y'' / y' notation and convert
194
+ ode_match = re.search(r"y[''′]+[^=\n]*=[^\n]+", text)
195
+ if not ode_match:
196
+ return None, []
197
+
198
+ x = sp.Symbol('x')
199
+ y_fn = sp.Function('y')
200
+ y = solution_expr # already a SymPy expression in x
201
+
202
+ ode_str = ode_match.group(0)
203
+ # Replace y'', y' with computed derivatives
204
+ d2y = sp.diff(y, x, 2)
205
+ dy = sp.diff(y, x)
206
+
207
+ # Build ODE expression by substituting into string-parsed version
208
+ ode_expr_str = (
209
+ ode_str
210
+ .replace("y''", f"({d2y})")
211
+ .replace("y'", f"({dy})")
212
+ .replace("y", f"({y})")
213
+ )
214
+ parts = ode_expr_str.split('=', 1)
215
+ if len(parts) != 2:
216
+ return None, []
217
+
218
+ local = {'x': x, 'exp': sp.exp, 'sin': sp.sin, 'cos': sp.cos,
219
+ 'sqrt': sp.sqrt, 'ln': sp.ln, 'C1': sp.Symbol('C1'),
220
+ 'C2': sp.Symbol('C2'), 'C3': sp.Symbol('C3')}
221
+ lhs = sp.sympify(parts[0].strip(), locals=local)
222
+ rhs = sp.sympify(parts[1].strip(), locals=local)
223
+ residual = sp.simplify(lhs - rhs)
224
+ if residual == 0:
225
+ return True, []
226
+ return False, [f"Proposed ODE solution does not satisfy the equation (residual = {residual})"]
227
+ except Exception as exc:
228
+ logger.debug("_verify_ode failed: %s", exc)
229
+ return None, []
230
+
231
+
232
+ # ── public API ────────────────────────────────────────────────────────────────
233
+
234
+ def sympy_verify(
235
+ problem_text: str,
236
+ final_answer: str,
237
+ problem_type: str = "",
238
+ ) -> tuple[bool | None, list[str]]:
239
+ """Verify final_answer against problem_text symbolically.
240
+
241
+ Returns:
242
+ (True, []) — confirmed correct
243
+ (False, [issues]) — confirmed wrong
244
+ (None, []) — inconclusive (proof, geometry, parse failure, etc.)
245
+ """
246
+ try:
247
+ if _should_skip(problem_type):
248
+ return None, []
249
+
250
+ # Multi-part answer ("a) X; b) Y") — can't verify symbolically without splitting the problem
251
+ if re.match(r'^[a-dA-D]\)', final_answer.strip()):
252
+ return None, []
253
+
254
+ # ODE path: problem_type contains "vi phân" or "ode" and answer has y =
255
+ is_ode = any(k in problem_type.lower() for k in ("vi phân", "ode", "differential"))
256
+ if is_ode or "y = " in final_answer.lower():
257
+ sol = _extract_ode_solution(final_answer)
258
+ if sol is not None:
259
+ return _verify_ode(problem_text, sol)
260
+
261
+ # System of equations: answer has multiple var=val pairs
262
+ assignments = _parse_system_assignments(final_answer)
263
+ if assignments:
264
+ return _verify_system(problem_text, assignments)
265
+
266
+ # Single/multiple roots
267
+ candidates = _parse_candidates(final_answer)
268
+ if candidates:
269
+ return _verify_equation(problem_text, candidates)
270
+
271
+ return None, []
272
+ except Exception as exc:
273
+ logger.debug("sympy_verify top-level exception: %s", exc)
274
+ return None, []
backend/app/math_wiki/agents/validator.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import logging
3
+ from openai import AsyncOpenAI
4
+ from app.config import get_settings
5
+ from app.agent.core import call_with_retry
6
+ from app.math_wiki.prompts import MODE_PROMPTS
7
+ from app.math_wiki.utils import _extract_json
8
+ from app.math_wiki.schemas import WikiUnit, SolverOutput, ValidationResult
9
+ from app.math_wiki.agents.sympy_verifier import sympy_verify
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+
14
+ async def validate(
15
+ client: AsyncOpenAI,
16
+ solver_output: SolverOutput,
17
+ context: list[WikiUnit],
18
+ problem_text: str = "",
19
+ ) -> ValidationResult:
20
+ settings = get_settings()
21
+ solver_dict = {
22
+ "problem_type": solver_output.problem_type,
23
+ "steps": solver_output.steps,
24
+ "final_answer": solver_output.final_answer,
25
+ "confidence": solver_output.confidence,
26
+ }
27
+ payload = json.dumps({
28
+ "solver_output": solver_dict,
29
+ "context": [u.model_dump() for u in context],
30
+ })
31
+ response = await call_with_retry(
32
+ client,
33
+ model=settings.default_model,
34
+ messages=[
35
+ {"role": "system", "content": MODE_PROMPTS["VALIDATE"]},
36
+ {"role": "user", "content": payload},
37
+ ],
38
+ max_tokens=600,
39
+ )
40
+ content = _extract_json(response.choices[0].message.content or "{}")
41
+ try:
42
+ parsed = json.loads(content)
43
+ except json.JSONDecodeError:
44
+ logger.warning("Validator returned malformed JSON — skipping UI issue")
45
+ return ValidationResult(valid=False, issues=[])
46
+ llm_result = ValidationResult(**parsed)
47
+
48
+ # Deterministic override: if SymPy confirms the answer is wrong, trust it over LLM.
49
+ if problem_text:
50
+ sympy_valid, sympy_issues = sympy_verify(
51
+ problem_text=problem_text,
52
+ final_answer=solver_output.final_answer,
53
+ problem_type=solver_output.problem_type,
54
+ )
55
+ if sympy_valid is False:
56
+ logger.debug("SymPy overrides LLM validation — issues: %s", sympy_issues)
57
+ return ValidationResult(valid=False, issues=llm_result.issues + sympy_issues)
58
+
59
+ return llm_result
backend/app/math_wiki/autoirt.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """AutoIRT: data-driven IRT parameter recalibration from response logs.
2
+
3
+ Uses MLE to estimate irt_a (discrimination) and irt_b (difficulty) from
4
+ accumulated solution_logs responses. Requires minimum 50 responses per question.
5
+ irt_c (guessing) is fixed at 0.25 for MCQ (4 choices).
6
+ """
7
+ import logging
8
+ import math
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+ _MIN_RESPONSES = 50
13
+ _GUESSING = 0.25
14
+
15
+
16
+ def _irt_prob(theta: float, a: float, b: float, c: float = _GUESSING) -> float:
17
+ """3PL IRT probability of correct response."""
18
+ return c + (1 - c) / (1 + math.exp(-a * (theta - b)))
19
+
20
+
21
+ def _mle_difficulty(correct: int, total: int, a: float = 1.0, c: float = _GUESSING) -> float:
22
+ """Simple MLE estimate of irt_b given observed proportion correct."""
23
+ if total == 0:
24
+ return 0.0
25
+ p = max(c + 0.01, min(0.99, correct / total))
26
+ # Invert 3PL: b = theta - (1/a) * log((p-c)/(1-p))
27
+ try:
28
+ b = -(1 / a) * math.log((p - c) / (1 - p))
29
+ except (ValueError, ZeroDivisionError):
30
+ b = 0.0
31
+ return round(max(-3.0, min(3.0, b)), 2)
32
+
33
+
34
+ def _mle_discrimination(correct: int, total: int, irt_b: float) -> float:
35
+ """Estimate irt_a from point-biserial correlation proxy."""
36
+ if total < _MIN_RESPONSES:
37
+ return 1.0
38
+ p = correct / total
39
+ if p <= 0 or p >= 1:
40
+ return 1.0
41
+ # Bock's approximation: a ∝ p(1-p) / phi(b)
42
+ phi_b = math.exp(-irt_b ** 2 / 2) / math.sqrt(2 * math.pi)
43
+ if phi_b < 0.001:
44
+ return 1.0
45
+ a = (p * (1 - p)) / phi_b
46
+ return round(max(0.5, min(3.0, a)), 2)
47
+
48
+
49
+ async def run_recalibration(pool) -> dict:
50
+ """Recalibrate IRT params for all questions with ≥50 responses."""
51
+ rows = await pool.fetch("""
52
+ SELECT sl.problem_id,
53
+ COUNT(*) as total,
54
+ SUM(CASE WHEN sl.actual_correct = 1 THEN 1 ELSE 0 END) as correct
55
+ FROM solution_logs sl
56
+ WHERE sl.problem_id IS NOT NULL AND sl.actual_correct IS NOT NULL
57
+ GROUP BY sl.problem_id
58
+ HAVING total >= ?
59
+ """, _MIN_RESPONSES)
60
+
61
+ if not rows:
62
+ return {"recalibrated": 0, "message": f"No questions with ≥{_MIN_RESPONSES} responses yet"}
63
+
64
+ updated = 0
65
+ for row in rows:
66
+ irt_b = _mle_difficulty(row["correct"], row["total"])
67
+ irt_a = _mle_discrimination(row["correct"], row["total"], irt_b)
68
+ await pool.execute(
69
+ "UPDATE problems SET irt_a = ?, irt_b = ? WHERE problem_id = ?",
70
+ irt_a, irt_b, row["problem_id"],
71
+ )
72
+ updated += 1
73
+
74
+ return {"recalibrated": updated, "min_responses_threshold": _MIN_RESPONSES}
backend/app/math_wiki/bobcat.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """BOBCAT: Bilevel Optimization-Based CAT (data-gated).
2
+
3
+ Reference: arXiv 2108.07386, IJCAI 2021
4
+ GitHub: github.com/arghosh/BOBCAT
5
+
6
+ This module provides the BOBCAT question selection policy as a drop-in
7
+ replacement for MaxInformationSelector once sufficient data is available.
8
+
9
+ DATA GATE: Requires ≥500 completed adaptive exam sessions in exam_sessions table.
10
+ Deploy: Train offline with `python -m app.math_wiki.bobcat_train`, then
11
+ set BOBCAT_ENABLED=true in environment to activate.
12
+ """
13
+ import logging
14
+ import os
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+ _ENABLED = os.getenv("BOBCAT_ENABLED", "false").lower() == "true"
19
+ _MIN_SESSIONS = 500
20
+
21
+ _model = None
22
+
23
+
24
+ def is_ready() -> bool:
25
+ """Returns True only when data gate is met and model is loaded."""
26
+ return _ENABLED and _model is not None
27
+
28
+
29
+ def select_next_question(
30
+ item_params, # np.ndarray shape (n_items, 4) — [a, b, c, d]
31
+ administered_items, # list[int] — indices already administered
32
+ est_theta: float, # current ability estimate
33
+ ) -> int:
34
+ """Select next question using BOBCAT policy.
35
+
36
+ Falls back to MaxInformationSelector if model not loaded.
37
+ """
38
+ if not is_ready():
39
+ # Graceful degradation: use classical Fisher Information selection
40
+ try:
41
+ from catsim.selection import MaxInformationSelector
42
+ import numpy as np
43
+ selector = MaxInformationSelector()
44
+ return int(selector.select(
45
+ items=item_params,
46
+ administered_items=np.array(administered_items),
47
+ est_theta=est_theta,
48
+ ))
49
+ except Exception as exc:
50
+ logger.warning("BOBCAT fallback to MaxInfo failed: %s", exc)
51
+ # Last resort: pick first un-administered item
52
+ admin_set = set(administered_items)
53
+ for i in range(len(item_params)):
54
+ if i not in admin_set:
55
+ return i
56
+ return 0
57
+
58
+ # BOBCAT neural policy (activated when model loaded)
59
+ try:
60
+ import torch
61
+ import numpy as np
62
+ history_vec = _encode_history(item_params, administered_items, est_theta)
63
+ with torch.no_grad():
64
+ scores = _model(torch.tensor(history_vec, dtype=torch.float32).unsqueeze(0))
65
+ scores = scores.squeeze(0).numpy()
66
+ # Mask already-administered items
67
+ admin_set = set(administered_items)
68
+ for idx in admin_set:
69
+ scores[idx] = -1e9
70
+ return int(np.argmax(scores))
71
+ except Exception as exc:
72
+ logger.warning("BOBCAT selection failed (%s), falling back", exc)
73
+ return select_next_question(item_params, administered_items, est_theta)
74
+
75
+
76
+ def _encode_history(item_params, administered_items, est_theta: float):
77
+ """Encode student history as fixed-size input vector for BOBCAT network."""
78
+ import numpy as np
79
+ n_items = len(item_params)
80
+ vec = np.zeros(n_items + 1)
81
+ for idx in administered_items:
82
+ if 0 <= idx < n_items:
83
+ vec[idx] = 1.0
84
+ vec[-1] = est_theta / 3.0 # normalize theta to [-1, 1] approx
85
+ return vec
backend/app/math_wiki/cache.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """LRU semantic cache for retrieval results.
2
+
3
+ Key: normalized query text hash.
4
+ Value: list of wiki_unit IDs (retrieval result).
5
+ TTL: 1 hour. Max entries: 512.
6
+ """
7
+ import hashlib
8
+ import logging
9
+ import time
10
+ from collections import OrderedDict
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+ _MAX_ENTRIES = 512
15
+ _TTL_SECONDS = 3600
16
+
17
+ _cache: OrderedDict[str, tuple[list[str], float]] = OrderedDict()
18
+
19
+
20
+ def _key(query: str) -> str:
21
+ normalized = " ".join(query.lower().split())
22
+ return hashlib.sha256(normalized.encode()).hexdigest()[:16]
23
+
24
+
25
+ def get(query: str) -> list[str] | None:
26
+ k = _key(query)
27
+ entry = _cache.get(k)
28
+ if entry is None:
29
+ return None
30
+ ids, ts = entry
31
+ if time.time() - ts > _TTL_SECONDS:
32
+ del _cache[k]
33
+ return None
34
+ _cache.move_to_end(k)
35
+ return ids
36
+
37
+
38
+ def put(query: str, ids: list[str]) -> None:
39
+ k = _key(query)
40
+ _cache[k] = (ids, time.time())
41
+ _cache.move_to_end(k)
42
+ while len(_cache) > _MAX_ENTRIES:
43
+ _cache.popitem(last=False)
backend/app/math_wiki/context_builder.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Ordered context assembly for the math solver.
2
+
3
+ Orders wiki units: prerequisites → core concept → formulas/theorems → examples → mistakes.
4
+ Caps total serialized content at ~8000 tokens (approx 32000 chars).
5
+ """
6
+ from app.math_wiki.schemas import WikiUnit
7
+
8
+ _NODE_ORDER = {
9
+ "theorem": 1,
10
+ "formula": 2,
11
+ "concept": 3,
12
+ "procedure": 4,
13
+ "pattern": 5,
14
+ "example": 6,
15
+ "mistake": 7,
16
+ }
17
+ _MAX_CHARS = 32_000
18
+
19
+
20
+ def _rank(unit: WikiUnit) -> int:
21
+ node_type = getattr(unit, "node_type", None) or unit.type or "pattern"
22
+ return _NODE_ORDER.get(node_type, 5)
23
+
24
+
25
+ def build_context(units: list[WikiUnit]) -> str:
26
+ """Serialize wiki units into an ordered context string for the solver prompt."""
27
+ if not units:
28
+ return ""
29
+ ordered = sorted(units, key=_rank)
30
+ parts = []
31
+ total = 0
32
+ for unit in ordered:
33
+ label = (getattr(unit, "node_type", None) or unit.type or "unit").upper()
34
+ topic = (unit.topic or "").upper()
35
+ content = unit.content or ""
36
+ entry = f"[{label}: {topic}]\n{content}\n---"
37
+ if total + len(entry) > _MAX_CHARS:
38
+ break
39
+ parts.append(entry)
40
+ total += len(entry)
41
+ return "\n".join(parts)
backend/app/math_wiki/crag.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Corrective RAG scoring gate.
2
+
3
+ Scores each retrieved candidate against the query using cosine similarity.
4
+ Discards candidates below threshold. If all are discarded, rewrites the query
5
+ and signals a retry.
6
+ """
7
+ import json
8
+ import logging
9
+ import numpy as np
10
+ from app.config import get_settings
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ def _cosine(a: list[float], b: list[float]) -> float:
16
+ va = np.array(a, dtype=np.float32)
17
+ vb = np.array(b, dtype=np.float32)
18
+ denom = (np.linalg.norm(va) + 1e-9) * (np.linalg.norm(vb) + 1e-9)
19
+ return float(np.dot(va, vb) / denom)
20
+
21
+
22
+ def score_candidates(
23
+ query_embedding: list[float],
24
+ candidates, # list[WikiUnit]
25
+ threshold: float | None = None,
26
+ ) -> tuple[list, bool]:
27
+ """Score candidates, filter below threshold.
28
+
29
+ Returns (filtered_candidates, should_retry).
30
+ should_retry=True when ALL candidates were below threshold.
31
+ """
32
+ settings = get_settings()
33
+ t = threshold if threshold is not None else getattr(settings, "crag_threshold", 0.20)
34
+
35
+ if not candidates or not query_embedding:
36
+ return candidates, False
37
+
38
+ scored = []
39
+ for unit in candidates:
40
+ try:
41
+ emb = json.loads(unit.embedding) if isinstance(unit.embedding, str) else unit.embedding
42
+ if emb:
43
+ score = _cosine(query_embedding, emb)
44
+ scored.append((score, unit))
45
+ except Exception:
46
+ scored.append((0.0, unit)) # keep on parse error
47
+
48
+ if not scored:
49
+ return candidates, False
50
+
51
+ above = [(s, u) for s, u in scored if s >= t]
52
+ if not above:
53
+ logger.info("CRAG: all %d candidates below threshold %.2f — flagging retry", len(scored), t)
54
+ return [], True # signal retry
55
+
56
+ above.sort(key=lambda x: x[0], reverse=True)
57
+ return [u for _, u in above], False
backend/app/math_wiki/deep_cat.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Deep CAT with Reinforcement Learning (data-gated).
2
+
3
+ Reference: arXiv 2502.19275, 2025
4
+ Uses double deep Q-learning for long-term optimal item selection.
5
+
6
+ DATA GATE: Requires ≥5,000 completed adaptive exam sessions.
7
+ Current system needs significant user history before training.
8
+ Deploy: Accumulate exam_sessions data, then run offline RL training.
9
+ Set DEEP_CAT_ENABLED=true to activate once trained.
10
+ """
11
+ import logging
12
+ import os
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+ _ENABLED = os.getenv("DEEP_CAT_ENABLED", "false").lower() == "true"
17
+ _MIN_SESSIONS = 5_000
18
+
19
+ _q_network = None # Primary Q-network
20
+ _target_network = None # Target Q-network (double DQN)
21
+
22
+
23
+ def is_ready() -> bool:
24
+ return _ENABLED and _q_network is not None
25
+
26
+
27
+ def select_next_question(
28
+ state_vector, # np.ndarray — encoded student state
29
+ administered_items, # list[int]
30
+ n_items: int,
31
+ ) -> int:
32
+ """Select next question via RL Q-network.
33
+
34
+ Falls back to BOBCAT → MaxInformationSelector when not ready.
35
+ """
36
+ if not is_ready():
37
+ # Cascade: try BOBCAT first, then MaxInfo
38
+ try:
39
+ from app.math_wiki.bobcat import select_next_question as bobcat_select
40
+ return bobcat_select(
41
+ item_params=state_vector,
42
+ administered_items=administered_items,
43
+ est_theta=0.0,
44
+ )
45
+ except Exception:
46
+ pass
47
+ # Raw fallback
48
+ admin_set = set(administered_items)
49
+ for i in range(n_items):
50
+ if i not in admin_set:
51
+ return i
52
+ return 0
53
+
54
+ try:
55
+ import torch
56
+ import numpy as np
57
+ admin_set = set(administered_items)
58
+ with torch.no_grad():
59
+ q_values = _q_network(
60
+ torch.tensor(state_vector, dtype=torch.float32).unsqueeze(0)
61
+ ).squeeze(0).numpy()
62
+ q_values[list(admin_set)] = -1e9 # mask administered
63
+ return int(np.argmax(q_values))
64
+ except Exception as exc:
65
+ logger.warning("Deep CAT selection failed (%s), falling back", exc)
66
+ admin_set = set(administered_items)
67
+ for i in range(n_items):
68
+ if i not in admin_set:
69
+ return i
70
+ return 0
71
+
72
+
73
+ class DQNNetwork:
74
+ """Placeholder for the Double DQN architecture.
75
+
76
+ Architecture (to be implemented during training phase):
77
+ - Input: state_dim (student history encoded)
78
+ - Hidden: 2 × 256 ReLU layers
79
+ - Output: n_items Q-values (one per question)
80
+
81
+ Training algorithm:
82
+ - Replay buffer of (state, action, reward, next_state, done) tuples
83
+ - Target network updated every 100 steps (Polyak averaging tau=0.005)
84
+ - Reward: delta in ability estimate accuracy at exam completion
85
+ - Epsilon-greedy exploration (epsilon decays 1.0 → 0.05 over 10k steps)
86
+ """
87
+ pass # Full implementation added during training phase
backend/app/math_wiki/difficulty_estimator.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """LLM-based IRT difficulty calibration.
2
+
3
+ Uses Claude Haiku's self-reported confidence to estimate irt_b (difficulty)
4
+ for each question. Confidence near 1.0 → easy (irt_b negative), near 0 → hard (irt_b positive).
5
+ Mapping: irt_b = (0.5 - confidence) * 4 (range: -2.0 to +2.0)
6
+ """
7
+ import json
8
+ import logging
9
+ from openai import AsyncOpenAI
10
+ from app.config import get_settings
11
+ from app.agent.core import call_with_retry
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ async def calibrate_question(client: AsyncOpenAI, problem_id: str, problem_text: str) -> float | None:
17
+ """Estimate irt_b for a single question. Returns irt_b in range [-2.0, 2.0]."""
18
+ settings = get_settings()
19
+ prompt = (
20
+ f"Giải bài toán sau và đánh giá mức độ tự tin của bạn từ 0.0 đến 1.0:\n\n{problem_text}\n\n"
21
+ "Trả về JSON: {\"confidence\": 0.8} — chỉ confidence, không giải thích thêm."
22
+ )
23
+ try:
24
+ resp = await call_with_retry(
25
+ client,
26
+ model=settings.haiku_model,
27
+ max_tokens=50,
28
+ messages=[{"role": "user", "content": prompt}],
29
+ response_format={"type": "json_object"},
30
+ )
31
+ parsed = json.loads(resp.choices[0].message.content or "{}")
32
+ confidence = float(parsed.get("confidence", 0.5))
33
+ confidence = max(0.0, min(1.0, confidence))
34
+ irt_b = round((0.5 - confidence) * 4, 2)
35
+ logger.debug("Calibrated %s: confidence=%.2f → irt_b=%.2f", problem_id, confidence, irt_b)
36
+ return irt_b
37
+ except Exception as exc:
38
+ logger.warning("calibrate_question failed for %s: %s", problem_id, exc)
39
+ return None
40
+
41
+
42
+ async def run_calibration_job(pool, client: AsyncOpenAI, batch_size: int = 50) -> dict:
43
+ """Calibrate irt_b for all uncalibrated questions (irt_b == 0.0 and irt_a == 1.0 = default)."""
44
+ rows = await pool.fetch(
45
+ "SELECT problem_id, problem_text FROM problems WHERE irt_a = 1.0 AND irt_b = 0.0 LIMIT ?",
46
+ batch_size,
47
+ )
48
+ if not rows:
49
+ return {"calibrated": 0, "message": "All questions already calibrated"}
50
+
51
+ calibrated = 0
52
+ for row in rows:
53
+ irt_b = await calibrate_question(client, row["problem_id"], row["problem_text"])
54
+ if irt_b is not None:
55
+ await pool.execute(
56
+ "UPDATE problems SET irt_b = ? WHERE problem_id = ?",
57
+ irt_b, row["problem_id"],
58
+ )
59
+ calibrated += 1
60
+
61
+ return {"calibrated": calibrated, "total_remaining": len(rows) - calibrated}
backend/app/math_wiki/dkvmn.py ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """DKVMN (Dynamic Key-Value Memory Networks) for knowledge tracing.
2
+
3
+ This module provides:
4
+ 1. The DKVMN PyTorch model class (for offline training)
5
+ 2. A predict_mastery() function that reads from concept_mastery/concept_elo tables
6
+ until a trained model is available.
7
+
8
+ Training: run `python -m app.math_wiki.dkvmn_train` after accumulating solution_logs data.
9
+ The trained model weights are saved to data/dkvmn_weights.pt.
10
+ """
11
+ import logging
12
+ import os
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+ _MODEL_PATH = os.path.join(os.path.dirname(__file__), "..", "..", "data", "dkvmn_weights.pt")
17
+ _model = None
18
+ _model_loaded = False
19
+
20
+ # Try to load PyTorch — gracefully degrade if not available
21
+ try:
22
+ import torch
23
+ import torch.nn as nn
24
+ import torch.nn.functional as F
25
+ _TORCH_AVAILABLE = True
26
+ except ImportError:
27
+ _TORCH_AVAILABLE = False
28
+ logger.info("PyTorch not available — DKVMN will use DB mastery fallback")
29
+
30
+
31
+ if _TORCH_AVAILABLE:
32
+ class DKVMN(nn.Module):
33
+ """Dynamic Key-Value Memory Network for knowledge tracing.
34
+
35
+ Architecture:
36
+ - Key memory: static concept embeddings (n_concepts × key_dim)
37
+ - Value memory: dynamic knowledge state (memory_size × value_dim)
38
+ - Input: (question_embed, response) pairs
39
+ - Output: P(correct) per concept
40
+ """
41
+ def __init__(self, n_concepts: int = 50, memory_size: int = 20,
42
+ key_dim: int = 50, value_dim: int = 200):
43
+ super().__init__()
44
+ self.key_memory = nn.Parameter(torch.randn(memory_size, key_dim))
45
+ self.value_memory = nn.Parameter(torch.randn(memory_size, value_dim))
46
+ self.key_embed = nn.Embedding(n_concepts, key_dim)
47
+ self.erase_linear = nn.Linear(value_dim, value_dim)
48
+ self.add_linear = nn.Linear(value_dim, value_dim)
49
+ self.fc_output = nn.Linear(value_dim + key_dim, 1)
50
+
51
+ def forward(self, concept_ids: 'torch.Tensor', responses: 'torch.Tensor'):
52
+ """
53
+ Args:
54
+ concept_ids: (batch, seq_len) concept indices
55
+ responses: (batch, seq_len) 0/1 correctness
56
+ Returns:
57
+ predictions: (batch, seq_len) P(correct) for each step
58
+ """
59
+ batch_size, seq_len = concept_ids.shape
60
+ value_mem = self.value_memory.unsqueeze(0).expand(batch_size, -1, -1).clone()
61
+ predictions = []
62
+
63
+ for t in range(seq_len):
64
+ q_embed = self.key_embed(concept_ids[:, t]) # (batch, key_dim)
65
+ # Correlation weights
66
+ w = F.softmax(q_embed @ self.key_memory.T, dim=-1) # (batch, memory_size)
67
+ # Read
68
+ r = (w.unsqueeze(-1) * value_mem).sum(dim=1) # (batch, value_dim)
69
+ # Predict
70
+ pred = torch.sigmoid(self.fc_output(torch.cat([r, q_embed], dim=-1)))
71
+ predictions.append(pred)
72
+ # Write (update value memory with response)
73
+ if t < seq_len - 1:
74
+ resp_embed = responses[:, t].float().unsqueeze(-1).expand_as(r)
75
+ combined = r * resp_embed
76
+ erase = torch.sigmoid(self.erase_linear(combined))
77
+ add = torch.tanh(self.add_linear(combined))
78
+ value_mem = value_mem * (1 - w.unsqueeze(-1) * erase.unsqueeze(1)) + \
79
+ w.unsqueeze(-1) * add.unsqueeze(1)
80
+
81
+ return torch.stack(predictions, dim=1).squeeze(-1)
82
+
83
+
84
+ def load_model(path: str = _MODEL_PATH) -> bool:
85
+ """Load trained DKVMN weights. Returns True if successful."""
86
+ global _model, _model_loaded
87
+ if not _TORCH_AVAILABLE:
88
+ return False
89
+ if not os.path.exists(path):
90
+ return False
91
+ try:
92
+ _model = DKVMN()
93
+ _model.load_state_dict(torch.load(path, map_location="cpu"))
94
+ _model.eval()
95
+ _model_loaded = True
96
+ logger.info("DKVMN model loaded from %s", path)
97
+ return True
98
+ except Exception as exc:
99
+ logger.warning("Failed to load DKVMN model: %s", exc)
100
+ return False
101
+
102
+
103
+ async def predict_mastery(pool, user_id: int) -> dict[str, float]:
104
+ """Return concept mastery scores {concept_id: 0.0-1.0}.
105
+
106
+ Uses DKVMN if trained model is available, otherwise falls back to DB.
107
+ """
108
+ # Fallback: read from concept_mastery or concept_elo tables
109
+ try:
110
+ rows = await pool.fetch(
111
+ "SELECT concept_id, mastery_score FROM concept_mastery WHERE user_id = ?",
112
+ user_id,
113
+ )
114
+ if rows:
115
+ return {r["concept_id"]: float(r["mastery_score"]) for r in rows}
116
+
117
+ # ELO fallback: normalize to 0-1
118
+ elo_rows = await pool.fetch(
119
+ "SELECT concept_id, elo_score FROM concept_elo WHERE user_id = ?",
120
+ user_id,
121
+ )
122
+ if elo_rows:
123
+ scores = [r["elo_score"] for r in elo_rows]
124
+ min_s, max_s = min(scores), max(scores)
125
+ rng = max_s - min_s or 1
126
+ return {
127
+ r["concept_id"]: round((r["elo_score"] - min_s) / rng, 3)
128
+ for r in elo_rows
129
+ }
130
+ except Exception as exc:
131
+ logger.debug("predict_mastery DB fallback failed: %s", exc)
132
+
133
+ return {}