github-actions[bot] commited on
Commit
de4b0cd
·
1 Parent(s): eba7c64

🚀 Auto-deploy backend from GitHub (10d155e)

Browse files
config/env.sample CHANGED
@@ -1,6 +1,6 @@
1
  # Inference provider selection
2
  # CI trigger marker: keep this file touchable to force backend deploy workflow runs when needed.
3
- INFERENCE_PROVIDER=hf_inference
4
  INFERENCE_PRO_ENABLED=true
5
  INFERENCE_PRO_PROVIDER=hf_inference
6
  INFERENCE_GPU_PROVIDER=hf_inference
 
1
  # Inference provider selection
2
  # CI trigger marker: keep this file touchable to force backend deploy workflow runs when needed.
3
+ INFERENCE_PROVIDER=deepseek
4
  INFERENCE_PRO_ENABLED=true
5
  INFERENCE_PRO_PROVIDER=hf_inference
6
  INFERENCE_GPU_PROVIDER=hf_inference
config/models.yaml CHANGED
@@ -1,55 +1,85 @@
1
  models:
2
  primary:
3
- id: Qwen/Qwen3-32B
4
- description: Global default instruction model for interactive Grade 11-12 math tutoring
5
- max_new_tokens: 640
6
- temperature: 0.25
7
  top_p: 0.9
8
 
9
- backup:
10
- - id: meta-llama/Meta-Llama-3-70B-Instruct
11
- description: High-quality model used for harder multi-step prompts
12
- max_new_tokens: 768
13
- temperature: 0.3
14
- top_p: 0.9
15
- - id: google/gemma-2-2b-it
16
- description: Secondary backup with broad instruction coverage
17
- max_new_tokens: 384
18
- temperature: 0.2
19
- top_p: 0.9
 
 
 
 
 
 
 
 
 
20
 
21
- experimental:
22
- - id: mistralai/Mistral-7B-Instruct-v0.3
23
- notes: Prompt/procedure experimentation
24
- - id: meta-llama/Meta-Llama-3-8B-Instruct
25
- notes: Baseline comparison against legacy deployment
26
 
27
  routing:
28
  task_model_map:
29
- # Keep all task defaults aligned to Qwen3-32B.
30
- # Hard prompts can still escalate via runtime policy in inference_client.
31
- chat: Qwen/Qwen3-32B
32
- verify_solution: Qwen/Qwen3-32B
33
- lesson_generation: Qwen/Qwen3-32B
34
- quiz_generation: Qwen/Qwen3-32B
35
- learning_path: Qwen/Qwen3-32B
36
- daily_insight: Qwen/Qwen3-32B
37
- risk_classification: Qwen/Qwen3-32B
38
- risk_narrative: Qwen/Qwen3-32B
 
39
 
40
  task_fallback_model_map:
41
- chat: [] # Chat is strict-primary only (no fallback chain)
 
42
  verify_solution:
43
- - meta-llama/Meta-Llama-3-70B-Instruct # Higher-capacity fallback
44
- - meta-llama/Llama-3.1-8B-Instruct # Second fallback
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
 
46
  task_provider_map:
47
- # All tasks use hf_inference router (Qwen/Qwen3-32B natively supported)
48
- chat: hf_inference
49
- verify_solution: hf_inference
50
- lesson_generation: hf_inference
51
- quiz_generation: hf_inference
52
- learning_path: hf_inference
53
- daily_insight: hf_inference
54
- risk_narrative: hf_inference
55
- risk_classification: hf_inference
 
 
 
1
  models:
2
  primary:
3
+ id: deepseek-chat
4
+ description: Default DeepSeek chat model all chat tasks, quizzes, lessons, reasoning
5
+ max_new_tokens: 800
6
+ temperature: 0.7
7
  top_p: 0.9
8
 
9
+ rag_primary:
10
+ id: deepseek-reasoner
11
+ description: DeepSeek reasoner extended reasoning for complex RAG tasks
12
+ max_new_tokens: 1800
13
+ temperature: 0.2
14
+ top_p: 0.9
15
+ enable_thinking_tasks:
16
+ - rag_lesson
17
+ - verify_solution
18
+ - risk_narrative
19
+ no_thinking_tasks:
20
+ - chat
21
+ - quiz_generation
22
+ - learning_path
23
+ - daily_insight
24
+
25
+ embedding:
26
+ id: BAAI/bge-small-en-v1.5
27
+ description: Embedding model for RAG retrieval — curriculum vectorstore ingestion and semantic search
28
+ note: Not part of the generation pipeline. Read from EMBEDDING_MODEL env var only. Not swappable via admin panel.
29
 
30
+ model_capabilities:
31
+ sequential_only:
32
+ - deepseek-reasoner
33
+ supports_thinking:
34
+ - deepseek-reasoner
35
 
36
  routing:
37
  task_model_map:
38
+ chat: deepseek-chat
39
+ verify_solution: deepseek-reasoner
40
+ lesson_generation: deepseek-chat
41
+ quiz_generation: deepseek-chat
42
+ learning_path: deepseek-chat
43
+ daily_insight: deepseek-chat
44
+ risk_classification: deepseek-chat
45
+ risk_narrative: deepseek-reasoner
46
+ rag_lesson: deepseek-reasoner
47
+ rag_problem: deepseek-chat
48
+ rag_analysis_context: deepseek-chat
49
 
50
  task_fallback_model_map:
51
+ chat:
52
+ - deepseek-chat
53
  verify_solution:
54
+ - deepseek-chat
55
+ lesson_generation:
56
+ - deepseek-chat
57
+ quiz_generation:
58
+ - deepseek-chat
59
+ learning_path:
60
+ - deepseek-chat
61
+ daily_insight:
62
+ - deepseek-chat
63
+ risk_classification:
64
+ - deepseek-chat
65
+ risk_narrative:
66
+ - deepseek-chat
67
+ rag_lesson:
68
+ - deepseek-chat
69
+ rag_problem:
70
+ - deepseek-chat
71
+ rag_analysis_context:
72
+ - deepseek-chat
73
 
74
  task_provider_map:
75
+ chat: deepseek
76
+ verify_solution: deepseek
77
+ lesson_generation: deepseek
78
+ quiz_generation: deepseek
79
+ learning_path: deepseek
80
+ daily_insight: deepseek
81
+ risk_classification: deepseek
82
+ risk_narrative: deepseek
83
+ rag_lesson: deepseek
84
+ rag_problem: deepseek
85
+ rag_analysis_context: deepseek
rag/firebase_storage_loader.py ADDED
@@ -0,0 +1,184 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Firebase Storage PDF loader for curriculum ingestion.
3
+ Downloads PDFs from Firebase Storage and extracts text for ChromaDB indexing.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ import logging
9
+ import os
10
+ from pathlib import Path
11
+ from typing import Dict, List, Optional, Tuple
12
+
13
+ logger = logging.getLogger("mathpulse.fb_storage_loader")
14
+
15
+ _FIREBASE_INITIALIZED = False
16
+
17
+
18
+ def _init_firebase_storage() -> Tuple[any, any]:
19
+ global _FIREBASE_INITIALIZED
20
+
21
+ if _FIREBASE_INITIALIZED:
22
+ try:
23
+ from firebase_admin import storage as fb_storage
24
+ bucket = fb_storage.bucket()
25
+ return fb_storage, bucket
26
+ except Exception as e:
27
+ logger.warning("Firebase storage unavailable: %s", e)
28
+ _FIREBASE_INITIALIZED = False
29
+ return None, None
30
+
31
+ try:
32
+ import firebase_admin
33
+ from firebase_admin import credentials, storage
34
+ except ImportError:
35
+ logger.warning("firebase_admin not installed")
36
+ return None, None
37
+
38
+ if firebase_admin._apps:
39
+ _FIREBASE_INITIALIZED = True
40
+ try:
41
+ bucket = storage.bucket()
42
+ return storage, bucket
43
+ except Exception as e:
44
+ logger.warning("Firebase storage bucket unavailable: %s", e)
45
+ return None, None
46
+
47
+ sa_json = os.getenv("FIREBASE_SERVICE_ACCOUNT_JSON")
48
+ # Also check HF Spaces secret mount path
49
+ if not sa_json:
50
+ secret_path = "/secret/FIREBASE_SERVICE_ACCOUNT_JSON"
51
+ if Path(secret_path).exists():
52
+ try:
53
+ sa_json = Path(secret_path).read_text(encoding="utf-8").strip()
54
+ except Exception:
55
+ pass
56
+
57
+ sa_file = os.getenv("FIREBASE_SERVICE_ACCOUNT_FILE")
58
+ bucket_name = os.getenv("FIREBASE_STORAGE_BUCKET", "mathpulse-ai-2026.firebasestorage.app")
59
+
60
+ try:
61
+ if sa_json:
62
+ import json as _json
63
+ creds = credentials.Certificate(_json.loads(sa_json))
64
+ elif sa_file and Path(sa_file).exists():
65
+ creds = credentials.Certificate(sa_file)
66
+ else:
67
+ creds = credentials.ApplicationDefault()
68
+
69
+ firebase_admin.initialize_app(creds, {"storageBucket": bucket_name})
70
+ _FIREBASE_INITIALIZED = True
71
+ bucket = storage.bucket()
72
+ return storage, bucket
73
+ except Exception as e:
74
+ logger.warning("Firebase init failed: %s", e)
75
+ return None, None
76
+
77
+
78
+ def download_pdf_from_storage(storage_path: str, dest_path: Optional[str] = None) -> Optional[bytes]:
79
+ """Download a PDF from Firebase Storage and return its bytes."""
80
+ _, bucket = _init_firebase_storage()
81
+ if bucket is None:
82
+ logger.warning("Firebase Storage not available, skipping download")
83
+ return None
84
+
85
+ try:
86
+ blob = bucket.blob(storage_path)
87
+ if not blob.exists():
88
+ logger.warning("Blob does not exist: %s", storage_path)
89
+ return None
90
+ bytes_data = blob.download_as_bytes()
91
+ logger.info("Downloaded %s (%d bytes)", storage_path, len(bytes_data))
92
+
93
+ if dest_path:
94
+ Path(dest_path).parent.mkdir(parents=True, exist_ok=True)
95
+ with open(dest_path, "wb") as f:
96
+ f.write(bytes_data)
97
+ logger.info("Saved to %s", dest_path)
98
+
99
+ return bytes_data
100
+ except Exception as e:
101
+ logger.error("Failed to download %s: %s", storage_path, e)
102
+ return None
103
+
104
+
105
+ def list_curriculum_blobs(prefix: str = "curriculum/") -> List[Dict[str, str]]:
106
+ """List all blobs under a prefix in Firebase Storage."""
107
+ _, bucket = _init_firebase_storage()
108
+ if bucket is None:
109
+ return []
110
+
111
+ blobs = bucket.list_blobs(prefix=prefix)
112
+ result = []
113
+ for blob in blobs:
114
+ if blob.name.endswith(".pdf"):
115
+ result.append({
116
+ "name": blob.name,
117
+ "size": blob.size,
118
+ "updated": str(blob.updated) if blob.updated else None,
119
+ "download_url": f"https://storage.googleapis.com/{bucket.name}/{blob.name}",
120
+ })
121
+ return result
122
+
123
+
124
+ # NOTE: Curriculum guide PDFs (shaping papers) are stored in Firebase Storage
125
+ # for system reference but are NOT included in RAG ingestion because they
126
+ # contain only learning objectives and course descriptions — insufficient
127
+ # content for lesson generation (typically <10 chunks each).
128
+ #
129
+ # Only SDO teaching modules (full lesson content with examples and problems)
130
+ # are included in the RAG pipeline.
131
+
132
+ PDF_METADATA: Dict[str, dict] = {
133
+ # General Mathematics Q1 — SDO Navotas teaching module (100 pages, ~117k chars)
134
+ "curriculum/gen_math_sdo/SDO_Navotas_Gen.Math_SHS_1stSem.FV.pdf": {
135
+ "subject": "General Mathematics",
136
+ "subjectId": "gen-math",
137
+ "type": "sdo_module",
138
+ "content_domain": "general",
139
+ "quarter": 1,
140
+ "storage_path": "curriculum/gen_math_sdo/SDO_Navotas_Gen.Math_SHS_1stSem.FV.pdf",
141
+ },
142
+ # General Mathematics Q2 — Interest & Annuities modules (~27-35 pages each)
143
+ "curriculum/general_math/genmath_q2_mod1_simpleandcompoundinterests_v2.pdf": {
144
+ "subject": "General Mathematics",
145
+ "subjectId": "gen-math",
146
+ "type": "sdo_module",
147
+ "content_domain": "general",
148
+ "quarter": 2,
149
+ "storage_path": "curriculum/general_math/genmath_q2_mod1_simpleandcompoundinterests_v2.pdf",
150
+ },
151
+ "curriculum/general_math/genmath_q2_mod2_interestmaturityfutureandpresentvaluesinsimpleandcompoundinterests_v2.pdf": {
152
+ "subject": "General Mathematics",
153
+ "subjectId": "gen-math",
154
+ "type": "sdo_module",
155
+ "content_domain": "general",
156
+ "quarter": 2,
157
+ "storage_path": "curriculum/general_math/genmath_q2_mod2_interestmaturityfutureandpresentvaluesinsimpleandcompoundinterests_v2.pdf",
158
+ },
159
+ "curriculum/general_math/genmath_q2_mod3_SolvingProblemsInvolvingSimpleandCompoundInterest_v2.pdf": {
160
+ "subject": "General Mathematics",
161
+ "subjectId": "gen-math",
162
+ "type": "sdo_module",
163
+ "content_domain": "general",
164
+ "quarter": 2,
165
+ "storage_path": "curriculum/general_math/genmath_q2_mod3_SolvingProblemsInvolvingSimpleandCompoundInterest_v2.pdf",
166
+ },
167
+ "curriculum/general_math/genmath_q2_mod4_simpleandgeneralannuities_v2.pdf": {
168
+ "subject": "General Mathematics",
169
+ "subjectId": "gen-math",
170
+ "type": "sdo_module",
171
+ "content_domain": "general",
172
+ "quarter": 2,
173
+ "storage_path": "curriculum/general_math/genmath_q2_mod4_simpleandgeneralannuities_v2.pdf",
174
+ },
175
+ # Statistics and Probability — Full textbook (331 pages, ~607k chars)
176
+ "curriculum/stat_prob/Full.pdf": {
177
+ "subject": "Statistics and Probability",
178
+ "subjectId": "stats-prob",
179
+ "type": "sdo_module",
180
+ "content_domain": "statistics",
181
+ "quarter": 1,
182
+ "storage_path": "curriculum/stat_prob/Full.pdf",
183
+ },
184
+ }
scripts/download_vectorstore_from_firebase.py CHANGED
@@ -14,7 +14,7 @@ logger = logging.getLogger("mathpulse.download_vectorstore")
14
 
15
  sys.path.insert(0, str(Path(__file__).resolve().parents[2]))
16
 
17
- from backend.rag.firebase_storage_loader import _init_firebase_storage
18
 
19
  REMOTE_PREFIX = "vectorstore/"
20
  LOCAL_DEST_DIR = Path("/app/datasets/vectorstore")
 
14
 
15
  sys.path.insert(0, str(Path(__file__).resolve().parents[2]))
16
 
17
+ from hf_space_test.rag.firebase_storage_loader import _init_firebase_storage
18
 
19
  REMOTE_PREFIX = "vectorstore/"
20
  LOCAL_DEST_DIR = Path("/app/datasets/vectorstore")
services/inference_client.py CHANGED
@@ -100,6 +100,11 @@ class InferenceClient:
100
  # Featherless AI for Qwen math models (used as fallback when HF router fails)
101
  self.featherless_api_key = os.getenv("FEATHERLESS_API_KEY", "")
102
  self.featherless_chat_url = os.getenv("FEATHERLESS_CHAT_URL", "https://api.featherless.ai/openai/v1/chat/completions")
 
 
 
 
 
103
 
104
  self.local_space_url = _normalize_local_space_url(
105
  os.getenv("INFERENCE_LOCAL_SPACE_URL", "http://127.0.0.1:7860")
@@ -591,8 +596,11 @@ class InferenceClient:
591
  route = self._resolve_route_label(provider, req.task_type)
592
  if provider == "local_space":
593
  return self._call_local_space(req, provider=provider, route=route, fallback_depth=fallback_depth)
594
-
595
- # All models use HF inference router directly (including Qwen/Qwen3-32B)
 
 
 
596
  return self._call_hf_inference(req, provider=provider, route=route, fallback_depth=fallback_depth)
597
 
598
  def _messages_to_prompt(self, messages: List[Dict[str, str]]) -> str:
@@ -931,6 +939,79 @@ class InferenceClient:
931
  self._bump_metric("requests_ok", 1)
932
  return text
933
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
934
  def _call_local_space(self, req: InferenceRequest, *, provider: str, route: str, fallback_depth: int) -> str:
935
  target_model = req.model or self.default_model
936
  url = f"{self.local_space_url.rstrip('/')}{self.local_generate_path}"
 
100
  # Featherless AI for Qwen math models (used as fallback when HF router fails)
101
  self.featherless_api_key = os.getenv("FEATHERLESS_API_KEY", "")
102
  self.featherless_chat_url = os.getenv("FEATHERLESS_CHAT_URL", "https://api.featherless.ai/openai/v1/chat/completions")
103
+
104
+ # DeepSeek API (primary inference provider)
105
+ self.deepseek_api_key = os.getenv("DEEPSEEK_API_KEY", "")
106
+ self.deepseek_base_url = os.getenv("DEEPSEEK_BASE_URL", "https://api.deepseek.com").rstrip("/")
107
+ self.deepseek_chat_url = f"{self.deepseek_base_url}/v1/chat/completions"
108
 
109
  self.local_space_url = _normalize_local_space_url(
110
  os.getenv("INFERENCE_LOCAL_SPACE_URL", "http://127.0.0.1:7860")
 
596
  route = self._resolve_route_label(provider, req.task_type)
597
  if provider == "local_space":
598
  return self._call_local_space(req, provider=provider, route=route, fallback_depth=fallback_depth)
599
+
600
+ if provider == "deepseek":
601
+ return self._call_deepseek(req, provider=provider, route=route, fallback_depth=fallback_depth)
602
+
603
+ # All other providers use HF inference router
604
  return self._call_hf_inference(req, provider=provider, route=route, fallback_depth=fallback_depth)
605
 
606
  def _messages_to_prompt(self, messages: List[Dict[str, str]]) -> str:
 
939
  self._bump_metric("requests_ok", 1)
940
  return text
941
 
942
+ def _call_deepseek(self, req: InferenceRequest, *, provider: str, route: str, fallback_depth: int) -> str:
943
+ """Call DeepSeek API (OpenAI-compatible endpoint)."""
944
+ if not self.deepseek_api_key:
945
+ raise RuntimeError("DEEPSEEK_API_KEY is not set")
946
+
947
+ target_model = req.model or self.default_model
948
+ url = self.deepseek_chat_url
949
+
950
+ model_base = target_model.split(":")[0] if ":" in target_model else target_model
951
+ LOGGER.debug(
952
+ f"📌 Calling DeepSeek: task={req.task_type} model={model_base} "
953
+ f"route={route} depth={fallback_depth}"
954
+ )
955
+
956
+ payload: Dict[str, object] = {
957
+ "model": target_model,
958
+ "messages": req.messages,
959
+ "stream": False,
960
+ "max_tokens": req.max_new_tokens or self.default_max_new_tokens,
961
+ "temperature": req.temperature,
962
+ "top_p": req.top_p,
963
+ }
964
+ headers = {
965
+ "Authorization": f"Bearer {self.deepseek_api_key}",
966
+ "Content-Type": "application/json",
967
+ "X-MathPulse-Task": (req.task_type or "default").strip().lower(),
968
+ }
969
+
970
+ timeout = self._timeout_for(req, provider)
971
+
972
+ resp, latency_ms, retry_attempt = self._post_with_retry(
973
+ url,
974
+ headers=headers,
975
+ payload=payload,
976
+ timeout=timeout,
977
+ provider=provider,
978
+ model=target_model,
979
+ task_type=req.task_type,
980
+ request_tag=req.request_tag,
981
+ fallback_depth=fallback_depth,
982
+ route=route,
983
+ )
984
+ self._bump_bucket("status_code_counts", str(resp.status_code), 1)
985
+ if resp.status_code != 200:
986
+ self._bump_metric("requests_error", 1)
987
+ raise RuntimeError(f"DeepSeek API error {resp.status_code}: {resp.text}")
988
+
989
+ data = resp.json()
990
+ text = self._extract_text(data)
991
+
992
+ LOGGER.info(
993
+ f"✅ DeepSeek success: task={req.task_type} model={model_base} "
994
+ f"latency={latency_ms:.0f}ms tokens_out={len(text.split())}"
995
+ )
996
+
997
+ log_model_call(
998
+ LOGGER,
999
+ provider=provider,
1000
+ model=target_model,
1001
+ endpoint=url,
1002
+ latency_ms=latency_ms,
1003
+ input_tokens=None,
1004
+ output_tokens=None,
1005
+ status="ok",
1006
+ task_type=req.task_type,
1007
+ request_tag=req.request_tag,
1008
+ retry_attempt=retry_attempt,
1009
+ fallback_depth=fallback_depth,
1010
+ route=route,
1011
+ )
1012
+ self._bump_metric("requests_ok", 1)
1013
+ return text
1014
+
1015
  def _call_local_space(self, req: InferenceRequest, *, provider: str, route: str, fallback_depth: int) -> str:
1016
  target_model = req.model or self.default_model
1017
  url = f"{self.local_space_url.rstrip('/')}{self.local_generate_path}"