Spaces:

Deign86
/

mathpulse-api-v3test

Running

App Files Files Community

github-actions[bot] commited on 18 days ago

Commit

de4b0cd

1 Parent(s): eba7c64

🚀 Auto-deploy backend from GitHub (10d155e)

Browse files

Files changed (5) hide show

config/env.sample +1 -1
config/models.yaml +72 -42
rag/firebase_storage_loader.py +184 -0
scripts/download_vectorstore_from_firebase.py +1 -1
services/inference_client.py +83 -2

config/env.sample CHANGED Viewed

@@ -1,6 +1,6 @@
 # Inference provider selection
 # CI trigger marker: keep this file touchable to force backend deploy workflow runs when needed.
-INFERENCE_PROVIDER=hf_inference
 INFERENCE_PRO_ENABLED=true
 INFERENCE_PRO_PROVIDER=hf_inference
 INFERENCE_GPU_PROVIDER=hf_inference

 # Inference provider selection
 # CI trigger marker: keep this file touchable to force backend deploy workflow runs when needed.
+INFERENCE_PROVIDER=deepseek
 INFERENCE_PRO_ENABLED=true
 INFERENCE_PRO_PROVIDER=hf_inference
 INFERENCE_GPU_PROVIDER=hf_inference

config/models.yaml CHANGED Viewed

@@ -1,55 +1,85 @@
 models:
   primary:
-    id: Qwen/Qwen3-32B
-    description: Global default instruction model for interactive Grade 11-12 math tutoring
-    max_new_tokens: 640
-    temperature: 0.25
     top_p: 0.9
-  backup:
-    - id: meta-llama/Meta-Llama-3-70B-Instruct
-      description: High-quality model used for harder multi-step prompts
-      max_new_tokens: 768
-      temperature: 0.3
-      top_p: 0.9
-    - id: google/gemma-2-2b-it
-      description: Secondary backup with broad instruction coverage
-      max_new_tokens: 384
-      temperature: 0.2
-      top_p: 0.9
-  experimental:
-    - id: mistralai/Mistral-7B-Instruct-v0.3
-      notes: Prompt/procedure experimentation
-    - id: meta-llama/Meta-Llama-3-8B-Instruct
-      notes: Baseline comparison against legacy deployment
 routing:
   task_model_map:
-    # Keep all task defaults aligned to Qwen3-32B.
-    # Hard prompts can still escalate via runtime policy in inference_client.
-    chat: Qwen/Qwen3-32B
-    verify_solution: Qwen/Qwen3-32B
-    lesson_generation: Qwen/Qwen3-32B
-    quiz_generation: Qwen/Qwen3-32B
-    learning_path: Qwen/Qwen3-32B
-    daily_insight: Qwen/Qwen3-32B
-    risk_classification: Qwen/Qwen3-32B
-    risk_narrative: Qwen/Qwen3-32B
   task_fallback_model_map:
-    chat: []                                       # Chat is strict-primary only (no fallback chain)
     verify_solution:
-      - meta-llama/Meta-Llama-3-70B-Instruct      # Higher-capacity fallback
-      - meta-llama/Llama-3.1-8B-Instruct          # Second fallback
   task_provider_map:
-    # All tasks use hf_inference router (Qwen/Qwen3-32B natively supported)
-    chat: hf_inference
-    verify_solution: hf_inference
-    lesson_generation: hf_inference
-    quiz_generation: hf_inference
-    learning_path: hf_inference
-    daily_insight: hf_inference
-    risk_narrative: hf_inference
-    risk_classification: hf_inference

 models:
   primary:
+    id: deepseek-chat
+    description: Default DeepSeek chat model — all chat tasks, quizzes, lessons, reasoning
+    max_new_tokens: 800
+    temperature: 0.7
     top_p: 0.9
+  rag_primary:
+    id: deepseek-reasoner
+    description: DeepSeek reasoner — extended reasoning for complex RAG tasks
+    max_new_tokens: 1800
+    temperature: 0.2
+    top_p: 0.9
+    enable_thinking_tasks:
+      - rag_lesson
+      - verify_solution
+      - risk_narrative
+    no_thinking_tasks:
+      - chat
+      - quiz_generation
+      - learning_path
+      - daily_insight
+  embedding:
+    id: BAAI/bge-small-en-v1.5
+    description: Embedding model for RAG retrieval — curriculum vectorstore ingestion and semantic search
+    note: Not part of the generation pipeline. Read from EMBEDDING_MODEL env var only. Not swappable via admin panel.
+  model_capabilities:
+    sequential_only:
+      - deepseek-reasoner
+    supports_thinking:
+      - deepseek-reasoner
 routing:
   task_model_map:
+    chat:                  deepseek-chat
+    verify_solution:       deepseek-reasoner
+    lesson_generation:     deepseek-chat
+    quiz_generation:       deepseek-chat
+    learning_path:         deepseek-chat
+    daily_insight:         deepseek-chat
+    risk_classification:   deepseek-chat
+    risk_narrative:        deepseek-reasoner
+    rag_lesson:            deepseek-reasoner
+    rag_problem:           deepseek-chat
+    rag_analysis_context:  deepseek-chat
   task_fallback_model_map:
+    chat:
+      - deepseek-chat
     verify_solution:
+      - deepseek-chat
+    lesson_generation:
+      - deepseek-chat
+    quiz_generation:
+      - deepseek-chat
+    learning_path:
+      - deepseek-chat
+    daily_insight:
+      - deepseek-chat
+    risk_classification:
+      - deepseek-chat
+    risk_narrative:
+      - deepseek-chat
+    rag_lesson:
+      - deepseek-chat
+    rag_problem:
+      - deepseek-chat
+    rag_analysis_context:
+      - deepseek-chat
   task_provider_map:
+    chat:                  deepseek
+    verify_solution:       deepseek
+    lesson_generation:     deepseek
+    quiz_generation:       deepseek
+    learning_path:         deepseek
+    daily_insight:         deepseek
+    risk_classification:   deepseek
+    risk_narrative:        deepseek
+    rag_lesson:            deepseek
+    rag_problem:           deepseek
+    rag_analysis_context:  deepseek

rag/firebase_storage_loader.py ADDED Viewed

	@@ -0,0 +1,184 @@

+"""
+Firebase Storage PDF loader for curriculum ingestion.
+Downloads PDFs from Firebase Storage and extracts text for ChromaDB indexing.
+"""
+from __future__ import annotations
+import logging
+import os
+from pathlib import Path
+from typing import Dict, List, Optional, Tuple
+logger = logging.getLogger("mathpulse.fb_storage_loader")
+_FIREBASE_INITIALIZED = False
+def _init_firebase_storage() -> Tuple[any, any]:
+    global _FIREBASE_INITIALIZED
+    if _FIREBASE_INITIALIZED:
+        try:
+            from firebase_admin import storage as fb_storage
+            bucket = fb_storage.bucket()
+            return fb_storage, bucket
+        except Exception as e:
+            logger.warning("Firebase storage unavailable: %s", e)
+            _FIREBASE_INITIALIZED = False
+            return None, None
+    try:
+        import firebase_admin
+        from firebase_admin import credentials, storage
+    except ImportError:
+        logger.warning("firebase_admin not installed")
+        return None, None
+    if firebase_admin._apps:
+        _FIREBASE_INITIALIZED = True
+        try:
+            bucket = storage.bucket()
+            return storage, bucket
+        except Exception as e:
+            logger.warning("Firebase storage bucket unavailable: %s", e)
+            return None, None
+    sa_json = os.getenv("FIREBASE_SERVICE_ACCOUNT_JSON")
+    # Also check HF Spaces secret mount path
+    if not sa_json:
+        secret_path = "/secret/FIREBASE_SERVICE_ACCOUNT_JSON"
+        if Path(secret_path).exists():
+            try:
+                sa_json = Path(secret_path).read_text(encoding="utf-8").strip()
+            except Exception:
+                pass
+    sa_file = os.getenv("FIREBASE_SERVICE_ACCOUNT_FILE")
+    bucket_name = os.getenv("FIREBASE_STORAGE_BUCKET", "mathpulse-ai-2026.firebasestorage.app")
+    try:
+        if sa_json:
+            import json as _json
+            creds = credentials.Certificate(_json.loads(sa_json))
+        elif sa_file and Path(sa_file).exists():
+            creds = credentials.Certificate(sa_file)
+        else:
+            creds = credentials.ApplicationDefault()
+        firebase_admin.initialize_app(creds, {"storageBucket": bucket_name})
+        _FIREBASE_INITIALIZED = True
+        bucket = storage.bucket()
+        return storage, bucket
+    except Exception as e:
+        logger.warning("Firebase init failed: %s", e)
+        return None, None
+def download_pdf_from_storage(storage_path: str, dest_path: Optional[str] = None) -> Optional[bytes]:
+    """Download a PDF from Firebase Storage and return its bytes."""
+    _, bucket = _init_firebase_storage()
+    if bucket is None:
+        logger.warning("Firebase Storage not available, skipping download")
+        return None
+    try:
+        blob = bucket.blob(storage_path)
+        if not blob.exists():
+            logger.warning("Blob does not exist: %s", storage_path)
+            return None
+        bytes_data = blob.download_as_bytes()
+        logger.info("Downloaded %s (%d bytes)", storage_path, len(bytes_data))
+        if dest_path:
+            Path(dest_path).parent.mkdir(parents=True, exist_ok=True)
+            with open(dest_path, "wb") as f:
+                f.write(bytes_data)
+            logger.info("Saved to %s", dest_path)
+        return bytes_data
+    except Exception as e:
+        logger.error("Failed to download %s: %s", storage_path, e)
+        return None
+def list_curriculum_blobs(prefix: str = "curriculum/") -> List[Dict[str, str]]:
+    """List all blobs under a prefix in Firebase Storage."""
+    _, bucket = _init_firebase_storage()
+    if bucket is None:
+        return []
+    blobs = bucket.list_blobs(prefix=prefix)
+    result = []
+    for blob in blobs:
+        if blob.name.endswith(".pdf"):
+            result.append({
+                "name": blob.name,
+                "size": blob.size,
+                "updated": str(blob.updated) if blob.updated else None,
+                "download_url": f"https://storage.googleapis.com/{bucket.name}/{blob.name}",
+            })
+    return result
+# NOTE: Curriculum guide PDFs (shaping papers) are stored in Firebase Storage
+# for system reference but are NOT included in RAG ingestion because they
+# contain only learning objectives and course descriptions — insufficient
+# content for lesson generation (typically <10 chunks each).
+#
+# Only SDO teaching modules (full lesson content with examples and problems)
+# are included in the RAG pipeline.
+PDF_METADATA: Dict[str, dict] = {
+    # General Mathematics Q1 — SDO Navotas teaching module (100 pages, ~117k chars)
+    "curriculum/gen_math_sdo/SDO_Navotas_Gen.Math_SHS_1stSem.FV.pdf": {
+        "subject": "General Mathematics",
+        "subjectId": "gen-math",
+        "type": "sdo_module",
+        "content_domain": "general",
+        "quarter": 1,
+        "storage_path": "curriculum/gen_math_sdo/SDO_Navotas_Gen.Math_SHS_1stSem.FV.pdf",
+    },
+    # General Mathematics Q2 — Interest & Annuities modules (~27-35 pages each)
+    "curriculum/general_math/genmath_q2_mod1_simpleandcompoundinterests_v2.pdf": {
+        "subject": "General Mathematics",
+        "subjectId": "gen-math",
+        "type": "sdo_module",
+        "content_domain": "general",
+        "quarter": 2,
+        "storage_path": "curriculum/general_math/genmath_q2_mod1_simpleandcompoundinterests_v2.pdf",
+    },
+    "curriculum/general_math/genmath_q2_mod2_interestmaturityfutureandpresentvaluesinsimpleandcompoundinterests_v2.pdf": {
+        "subject": "General Mathematics",
+        "subjectId": "gen-math",
+        "type": "sdo_module",
+        "content_domain": "general",
+        "quarter": 2,
+        "storage_path": "curriculum/general_math/genmath_q2_mod2_interestmaturityfutureandpresentvaluesinsimpleandcompoundinterests_v2.pdf",
+    },
+    "curriculum/general_math/genmath_q2_mod3_SolvingProblemsInvolvingSimpleandCompoundInterest_v2.pdf": {
+        "subject": "General Mathematics",
+        "subjectId": "gen-math",
+        "type": "sdo_module",
+        "content_domain": "general",
+        "quarter": 2,
+        "storage_path": "curriculum/general_math/genmath_q2_mod3_SolvingProblemsInvolvingSimpleandCompoundInterest_v2.pdf",
+    },
+    "curriculum/general_math/genmath_q2_mod4_simpleandgeneralannuities_v2.pdf": {
+        "subject": "General Mathematics",
+        "subjectId": "gen-math",
+        "type": "sdo_module",
+        "content_domain": "general",
+        "quarter": 2,
+        "storage_path": "curriculum/general_math/genmath_q2_mod4_simpleandgeneralannuities_v2.pdf",
+    },
+    # Statistics and Probability — Full textbook (331 pages, ~607k chars)
+    "curriculum/stat_prob/Full.pdf": {
+        "subject": "Statistics and Probability",
+        "subjectId": "stats-prob",
+        "type": "sdo_module",
+        "content_domain": "statistics",
+        "quarter": 1,
+        "storage_path": "curriculum/stat_prob/Full.pdf",
+    },
+}

scripts/download_vectorstore_from_firebase.py CHANGED Viewed

@@ -14,7 +14,7 @@ logger = logging.getLogger("mathpulse.download_vectorstore")
 sys.path.insert(0, str(Path(__file__).resolve().parents[2]))
-from backend.rag.firebase_storage_loader import _init_firebase_storage
 REMOTE_PREFIX = "vectorstore/"
 LOCAL_DEST_DIR = Path("/app/datasets/vectorstore")

 sys.path.insert(0, str(Path(__file__).resolve().parents[2]))
+from hf_space_test.rag.firebase_storage_loader import _init_firebase_storage
 REMOTE_PREFIX = "vectorstore/"
 LOCAL_DEST_DIR = Path("/app/datasets/vectorstore")

services/inference_client.py CHANGED Viewed

@@ -100,6 +100,11 @@ class InferenceClient:
         # Featherless AI for Qwen math models (used as fallback when HF router fails)
         self.featherless_api_key = os.getenv("FEATHERLESS_API_KEY", "")
         self.featherless_chat_url = os.getenv("FEATHERLESS_CHAT_URL", "https://api.featherless.ai/openai/v1/chat/completions")
         self.local_space_url = _normalize_local_space_url(
             os.getenv("INFERENCE_LOCAL_SPACE_URL", "http://127.0.0.1:7860")
@@ -591,8 +596,11 @@ class InferenceClient:
         route = self._resolve_route_label(provider, req.task_type)
         if provider == "local_space":
             return self._call_local_space(req, provider=provider, route=route, fallback_depth=fallback_depth)
-        # All models use HF inference router directly (including Qwen/Qwen3-32B)
         return self._call_hf_inference(req, provider=provider, route=route, fallback_depth=fallback_depth)
     def _messages_to_prompt(self, messages: List[Dict[str, str]]) -> str:
@@ -931,6 +939,79 @@ class InferenceClient:
         self._bump_metric("requests_ok", 1)
         return text
     def _call_local_space(self, req: InferenceRequest, *, provider: str, route: str, fallback_depth: int) -> str:
         target_model = req.model or self.default_model
         url = f"{self.local_space_url.rstrip('/')}{self.local_generate_path}"

         # Featherless AI for Qwen math models (used as fallback when HF router fails)
         self.featherless_api_key = os.getenv("FEATHERLESS_API_KEY", "")
         self.featherless_chat_url = os.getenv("FEATHERLESS_CHAT_URL", "https://api.featherless.ai/openai/v1/chat/completions")
+        # DeepSeek API (primary inference provider)
+        self.deepseek_api_key = os.getenv("DEEPSEEK_API_KEY", "")
+        self.deepseek_base_url = os.getenv("DEEPSEEK_BASE_URL", "https://api.deepseek.com").rstrip("/")
+        self.deepseek_chat_url = f"{self.deepseek_base_url}/v1/chat/completions"
         self.local_space_url = _normalize_local_space_url(
             os.getenv("INFERENCE_LOCAL_SPACE_URL", "http://127.0.0.1:7860")
         route = self._resolve_route_label(provider, req.task_type)
         if provider == "local_space":
             return self._call_local_space(req, provider=provider, route=route, fallback_depth=fallback_depth)
+        if provider == "deepseek":
+            return self._call_deepseek(req, provider=provider, route=route, fallback_depth=fallback_depth)
+        # All other providers use HF inference router
         return self._call_hf_inference(req, provider=provider, route=route, fallback_depth=fallback_depth)
     def _messages_to_prompt(self, messages: List[Dict[str, str]]) -> str:
         self._bump_metric("requests_ok", 1)
         return text
+    def _call_deepseek(self, req: InferenceRequest, *, provider: str, route: str, fallback_depth: int) -> str:
+        """Call DeepSeek API (OpenAI-compatible endpoint)."""
+        if not self.deepseek_api_key:
+            raise RuntimeError("DEEPSEEK_API_KEY is not set")
+        target_model = req.model or self.default_model
+        url = self.deepseek_chat_url
+        model_base = target_model.split(":")[0] if ":" in target_model else target_model
+        LOGGER.debug(
+            f"📌 Calling DeepSeek: task={req.task_type} model={model_base} "
+            f"route={route} depth={fallback_depth}"
+        )
+        payload: Dict[str, object] = {
+            "model": target_model,
+            "messages": req.messages,
+            "stream": False,
+            "max_tokens": req.max_new_tokens or self.default_max_new_tokens,
+            "temperature": req.temperature,
+            "top_p": req.top_p,
+        }
+        headers = {
+            "Authorization": f"Bearer {self.deepseek_api_key}",
+            "Content-Type": "application/json",
+            "X-MathPulse-Task": (req.task_type or "default").strip().lower(),
+        }
+        timeout = self._timeout_for(req, provider)
+        resp, latency_ms, retry_attempt = self._post_with_retry(
+            url,
+            headers=headers,
+            payload=payload,
+            timeout=timeout,
+            provider=provider,
+            model=target_model,
+            task_type=req.task_type,
+            request_tag=req.request_tag,
+            fallback_depth=fallback_depth,
+            route=route,
+        )
+        self._bump_bucket("status_code_counts", str(resp.status_code), 1)
+        if resp.status_code != 200:
+            self._bump_metric("requests_error", 1)
+            raise RuntimeError(f"DeepSeek API error {resp.status_code}: {resp.text}")
+        data = resp.json()
+        text = self._extract_text(data)
+        LOGGER.info(
+            f"✅ DeepSeek success: task={req.task_type} model={model_base} "
+            f"latency={latency_ms:.0f}ms tokens_out={len(text.split())}"
+        )
+        log_model_call(
+            LOGGER,
+            provider=provider,
+            model=target_model,
+            endpoint=url,
+            latency_ms=latency_ms,
+            input_tokens=None,
+            output_tokens=None,
+            status="ok",
+            task_type=req.task_type,
+            request_tag=req.request_tag,
+            retry_attempt=retry_attempt,
+            fallback_depth=fallback_depth,
+            route=route,
+        )
+        self._bump_metric("requests_ok", 1)
+        return text
     def _call_local_space(self, req: InferenceRequest, *, provider: str, route: str, fallback_depth: int) -> str:
         target_model = req.model or self.default_model
         url = f"{self.local_space_url.rstrip('/')}{self.local_generate_path}"