github-actions[bot] commited on
Commit
e2968a4
ยท
1 Parent(s): de4b0cd

๐Ÿš€ Auto-deploy backend from GitHub (454ffc8)

Browse files
services/inference_client.py CHANGED
@@ -113,10 +113,10 @@ class InferenceClient:
113
  self.pro_route_header_name = os.getenv("INFERENCE_PRO_ROUTE_HEADER_NAME", "")
114
  self.pro_route_header_value = os.getenv("INFERENCE_PRO_ROUTE_HEADER_VALUE", "true")
115
 
116
- self.enforce_qwen_only = os.getenv("INFERENCE_ENFORCE_QWEN_ONLY", "true").strip().lower() in {"1", "true", "yes", "on"}
117
- self.qwen_lock_model = os.getenv("INFERENCE_QWEN_LOCK_MODEL", "Qwen/Qwen3-32B").strip() or "Qwen/Qwen3-32B"
118
 
119
- default_model_fallback = str(primary.get("id") or "Qwen/Qwen3-32B")
120
  env_model_id = os.getenv("INFERENCE_MODEL_ID", "").strip()
121
  self.default_model = env_model_id or default_model_fallback
122
 
@@ -189,16 +189,16 @@ class InferenceClient:
189
  )
190
 
191
  # Default task-to-model routing.
192
- # Keep all tasks pinned to Qwen3-32B when qwen-only lock is active.
193
  self.task_model_map: Dict[str, str] = {
194
- "chat": "Qwen/Qwen3-32B",
195
- "verify_solution": "Qwen/Qwen3-32B",
196
- "lesson_generation": "Qwen/Qwen3-32B",
197
- "quiz_generation": "Qwen/Qwen3-32B",
198
- "learning_path": "Qwen/Qwen3-32B",
199
- "daily_insight": "Qwen/Qwen3-32B",
200
- "risk_classification": "Qwen/Qwen3-32B",
201
- "risk_narrative": "Qwen/Qwen3-32B",
202
  }
203
  # Fallback chains (only to other HF-supported models, no featherless-ai)
204
  self.task_fallback_model_map: Dict[str, List[str]] = {
 
113
  self.pro_route_header_name = os.getenv("INFERENCE_PRO_ROUTE_HEADER_NAME", "")
114
  self.pro_route_header_value = os.getenv("INFERENCE_PRO_ROUTE_HEADER_VALUE", "true")
115
 
116
+ self.enforce_qwen_only = os.getenv("INFERENCE_ENFORCE_QWEN_ONLY", "false").strip().lower() in {"1", "true", "yes", "on"}
117
+ self.qwen_lock_model = os.getenv("INFERENCE_QWEN_LOCK_MODEL", "deepseek-chat").strip() or "deepseek-chat"
118
 
119
+ default_model_fallback = str(primary.get("id") or "deepseek-chat")
120
  env_model_id = os.getenv("INFERENCE_MODEL_ID", "").strip()
121
  self.default_model = env_model_id or default_model_fallback
122
 
 
189
  )
190
 
191
  # Default task-to-model routing.
192
+ # Keep all tasks pinned to deepseek-chat when qwen-only lock is active.
193
  self.task_model_map: Dict[str, str] = {
194
+ "chat": "deepseek-chat",
195
+ "verify_solution": "deepseek-chat",
196
+ "lesson_generation": "deepseek-chat",
197
+ "quiz_generation": "deepseek-chat",
198
+ "learning_path": "deepseek-chat",
199
+ "daily_insight": "deepseek-chat",
200
+ "risk_classification": "deepseek-chat",
201
+ "risk_narrative": "deepseek-chat",
202
  }
203
  # Fallback chains (only to other HF-supported models, no featherless-ai)
204
  self.task_fallback_model_map: Dict[str, List[str]] = {
startup_validation.py CHANGED
@@ -92,13 +92,13 @@ def validate_environment() -> None:
92
  logger.info(f" โœ“ INFERENCE_PROVIDER: {inference_provider}")
93
 
94
  # Check model IDs
95
- chat_model = os.getenv("INFERENCE_CHAT_MODEL_ID") or os.getenv("INFERENCE_MODEL_ID") or "Qwen/Qwen3-32B"
96
  logger.info(f" โœ“ Chat model configured: {chat_model}")
97
 
98
  chat_strict = os.getenv("INFERENCE_CHAT_STRICT_MODEL_ONLY", "true").strip().lower() in {"1", "true", "yes", "on"}
99
  chat_hard_trigger = os.getenv("INFERENCE_CHAT_HARD_TRIGGER_ENABLED", "false").strip().lower() in {"1", "true", "yes", "on"}
100
- enforce_qwen_only = os.getenv("INFERENCE_ENFORCE_QWEN_ONLY", "true").strip().lower() in {"1", "true", "yes", "on"}
101
- qwen_lock_model = os.getenv("INFERENCE_QWEN_LOCK_MODEL", "Qwen/Qwen3-32B").strip() or "Qwen/Qwen3-32B"
102
  logger.info(f" โœ“ INFERENCE_CHAT_STRICT_MODEL_ONLY: {chat_strict}")
103
  logger.info(f" โœ“ INFERENCE_CHAT_HARD_TRIGGER_ENABLED: {chat_hard_trigger}")
104
  logger.info(f" โœ“ INFERENCE_ENFORCE_QWEN_ONLY: {enforce_qwen_only}")
 
92
  logger.info(f" โœ“ INFERENCE_PROVIDER: {inference_provider}")
93
 
94
  # Check model IDs
95
+ chat_model = os.getenv("INFERENCE_CHAT_MODEL_ID") or os.getenv("INFERENCE_MODEL_ID") or "deepseek-chat"
96
  logger.info(f" โœ“ Chat model configured: {chat_model}")
97
 
98
  chat_strict = os.getenv("INFERENCE_CHAT_STRICT_MODEL_ONLY", "true").strip().lower() in {"1", "true", "yes", "on"}
99
  chat_hard_trigger = os.getenv("INFERENCE_CHAT_HARD_TRIGGER_ENABLED", "false").strip().lower() in {"1", "true", "yes", "on"}
100
+ enforce_qwen_only = os.getenv("INFERENCE_ENFORCE_QWEN_ONLY", "false").strip().lower() in {"1", "true", "yes", "on"}
101
+ qwen_lock_model = os.getenv("INFERENCE_QWEN_LOCK_MODEL", "deepseek-chat").strip() or "deepseek-chat"
102
  logger.info(f" โœ“ INFERENCE_CHAT_STRICT_MODEL_ONLY: {chat_strict}")
103
  logger.info(f" โœ“ INFERENCE_CHAT_HARD_TRIGGER_ENABLED: {chat_hard_trigger}")
104
  logger.info(f" โœ“ INFERENCE_ENFORCE_QWEN_ONLY: {enforce_qwen_only}")