Spaces:
Running
Running
github-actions[bot] commited on
Commit ยท
e2968a4
1
Parent(s): de4b0cd
๐ Auto-deploy backend from GitHub (454ffc8)
Browse files- services/inference_client.py +12 -12
- startup_validation.py +3 -3
services/inference_client.py
CHANGED
|
@@ -113,10 +113,10 @@ class InferenceClient:
|
|
| 113 |
self.pro_route_header_name = os.getenv("INFERENCE_PRO_ROUTE_HEADER_NAME", "")
|
| 114 |
self.pro_route_header_value = os.getenv("INFERENCE_PRO_ROUTE_HEADER_VALUE", "true")
|
| 115 |
|
| 116 |
-
self.enforce_qwen_only = os.getenv("INFERENCE_ENFORCE_QWEN_ONLY", "
|
| 117 |
-
self.qwen_lock_model = os.getenv("INFERENCE_QWEN_LOCK_MODEL", "
|
| 118 |
|
| 119 |
-
default_model_fallback = str(primary.get("id") or "
|
| 120 |
env_model_id = os.getenv("INFERENCE_MODEL_ID", "").strip()
|
| 121 |
self.default_model = env_model_id or default_model_fallback
|
| 122 |
|
|
@@ -189,16 +189,16 @@ class InferenceClient:
|
|
| 189 |
)
|
| 190 |
|
| 191 |
# Default task-to-model routing.
|
| 192 |
-
# Keep all tasks pinned to
|
| 193 |
self.task_model_map: Dict[str, str] = {
|
| 194 |
-
"chat": "
|
| 195 |
-
"verify_solution": "
|
| 196 |
-
"lesson_generation": "
|
| 197 |
-
"quiz_generation": "
|
| 198 |
-
"learning_path": "
|
| 199 |
-
"daily_insight": "
|
| 200 |
-
"risk_classification": "
|
| 201 |
-
"risk_narrative": "
|
| 202 |
}
|
| 203 |
# Fallback chains (only to other HF-supported models, no featherless-ai)
|
| 204 |
self.task_fallback_model_map: Dict[str, List[str]] = {
|
|
|
|
| 113 |
self.pro_route_header_name = os.getenv("INFERENCE_PRO_ROUTE_HEADER_NAME", "")
|
| 114 |
self.pro_route_header_value = os.getenv("INFERENCE_PRO_ROUTE_HEADER_VALUE", "true")
|
| 115 |
|
| 116 |
+
self.enforce_qwen_only = os.getenv("INFERENCE_ENFORCE_QWEN_ONLY", "false").strip().lower() in {"1", "true", "yes", "on"}
|
| 117 |
+
self.qwen_lock_model = os.getenv("INFERENCE_QWEN_LOCK_MODEL", "deepseek-chat").strip() or "deepseek-chat"
|
| 118 |
|
| 119 |
+
default_model_fallback = str(primary.get("id") or "deepseek-chat")
|
| 120 |
env_model_id = os.getenv("INFERENCE_MODEL_ID", "").strip()
|
| 121 |
self.default_model = env_model_id or default_model_fallback
|
| 122 |
|
|
|
|
| 189 |
)
|
| 190 |
|
| 191 |
# Default task-to-model routing.
|
| 192 |
+
# Keep all tasks pinned to deepseek-chat when qwen-only lock is active.
|
| 193 |
self.task_model_map: Dict[str, str] = {
|
| 194 |
+
"chat": "deepseek-chat",
|
| 195 |
+
"verify_solution": "deepseek-chat",
|
| 196 |
+
"lesson_generation": "deepseek-chat",
|
| 197 |
+
"quiz_generation": "deepseek-chat",
|
| 198 |
+
"learning_path": "deepseek-chat",
|
| 199 |
+
"daily_insight": "deepseek-chat",
|
| 200 |
+
"risk_classification": "deepseek-chat",
|
| 201 |
+
"risk_narrative": "deepseek-chat",
|
| 202 |
}
|
| 203 |
# Fallback chains (only to other HF-supported models, no featherless-ai)
|
| 204 |
self.task_fallback_model_map: Dict[str, List[str]] = {
|
startup_validation.py
CHANGED
|
@@ -92,13 +92,13 @@ def validate_environment() -> None:
|
|
| 92 |
logger.info(f" โ INFERENCE_PROVIDER: {inference_provider}")
|
| 93 |
|
| 94 |
# Check model IDs
|
| 95 |
-
chat_model = os.getenv("INFERENCE_CHAT_MODEL_ID") or os.getenv("INFERENCE_MODEL_ID") or "
|
| 96 |
logger.info(f" โ Chat model configured: {chat_model}")
|
| 97 |
|
| 98 |
chat_strict = os.getenv("INFERENCE_CHAT_STRICT_MODEL_ONLY", "true").strip().lower() in {"1", "true", "yes", "on"}
|
| 99 |
chat_hard_trigger = os.getenv("INFERENCE_CHAT_HARD_TRIGGER_ENABLED", "false").strip().lower() in {"1", "true", "yes", "on"}
|
| 100 |
-
enforce_qwen_only = os.getenv("INFERENCE_ENFORCE_QWEN_ONLY", "
|
| 101 |
-
qwen_lock_model = os.getenv("INFERENCE_QWEN_LOCK_MODEL", "
|
| 102 |
logger.info(f" โ INFERENCE_CHAT_STRICT_MODEL_ONLY: {chat_strict}")
|
| 103 |
logger.info(f" โ INFERENCE_CHAT_HARD_TRIGGER_ENABLED: {chat_hard_trigger}")
|
| 104 |
logger.info(f" โ INFERENCE_ENFORCE_QWEN_ONLY: {enforce_qwen_only}")
|
|
|
|
| 92 |
logger.info(f" โ INFERENCE_PROVIDER: {inference_provider}")
|
| 93 |
|
| 94 |
# Check model IDs
|
| 95 |
+
chat_model = os.getenv("INFERENCE_CHAT_MODEL_ID") or os.getenv("INFERENCE_MODEL_ID") or "deepseek-chat"
|
| 96 |
logger.info(f" โ Chat model configured: {chat_model}")
|
| 97 |
|
| 98 |
chat_strict = os.getenv("INFERENCE_CHAT_STRICT_MODEL_ONLY", "true").strip().lower() in {"1", "true", "yes", "on"}
|
| 99 |
chat_hard_trigger = os.getenv("INFERENCE_CHAT_HARD_TRIGGER_ENABLED", "false").strip().lower() in {"1", "true", "yes", "on"}
|
| 100 |
+
enforce_qwen_only = os.getenv("INFERENCE_ENFORCE_QWEN_ONLY", "false").strip().lower() in {"1", "true", "yes", "on"}
|
| 101 |
+
qwen_lock_model = os.getenv("INFERENCE_QWEN_LOCK_MODEL", "deepseek-chat").strip() or "deepseek-chat"
|
| 102 |
logger.info(f" โ INFERENCE_CHAT_STRICT_MODEL_ONLY: {chat_strict}")
|
| 103 |
logger.info(f" โ INFERENCE_CHAT_HARD_TRIGGER_ENABLED: {chat_hard_trigger}")
|
| 104 |
logger.info(f" โ INFERENCE_ENFORCE_QWEN_ONLY: {enforce_qwen_only}")
|