Spaces:
Sleeping
Sleeping
KAPO rollout fix: HF generation fallback
Browse files- brain_server/api/main.py +7 -6
brain_server/api/main.py
CHANGED
|
@@ -296,16 +296,16 @@ def _load_default_model() -> None:
|
|
| 296 |
filename = str(os.getenv("MODEL_FILE", "") or "").strip()
|
| 297 |
provider = str(os.getenv("BRAIN_PROVIDER", "") or os.getenv("BRAIN_TEMPLATE", "") or "").strip().lower()
|
| 298 |
if _hf_transformers_runtime_enabled():
|
| 299 |
-
|
|
|
|
|
|
|
|
|
|
| 300 |
return
|
| 301 |
if not filename and ("huggingface" in provider or "hf-space" in provider):
|
| 302 |
-
ensure_model_loaded(repo_id, filename, hf_token=os.getenv("HF_TOKEN"))
|
| 303 |
-
return
|
| 304 |
-
if not filename and (_feature_enabled("KAPO_HF_INFERENCE_API", default=False) or "huggingface" in provider or "hf-space" in provider):
|
| 305 |
MODEL = None
|
| 306 |
MODEL_ERROR = None
|
| 307 |
MODEL_META = {"repo_id": repo_id, "filename": "", "path": None}
|
| 308 |
-
logger.info("Skipping local model load; using Hugging Face
|
| 309 |
return
|
| 310 |
ensure_model_loaded(repo_id, filename or DEFAULT_MODEL_FILE, hf_token=os.getenv("HF_TOKEN"))
|
| 311 |
|
|
@@ -1669,7 +1669,8 @@ def _generate_response(user_input: str, history: list[dict[str, str]], context_b
|
|
| 1669 |
if fast_reply:
|
| 1670 |
return fast_reply
|
| 1671 |
if MODEL is None:
|
| 1672 |
-
|
|
|
|
| 1673 |
try:
|
| 1674 |
from huggingface_hub import InferenceClient
|
| 1675 |
prompt = _build_chat_prompt(user_input, history, context_block)
|
|
|
|
| 296 |
filename = str(os.getenv("MODEL_FILE", "") or "").strip()
|
| 297 |
provider = str(os.getenv("BRAIN_PROVIDER", "") or os.getenv("BRAIN_TEMPLATE", "") or "").strip().lower()
|
| 298 |
if _hf_transformers_runtime_enabled():
|
| 299 |
+
MODEL = None
|
| 300 |
+
MODEL_ERROR = None
|
| 301 |
+
MODEL_META = {"repo_id": repo_id, "filename": "", "path": None}
|
| 302 |
+
logger.info("Skipping local model load; HF transformers runtime is enabled for %s", repo_id)
|
| 303 |
return
|
| 304 |
if not filename and ("huggingface" in provider or "hf-space" in provider):
|
|
|
|
|
|
|
|
|
|
| 305 |
MODEL = None
|
| 306 |
MODEL_ERROR = None
|
| 307 |
MODEL_META = {"repo_id": repo_id, "filename": "", "path": None}
|
| 308 |
+
logger.info("Skipping local model load; using Hugging Face remote generation fallback for %s", repo_id)
|
| 309 |
return
|
| 310 |
ensure_model_loaded(repo_id, filename or DEFAULT_MODEL_FILE, hf_token=os.getenv("HF_TOKEN"))
|
| 311 |
|
|
|
|
| 1669 |
if fast_reply:
|
| 1670 |
return fast_reply
|
| 1671 |
if MODEL is None:
|
| 1672 |
+
provider = str(os.getenv("BRAIN_PROVIDER", "") or os.getenv("BRAIN_TEMPLATE", "") or "").strip().lower()
|
| 1673 |
+
if _feature_enabled("KAPO_HF_INFERENCE_API", default=False) or "huggingface" in provider or "hf-space" in provider or _hf_transformers_runtime_enabled():
|
| 1674 |
try:
|
| 1675 |
from huggingface_hub import InferenceClient
|
| 1676 |
prompt = _build_chat_prompt(user_input, history, context_block)
|