MrA7A1 commited on
Commit
9a8a7a4
·
verified ·
1 Parent(s): 3b68212

KAPO rollout fix: HF generation fallback

Browse files
Files changed (1) hide show
  1. brain_server/api/main.py +7 -6
brain_server/api/main.py CHANGED
@@ -296,16 +296,16 @@ def _load_default_model() -> None:
296
  filename = str(os.getenv("MODEL_FILE", "") or "").strip()
297
  provider = str(os.getenv("BRAIN_PROVIDER", "") or os.getenv("BRAIN_TEMPLATE", "") or "").strip().lower()
298
  if _hf_transformers_runtime_enabled():
299
- ensure_model_loaded(repo_id, filename, hf_token=os.getenv("HF_TOKEN"))
 
 
 
300
  return
301
  if not filename and ("huggingface" in provider or "hf-space" in provider):
302
- ensure_model_loaded(repo_id, filename, hf_token=os.getenv("HF_TOKEN"))
303
- return
304
- if not filename and (_feature_enabled("KAPO_HF_INFERENCE_API", default=False) or "huggingface" in provider or "hf-space" in provider):
305
  MODEL = None
306
  MODEL_ERROR = None
307
  MODEL_META = {"repo_id": repo_id, "filename": "", "path": None}
308
- logger.info("Skipping local model load; using Hugging Face inference API for %s", repo_id)
309
  return
310
  ensure_model_loaded(repo_id, filename or DEFAULT_MODEL_FILE, hf_token=os.getenv("HF_TOKEN"))
311
 
@@ -1669,7 +1669,8 @@ def _generate_response(user_input: str, history: list[dict[str, str]], context_b
1669
  if fast_reply:
1670
  return fast_reply
1671
  if MODEL is None:
1672
- if _feature_enabled("KAPO_HF_INFERENCE_API", default=False):
 
1673
  try:
1674
  from huggingface_hub import InferenceClient
1675
  prompt = _build_chat_prompt(user_input, history, context_block)
 
296
  filename = str(os.getenv("MODEL_FILE", "") or "").strip()
297
  provider = str(os.getenv("BRAIN_PROVIDER", "") or os.getenv("BRAIN_TEMPLATE", "") or "").strip().lower()
298
  if _hf_transformers_runtime_enabled():
299
+ MODEL = None
300
+ MODEL_ERROR = None
301
+ MODEL_META = {"repo_id": repo_id, "filename": "", "path": None}
302
+ logger.info("Skipping local model load; HF transformers runtime is enabled for %s", repo_id)
303
  return
304
  if not filename and ("huggingface" in provider or "hf-space" in provider):
 
 
 
305
  MODEL = None
306
  MODEL_ERROR = None
307
  MODEL_META = {"repo_id": repo_id, "filename": "", "path": None}
308
+ logger.info("Skipping local model load; using Hugging Face remote generation fallback for %s", repo_id)
309
  return
310
  ensure_model_loaded(repo_id, filename or DEFAULT_MODEL_FILE, hf_token=os.getenv("HF_TOKEN"))
311
 
 
1669
  if fast_reply:
1670
  return fast_reply
1671
  if MODEL is None:
1672
+ provider = str(os.getenv("BRAIN_PROVIDER", "") or os.getenv("BRAIN_TEMPLATE", "") or "").strip().lower()
1673
+ if _feature_enabled("KAPO_HF_INFERENCE_API", default=False) or "huggingface" in provider or "hf-space" in provider or _hf_transformers_runtime_enabled():
1674
  try:
1675
  from huggingface_hub import InferenceClient
1676
  prompt = _build_chat_prompt(user_input, history, context_block)