MrA7A3 commited on
Commit
cf81ae1
·
verified ·
1 Parent(s): 5c46e9d

KAPO modernization rollout

Browse files
Files changed (4) hide show
  1. .env +2 -2
  2. bootstrap_space_runtime.py +2 -2
  3. brain_server/api/main.py +15 -8
  4. kapo.env +2 -2
.env CHANGED
@@ -20,8 +20,8 @@ KAPO_LAZY_EMBED_STARTUP=1
20
  KAPO_LAZY_MODEL_STARTUP=1
21
  KAPO_PATCH_BUNDLE_URL=https://drive.google.com/uc?export=download&id=16rIe05GZihhAz7ba8E-WibKaJKbh9eu1
22
  KAPO_PATCH_MANIFEST_URL=https://drive.google.com/uc?export=download&id=1jLuPMCA3hp9qstZZtpBNzTK0XOmLrV8b
23
- KAPO_REMOTE_ENV_PASSWORD_B64=M2pPZ3FSYWlIcE80SHhTQmI2TTRyU2tVQzE0bVBYNzE
24
- KAPO_REMOTE_ENV_URL_B64=aHR0cHM6Ly9kcml2ZS5nb29nbGUuY29tL3VjP2V4cG9ydD1kb3dubG9hZCZpZD0xSjhXYzAwR190UEgwZDNubkN3X19VMkUyRUFPLXY0MGI
25
  KAPO_SHARED_STATE_BACKEND=google_drive
26
  MODEL_PROFILE_ID=hf-debugger-qwen25-7b-instruct
27
  MODEL_REPO=Qwen/Qwen2.5-1.5B-Instruct
 
20
  KAPO_LAZY_MODEL_STARTUP=1
21
  KAPO_PATCH_BUNDLE_URL=https://drive.google.com/uc?export=download&id=16rIe05GZihhAz7ba8E-WibKaJKbh9eu1
22
  KAPO_PATCH_MANIFEST_URL=https://drive.google.com/uc?export=download&id=1jLuPMCA3hp9qstZZtpBNzTK0XOmLrV8b
23
+ KAPO_REMOTE_ENV_PASSWORD_B64=Wml4VURXdlRyU2FXQ011czNxODF5dEdVNmZ1Ujh3VmY
24
+ KAPO_REMOTE_ENV_URL_B64=aHR0cHM6Ly9kcml2ZS5nb29nbGUuY29tL3VjP2V4cG9ydD1kb3dubG9hZCZpZD0xYlhYeWJaQUpqb3lQeGIxNUJDUXhRYmpJb3VQRHh4TU4
25
  KAPO_SHARED_STATE_BACKEND=google_drive
26
  MODEL_PROFILE_ID=hf-debugger-qwen25-7b-instruct
27
  MODEL_REPO=Qwen/Qwen2.5-1.5B-Instruct
bootstrap_space_runtime.py CHANGED
@@ -28,8 +28,8 @@ DEFAULT_ENV = {
28
  "KAPO_LAZY_MODEL_STARTUP": "1",
29
  "KAPO_PATCH_BUNDLE_URL": "https://drive.google.com/uc?export=download&id=16rIe05GZihhAz7ba8E-WibKaJKbh9eu1",
30
  "KAPO_PATCH_MANIFEST_URL": "https://drive.google.com/uc?export=download&id=1jLuPMCA3hp9qstZZtpBNzTK0XOmLrV8b",
31
- "KAPO_REMOTE_ENV_PASSWORD_B64": "M2pPZ3FSYWlIcE80SHhTQmI2TTRyU2tVQzE0bVBYNzE",
32
- "KAPO_REMOTE_ENV_URL_B64": "aHR0cHM6Ly9kcml2ZS5nb29nbGUuY29tL3VjP2V4cG9ydD1kb3dubG9hZCZpZD0xSjhXYzAwR190UEgwZDNubkN3X19VMkUyRUFPLXY0MGI",
33
  "KAPO_SHARED_STATE_BACKEND": "google_drive",
34
  "MODEL_PROFILE_ID": "hf-debugger-qwen25-7b-instruct",
35
  "MODEL_REPO": "Qwen/Qwen2.5-1.5B-Instruct",
 
28
  "KAPO_LAZY_MODEL_STARTUP": "1",
29
  "KAPO_PATCH_BUNDLE_URL": "https://drive.google.com/uc?export=download&id=16rIe05GZihhAz7ba8E-WibKaJKbh9eu1",
30
  "KAPO_PATCH_MANIFEST_URL": "https://drive.google.com/uc?export=download&id=1jLuPMCA3hp9qstZZtpBNzTK0XOmLrV8b",
31
+ "KAPO_REMOTE_ENV_PASSWORD_B64": "Wml4VURXdlRyU2FXQ011czNxODF5dEdVNmZ1Ujh3VmY",
32
+ "KAPO_REMOTE_ENV_URL_B64": "aHR0cHM6Ly9kcml2ZS5nb29nbGUuY29tL3VjP2V4cG9ydD1kb3dubG9hZCZpZD0xYlhYeWJaQUpqb3lQeGIxNUJDUXhRYmpJb3VQRHh4TU4",
33
  "KAPO_SHARED_STATE_BACKEND": "google_drive",
34
  "MODEL_PROFILE_ID": "hf-debugger-qwen25-7b-instruct",
35
  "MODEL_REPO": "Qwen/Qwen2.5-1.5B-Instruct",
brain_server/api/main.py CHANGED
@@ -269,9 +269,17 @@ def _load_embed_model() -> None:
269
 
270
 
271
  def _load_default_model() -> None:
272
- repo_id = os.getenv("MODEL_REPO", DEFAULT_MODEL_REPO)
273
- filename = os.getenv("MODEL_FILE", DEFAULT_MODEL_FILE)
274
- ensure_model_loaded(repo_id, filename, hf_token=os.getenv("HF_TOKEN"))
 
 
 
 
 
 
 
 
275
 
276
 
277
  def _brain_headers() -> dict:
@@ -1609,19 +1617,18 @@ def _generate_response(user_input: str, history: list[dict[str, str]], context_b
1609
  from huggingface_hub import InferenceClient
1610
  prompt = _build_chat_prompt(user_input, history, context_block)
1611
  max_tokens = 80 if language == "ar" else 96
1612
- client = InferenceClient(api_key=(str(os.getenv("HF_TOKEN", "") or "").strip() or None))
 
1613
  generated = client.text_generation(
1614
  prompt,
1615
- model=str(os.getenv("MODEL_REPO", DEFAULT_MODEL_REPO) or DEFAULT_MODEL_REPO).strip(),
1616
  max_new_tokens=max_tokens,
1617
- temperature=0.1,
1618
  return_full_text=False,
1619
  )
1620
  generated_text = str(generated or "").strip()
1621
  if generated_text:
1622
  return generated_text
1623
- except Exception:
1624
- logger.warning("HF inference fallback failed", exc_info=True)
1625
  if language == "ar":
1626
  return "الخدمة تعمل لكن توليد الرد الحر غير متاح الآن لأن النموذج غير محمل."
1627
  return "The Brain is online, but natural chat generation is unavailable because the model is not loaded."
 
269
 
270
 
271
  def _load_default_model() -> None:
272
+ global MODEL, MODEL_ERROR, MODEL_META
273
+ repo_id = str(os.getenv("MODEL_REPO", DEFAULT_MODEL_REPO) or DEFAULT_MODEL_REPO).strip()
274
+ filename = str(os.getenv("MODEL_FILE", "") or "").strip()
275
+ provider = str(os.getenv("BRAIN_PROVIDER", "") or os.getenv("BRAIN_TEMPLATE", "") or "").strip().lower()
276
+ if not filename and (_feature_enabled("KAPO_HF_INFERENCE_API", default=False) or "huggingface" in provider or "hf-space" in provider):
277
+ MODEL = None
278
+ MODEL_ERROR = None
279
+ MODEL_META = {"repo_id": repo_id, "filename": "", "path": None}
280
+ logger.info("Skipping local model load; using Hugging Face inference API for %s", repo_id)
281
+ return
282
+ ensure_model_loaded(repo_id, filename or DEFAULT_MODEL_FILE, hf_token=os.getenv("HF_TOKEN"))
283
 
284
 
285
  def _brain_headers() -> dict:
 
1617
  from huggingface_hub import InferenceClient
1618
  prompt = _build_chat_prompt(user_input, history, context_block)
1619
  max_tokens = 80 if language == "ar" else 96
1620
+ model_repo = str(os.getenv("MODEL_REPO", DEFAULT_MODEL_REPO) or DEFAULT_MODEL_REPO).strip()
1621
+ client = InferenceClient(model=model_repo, api_key=(str(os.getenv("HF_TOKEN", "") or "").strip() or None))
1622
  generated = client.text_generation(
1623
  prompt,
 
1624
  max_new_tokens=max_tokens,
 
1625
  return_full_text=False,
1626
  )
1627
  generated_text = str(generated or "").strip()
1628
  if generated_text:
1629
  return generated_text
1630
+ except Exception as exc:
1631
+ logger.warning("HF inference fallback failed: %s", exc, exc_info=True)
1632
  if language == "ar":
1633
  return "الخدمة تعمل لكن توليد الرد الحر غير متاح الآن لأن النموذج غير محمل."
1634
  return "The Brain is online, but natural chat generation is unavailable because the model is not loaded."
kapo.env CHANGED
@@ -20,8 +20,8 @@ KAPO_LAZY_EMBED_STARTUP=1
20
  KAPO_LAZY_MODEL_STARTUP=1
21
  KAPO_PATCH_BUNDLE_URL=https://drive.google.com/uc?export=download&id=16rIe05GZihhAz7ba8E-WibKaJKbh9eu1
22
  KAPO_PATCH_MANIFEST_URL=https://drive.google.com/uc?export=download&id=1jLuPMCA3hp9qstZZtpBNzTK0XOmLrV8b
23
- KAPO_REMOTE_ENV_PASSWORD_B64=M2pPZ3FSYWlIcE80SHhTQmI2TTRyU2tVQzE0bVBYNzE
24
- KAPO_REMOTE_ENV_URL_B64=aHR0cHM6Ly9kcml2ZS5nb29nbGUuY29tL3VjP2V4cG9ydD1kb3dubG9hZCZpZD0xSjhXYzAwR190UEgwZDNubkN3X19VMkUyRUFPLXY0MGI
25
  KAPO_SHARED_STATE_BACKEND=google_drive
26
  MODEL_PROFILE_ID=hf-debugger-qwen25-7b-instruct
27
  MODEL_REPO=Qwen/Qwen2.5-1.5B-Instruct
 
20
  KAPO_LAZY_MODEL_STARTUP=1
21
  KAPO_PATCH_BUNDLE_URL=https://drive.google.com/uc?export=download&id=16rIe05GZihhAz7ba8E-WibKaJKbh9eu1
22
  KAPO_PATCH_MANIFEST_URL=https://drive.google.com/uc?export=download&id=1jLuPMCA3hp9qstZZtpBNzTK0XOmLrV8b
23
+ KAPO_REMOTE_ENV_PASSWORD_B64=Wml4VURXdlRyU2FXQ011czNxODF5dEdVNmZ1Ujh3VmY
24
+ KAPO_REMOTE_ENV_URL_B64=aHR0cHM6Ly9kcml2ZS5nb29nbGUuY29tL3VjP2V4cG9ydD1kb3dubG9hZCZpZD0xYlhYeWJaQUpqb3lQeGIxNUJDUXhRYmpJb3VQRHh4TU4
25
  KAPO_SHARED_STATE_BACKEND=google_drive
26
  MODEL_PROFILE_ID=hf-debugger-qwen25-7b-instruct
27
  MODEL_REPO=Qwen/Qwen2.5-1.5B-Instruct