MrA7A1 commited on
Commit
5e92c5a
·
verified ·
1 Parent(s): 8ef99ed

KAPO modernization rollout

Browse files
Files changed (4) hide show
  1. .env +2 -2
  2. bootstrap_space_runtime.py +2 -2
  3. brain_server/api/main.py +15 -8
  4. kapo.env +2 -2
.env CHANGED
@@ -22,8 +22,8 @@ KAPO_LAZY_EMBED_STARTUP=1
22
  KAPO_LAZY_MODEL_STARTUP=1
23
  KAPO_PATCH_BUNDLE_URL=https://drive.google.com/uc?export=download&id=16rIe05GZihhAz7ba8E-WibKaJKbh9eu1
24
  KAPO_PATCH_MANIFEST_URL=https://drive.google.com/uc?export=download&id=1jLuPMCA3hp9qstZZtpBNzTK0XOmLrV8b
25
- KAPO_REMOTE_ENV_PASSWORD_B64=MUJYcHlrS05ldHZnU1FRYTAwaVoxUkd2blZOTlhTT2M
26
- KAPO_REMOTE_ENV_URL_B64=aHR0cHM6Ly9kcml2ZS5nb29nbGUuY29tL3VjP2V4cG9ydD1kb3dubG9hZCZpZD0xZExHdHBubmo3d2tmVi1VdGUxV2luZks1eS10Vk1McGI
27
  KAPO_SHARED_STATE_BACKEND=google_drive
28
  MODEL_PROFILE_ID=hf-coder-qwen25-coder-7b-instruct
29
  MODEL_REPO=Qwen/Qwen2.5-Coder-1.5B-Instruct
 
22
  KAPO_LAZY_MODEL_STARTUP=1
23
  KAPO_PATCH_BUNDLE_URL=https://drive.google.com/uc?export=download&id=16rIe05GZihhAz7ba8E-WibKaJKbh9eu1
24
  KAPO_PATCH_MANIFEST_URL=https://drive.google.com/uc?export=download&id=1jLuPMCA3hp9qstZZtpBNzTK0XOmLrV8b
25
+ KAPO_REMOTE_ENV_PASSWORD_B64=ZjIzMm5hamNHYkFXOS1admF6UUJPOFFvaHJuUWwtMjQ
26
+ KAPO_REMOTE_ENV_URL_B64=aHR0cHM6Ly9kcml2ZS5nb29nbGUuY29tL3VjP2V4cG9ydD1kb3dubG9hZCZpZD0xak8zd3FuUVZQVzFwNXB3YVVmMmVqWERLZE1FbkxkYnU
27
  KAPO_SHARED_STATE_BACKEND=google_drive
28
  MODEL_PROFILE_ID=hf-coder-qwen25-coder-7b-instruct
29
  MODEL_REPO=Qwen/Qwen2.5-Coder-1.5B-Instruct
bootstrap_space_runtime.py CHANGED
@@ -30,8 +30,8 @@ DEFAULT_ENV = {
30
  "KAPO_LAZY_MODEL_STARTUP": "1",
31
  "KAPO_PATCH_BUNDLE_URL": "https://drive.google.com/uc?export=download&id=16rIe05GZihhAz7ba8E-WibKaJKbh9eu1",
32
  "KAPO_PATCH_MANIFEST_URL": "https://drive.google.com/uc?export=download&id=1jLuPMCA3hp9qstZZtpBNzTK0XOmLrV8b",
33
- "KAPO_REMOTE_ENV_PASSWORD_B64": "MUJYcHlrS05ldHZnU1FRYTAwaVoxUkd2blZOTlhTT2M",
34
- "KAPO_REMOTE_ENV_URL_B64": "aHR0cHM6Ly9kcml2ZS5nb29nbGUuY29tL3VjP2V4cG9ydD1kb3dubG9hZCZpZD0xZExHdHBubmo3d2tmVi1VdGUxV2luZks1eS10Vk1McGI",
35
  "KAPO_SHARED_STATE_BACKEND": "google_drive",
36
  "MODEL_PROFILE_ID": "hf-coder-qwen25-coder-7b-instruct",
37
  "MODEL_REPO": "Qwen/Qwen2.5-Coder-1.5B-Instruct",
 
30
  "KAPO_LAZY_MODEL_STARTUP": "1",
31
  "KAPO_PATCH_BUNDLE_URL": "https://drive.google.com/uc?export=download&id=16rIe05GZihhAz7ba8E-WibKaJKbh9eu1",
32
  "KAPO_PATCH_MANIFEST_URL": "https://drive.google.com/uc?export=download&id=1jLuPMCA3hp9qstZZtpBNzTK0XOmLrV8b",
33
+ "KAPO_REMOTE_ENV_PASSWORD_B64": "ZjIzMm5hamNHYkFXOS1admF6UUJPOFFvaHJuUWwtMjQ",
34
+ "KAPO_REMOTE_ENV_URL_B64": "aHR0cHM6Ly9kcml2ZS5nb29nbGUuY29tL3VjP2V4cG9ydD1kb3dubG9hZCZpZD0xak8zd3FuUVZQVzFwNXB3YVVmMmVqWERLZE1FbkxkYnU",
35
  "KAPO_SHARED_STATE_BACKEND": "google_drive",
36
  "MODEL_PROFILE_ID": "hf-coder-qwen25-coder-7b-instruct",
37
  "MODEL_REPO": "Qwen/Qwen2.5-Coder-1.5B-Instruct",
brain_server/api/main.py CHANGED
@@ -269,9 +269,17 @@ def _load_embed_model() -> None:
269
 
270
 
271
  def _load_default_model() -> None:
272
- repo_id = os.getenv("MODEL_REPO", DEFAULT_MODEL_REPO)
273
- filename = os.getenv("MODEL_FILE", DEFAULT_MODEL_FILE)
274
- ensure_model_loaded(repo_id, filename, hf_token=os.getenv("HF_TOKEN"))
 
 
 
 
 
 
 
 
275
 
276
 
277
  def _brain_headers() -> dict:
@@ -1609,19 +1617,18 @@ def _generate_response(user_input: str, history: list[dict[str, str]], context_b
1609
  from huggingface_hub import InferenceClient
1610
  prompt = _build_chat_prompt(user_input, history, context_block)
1611
  max_tokens = 80 if language == "ar" else 96
1612
- client = InferenceClient(api_key=(str(os.getenv("HF_TOKEN", "") or "").strip() or None))
 
1613
  generated = client.text_generation(
1614
  prompt,
1615
- model=str(os.getenv("MODEL_REPO", DEFAULT_MODEL_REPO) or DEFAULT_MODEL_REPO).strip(),
1616
  max_new_tokens=max_tokens,
1617
- temperature=0.1,
1618
  return_full_text=False,
1619
  )
1620
  generated_text = str(generated or "").strip()
1621
  if generated_text:
1622
  return generated_text
1623
- except Exception:
1624
- logger.warning("HF inference fallback failed", exc_info=True)
1625
  if language == "ar":
1626
  return "الخدمة تعمل لكن توليد الرد الحر غير متاح الآن لأن النموذج غير محمل."
1627
  return "The Brain is online, but natural chat generation is unavailable because the model is not loaded."
 
269
 
270
 
271
  def _load_default_model() -> None:
272
+ global MODEL, MODEL_ERROR, MODEL_META
273
+ repo_id = str(os.getenv("MODEL_REPO", DEFAULT_MODEL_REPO) or DEFAULT_MODEL_REPO).strip()
274
+ filename = str(os.getenv("MODEL_FILE", "") or "").strip()
275
+ provider = str(os.getenv("BRAIN_PROVIDER", "") or os.getenv("BRAIN_TEMPLATE", "") or "").strip().lower()
276
+ if not filename and (_feature_enabled("KAPO_HF_INFERENCE_API", default=False) or "huggingface" in provider or "hf-space" in provider):
277
+ MODEL = None
278
+ MODEL_ERROR = None
279
+ MODEL_META = {"repo_id": repo_id, "filename": "", "path": None}
280
+ logger.info("Skipping local model load; using Hugging Face inference API for %s", repo_id)
281
+ return
282
+ ensure_model_loaded(repo_id, filename or DEFAULT_MODEL_FILE, hf_token=os.getenv("HF_TOKEN"))
283
 
284
 
285
  def _brain_headers() -> dict:
 
1617
  from huggingface_hub import InferenceClient
1618
  prompt = _build_chat_prompt(user_input, history, context_block)
1619
  max_tokens = 80 if language == "ar" else 96
1620
+ model_repo = str(os.getenv("MODEL_REPO", DEFAULT_MODEL_REPO) or DEFAULT_MODEL_REPO).strip()
1621
+ client = InferenceClient(model=model_repo, api_key=(str(os.getenv("HF_TOKEN", "") or "").strip() or None))
1622
  generated = client.text_generation(
1623
  prompt,
 
1624
  max_new_tokens=max_tokens,
 
1625
  return_full_text=False,
1626
  )
1627
  generated_text = str(generated or "").strip()
1628
  if generated_text:
1629
  return generated_text
1630
+ except Exception as exc:
1631
+ logger.warning("HF inference fallback failed: %s", exc, exc_info=True)
1632
  if language == "ar":
1633
  return "الخدمة تعمل لكن توليد الرد الحر غير متاح الآن لأن النموذج غير محمل."
1634
  return "The Brain is online, but natural chat generation is unavailable because the model is not loaded."
kapo.env CHANGED
@@ -22,8 +22,8 @@ KAPO_LAZY_EMBED_STARTUP=1
22
  KAPO_LAZY_MODEL_STARTUP=1
23
  KAPO_PATCH_BUNDLE_URL=https://drive.google.com/uc?export=download&id=16rIe05GZihhAz7ba8E-WibKaJKbh9eu1
24
  KAPO_PATCH_MANIFEST_URL=https://drive.google.com/uc?export=download&id=1jLuPMCA3hp9qstZZtpBNzTK0XOmLrV8b
25
- KAPO_REMOTE_ENV_PASSWORD_B64=MUJYcHlrS05ldHZnU1FRYTAwaVoxUkd2blZOTlhTT2M
26
- KAPO_REMOTE_ENV_URL_B64=aHR0cHM6Ly9kcml2ZS5nb29nbGUuY29tL3VjP2V4cG9ydD1kb3dubG9hZCZpZD0xZExHdHBubmo3d2tmVi1VdGUxV2luZks1eS10Vk1McGI
27
  KAPO_SHARED_STATE_BACKEND=google_drive
28
  MODEL_PROFILE_ID=hf-coder-qwen25-coder-7b-instruct
29
  MODEL_REPO=Qwen/Qwen2.5-Coder-1.5B-Instruct
 
22
  KAPO_LAZY_MODEL_STARTUP=1
23
  KAPO_PATCH_BUNDLE_URL=https://drive.google.com/uc?export=download&id=16rIe05GZihhAz7ba8E-WibKaJKbh9eu1
24
  KAPO_PATCH_MANIFEST_URL=https://drive.google.com/uc?export=download&id=1jLuPMCA3hp9qstZZtpBNzTK0XOmLrV8b
25
+ KAPO_REMOTE_ENV_PASSWORD_B64=ZjIzMm5hamNHYkFXOS1admF6UUJPOFFvaHJuUWwtMjQ
26
+ KAPO_REMOTE_ENV_URL_B64=aHR0cHM6Ly9kcml2ZS5nb29nbGUuY29tL3VjP2V4cG9ydD1kb3dubG9hZCZpZD0xak8zd3FuUVZQVzFwNXB3YVVmMmVqWERLZE1FbkxkYnU
27
  KAPO_SHARED_STATE_BACKEND=google_drive
28
  MODEL_PROFILE_ID=hf-coder-qwen25-coder-7b-instruct
29
  MODEL_REPO=Qwen/Qwen2.5-Coder-1.5B-Instruct