MrA7A3 commited on
Commit
5c46e9d
·
verified ·
1 Parent(s): a87d13e

Enable HF inference fallback when local model is unavailable

Browse files
Files changed (4) hide show
  1. .env +3 -2
  2. bootstrap_space_runtime.py +4 -3
  3. brain_server/api/main.py +18 -0
  4. kapo.env +3 -2
.env CHANGED
@@ -20,9 +20,10 @@ KAPO_LAZY_EMBED_STARTUP=1
20
  KAPO_LAZY_MODEL_STARTUP=1
21
  KAPO_PATCH_BUNDLE_URL=https://drive.google.com/uc?export=download&id=16rIe05GZihhAz7ba8E-WibKaJKbh9eu1
22
  KAPO_PATCH_MANIFEST_URL=https://drive.google.com/uc?export=download&id=1jLuPMCA3hp9qstZZtpBNzTK0XOmLrV8b
23
- KAPO_REMOTE_ENV_PASSWORD_B64=Rzg5TVFpRTBSdmF5YzZIM3R2WHBkTWd1TjhZVHlsd24
24
- KAPO_REMOTE_ENV_URL_B64=aHR0cHM6Ly9kcml2ZS5nb29nbGUuY29tL3VjP2V4cG9ydD1kb3dubG9hZCZpZD0xa1NOMFVkS3hKTXg5RmhTak14cW9hNW1rd0xhNlV0cWU
25
  KAPO_SHARED_STATE_BACKEND=google_drive
26
  MODEL_PROFILE_ID=hf-debugger-qwen25-7b-instruct
27
  MODEL_REPO=Qwen/Qwen2.5-1.5B-Instruct
28
  REMOTE_BRAIN_ONLY=1
 
 
20
  KAPO_LAZY_MODEL_STARTUP=1
21
  KAPO_PATCH_BUNDLE_URL=https://drive.google.com/uc?export=download&id=16rIe05GZihhAz7ba8E-WibKaJKbh9eu1
22
  KAPO_PATCH_MANIFEST_URL=https://drive.google.com/uc?export=download&id=1jLuPMCA3hp9qstZZtpBNzTK0XOmLrV8b
23
+ KAPO_REMOTE_ENV_PASSWORD_B64=M2pPZ3FSYWlIcE80SHhTQmI2TTRyU2tVQzE0bVBYNzE
24
+ KAPO_REMOTE_ENV_URL_B64=aHR0cHM6Ly9kcml2ZS5nb29nbGUuY29tL3VjP2V4cG9ydD1kb3dubG9hZCZpZD0xSjhXYzAwR190UEgwZDNubkN3X19VMkUyRUFPLXY0MGI
25
  KAPO_SHARED_STATE_BACKEND=google_drive
26
  MODEL_PROFILE_ID=hf-debugger-qwen25-7b-instruct
27
  MODEL_REPO=Qwen/Qwen2.5-1.5B-Instruct
28
  REMOTE_BRAIN_ONLY=1
29
+ SPACE_PUBLIC_URL=https://MrA7A3-AiDebuggerClean.hf.space
bootstrap_space_runtime.py CHANGED
@@ -28,12 +28,13 @@ DEFAULT_ENV = {
28
  "KAPO_LAZY_MODEL_STARTUP": "1",
29
  "KAPO_PATCH_BUNDLE_URL": "https://drive.google.com/uc?export=download&id=16rIe05GZihhAz7ba8E-WibKaJKbh9eu1",
30
  "KAPO_PATCH_MANIFEST_URL": "https://drive.google.com/uc?export=download&id=1jLuPMCA3hp9qstZZtpBNzTK0XOmLrV8b",
31
- "KAPO_REMOTE_ENV_PASSWORD_B64": "Rzg5TVFpRTBSdmF5YzZIM3R2WHBkTWd1TjhZVHlsd24",
32
- "KAPO_REMOTE_ENV_URL_B64": "aHR0cHM6Ly9kcml2ZS5nb29nbGUuY29tL3VjP2V4cG9ydD1kb3dubG9hZCZpZD0xa1NOMFVkS3hKTXg5RmhTak14cW9hNW1rd0xhNlV0cWU",
33
  "KAPO_SHARED_STATE_BACKEND": "google_drive",
34
  "MODEL_PROFILE_ID": "hf-debugger-qwen25-7b-instruct",
35
  "MODEL_REPO": "Qwen/Qwen2.5-1.5B-Instruct",
36
- "REMOTE_BRAIN_ONLY": "1"
 
37
  }
38
 
39
 
 
28
  "KAPO_LAZY_MODEL_STARTUP": "1",
29
  "KAPO_PATCH_BUNDLE_URL": "https://drive.google.com/uc?export=download&id=16rIe05GZihhAz7ba8E-WibKaJKbh9eu1",
30
  "KAPO_PATCH_MANIFEST_URL": "https://drive.google.com/uc?export=download&id=1jLuPMCA3hp9qstZZtpBNzTK0XOmLrV8b",
31
+ "KAPO_REMOTE_ENV_PASSWORD_B64": "M2pPZ3FSYWlIcE80SHhTQmI2TTRyU2tVQzE0bVBYNzE",
32
+ "KAPO_REMOTE_ENV_URL_B64": "aHR0cHM6Ly9kcml2ZS5nb29nbGUuY29tL3VjP2V4cG9ydD1kb3dubG9hZCZpZD0xSjhXYzAwR190UEgwZDNubkN3X19VMkUyRUFPLXY0MGI",
33
  "KAPO_SHARED_STATE_BACKEND": "google_drive",
34
  "MODEL_PROFILE_ID": "hf-debugger-qwen25-7b-instruct",
35
  "MODEL_REPO": "Qwen/Qwen2.5-1.5B-Instruct",
36
+ "REMOTE_BRAIN_ONLY": "1",
37
+ "SPACE_PUBLIC_URL": "https://MrA7A3-AiDebuggerClean.hf.space"
38
  }
39
 
40
 
brain_server/api/main.py CHANGED
@@ -1604,6 +1604,24 @@ def _generate_response(user_input: str, history: list[dict[str, str]], context_b
1604
  if fast_reply:
1605
  return fast_reply
1606
  if MODEL is None:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1607
  if language == "ar":
1608
  return "الخدمة تعمل لكن توليد الرد الحر غير متاح الآن لأن النموذج غير محمل."
1609
  return "The Brain is online, but natural chat generation is unavailable because the model is not loaded."
 
1604
  if fast_reply:
1605
  return fast_reply
1606
  if MODEL is None:
1607
+ if _feature_enabled("KAPO_HF_INFERENCE_API", default=False):
1608
+ try:
1609
+ from huggingface_hub import InferenceClient
1610
+ prompt = _build_chat_prompt(user_input, history, context_block)
1611
+ max_tokens = 80 if language == "ar" else 96
1612
+ client = InferenceClient(api_key=(str(os.getenv("HF_TOKEN", "") or "").strip() or None))
1613
+ generated = client.text_generation(
1614
+ prompt,
1615
+ model=str(os.getenv("MODEL_REPO", DEFAULT_MODEL_REPO) or DEFAULT_MODEL_REPO).strip(),
1616
+ max_new_tokens=max_tokens,
1617
+ temperature=0.1,
1618
+ return_full_text=False,
1619
+ )
1620
+ generated_text = str(generated or "").strip()
1621
+ if generated_text:
1622
+ return generated_text
1623
+ except Exception:
1624
+ logger.warning("HF inference fallback failed", exc_info=True)
1625
  if language == "ar":
1626
  return "الخدمة تعمل لكن توليد الرد الحر غير متاح الآن لأن النموذج غير محمل."
1627
  return "The Brain is online, but natural chat generation is unavailable because the model is not loaded."
kapo.env CHANGED
@@ -20,9 +20,10 @@ KAPO_LAZY_EMBED_STARTUP=1
20
  KAPO_LAZY_MODEL_STARTUP=1
21
  KAPO_PATCH_BUNDLE_URL=https://drive.google.com/uc?export=download&id=16rIe05GZihhAz7ba8E-WibKaJKbh9eu1
22
  KAPO_PATCH_MANIFEST_URL=https://drive.google.com/uc?export=download&id=1jLuPMCA3hp9qstZZtpBNzTK0XOmLrV8b
23
- KAPO_REMOTE_ENV_PASSWORD_B64=Rzg5TVFpRTBSdmF5YzZIM3R2WHBkTWd1TjhZVHlsd24
24
- KAPO_REMOTE_ENV_URL_B64=aHR0cHM6Ly9kcml2ZS5nb29nbGUuY29tL3VjP2V4cG9ydD1kb3dubG9hZCZpZD0xa1NOMFVkS3hKTXg5RmhTak14cW9hNW1rd0xhNlV0cWU
25
  KAPO_SHARED_STATE_BACKEND=google_drive
26
  MODEL_PROFILE_ID=hf-debugger-qwen25-7b-instruct
27
  MODEL_REPO=Qwen/Qwen2.5-1.5B-Instruct
28
  REMOTE_BRAIN_ONLY=1
 
 
20
  KAPO_LAZY_MODEL_STARTUP=1
21
  KAPO_PATCH_BUNDLE_URL=https://drive.google.com/uc?export=download&id=16rIe05GZihhAz7ba8E-WibKaJKbh9eu1
22
  KAPO_PATCH_MANIFEST_URL=https://drive.google.com/uc?export=download&id=1jLuPMCA3hp9qstZZtpBNzTK0XOmLrV8b
23
+ KAPO_REMOTE_ENV_PASSWORD_B64=M2pPZ3FSYWlIcE80SHhTQmI2TTRyU2tVQzE0bVBYNzE
24
+ KAPO_REMOTE_ENV_URL_B64=aHR0cHM6Ly9kcml2ZS5nb29nbGUuY29tL3VjP2V4cG9ydD1kb3dubG9hZCZpZD0xSjhXYzAwR190UEgwZDNubkN3X19VMkUyRUFPLXY0MGI
25
  KAPO_SHARED_STATE_BACKEND=google_drive
26
  MODEL_PROFILE_ID=hf-debugger-qwen25-7b-instruct
27
  MODEL_REPO=Qwen/Qwen2.5-1.5B-Instruct
28
  REMOTE_BRAIN_ONLY=1
29
+ SPACE_PUBLIC_URL=https://MrA7A3-AiDebuggerClean.hf.space