MrA7A1 commited on
Commit
8ef99ed
·
verified ·
1 Parent(s): 69e29da

Enable HF inference fallback when local model is unavailable

Browse files
Files changed (4) hide show
  1. .env +3 -3
  2. bootstrap_space_runtime.py +3 -3
  3. brain_server/api/main.py +18 -0
  4. kapo.env +3 -3
.env CHANGED
@@ -22,10 +22,10 @@ KAPO_LAZY_EMBED_STARTUP=1
22
  KAPO_LAZY_MODEL_STARTUP=1
23
  KAPO_PATCH_BUNDLE_URL=https://drive.google.com/uc?export=download&id=16rIe05GZihhAz7ba8E-WibKaJKbh9eu1
24
  KAPO_PATCH_MANIFEST_URL=https://drive.google.com/uc?export=download&id=1jLuPMCA3hp9qstZZtpBNzTK0XOmLrV8b
25
- KAPO_REMOTE_ENV_PASSWORD_B64=a053SmZIcXkwMm1VQ0F6cUFIWWJ6SXBmRmtRN2pXdG0
26
- KAPO_REMOTE_ENV_URL_B64=aHR0cHM6Ly9kcml2ZS5nb29nbGUuY29tL3VjP2V4cG9ydD1kb3dubG9hZCZpZD0xeGQ0b1BqY09sT2pmYXNmMHlPTGg2WjdFSXFxbF96WVo
27
  KAPO_SHARED_STATE_BACKEND=google_drive
28
  MODEL_PROFILE_ID=hf-coder-qwen25-coder-7b-instruct
29
  MODEL_REPO=Qwen/Qwen2.5-Coder-1.5B-Instruct
30
  REMOTE_BRAIN_ONLY=1
31
- SPACE_PUBLIC_URL=https://MrA7A1-AiCoder.hf.space
 
22
  KAPO_LAZY_MODEL_STARTUP=1
23
  KAPO_PATCH_BUNDLE_URL=https://drive.google.com/uc?export=download&id=16rIe05GZihhAz7ba8E-WibKaJKbh9eu1
24
  KAPO_PATCH_MANIFEST_URL=https://drive.google.com/uc?export=download&id=1jLuPMCA3hp9qstZZtpBNzTK0XOmLrV8b
25
+ KAPO_REMOTE_ENV_PASSWORD_B64=MUJYcHlrS05ldHZnU1FRYTAwaVoxUkd2blZOTlhTT2M
26
+ KAPO_REMOTE_ENV_URL_B64=aHR0cHM6Ly9kcml2ZS5nb29nbGUuY29tL3VjP2V4cG9ydD1kb3dubG9hZCZpZD0xZExHdHBubmo3d2tmVi1VdGUxV2luZks1eS10Vk1McGI
27
  KAPO_SHARED_STATE_BACKEND=google_drive
28
  MODEL_PROFILE_ID=hf-coder-qwen25-coder-7b-instruct
29
  MODEL_REPO=Qwen/Qwen2.5-Coder-1.5B-Instruct
30
  REMOTE_BRAIN_ONLY=1
31
+ SPACE_PUBLIC_URL=https://MrA7A1-AiCoderClean.hf.space
bootstrap_space_runtime.py CHANGED
@@ -30,13 +30,13 @@ DEFAULT_ENV = {
30
  "KAPO_LAZY_MODEL_STARTUP": "1",
31
  "KAPO_PATCH_BUNDLE_URL": "https://drive.google.com/uc?export=download&id=16rIe05GZihhAz7ba8E-WibKaJKbh9eu1",
32
  "KAPO_PATCH_MANIFEST_URL": "https://drive.google.com/uc?export=download&id=1jLuPMCA3hp9qstZZtpBNzTK0XOmLrV8b",
33
- "KAPO_REMOTE_ENV_PASSWORD_B64": "a053SmZIcXkwMm1VQ0F6cUFIWWJ6SXBmRmtRN2pXdG0",
34
- "KAPO_REMOTE_ENV_URL_B64": "aHR0cHM6Ly9kcml2ZS5nb29nbGUuY29tL3VjP2V4cG9ydD1kb3dubG9hZCZpZD0xeGQ0b1BqY09sT2pmYXNmMHlPTGg2WjdFSXFxbF96WVo",
35
  "KAPO_SHARED_STATE_BACKEND": "google_drive",
36
  "MODEL_PROFILE_ID": "hf-coder-qwen25-coder-7b-instruct",
37
  "MODEL_REPO": "Qwen/Qwen2.5-Coder-1.5B-Instruct",
38
  "REMOTE_BRAIN_ONLY": "1",
39
- "SPACE_PUBLIC_URL": "https://MrA7A1-AiCoder.hf.space"
40
  }
41
 
42
 
 
30
  "KAPO_LAZY_MODEL_STARTUP": "1",
31
  "KAPO_PATCH_BUNDLE_URL": "https://drive.google.com/uc?export=download&id=16rIe05GZihhAz7ba8E-WibKaJKbh9eu1",
32
  "KAPO_PATCH_MANIFEST_URL": "https://drive.google.com/uc?export=download&id=1jLuPMCA3hp9qstZZtpBNzTK0XOmLrV8b",
33
+ "KAPO_REMOTE_ENV_PASSWORD_B64": "MUJYcHlrS05ldHZnU1FRYTAwaVoxUkd2blZOTlhTT2M",
34
+ "KAPO_REMOTE_ENV_URL_B64": "aHR0cHM6Ly9kcml2ZS5nb29nbGUuY29tL3VjP2V4cG9ydD1kb3dubG9hZCZpZD0xZExHdHBubmo3d2tmVi1VdGUxV2luZks1eS10Vk1McGI",
35
  "KAPO_SHARED_STATE_BACKEND": "google_drive",
36
  "MODEL_PROFILE_ID": "hf-coder-qwen25-coder-7b-instruct",
37
  "MODEL_REPO": "Qwen/Qwen2.5-Coder-1.5B-Instruct",
38
  "REMOTE_BRAIN_ONLY": "1",
39
+ "SPACE_PUBLIC_URL": "https://MrA7A1-AiCoderClean.hf.space"
40
  }
41
 
42
 
brain_server/api/main.py CHANGED
@@ -1604,6 +1604,24 @@ def _generate_response(user_input: str, history: list[dict[str, str]], context_b
1604
  if fast_reply:
1605
  return fast_reply
1606
  if MODEL is None:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1607
  if language == "ar":
1608
  return "الخدمة تعمل لكن توليد الرد الحر غير متاح الآن لأن النموذج غير محمل."
1609
  return "The Brain is online, but natural chat generation is unavailable because the model is not loaded."
 
1604
  if fast_reply:
1605
  return fast_reply
1606
  if MODEL is None:
1607
+ if _feature_enabled("KAPO_HF_INFERENCE_API", default=False):
1608
+ try:
1609
+ from huggingface_hub import InferenceClient
1610
+ prompt = _build_chat_prompt(user_input, history, context_block)
1611
+ max_tokens = 80 if language == "ar" else 96
1612
+ client = InferenceClient(api_key=(str(os.getenv("HF_TOKEN", "") or "").strip() or None))
1613
+ generated = client.text_generation(
1614
+ prompt,
1615
+ model=str(os.getenv("MODEL_REPO", DEFAULT_MODEL_REPO) or DEFAULT_MODEL_REPO).strip(),
1616
+ max_new_tokens=max_tokens,
1617
+ temperature=0.1,
1618
+ return_full_text=False,
1619
+ )
1620
+ generated_text = str(generated or "").strip()
1621
+ if generated_text:
1622
+ return generated_text
1623
+ except Exception:
1624
+ logger.warning("HF inference fallback failed", exc_info=True)
1625
  if language == "ar":
1626
  return "الخدمة تعمل لكن توليد الرد الحر غير متاح الآن لأن النموذج غير محمل."
1627
  return "The Brain is online, but natural chat generation is unavailable because the model is not loaded."
kapo.env CHANGED
@@ -22,10 +22,10 @@ KAPO_LAZY_EMBED_STARTUP=1
22
  KAPO_LAZY_MODEL_STARTUP=1
23
  KAPO_PATCH_BUNDLE_URL=https://drive.google.com/uc?export=download&id=16rIe05GZihhAz7ba8E-WibKaJKbh9eu1
24
  KAPO_PATCH_MANIFEST_URL=https://drive.google.com/uc?export=download&id=1jLuPMCA3hp9qstZZtpBNzTK0XOmLrV8b
25
- KAPO_REMOTE_ENV_PASSWORD_B64=a053SmZIcXkwMm1VQ0F6cUFIWWJ6SXBmRmtRN2pXdG0
26
- KAPO_REMOTE_ENV_URL_B64=aHR0cHM6Ly9kcml2ZS5nb29nbGUuY29tL3VjP2V4cG9ydD1kb3dubG9hZCZpZD0xeGQ0b1BqY09sT2pmYXNmMHlPTGg2WjdFSXFxbF96WVo
27
  KAPO_SHARED_STATE_BACKEND=google_drive
28
  MODEL_PROFILE_ID=hf-coder-qwen25-coder-7b-instruct
29
  MODEL_REPO=Qwen/Qwen2.5-Coder-1.5B-Instruct
30
  REMOTE_BRAIN_ONLY=1
31
- SPACE_PUBLIC_URL=https://MrA7A1-AiCoder.hf.space
 
22
  KAPO_LAZY_MODEL_STARTUP=1
23
  KAPO_PATCH_BUNDLE_URL=https://drive.google.com/uc?export=download&id=16rIe05GZihhAz7ba8E-WibKaJKbh9eu1
24
  KAPO_PATCH_MANIFEST_URL=https://drive.google.com/uc?export=download&id=1jLuPMCA3hp9qstZZtpBNzTK0XOmLrV8b
25
+ KAPO_REMOTE_ENV_PASSWORD_B64=MUJYcHlrS05ldHZnU1FRYTAwaVoxUkd2blZOTlhTT2M
26
+ KAPO_REMOTE_ENV_URL_B64=aHR0cHM6Ly9kcml2ZS5nb29nbGUuY29tL3VjP2V4cG9ydD1kb3dubG9hZCZpZD0xZExHdHBubmo3d2tmVi1VdGUxV2luZks1eS10Vk1McGI
27
  KAPO_SHARED_STATE_BACKEND=google_drive
28
  MODEL_PROFILE_ID=hf-coder-qwen25-coder-7b-instruct
29
  MODEL_REPO=Qwen/Qwen2.5-Coder-1.5B-Instruct
30
  REMOTE_BRAIN_ONLY=1
31
+ SPACE_PUBLIC_URL=https://MrA7A1-AiCoderClean.hf.space