Spaces:

MrA7A1
/

AiCoderClean

Sleeping

App Files Files Community

MrA7A1 commited on Mar 22

Commit

53a5ee8

verified ·

1 Parent(s): 88b7e97

KAPO self-heal fix: conversational HF fallback

Browse files

Files changed (1) hide show

brain_server/api/main.py +48 -12

brain_server/api/main.py CHANGED Viewed

@@ -1664,6 +1664,48 @@ def _project_specific_fast_reply(user_input: str) -> str:
     return ""
 def _generate_response(user_input: str, history: list[dict[str, str]], context_block: str) -> str:
     language = _detect_language(user_input)
     exact_reply = _extract_exact_reply_instruction_safe(user_input)
@@ -1678,22 +1720,16 @@ def _generate_response(user_input: str, history: list[dict[str, str]], context_b
             try:
                 from huggingface_hub import InferenceClient
                 prompt = _build_chat_prompt(user_input, history, context_block)
                 max_tokens = 80 if language == "ar" else 96
                 model_repo = str(os.getenv("MODEL_REPO", DEFAULT_MODEL_REPO) or DEFAULT_MODEL_REPO).strip()
                 client = InferenceClient(model=model_repo, api_key=(str(os.getenv("HF_TOKEN", "") or "").strip() or None))
                 try:
-                    chat_result = client.chat_completion(
-                        messages=[{"role": "user", "content": prompt}],
-                        max_tokens=max_tokens,
-                    )
-                    choices = getattr(chat_result, "choices", None) or []
-                    if choices:
-                        message = getattr(choices[0], "message", None)
-                        generated_text = str(getattr(message, "content", "") or "").strip()
-                        if generated_text:
-                            return generated_text
-                except Exception:
-                    pass
                 generated = client.text_generation(
                     prompt,
                     max_new_tokens=max_tokens,

     return ""
+def _hf_chat_messages(user_input: str, history: list[dict[str, str]], context_block: str) -> list[dict[str, str]]:
+    messages: list[dict[str, str]] = []
+    context_text = str(context_block or "").strip()
+    if context_text:
+        messages.append({"role": "system", "content": context_text})
+    for item in history or []:
+        role = str((item or {}).get("role") or "").strip().lower()
+        content = str((item or {}).get("content") or "").strip()
+        if role in {"system", "user", "assistant"} and content:
+            messages.append({"role": role, "content": content})
+    messages.append({"role": "user", "content": str(user_input or "").strip()})
+    return messages
+def _hf_chat_completion_text(client: Any, messages: list[dict[str, str]], max_tokens: int) -> str:
+    result = client.chat_completion(messages=messages, max_tokens=max_tokens)
+    choices = getattr(result, "choices", None)
+    if choices is None and isinstance(result, dict):
+        choices = result.get("choices")
+    choices = choices or []
+    if not choices:
+        return ""
+    first = choices[0]
+    message = getattr(first, "message", None)
+    if message is None and isinstance(first, dict):
+        message = first.get("message")
+    content = getattr(message, "content", None)
+    if content is None and isinstance(message, dict):
+        content = message.get("content")
+    if isinstance(content, list):
+        parts: list[str] = []
+        for item in content:
+            if isinstance(item, dict):
+                text = item.get("text")
+                if text:
+                    parts.append(str(text))
+            elif item:
+                parts.append(str(item))
+        content = "\n".join(part for part in parts if part.strip())
+    return str(content or "").strip()
 def _generate_response(user_input: str, history: list[dict[str, str]], context_block: str) -> str:
     language = _detect_language(user_input)
     exact_reply = _extract_exact_reply_instruction_safe(user_input)
             try:
                 from huggingface_hub import InferenceClient
                 prompt = _build_chat_prompt(user_input, history, context_block)
+                messages = _hf_chat_messages(user_input, history, context_block)
                 max_tokens = 80 if language == "ar" else 96
                 model_repo = str(os.getenv("MODEL_REPO", DEFAULT_MODEL_REPO) or DEFAULT_MODEL_REPO).strip()
                 client = InferenceClient(model=model_repo, api_key=(str(os.getenv("HF_TOKEN", "") or "").strip() or None))
                 try:
+                    generated_text = _hf_chat_completion_text(client, messages, max_tokens)
+                    if generated_text:
+                        return generated_text
+                except Exception as exc:
+                    logger.info("HF chat-completion path failed; falling back to text-generation (%s)", exc)
                 generated = client.text_generation(
                     prompt,
                     max_new_tokens=max_tokens,