Spaces:

kaurm43
/

PolyFusionAgent

Running

App Files Files Community

kaurm43 commited on 10 days ago

Commit

7b735c8

verified ·

1 Parent(s): e6dcf3c

Update PolyAgent/gradio_interface.py

Browse files

Files changed (1) hide show

PolyAgent/gradio_interface.py +38 -24

PolyAgent/gradio_interface.py CHANGED Viewed

@@ -1201,7 +1201,9 @@ def gpt_only_answer(state: Dict[str, Any], prompt: str) -> str:
 # ----------------------------- Other LLMs (Hugging Face Inference) ----------------------------- #
 def llm_only_answer(state: Dict[str, Any], model_name: str, prompt: str) -> str:
     """
-    LLM-only responses using Hugging Face Inference API for non-GPT models.
     """
     ensure_orch(state)
@@ -1210,7 +1212,7 @@ def llm_only_answer(state: Dict[str, Any], model_name: str, prompt: str) -> str:
     HF_TOKEN = (os.getenv("HF_TOKEN") or "").strip()
     if not HF_TOKEN:
-        return pretty_json({"ok": False, "error": "HF_TOKEN is not set. Add HF_TOKEN=hf_... to your .env or env vars."})
     HF_MODEL_MAP = {
         "mixtral-8x22b-instruct": "mistralai/Mixtral-8x22B-Instruct-v0.1",
@@ -1228,18 +1230,22 @@ def llm_only_answer(state: Dict[str, Any], model_name: str, prompt: str) -> str:
     if not model_id:
         return pretty_json({"ok": False, "error": f"Unsupported model selection: {m}", "supported": list(HF_MODEL_MAP.keys())})
-    client = InferenceClient(model=model_id, token=HF_TOKEN)
     system = (
         "You are a polymer R&D assistant. Answer directly and clearly. "
         "Do not call tools or run web searches. If you are uncertain, state uncertainty."
     )
-    # A simple instruct-style prompt that works for text-generation endpoints
-    flat_prompt = f"{system}\n\nUser:\n{p}\n\nAssistant:\n"
     try:
-        # Try chat endpoint first (works only if the provider exposes the model as chat)
         resp = client.chat_completion(
             messages=[
                 {"role": "system", "content": system},
@@ -1249,25 +1255,33 @@ def llm_only_answer(state: Dict[str, Any], model_name: str, prompt: str) -> str:
             temperature=0.7,
         )
         return resp.choices[0].message.content or ""
-    except Exception as e:
-        msg = str(e)
-        # If provider says it's not a chat model, fall back to text generation.
-        if ("not a chat model" in msg.lower()) or ("model_not_supported" in msg.lower()):
-            try:
-                out = client.text_generation(
-                    flat_prompt,
-                    max_new_tokens=900,
-                    temperature=0.7,
-                    do_sample=True,
-                    return_full_text=False,
                 )
-                return out if isinstance(out, str) else str(out)
-            except Exception as e2:
-                return pretty_json({"ok": False, "error": str(e2), "model_id": model_id, "mode": "text_generation"})
-        return pretty_json({"ok": False, "error": msg, "model_id": model_id, "mode": "chat_completion"})
 def build_ui() -> gr.Blocks:

 # ----------------------------- Other LLMs (Hugging Face Inference) ----------------------------- #
 def llm_only_answer(state: Dict[str, Any], model_name: str, prompt: str) -> str:
     """
+    LLM-only responses via huggingface_hub.InferenceClient.
+    - Forces provider to avoid unwanted auto-routing (e.g., fireworks-ai).
+    - Tries chat_completion first; if model/provider doesn't support chat, falls back to text_generation.
     """
     ensure_orch(state)
     HF_TOKEN = (os.getenv("HF_TOKEN") or "").strip()
     if not HF_TOKEN:
+        return pretty_json({"ok": False, "error": "HF_TOKEN is not set. Add HF_TOKEN=hf_... to Space Secrets."})
     HF_MODEL_MAP = {
         "mixtral-8x22b-instruct": "mistralai/Mixtral-8x22B-Instruct-v0.1",
     if not model_id:
         return pretty_json({"ok": False, "error": f"Unsupported model selection: {m}", "supported": list(HF_MODEL_MAP.keys())})
+    # IMPORTANT: force provider so HF doesn't auto-route you to a provider that lacks the needed task
+    provider = (os.getenv("HF_PROVIDER") or "hf-inference").strip()
+    client = InferenceClient(
+        provider=provider,
+        model=model_id,
+        api_key=HF_TOKEN,  # api_key works for both HF token + provider keys
+    )
     system = (
         "You are a polymer R&D assistant. Answer directly and clearly. "
         "Do not call tools or run web searches. If you are uncertain, state uncertainty."
     )
+    # 1) Try chat (conversational)
     try:
         resp = client.chat_completion(
             messages=[
                 {"role": "system", "content": system},
             temperature=0.7,
         )
         return resp.choices[0].message.content or ""
+    except Exception as e_chat:
+        # 2) Fallback to plain text-generation (works on hf-inference; many providers don't support it)
+        try:
+            if provider != "hf-inference":
+                # text_generation is not universally supported across providers
+                raise RuntimeError(
+                    f"Chat failed and provider='{provider}' may not support text_generation. "
+                    f"Set HF_PROVIDER=hf-inference (recommended) or choose a compatible model/provider."
                 )
+            # A simple prompt wrapper for non-chat models / non-chat endpoints
+            wrapped = f"{system}\n\nUser: {p}\nAssistant:"
+            out = client.text_generation(
+                wrapped,
+                max_new_tokens=900,
+                temperature=0.7,
+                do_sample=True,
+                return_full_text=False,
+            )
+            return out if isinstance(out, str) else str(out)
+        except Exception as e_gen:
+            return pretty_json({
+                "ok": False,
+                "error": f"chat_completion failed: {e_chat}; text_generation failed: {e_gen}",
+                "model_id": model_id,
+                "provider": provider,
+            })
 def build_ui() -> gr.Blocks: