Spaces:

soupstick
/

marketplace-intelligence

Sleeping

App Files Files Community

soupstick commited on Nov 1, 2025

Commit

0c36357

1 Parent(s): 32115de

Fix HF API ERROR

Browse files

Files changed (4) hide show

.env.example +3 -2
api/inference.py +27 -17
api/providers.py +3 -4
app.py +5 -5

.env.example CHANGED Viewed

@@ -5,7 +5,8 @@ AGENT_API_URL=http://localhost:7861
 DBT_PROFILES_DIR=./dbt_project/profiles
 # LLM Provider Configuration
-LLM_PROVIDER=hf
 HF_TOKEN=YOUR_TOKEN
 HF_ROUTER_MODEL=Qwen/Qwen3-Coder-30B-A3B-Instruct:fireworks-ai
@@ -14,7 +15,7 @@ LLM_MODEL_GEN=Qwen/Qwen3-Coder-30B-A3B-Instruct:fireworks-ai
 LLM_MODEL_REV=Qwen/Qwen3-Coder-30B-A3B-Instruct:fireworks-ai
 # Optional Backend API URL (for existing functionality)
-API_URL=
 # Cache Configuration
 TRANSFORMERS_CACHE=/tmp/cache/transformers

 DBT_PROFILES_DIR=./dbt_project/profiles
 # LLM Provider Configuration
+LLM_PROVIDER=hf_router
+API_URL=https://router.huggingface.co/v1/chat/completions
 HF_TOKEN=YOUR_TOKEN
 HF_ROUTER_MODEL=Qwen/Qwen3-Coder-30B-A3B-Instruct:fireworks-ai
 LLM_MODEL_REV=Qwen/Qwen3-Coder-30B-A3B-Instruct:fireworks-ai
 # Optional Backend API URL (for existing functionality)
+# API_URL can be used to point to a compatible backend, but also is used by the HF Router client above.
 # Cache Configuration
 TRANSFORMERS_CACHE=/tmp/cache/transformers

api/inference.py CHANGED Viewed

@@ -1,25 +1,35 @@
-import os
-import requests
-from typing import Optional
-API_URL = "https://router.huggingface.co/v1/chat/completions"
-def _call_llm(prompt: str, max_tokens: int = 512, temperature: float = 0.2, model: Optional[str] = None) -> str:
-    hf_token = os.getenv("HF_TOKEN")
-    if not hf_token:
-        raise RuntimeError("Set HF_TOKEN in env")
-    headers = {"Authorization": f"Bearer {hf_token}"}
-    payload = {
-        "model": model or os.getenv("HF_ROUTER_MODEL", "Qwen/Qwen3-Coder-30B-A3B-Instruct:fireworks-ai"),
         "messages": [{"role": "user", "content": prompt}],
         "max_tokens": max_tokens,
-        "temperature": temperature,
-        "stream": False,
     }
-    resp = requests.post(API_URL, headers=headers, json=payload, timeout=60)
     if resp.status_code != 200:
-        print("HF Router error:", resp.text)
-        resp.raise_for_status()
-    return resp.json()["choices"][0]["message"]["content"]

+import os, requests, json
+ROUTER_URL = os.getenv("API_URL", "https://router.huggingface.co/v1/chat/completions")
+DEFAULT_MODEL = os.getenv("HF_ROUTER_MODEL", "Qwen/Qwen3-Coder-30B-A3B-Instruct:fireworks-ai")
+def _call_llm(prompt: str, max_tokens: int = 512, temperature: float = 0.2, model: str | None = None) -> str:
+    token = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACEHUB_API_TOKEN")
+    if not token:
+        raise RuntimeError("Set HF_TOKEN (or HUGGINGFACEHUB_API_TOKEN) in env")
+    headers = {"Authorization": f"Bearer {token}", "Content-Type": "application/json"}
+    body = {
+        "model": model or DEFAULT_MODEL,
         "messages": [{"role": "user", "content": prompt}],
         "max_tokens": max_tokens,
+        "temperature": float(temperature),
+        "stream": False
     }
+    resp = requests.post(ROUTER_URL, headers=headers, json=body, timeout=60)
     if resp.status_code != 200:
+        # Surface router/provider error clearly
+        try:
+            failing_model = body.get("model")
+        except Exception:
+            failing_model = None
+        print(f"HF Router error {resp.status_code} for model={failing_model}: {resp.text}")
+        raise RuntimeError(f"HF Router error {resp.status_code}: {resp.text}")
+    data = resp.json()
+    try:
+        return data["choices"][0]["message"]["content"]
+    except Exception:
+        raise RuntimeError(f"Unexpected HF Router response: {json.dumps(data)[:800]}")

api/providers.py CHANGED Viewed

@@ -42,9 +42,8 @@ def extract_json(text: str):
 # ---------- Unified provider call via HF Router ----------
 def llm_call(kind: str, prompt: str) -> str:
-    """Unified HF Router call. kind: 'gen' or 'rev' chooses env model."""
-    model = os.getenv("LLM_MODEL_GEN") if kind == "gen" else os.getenv("LLM_MODEL_REV")
     if not model:
-        raise RuntimeError(f"Missing model id for kind={kind}. Set LLM_MODEL_GEN/REV.")
     return _router_call(prompt, max_tokens=256, temperature=0.0, model=model)

 # ---------- Unified provider call via HF Router ----------
 def llm_call(kind: str, prompt: str) -> str:
+    """Unified HF Router call. Model is taken from HF_ROUTER_MODEL env."""
+    model = os.getenv("HF_ROUTER_MODEL")
     if not model:
+        raise RuntimeError("Set HF_ROUTER_MODEL in env for HF Router.")
     return _router_call(prompt, max_tokens=256, temperature=0.0, model=model)

app.py CHANGED Viewed

@@ -119,8 +119,8 @@ def _provider_has_creds(provider: str) -> bool:
 def _call_llm(provider: str, model: str, prompt: str) -> str:
-    # Delegate to unified HF Router call; keep signature for compatibility
-    return _router_call(prompt, max_tokens=400, temperature=0.0, model=model)
 def _extract_sql_from_text(text: str) -> str:
@@ -171,7 +171,7 @@ def _gen_sql(question: str, schema: str, provider: str, model: str, api_url: str
             return _enforce_limits(candidate)
         except Exception:
             st.warning(REMOTE_ERROR_HINT)
-    llm_output = _router_call(prompt, max_tokens=400, temperature=0.0, model=model)
     return _enforce_limits(_extract_sql_from_text(llm_output))
@@ -227,7 +227,7 @@ def _review_sql(question: str, sql: str, schema: str, provider: str, model: str)
         "Return JSON with keys reasoning, ok (true/false), fixed_sql."
     )
     try:
-        llm_response = _router_call(prompt, max_tokens=400, temperature=0.0, model=model)
         parsed = _extract_json(llm_response)
         if parsed:
             return parsed
@@ -268,7 +268,7 @@ def _suggest_chart(df: pd.DataFrame, provider: str, model: str) -> Optional[Dict
     column_info = ", ".join(f"{col} ({df[col].dtype})" for col in df.columns)
     prompt = PROMPT_DASHBOARD.format(column_info=column_info)
     try:
-        raw = _router_call(prompt, max_tokens=400, temperature=0.0, model=model)
         parsed = _extract_json(raw)
         if isinstance(parsed, dict):
             chart_type = parsed.get("chart_type")

 def _call_llm(provider: str, model: str, prompt: str) -> str:
+    # Delegate to unified HF Router call; always use HF_ROUTER_MODEL with the router
+    return _router_call(prompt, max_tokens=400, temperature=0.0, model=os.getenv('HF_ROUTER_MODEL'))
 def _extract_sql_from_text(text: str) -> str:
             return _enforce_limits(candidate)
         except Exception:
             st.warning(REMOTE_ERROR_HINT)
+    llm_output = _router_call(prompt, max_tokens=400, temperature=0.0, model=os.getenv('HF_ROUTER_MODEL'))
     return _enforce_limits(_extract_sql_from_text(llm_output))
         "Return JSON with keys reasoning, ok (true/false), fixed_sql."
     )
     try:
+        llm_response = _router_call(prompt, max_tokens=400, temperature=0.0, model=os.getenv('HF_ROUTER_MODEL'))
         parsed = _extract_json(llm_response)
         if parsed:
             return parsed
     column_info = ", ".join(f"{col} ({df[col].dtype})" for col in df.columns)
     prompt = PROMPT_DASHBOARD.format(column_info=column_info)
     try:
+        raw = _router_call(prompt, max_tokens=400, temperature=0.0, model=os.getenv('HF_ROUTER_MODEL'))
         parsed = _extract_json(raw)
         if isinstance(parsed, dict):
             chart_type = parsed.get("chart_type")