Spaces:

TurkishCodeMan
/

fintech-orchestrator

Sleeping

App Files Files Community

TurkishCodeMan commited on Feb 9

Commit

bfe842a

verified ·

1 Parent(s): ce7981f

Upload folder using huggingface_hub

Browse files

Files changed (1) hide show

hf_model.py +29 -79

hf_model.py CHANGED Viewed

@@ -1,63 +1,47 @@
 # -*- coding: utf-8 -*-
 """
-HF Inference wrapper for Hugging Face Spaces.
-Fixes StopIteration (empty provider list) by:
-1) Forcing provider="hf-inference" in InferenceClient
-2) Fallback to HF Router OpenAI-compatible endpoint if needed
-Notes:
-- Make sure you ACCEPT Gemma license on Hugging Face with the same account as HF_TOKEN.
-- Add HF_TOKEN in Space Settings -> Secrets.
 """
 import os
 import traceback
-from typing import List, Dict, Optional
 import httpx
-from huggingface_hub import InferenceClient
 HF_TOKEN = os.getenv("HF_TOKEN")
 MODEL_ID = os.getenv("MODEL_ID", "google/gemma-3-4b-it")
-# Force HF provider (instead of provider="auto")
-client = InferenceClient(model=MODEL_ID, token=HF_TOKEN, provider="hf-inference")
-def _messages_to_prompt(messages: List[Dict]) -> str:
-    """Convert role/content messages into a simple prompt."""
-    parts = []
-    for m in messages:
-        role = (m.get("role") or "user").lower()
-        content = m.get("content") or ""
-        if role == "system":
-            parts.append(f"System: {content}")
-        elif role == "assistant":
-            parts.append(f"Assistant: {content}")
-        else:
-            parts.append(f"User: {content}")
-    parts.append("Assistant:")
-    return "\n".join(parts)
-def _router_chat_completion(
     messages: List[Dict],
-    max_tokens: int,
-    temperature: float,
 ) -> str:
     """
-    Fallback: call HF Router (OpenAI-compatible) endpoint.
-    Endpoint format (hf-inference route):
     https://router.huggingface.co/hf-inference/models/{MODEL_ID}/v1/chat/completions
     """
     if not HF_TOKEN:
         return "Error: HF_TOKEN is not set. Add it in Space Settings -> Secrets."
     url = f"https://router.huggingface.co/hf-inference/models/{MODEL_ID}/v1/chat/completions"
     payload = {
         "model": MODEL_ID,
         "messages": messages,
@@ -65,53 +49,19 @@ def _router_chat_completion(
         "temperature": temperature,
     }
-    headers = {
-        "Authorization": f"Bearer {HF_TOKEN}",
-        "Content-Type": "application/json",
-    }
-    with httpx.Client(timeout=60) as http:
-        r = http.post(url, headers=headers, json=payload)
-        r.raise_for_status()
-        data = r.json()
-    return data["choices"][0]["message"]["content"].strip()
-def generate_response(
-    messages: List[Dict],
-    max_tokens: int = 512,
-    temperature: float = 0.7,
-) -> str:
-    """
-    Main generation function.
-    1) Try HF InferenceClient.text_generation with provider="hf-inference"
-    2) If StopIteration / provider issues happen, fallback to HF Router chat completions
-    """
-    try:
-        if not HF_TOKEN:
-            return "Error: HF_TOKEN is not set. Add it in Space Settings -> Secrets."
-        # Try text-generation (broadly supported)
-        prompt = _messages_to_prompt(messages)
-        out = client.text_generation(
-            prompt,
-            max_new_tokens=max_tokens,
-            temperature=temperature,
-            do_sample=True,
-            return_full_text=False,
-        )
-        return out.strip()
-    except StopIteration:
-        # Provider list empty: try router fallback
-        try:
-            return _router_chat_completion(messages, max_tokens=max_tokens, temperature=temperature)
-        except Exception as e2:
-            return (
-                "Error: StopIteration() and router fallback failed.\n\n"
-                f"Fallback error: {repr(e2)}\n\n{traceback.format_exc()}"
-            )
     except Exception as e:
         return f"Error: {repr(e)}\n\n{traceback.format_exc()}"

 # -*- coding: utf-8 -*-
 """
+HF Router (OpenAI-compatible) chat-completions wrapper for Hugging Face Spaces.
+Why:
+- Some models (incl. some Gemma 3 variants) are served as conversational / image-text-to-text.
+- In that case, non-conversational text_generation is NOT supported.
+- So we call the HF Router chat completions endpoint directly.
+Requirements:
+- Set HF_TOKEN in Space Settings -> Secrets
+- Ensure your HF account accepted the model's license if gated.
 """
 import os
+import json
 import traceback
+from typing import List, Dict
 import httpx
 HF_TOKEN = os.getenv("HF_TOKEN")
 MODEL_ID = os.getenv("MODEL_ID", "google/gemma-3-4b-it")
+def generate_response(
     messages: List[Dict],
+    max_tokens: int = 512,
+    temperature: float = 0.7,
 ) -> str:
     """
+    Generate response using HF Router chat completions (OpenAI-compatible).
+    Endpoint:
     https://router.huggingface.co/hf-inference/models/{MODEL_ID}/v1/chat/completions
     """
     if not HF_TOKEN:
         return "Error: HF_TOKEN is not set. Add it in Space Settings -> Secrets."
     url = f"https://router.huggingface.co/hf-inference/models/{MODEL_ID}/v1/chat/completions"
+    headers = {
+        "Authorization": f"Bearer {HF_TOKEN}",
+        "Content-Type": "application/json",
+    }
     payload = {
         "model": MODEL_ID,
         "messages": messages,
         "temperature": temperature,
     }
+    try:
+        with httpx.Client(timeout=90) as http:
+            r = http.post(url, headers=headers, json=payload)
+            # If error, show status + body to debug quickly
+            if r.status_code >= 400:
+                body = r.text
+                return f"Error: HTTP {r.status_code}\n\n{body}"
+            data = r.json()
+        # OpenAI-style response
+        return data["choices"][0]["message"]["content"].strip()
     except Exception as e:
         return f"Error: {repr(e)}\n\n{traceback.format_exc()}"