Spaces:

TurkishCodeMan
/

fintech-orchestrator

Sleeping

App Files Files Community

TurkishCodeMan commited on Feb 9

Commit

26d513b

verified ·

1 Parent(s): bfe842a

Upload folder using huggingface_hub

Browse files

Files changed (1) hide show

hf_model.py +10 -22

hf_model.py CHANGED Viewed

@@ -2,24 +2,22 @@
 """
 HF Router (OpenAI-compatible) chat-completions wrapper for Hugging Face Spaces.
-Why:
-- Some models (incl. some Gemma 3 variants) are served as conversational / image-text-to-text.
-- In that case, non-conversational text_generation is NOT supported.
-- So we call the HF Router chat completions endpoint directly.
 Requirements:
-- Set HF_TOKEN in Space Settings -> Secrets
-- Ensure your HF account accepted the model's license if gated.
 """
 import os
-import json
 import traceback
 from typing import List, Dict
 import httpx
 HF_TOKEN = os.getenv("HF_TOKEN")
 MODEL_ID = os.getenv("MODEL_ID", "google/gemma-3-4b-it")
@@ -28,16 +26,10 @@ def generate_response(
     max_tokens: int = 512,
     temperature: float = 0.7,
 ) -> str:
-    """
-    Generate response using HF Router chat completions (OpenAI-compatible).
-    Endpoint:
-    https://router.huggingface.co/hf-inference/models/{MODEL_ID}/v1/chat/completions
-    """
     if not HF_TOKEN:
         return "Error: HF_TOKEN is not set. Add it in Space Settings -> Secrets."
-    url = f"https://router.huggingface.co/hf-inference/models/{MODEL_ID}/v1/chat/completions"
     headers = {
         "Authorization": f"Bearer {HF_TOKEN}",
         "Content-Type": "application/json",
@@ -53,14 +45,11 @@ def generate_response(
         with httpx.Client(timeout=90) as http:
             r = http.post(url, headers=headers, json=payload)
-            # If error, show status + body to debug quickly
-            if r.status_code >= 400:
-                body = r.text
-                return f"Error: HTTP {r.status_code}\n\n{body}"
-            data = r.json()
-        # OpenAI-style response
         return data["choices"][0]["message"]["content"].strip()
     except Exception as e:
@@ -92,6 +81,5 @@ def calculate_expression(expression: str) -> str:
         result = eval(expr, {"__builtins__": {}}, allowed_names)
         return f"{result:,.2f}"
     except Exception as e:
         return f"Calculation error: {str(e)}"

 """
 HF Router (OpenAI-compatible) chat-completions wrapper for Hugging Face Spaces.
+Uses:
+POST https://router.huggingface.co/v1/chat/completions
 Requirements:
+- HF_TOKEN must have "Inference Providers" permission
+- If model is gated, accept license with the same HF account
 """
 import os
 import traceback
 from typing import List, Dict
 import httpx
 HF_TOKEN = os.getenv("HF_TOKEN")
+# İstersen provider'ı zorlamak için: "google/gemma-3-4b-it:hf-inference"
 MODEL_ID = os.getenv("MODEL_ID", "google/gemma-3-4b-it")
     max_tokens: int = 512,
     temperature: float = 0.7,
 ) -> str:
     if not HF_TOKEN:
         return "Error: HF_TOKEN is not set. Add it in Space Settings -> Secrets."
+    url = "https://router.huggingface.co/v1/chat/completions"
     headers = {
         "Authorization": f"Bearer {HF_TOKEN}",
         "Content-Type": "application/json",
         with httpx.Client(timeout=90) as http:
             r = http.post(url, headers=headers, json=payload)
+        if r.status_code >= 400:
+            # Body'yi bas: 401/403/404/429 vs hemen anlaşılır
+            return f"Error: HTTP {r.status_code}\n\n{r.text}"
+        data = r.json()
         return data["choices"][0]["message"]["content"].strip()
     except Exception as e:
         result = eval(expr, {"__builtins__": {}}, allowed_names)
         return f"{result:,.2f}"
     except Exception as e:
         return f"Calculation error: {str(e)}"