TurkishCodeMan commited on
Commit
26d513b
·
verified ·
1 Parent(s): bfe842a

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. hf_model.py +10 -22
hf_model.py CHANGED
@@ -2,24 +2,22 @@
2
  """
3
  HF Router (OpenAI-compatible) chat-completions wrapper for Hugging Face Spaces.
4
 
5
- Why:
6
- - Some models (incl. some Gemma 3 variants) are served as conversational / image-text-to-text.
7
- - In that case, non-conversational text_generation is NOT supported.
8
- - So we call the HF Router chat completions endpoint directly.
9
 
10
  Requirements:
11
- - Set HF_TOKEN in Space Settings -> Secrets
12
- - Ensure your HF account accepted the model's license if gated.
13
  """
14
 
15
  import os
16
- import json
17
  import traceback
18
  from typing import List, Dict
19
 
20
  import httpx
21
 
22
  HF_TOKEN = os.getenv("HF_TOKEN")
 
23
  MODEL_ID = os.getenv("MODEL_ID", "google/gemma-3-4b-it")
24
 
25
 
@@ -28,16 +26,10 @@ def generate_response(
28
  max_tokens: int = 512,
29
  temperature: float = 0.7,
30
  ) -> str:
31
- """
32
- Generate response using HF Router chat completions (OpenAI-compatible).
33
-
34
- Endpoint:
35
- https://router.huggingface.co/hf-inference/models/{MODEL_ID}/v1/chat/completions
36
- """
37
  if not HF_TOKEN:
38
  return "Error: HF_TOKEN is not set. Add it in Space Settings -> Secrets."
39
 
40
- url = f"https://router.huggingface.co/hf-inference/models/{MODEL_ID}/v1/chat/completions"
41
  headers = {
42
  "Authorization": f"Bearer {HF_TOKEN}",
43
  "Content-Type": "application/json",
@@ -53,14 +45,11 @@ def generate_response(
53
  with httpx.Client(timeout=90) as http:
54
  r = http.post(url, headers=headers, json=payload)
55
 
56
- # If error, show status + body to debug quickly
57
- if r.status_code >= 400:
58
- body = r.text
59
- return f"Error: HTTP {r.status_code}\n\n{body}"
60
-
61
- data = r.json()
62
 
63
- # OpenAI-style response
64
  return data["choices"][0]["message"]["content"].strip()
65
 
66
  except Exception as e:
@@ -92,6 +81,5 @@ def calculate_expression(expression: str) -> str:
92
 
93
  result = eval(expr, {"__builtins__": {}}, allowed_names)
94
  return f"{result:,.2f}"
95
-
96
  except Exception as e:
97
  return f"Calculation error: {str(e)}"
 
2
  """
3
  HF Router (OpenAI-compatible) chat-completions wrapper for Hugging Face Spaces.
4
 
5
+ Uses:
6
+ POST https://router.huggingface.co/v1/chat/completions
 
 
7
 
8
  Requirements:
9
+ - HF_TOKEN must have "Inference Providers" permission
10
+ - If model is gated, accept license with the same HF account
11
  """
12
 
13
  import os
 
14
  import traceback
15
  from typing import List, Dict
16
 
17
  import httpx
18
 
19
  HF_TOKEN = os.getenv("HF_TOKEN")
20
+ # İstersen provider'ı zorlamak için: "google/gemma-3-4b-it:hf-inference"
21
  MODEL_ID = os.getenv("MODEL_ID", "google/gemma-3-4b-it")
22
 
23
 
 
26
  max_tokens: int = 512,
27
  temperature: float = 0.7,
28
  ) -> str:
 
 
 
 
 
 
29
  if not HF_TOKEN:
30
  return "Error: HF_TOKEN is not set. Add it in Space Settings -> Secrets."
31
 
32
+ url = "https://router.huggingface.co/v1/chat/completions"
33
  headers = {
34
  "Authorization": f"Bearer {HF_TOKEN}",
35
  "Content-Type": "application/json",
 
45
  with httpx.Client(timeout=90) as http:
46
  r = http.post(url, headers=headers, json=payload)
47
 
48
+ if r.status_code >= 400:
49
+ # Body'yi bas: 401/403/404/429 vs hemen anlaşılır
50
+ return f"Error: HTTP {r.status_code}\n\n{r.text}"
 
 
 
51
 
52
+ data = r.json()
53
  return data["choices"][0]["message"]["content"].strip()
54
 
55
  except Exception as e:
 
81
 
82
  result = eval(expr, {"__builtins__": {}}, allowed_names)
83
  return f"{result:,.2f}"
 
84
  except Exception as e:
85
  return f"Calculation error: {str(e)}"