TurkishCodeMan commited on
Commit
ce7981f
·
verified ·
1 Parent(s): 28a0f3c

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. hf_model.py +65 -27
hf_model.py CHANGED
@@ -1,66 +1,99 @@
1
  # -*- coding: utf-8 -*-
2
  """
3
- HuggingFace Inference API Model Wrapper
4
- Uses HuggingFace InferenceClient with text_generation (more compatible than chat.completions).
 
 
 
 
 
 
 
5
  """
6
 
7
  import os
8
  import traceback
9
- from typing import List, Dict
10
 
 
11
  from huggingface_hub import InferenceClient
12
 
13
- # ---- Config ----
14
  HF_TOKEN = os.getenv("HF_TOKEN")
15
  MODEL_ID = os.getenv("MODEL_ID", "google/gemma-3-4b-it")
16
 
17
- # Initialize client (bind model here so calls don't need model=...)
18
- client = InferenceClient(model=MODEL_ID, token=HF_TOKEN)
19
 
20
 
21
  def _messages_to_prompt(messages: List[Dict]) -> str:
22
- """
23
- Convert OpenAI-style messages (role/content) to a simple prompt.
24
- This is a generic format that works with text-generation endpoints.
25
- """
26
  parts = []
27
  for m in messages:
28
  role = (m.get("role") or "user").lower()
29
  content = m.get("content") or ""
30
-
31
  if role == "system":
32
  parts.append(f"System: {content}")
33
  elif role == "assistant":
34
  parts.append(f"Assistant: {content}")
35
  else:
36
  parts.append(f"User: {content}")
37
-
38
  parts.append("Assistant:")
39
  return "\n".join(parts)
40
 
41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  def generate_response(
43
  messages: List[Dict],
44
  max_tokens: int = 512,
45
  temperature: float = 0.7,
46
  ) -> str:
47
  """
48
- Generate response using HF Inference API via text_generation.
49
-
50
- Args:
51
- messages: List of message dicts with 'role' and 'content'
52
- max_tokens: Maximum new tokens to generate
53
- temperature: Sampling temperature
54
-
55
- Returns:
56
- Generated text response (or detailed error)
57
  """
58
  try:
59
  if not HF_TOKEN:
60
  return "Error: HF_TOKEN is not set. Add it in Space Settings -> Secrets."
61
 
 
62
  prompt = _messages_to_prompt(messages)
63
-
64
  out = client.text_generation(
65
  prompt,
66
  max_new_tokens=max_tokens,
@@ -68,10 +101,18 @@ def generate_response(
68
  do_sample=True,
69
  return_full_text=False,
70
  )
71
-
72
- # InferenceClient.text_generation returns a string
73
  return out.strip()
74
 
 
 
 
 
 
 
 
 
 
 
75
  except Exception as e:
76
  return f"Error: {repr(e)}\n\n{traceback.format_exc()}"
77
 
@@ -96,10 +137,7 @@ def calculate_expression(expression: str) -> str:
96
 
97
  try:
98
  expr = expression.strip()
99
-
100
- # Allow only digits/operators/parentheses/spaces/dots and ** for power
101
  if not re.match(r"^[\d\s\+\-\*\/\.\(\)\^]+$", expr.replace("**", "^")):
102
- # If it's not a pure math string, bail out gracefully
103
  return "Calculation error: invalid characters in expression."
104
 
105
  result = eval(expr, {"__builtins__": {}}, allowed_names)
 
1
  # -*- coding: utf-8 -*-
2
  """
3
+ HF Inference wrapper for Hugging Face Spaces.
4
+
5
+ Fixes StopIteration (empty provider list) by:
6
+ 1) Forcing provider="hf-inference" in InferenceClient
7
+ 2) Fallback to HF Router OpenAI-compatible endpoint if needed
8
+
9
+ Notes:
10
+ - Make sure you ACCEPT Gemma license on Hugging Face with the same account as HF_TOKEN.
11
+ - Add HF_TOKEN in Space Settings -> Secrets.
12
  """
13
 
14
  import os
15
  import traceback
16
+ from typing import List, Dict, Optional
17
 
18
+ import httpx
19
  from huggingface_hub import InferenceClient
20
 
21
+
22
  HF_TOKEN = os.getenv("HF_TOKEN")
23
  MODEL_ID = os.getenv("MODEL_ID", "google/gemma-3-4b-it")
24
 
25
+ # Force HF provider (instead of provider="auto")
26
+ client = InferenceClient(model=MODEL_ID, token=HF_TOKEN, provider="hf-inference")
27
 
28
 
29
  def _messages_to_prompt(messages: List[Dict]) -> str:
30
+ """Convert role/content messages into a simple prompt."""
 
 
 
31
  parts = []
32
  for m in messages:
33
  role = (m.get("role") or "user").lower()
34
  content = m.get("content") or ""
 
35
  if role == "system":
36
  parts.append(f"System: {content}")
37
  elif role == "assistant":
38
  parts.append(f"Assistant: {content}")
39
  else:
40
  parts.append(f"User: {content}")
 
41
  parts.append("Assistant:")
42
  return "\n".join(parts)
43
 
44
 
45
+ def _router_chat_completion(
46
+ messages: List[Dict],
47
+ max_tokens: int,
48
+ temperature: float,
49
+ ) -> str:
50
+ """
51
+ Fallback: call HF Router (OpenAI-compatible) endpoint.
52
+
53
+ Endpoint format (hf-inference route):
54
+ https://router.huggingface.co/hf-inference/models/{MODEL_ID}/v1/chat/completions
55
+ """
56
+ if not HF_TOKEN:
57
+ return "Error: HF_TOKEN is not set. Add it in Space Settings -> Secrets."
58
+
59
+ url = f"https://router.huggingface.co/hf-inference/models/{MODEL_ID}/v1/chat/completions"
60
+
61
+ payload = {
62
+ "model": MODEL_ID,
63
+ "messages": messages,
64
+ "max_tokens": max_tokens,
65
+ "temperature": temperature,
66
+ }
67
+
68
+ headers = {
69
+ "Authorization": f"Bearer {HF_TOKEN}",
70
+ "Content-Type": "application/json",
71
+ }
72
+
73
+ with httpx.Client(timeout=60) as http:
74
+ r = http.post(url, headers=headers, json=payload)
75
+ r.raise_for_status()
76
+ data = r.json()
77
+
78
+ return data["choices"][0]["message"]["content"].strip()
79
+
80
+
81
  def generate_response(
82
  messages: List[Dict],
83
  max_tokens: int = 512,
84
  temperature: float = 0.7,
85
  ) -> str:
86
  """
87
+ Main generation function.
88
+ 1) Try HF InferenceClient.text_generation with provider="hf-inference"
89
+ 2) If StopIteration / provider issues happen, fallback to HF Router chat completions
 
 
 
 
 
 
90
  """
91
  try:
92
  if not HF_TOKEN:
93
  return "Error: HF_TOKEN is not set. Add it in Space Settings -> Secrets."
94
 
95
+ # Try text-generation (broadly supported)
96
  prompt = _messages_to_prompt(messages)
 
97
  out = client.text_generation(
98
  prompt,
99
  max_new_tokens=max_tokens,
 
101
  do_sample=True,
102
  return_full_text=False,
103
  )
 
 
104
  return out.strip()
105
 
106
+ except StopIteration:
107
+ # Provider list empty: try router fallback
108
+ try:
109
+ return _router_chat_completion(messages, max_tokens=max_tokens, temperature=temperature)
110
+ except Exception as e2:
111
+ return (
112
+ "Error: StopIteration() and router fallback failed.\n\n"
113
+ f"Fallback error: {repr(e2)}\n\n{traceback.format_exc()}"
114
+ )
115
+
116
  except Exception as e:
117
  return f"Error: {repr(e)}\n\n{traceback.format_exc()}"
118
 
 
137
 
138
  try:
139
  expr = expression.strip()
 
 
140
  if not re.match(r"^[\d\s\+\-\*\/\.\(\)\^]+$", expr.replace("**", "^")):
 
141
  return "Calculation error: invalid characters in expression."
142
 
143
  result = eval(expr, {"__builtins__": {}}, allowed_names)