TurkishCodeMan commited on
Commit
bfe842a
·
verified ·
1 Parent(s): ce7981f

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. hf_model.py +29 -79
hf_model.py CHANGED
@@ -1,63 +1,47 @@
1
  # -*- coding: utf-8 -*-
2
  """
3
- HF Inference wrapper for Hugging Face Spaces.
4
 
5
- Fixes StopIteration (empty provider list) by:
6
- 1) Forcing provider="hf-inference" in InferenceClient
7
- 2) Fallback to HF Router OpenAI-compatible endpoint if needed
 
8
 
9
- Notes:
10
- - Make sure you ACCEPT Gemma license on Hugging Face with the same account as HF_TOKEN.
11
- - Add HF_TOKEN in Space Settings -> Secrets.
12
  """
13
 
14
  import os
 
15
  import traceback
16
- from typing import List, Dict, Optional
17
 
18
  import httpx
19
- from huggingface_hub import InferenceClient
20
-
21
 
22
  HF_TOKEN = os.getenv("HF_TOKEN")
23
  MODEL_ID = os.getenv("MODEL_ID", "google/gemma-3-4b-it")
24
 
25
- # Force HF provider (instead of provider="auto")
26
- client = InferenceClient(model=MODEL_ID, token=HF_TOKEN, provider="hf-inference")
27
-
28
-
29
- def _messages_to_prompt(messages: List[Dict]) -> str:
30
- """Convert role/content messages into a simple prompt."""
31
- parts = []
32
- for m in messages:
33
- role = (m.get("role") or "user").lower()
34
- content = m.get("content") or ""
35
- if role == "system":
36
- parts.append(f"System: {content}")
37
- elif role == "assistant":
38
- parts.append(f"Assistant: {content}")
39
- else:
40
- parts.append(f"User: {content}")
41
- parts.append("Assistant:")
42
- return "\n".join(parts)
43
-
44
 
45
- def _router_chat_completion(
46
  messages: List[Dict],
47
- max_tokens: int,
48
- temperature: float,
49
  ) -> str:
50
  """
51
- Fallback: call HF Router (OpenAI-compatible) endpoint.
52
 
53
- Endpoint format (hf-inference route):
54
  https://router.huggingface.co/hf-inference/models/{MODEL_ID}/v1/chat/completions
55
  """
56
  if not HF_TOKEN:
57
  return "Error: HF_TOKEN is not set. Add it in Space Settings -> Secrets."
58
 
59
  url = f"https://router.huggingface.co/hf-inference/models/{MODEL_ID}/v1/chat/completions"
60
-
 
 
 
61
  payload = {
62
  "model": MODEL_ID,
63
  "messages": messages,
@@ -65,53 +49,19 @@ def _router_chat_completion(
65
  "temperature": temperature,
66
  }
67
 
68
- headers = {
69
- "Authorization": f"Bearer {HF_TOKEN}",
70
- "Content-Type": "application/json",
71
- }
72
-
73
- with httpx.Client(timeout=60) as http:
74
- r = http.post(url, headers=headers, json=payload)
75
- r.raise_for_status()
76
- data = r.json()
77
 
78
- return data["choices"][0]["message"]["content"].strip()
 
 
 
79
 
 
80
 
81
- def generate_response(
82
- messages: List[Dict],
83
- max_tokens: int = 512,
84
- temperature: float = 0.7,
85
- ) -> str:
86
- """
87
- Main generation function.
88
- 1) Try HF InferenceClient.text_generation with provider="hf-inference"
89
- 2) If StopIteration / provider issues happen, fallback to HF Router chat completions
90
- """
91
- try:
92
- if not HF_TOKEN:
93
- return "Error: HF_TOKEN is not set. Add it in Space Settings -> Secrets."
94
-
95
- # Try text-generation (broadly supported)
96
- prompt = _messages_to_prompt(messages)
97
- out = client.text_generation(
98
- prompt,
99
- max_new_tokens=max_tokens,
100
- temperature=temperature,
101
- do_sample=True,
102
- return_full_text=False,
103
- )
104
- return out.strip()
105
-
106
- except StopIteration:
107
- # Provider list empty: try router fallback
108
- try:
109
- return _router_chat_completion(messages, max_tokens=max_tokens, temperature=temperature)
110
- except Exception as e2:
111
- return (
112
- "Error: StopIteration() and router fallback failed.\n\n"
113
- f"Fallback error: {repr(e2)}\n\n{traceback.format_exc()}"
114
- )
115
 
116
  except Exception as e:
117
  return f"Error: {repr(e)}\n\n{traceback.format_exc()}"
 
1
  # -*- coding: utf-8 -*-
2
  """
3
+ HF Router (OpenAI-compatible) chat-completions wrapper for Hugging Face Spaces.
4
 
5
+ Why:
6
+ - Some models (incl. some Gemma 3 variants) are served as conversational / image-text-to-text.
7
+ - In that case, non-conversational text_generation is NOT supported.
8
+ - So we call the HF Router chat completions endpoint directly.
9
 
10
+ Requirements:
11
+ - Set HF_TOKEN in Space Settings -> Secrets
12
+ - Ensure your HF account accepted the model's license if gated.
13
  """
14
 
15
  import os
16
+ import json
17
  import traceback
18
+ from typing import List, Dict
19
 
20
  import httpx
 
 
21
 
22
  HF_TOKEN = os.getenv("HF_TOKEN")
23
  MODEL_ID = os.getenv("MODEL_ID", "google/gemma-3-4b-it")
24
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
+ def generate_response(
27
  messages: List[Dict],
28
+ max_tokens: int = 512,
29
+ temperature: float = 0.7,
30
  ) -> str:
31
  """
32
+ Generate response using HF Router chat completions (OpenAI-compatible).
33
 
34
+ Endpoint:
35
  https://router.huggingface.co/hf-inference/models/{MODEL_ID}/v1/chat/completions
36
  """
37
  if not HF_TOKEN:
38
  return "Error: HF_TOKEN is not set. Add it in Space Settings -> Secrets."
39
 
40
  url = f"https://router.huggingface.co/hf-inference/models/{MODEL_ID}/v1/chat/completions"
41
+ headers = {
42
+ "Authorization": f"Bearer {HF_TOKEN}",
43
+ "Content-Type": "application/json",
44
+ }
45
  payload = {
46
  "model": MODEL_ID,
47
  "messages": messages,
 
49
  "temperature": temperature,
50
  }
51
 
52
+ try:
53
+ with httpx.Client(timeout=90) as http:
54
+ r = http.post(url, headers=headers, json=payload)
 
 
 
 
 
 
55
 
56
+ # If error, show status + body to debug quickly
57
+ if r.status_code >= 400:
58
+ body = r.text
59
+ return f"Error: HTTP {r.status_code}\n\n{body}"
60
 
61
+ data = r.json()
62
 
63
+ # OpenAI-style response
64
+ return data["choices"][0]["message"]["content"].strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
 
66
  except Exception as e:
67
  return f"Error: {repr(e)}\n\n{traceback.format_exc()}"