Pikeras commited on
Commit
b7fdad8
·
verified ·
1 Parent(s): 2f2fc9b

Update src/web/hf_client.py

Browse files
Files changed (1) hide show
  1. src/web/hf_client.py +32 -4
src/web/hf_client.py CHANGED
@@ -10,12 +10,13 @@ def generar_respuesta_hf(
10
  timeout_segundos: int,
11
  numero_reintentos: int,
12
  ) -> str:
13
- url = f"https://router.huggingface.co/hf-inference/models/{model_id}"
 
14
  headers = {
15
  "Authorization": f"Bearer {api_key}",
16
  "Content-Type": "application/json",
17
  }
18
- payload = {
19
  "inputs": prompt,
20
  "parameters": {
21
  "return_full_text": False,
@@ -23,14 +24,41 @@ def generar_respuesta_hf(
23
  "temperature": 0.0,
24
  },
25
  }
 
 
 
 
 
 
26
 
27
  ultimo_error = None
28
  for _ in range(numero_reintentos):
29
  try:
30
- response = requests.post(url, headers=headers, json=payload, timeout=timeout_segundos)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  if response.status_code >= 400:
32
- ultimo_error = f"HTTP {response.status_code}: {response.text[:200]}"
 
 
 
 
 
 
33
  continue
 
34
  body = response.json()
35
 
36
  if isinstance(body, list) and body:
 
10
  timeout_segundos: int,
11
  numero_reintentos: int,
12
  ) -> str:
13
+ url_hf_inference = f"https://router.huggingface.co/hf-inference/models/{model_id}"
14
+ url_chat = "https://router.huggingface.co/v1/chat/completions"
15
  headers = {
16
  "Authorization": f"Bearer {api_key}",
17
  "Content-Type": "application/json",
18
  }
19
+ payload_hf_inference = {
20
  "inputs": prompt,
21
  "parameters": {
22
  "return_full_text": False,
 
24
  "temperature": 0.0,
25
  },
26
  }
27
+ payload_chat = {
28
+ "model": model_id,
29
+ "messages": [{"role": "user", "content": prompt}],
30
+ "max_tokens": 64,
31
+ "temperature": 0.0,
32
+ }
33
 
34
  ultimo_error = None
35
  for _ in range(numero_reintentos):
36
  try:
37
+ response = requests.post(
38
+ url_hf_inference,
39
+ headers=headers,
40
+ json=payload_hf_inference,
41
+ timeout=timeout_segundos,
42
+ )
43
+
44
+ if response.status_code == 404:
45
+ response = requests.post(
46
+ url_chat,
47
+ headers=headers,
48
+ json=payload_chat,
49
+ timeout=timeout_segundos,
50
+ )
51
+
52
  if response.status_code >= 400:
53
+ if response.status_code == 404:
54
+ ultimo_error = (
55
+ "HTTP 404: modelo no disponible en Hugging Face Inference Router "
56
+ "o sin permisos para tu token. Prueba otro model_id."
57
+ )
58
+ else:
59
+ ultimo_error = f"HTTP {response.status_code}: {response.text[:250]}"
60
  continue
61
+
62
  body = response.json()
63
 
64
  if isinstance(body, list) and body: