mlbench123 commited on
Commit
f5c4e2c
Β·
verified Β·
1 Parent(s): a6e7e36

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -64
app.py CHANGED
@@ -19,20 +19,20 @@ import os
19
  import re
20
  import io
21
 
22
- import requests
23
  from PIL import Image
 
24
 
25
  # ──────────────────────────────────────────────────────────────────────────────
26
  # MODELS β€” ordered by reliability on HF free tier (most reliable first)
27
  # ──────────────────────────────────────────────────────────────────────────────
 
28
  MODELS = [
29
- "meta-llama/Llama-3.2-11B-Vision-Instruct", # Best free vision model on HF
30
- "Qwen/Qwen2.5-VL-7B-Instruct", # Good fallback
31
- "google/gemma-3-4b-it", # Smaller, faster fallback
32
  ]
33
 
34
  # HF Serverless Inference β€” new router endpoint (api-inference.huggingface.co is deprecated as of 2026)
35
- HF_CHAT_URL = "https://router.huggingface.co/hf-inference/models/{model}/v1/chat/completions"
36
 
37
  # ──────────────────────────────────────────────────────────────────────────────
38
  # DETECTION PROMPT
@@ -165,72 +165,41 @@ def validate_result(data: dict) -> dict | None:
165
 
166
  def call_model(img: Image.Image, model: str, token: str) -> dict:
167
  """
168
- Call one HF vision model via the chat-completions endpoint.
 
169
  Returns validated result dict on success.
170
  Raises RuntimeError with a clear message on failure.
171
  """
172
  b64 = pil_to_b64(img)
 
173
 
174
- headers = {
175
- "Content-Type": "application/json",
176
- "Authorization": f"Bearer {token}",
177
- }
178
-
179
- payload = {
180
- "model": model,
181
- "messages": [
182
- {
183
  "role": "user",
184
  "content": [
185
- {
186
- "type": "image_url",
187
- "image_url": {"url": f"data:image/jpeg;base64,{b64}"},
188
- },
189
- {
190
- "type": "text",
191
- "text": DETECTION_PROMPT,
192
- },
193
  ],
194
- }
195
- ],
196
- "max_tokens": 512,
197
- "temperature": 0.05,
198
- "stream": False,
199
- }
200
-
201
- url = HF_CHAT_URL.format(model=model)
202
- short = model.split("/")[-1]
203
-
204
- try:
205
- resp = requests.post(url, headers=headers, json=payload, timeout=90)
206
- except requests.exceptions.Timeout:
207
- raise RuntimeError(f"{short}: request timed out (90s)")
208
- except requests.exceptions.ConnectionError as e:
209
- raise RuntimeError(f"{short}: connection error β€” {e}")
210
-
211
- # ── HTTP-level error handling ────────────────────────────────────────────
212
- if resp.status_code == 401:
213
- raise RuntimeError(f"{short}: 401 Unauthorized β€” HF_TOKEN is missing or invalid")
214
- if resp.status_code == 403:
215
- raise RuntimeError(f"{short}: 403 Forbidden β€” token may not have access to this model")
216
- if resp.status_code == 404:
217
- raise RuntimeError(f"{short}: 404 Not Found β€” model not available on serverless endpoint")
218
- if resp.status_code == 422:
219
- raise RuntimeError(f"{short}: 422 Unprocessable β€” model may not support vision input")
220
- if resp.status_code == 429:
221
- raise RuntimeError(f"{short}: 429 Rate Limited β€” try again in ~60 seconds")
222
- if resp.status_code in (502, 503):
223
- raise RuntimeError(f"{short}: {resp.status_code} Service Unavailable β€” model is loading")
224
- if resp.status_code != 200:
225
- body_preview = resp.text[:200].replace("\n", " ")
226
- raise RuntimeError(f"{short}: HTTP {resp.status_code} β€” {body_preview}")
227
-
228
- # ── Parse response ───────────────────────────────────────────────────────
229
- try:
230
- body = resp.json()
231
- content = body["choices"][0]["message"]["content"]
232
- except (KeyError, IndexError, json.JSONDecodeError) as e:
233
- raise RuntimeError(f"{short}: unexpected response shape β€” {e} | body: {resp.text[:200]}")
234
 
235
  print(f"[{short}] raw LLM output: {content[:300]}") # visible in Space logs
236
 
@@ -487,7 +456,7 @@ print("=" * 60)
487
  print(" Amazon Trailer Inspector β€” startup")
488
  print(f" HF_TOKEN : {'SET (' + str(len(_tok)) + ' chars)' if _tok else 'NOT SET ← add to Space Secrets!'}")
489
  print(f" Models : {[m.split('/')[-1] for m in MODELS]}")
490
- print(f" Endpoint : {HF_CHAT_URL[:60]}...")
491
  print("=" * 60)
492
 
493
  # ──────────────────────────────────────────────────────────────────────────────
 
19
  import re
20
  import io
21
 
 
22
  from PIL import Image
23
+ from huggingface_hub import InferenceClient
24
 
25
  # ──────────────────────────────────────────────────────────────────────────────
26
  # MODELS β€” ordered by reliability on HF free tier (most reliable first)
27
  # ──────────────────────────────────────────────────────────────────────────────
28
+ # Verify live status: huggingface.co/models?pipeline_tag=image-text-to-text&inference=warm
29
  MODELS = [
30
+ "meta-llama/Llama-3.2-11B-Vision-Instruct", # Primary
31
+ "Qwen/Qwen2.5-VL-3B-Instruct", # Smaller Qwen β€” more likely warm
32
+ "microsoft/Phi-3.5-vision-instruct", # Fallback
33
  ]
34
 
35
  # HF Serverless Inference β€” new router endpoint (api-inference.huggingface.co is deprecated as of 2026)
 
36
 
37
  # ──────────────────────────────────────────────────────────────────────────────
38
  # DETECTION PROMPT
 
165
 
166
  def call_model(img: Image.Image, model: str, token: str) -> dict:
167
  """
168
+ Call one HF vision model via InferenceClient with provider='hf-inference'.
169
+ This is the official HF-recommended approach after api-inference deprecation.
170
  Returns validated result dict on success.
171
  Raises RuntimeError with a clear message on failure.
172
  """
173
  b64 = pil_to_b64(img)
174
+ short = model.split("/")[-1]
175
 
176
+ try:
177
+ client = InferenceClient(provider="hf-inference", api_key=token)
178
+ resp = client.chat_completion(
179
+ model=model,
180
+ messages=[{
 
 
 
 
181
  "role": "user",
182
  "content": [
183
+ {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{b64}"}},
184
+ {"type": "text", "text": DETECTION_PROMPT},
 
 
 
 
 
 
185
  ],
186
+ }],
187
+ max_tokens=512,
188
+ temperature=0.05,
189
+ )
190
+ raw_content = resp.choices[0].message.content
191
+ except Exception as e:
192
+ err = str(e)
193
+ if "401" in err or "403" in err:
194
+ raise RuntimeError(f"{short}: auth error β€” check HF_TOKEN ({err[:120]})")
195
+ elif "404" in err:
196
+ raise RuntimeError(f"{short}: 404 β€” model not on free serverless tier ({err[:120]})")
197
+ elif "429" in err:
198
+ raise RuntimeError(f"{short}: rate limited β€” retry in ~60s")
199
+ elif "503" in err or "502" in err:
200
+ raise RuntimeError(f"{short}: model loading/unavailable β€” retry shortly")
201
+ else:
202
+ raise RuntimeError(f"{short}: {err[:200]}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
203
 
204
  print(f"[{short}] raw LLM output: {content[:300]}") # visible in Space logs
205
 
 
456
  print(" Amazon Trailer Inspector β€” startup")
457
  print(f" HF_TOKEN : {'SET (' + str(len(_tok)) + ' chars)' if _tok else 'NOT SET ← add to Space Secrets!'}")
458
  print(f" Models : {[m.split('/')[-1] for m in MODELS]}")
459
+ print(f" Method : InferenceClient(provider='hf-inference')")
460
  print("=" * 60)
461
 
462
  # ──────────────────────────────────────────────────────────────────────────────