Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -19,20 +19,20 @@ import os
|
|
| 19 |
import re
|
| 20 |
import io
|
| 21 |
|
| 22 |
-
import requests
|
| 23 |
from PIL import Image
|
|
|
|
| 24 |
|
| 25 |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 26 |
# MODELS β ordered by reliability on HF free tier (most reliable first)
|
| 27 |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
|
|
| 28 |
MODELS = [
|
| 29 |
-
"meta-llama/Llama-3.2-11B-Vision-Instruct", #
|
| 30 |
-
"Qwen/Qwen2.5-VL-
|
| 31 |
-
"
|
| 32 |
]
|
| 33 |
|
| 34 |
# HF Serverless Inference β new router endpoint (api-inference.huggingface.co is deprecated as of 2026)
|
| 35 |
-
HF_CHAT_URL = "https://router.huggingface.co/hf-inference/models/{model}/v1/chat/completions"
|
| 36 |
|
| 37 |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 38 |
# DETECTION PROMPT
|
|
@@ -165,72 +165,41 @@ def validate_result(data: dict) -> dict | None:
|
|
| 165 |
|
| 166 |
def call_model(img: Image.Image, model: str, token: str) -> dict:
|
| 167 |
"""
|
| 168 |
-
Call one HF vision model via
|
|
|
|
| 169 |
Returns validated result dict on success.
|
| 170 |
Raises RuntimeError with a clear message on failure.
|
| 171 |
"""
|
| 172 |
b64 = pil_to_b64(img)
|
|
|
|
| 173 |
|
| 174 |
-
|
| 175 |
-
"
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
payload = {
|
| 180 |
-
"model": model,
|
| 181 |
-
"messages": [
|
| 182 |
-
{
|
| 183 |
"role": "user",
|
| 184 |
"content": [
|
| 185 |
-
{
|
| 186 |
-
|
| 187 |
-
"image_url": {"url": f"data:image/jpeg;base64,{b64}"},
|
| 188 |
-
},
|
| 189 |
-
{
|
| 190 |
-
"type": "text",
|
| 191 |
-
"text": DETECTION_PROMPT,
|
| 192 |
-
},
|
| 193 |
],
|
| 194 |
-
}
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
# ββ HTTP-level error handling ββββββββββββββββββββββββββββββββββββββββββββ
|
| 212 |
-
if resp.status_code == 401:
|
| 213 |
-
raise RuntimeError(f"{short}: 401 Unauthorized β HF_TOKEN is missing or invalid")
|
| 214 |
-
if resp.status_code == 403:
|
| 215 |
-
raise RuntimeError(f"{short}: 403 Forbidden β token may not have access to this model")
|
| 216 |
-
if resp.status_code == 404:
|
| 217 |
-
raise RuntimeError(f"{short}: 404 Not Found β model not available on serverless endpoint")
|
| 218 |
-
if resp.status_code == 422:
|
| 219 |
-
raise RuntimeError(f"{short}: 422 Unprocessable β model may not support vision input")
|
| 220 |
-
if resp.status_code == 429:
|
| 221 |
-
raise RuntimeError(f"{short}: 429 Rate Limited β try again in ~60 seconds")
|
| 222 |
-
if resp.status_code in (502, 503):
|
| 223 |
-
raise RuntimeError(f"{short}: {resp.status_code} Service Unavailable β model is loading")
|
| 224 |
-
if resp.status_code != 200:
|
| 225 |
-
body_preview = resp.text[:200].replace("\n", " ")
|
| 226 |
-
raise RuntimeError(f"{short}: HTTP {resp.status_code} β {body_preview}")
|
| 227 |
-
|
| 228 |
-
# ββ Parse response βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 229 |
-
try:
|
| 230 |
-
body = resp.json()
|
| 231 |
-
content = body["choices"][0]["message"]["content"]
|
| 232 |
-
except (KeyError, IndexError, json.JSONDecodeError) as e:
|
| 233 |
-
raise RuntimeError(f"{short}: unexpected response shape β {e} | body: {resp.text[:200]}")
|
| 234 |
|
| 235 |
print(f"[{short}] raw LLM output: {content[:300]}") # visible in Space logs
|
| 236 |
|
|
@@ -487,7 +456,7 @@ print("=" * 60)
|
|
| 487 |
print(" Amazon Trailer Inspector β startup")
|
| 488 |
print(f" HF_TOKEN : {'SET (' + str(len(_tok)) + ' chars)' if _tok else 'NOT SET β add to Space Secrets!'}")
|
| 489 |
print(f" Models : {[m.split('/')[-1] for m in MODELS]}")
|
| 490 |
-
print(f"
|
| 491 |
print("=" * 60)
|
| 492 |
|
| 493 |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
|
|
| 19 |
import re
|
| 20 |
import io
|
| 21 |
|
|
|
|
| 22 |
from PIL import Image
|
| 23 |
+
from huggingface_hub import InferenceClient
|
| 24 |
|
| 25 |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 26 |
# MODELS β ordered by reliability on HF free tier (most reliable first)
|
| 27 |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 28 |
+
# Verify live status: huggingface.co/models?pipeline_tag=image-text-to-text&inference=warm
|
| 29 |
MODELS = [
|
| 30 |
+
"meta-llama/Llama-3.2-11B-Vision-Instruct", # Primary
|
| 31 |
+
"Qwen/Qwen2.5-VL-3B-Instruct", # Smaller Qwen β more likely warm
|
| 32 |
+
"microsoft/Phi-3.5-vision-instruct", # Fallback
|
| 33 |
]
|
| 34 |
|
| 35 |
# HF Serverless Inference β new router endpoint (api-inference.huggingface.co is deprecated as of 2026)
|
|
|
|
| 36 |
|
| 37 |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 38 |
# DETECTION PROMPT
|
|
|
|
| 165 |
|
| 166 |
def call_model(img: Image.Image, model: str, token: str) -> dict:
|
| 167 |
"""
|
| 168 |
+
Call one HF vision model via InferenceClient with provider='hf-inference'.
|
| 169 |
+
This is the official HF-recommended approach after api-inference deprecation.
|
| 170 |
Returns validated result dict on success.
|
| 171 |
Raises RuntimeError with a clear message on failure.
|
| 172 |
"""
|
| 173 |
b64 = pil_to_b64(img)
|
| 174 |
+
short = model.split("/")[-1]
|
| 175 |
|
| 176 |
+
try:
|
| 177 |
+
client = InferenceClient(provider="hf-inference", api_key=token)
|
| 178 |
+
resp = client.chat_completion(
|
| 179 |
+
model=model,
|
| 180 |
+
messages=[{
|
|
|
|
|
|
|
|
|
|
|
|
|
| 181 |
"role": "user",
|
| 182 |
"content": [
|
| 183 |
+
{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{b64}"}},
|
| 184 |
+
{"type": "text", "text": DETECTION_PROMPT},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 185 |
],
|
| 186 |
+
}],
|
| 187 |
+
max_tokens=512,
|
| 188 |
+
temperature=0.05,
|
| 189 |
+
)
|
| 190 |
+
raw_content = resp.choices[0].message.content
|
| 191 |
+
except Exception as e:
|
| 192 |
+
err = str(e)
|
| 193 |
+
if "401" in err or "403" in err:
|
| 194 |
+
raise RuntimeError(f"{short}: auth error β check HF_TOKEN ({err[:120]})")
|
| 195 |
+
elif "404" in err:
|
| 196 |
+
raise RuntimeError(f"{short}: 404 β model not on free serverless tier ({err[:120]})")
|
| 197 |
+
elif "429" in err:
|
| 198 |
+
raise RuntimeError(f"{short}: rate limited β retry in ~60s")
|
| 199 |
+
elif "503" in err or "502" in err:
|
| 200 |
+
raise RuntimeError(f"{short}: model loading/unavailable β retry shortly")
|
| 201 |
+
else:
|
| 202 |
+
raise RuntimeError(f"{short}: {err[:200]}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 203 |
|
| 204 |
print(f"[{short}] raw LLM output: {content[:300]}") # visible in Space logs
|
| 205 |
|
|
|
|
| 456 |
print(" Amazon Trailer Inspector β startup")
|
| 457 |
print(f" HF_TOKEN : {'SET (' + str(len(_tok)) + ' chars)' if _tok else 'NOT SET β add to Space Secrets!'}")
|
| 458 |
print(f" Models : {[m.split('/')[-1] for m in MODELS]}")
|
| 459 |
+
print(f" Method : InferenceClient(provider='hf-inference')")
|
| 460 |
print("=" * 60)
|
| 461 |
|
| 462 |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|