Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -25,11 +25,12 @@ from huggingface_hub import InferenceClient
|
|
| 25 |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 26 |
# MODELS β ordered by reliability on HF free tier (most reliable first)
|
| 27 |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 28 |
-
#
|
|
|
|
| 29 |
MODELS = [
|
| 30 |
-
"meta-llama/Llama-3.2-11B-Vision-Instruct", # Primary
|
| 31 |
-
"Qwen/Qwen2.5-VL-
|
| 32 |
-
"
|
| 33 |
]
|
| 34 |
|
| 35 |
# HF Serverless Inference β new router endpoint (api-inference.huggingface.co is deprecated as of 2026)
|
|
@@ -165,7 +166,7 @@ def validate_result(data: dict) -> dict | None:
|
|
| 165 |
|
| 166 |
def call_model(img: Image.Image, model: str, token: str) -> dict:
|
| 167 |
"""
|
| 168 |
-
Call one HF vision model via InferenceClient with provider='
|
| 169 |
This is the official HF-recommended approach after api-inference deprecation.
|
| 170 |
Returns validated result dict on success.
|
| 171 |
Raises RuntimeError with a clear message on failure.
|
|
@@ -174,7 +175,9 @@ def call_model(img: Image.Image, model: str, token: str) -> dict:
|
|
| 174 |
short = model.split("/")[-1]
|
| 175 |
|
| 176 |
try:
|
| 177 |
-
|
|
|
|
|
|
|
| 178 |
resp = client.chat_completion(
|
| 179 |
model=model,
|
| 180 |
messages=[{
|
|
@@ -456,7 +459,7 @@ print("=" * 60)
|
|
| 456 |
print(" Amazon Trailer Inspector β startup")
|
| 457 |
print(f" HF_TOKEN : {'SET (' + str(len(_tok)) + ' chars)' if _tok else 'NOT SET β add to Space Secrets!'}")
|
| 458 |
print(f" Models : {[m.split('/')[-1] for m in MODELS]}")
|
| 459 |
-
print(f" Method : InferenceClient(provider='
|
| 460 |
print("=" * 60)
|
| 461 |
|
| 462 |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
|
|
| 25 |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 26 |
# MODELS β ordered by reliability on HF free tier (most reliable first)
|
| 27 |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 28 |
+
# provider="auto" lets HF router pick the best available provider (Nebius, Together, Fireworks, etc.)
|
| 29 |
+
# hf-inference does NOT serve large vision LLMs β it's CPU-only for small models since July 2025
|
| 30 |
MODELS = [
|
| 31 |
+
"meta-llama/Llama-3.2-11B-Vision-Instruct", # Primary β available on Nebius/Fireworks
|
| 32 |
+
"Qwen/Qwen2.5-VL-7B-Instruct", # Fallback 1 β available on Nebius
|
| 33 |
+
"mistralai/Pixtral-12B-2409", # Fallback 2 β available on Fireworks
|
| 34 |
]
|
| 35 |
|
| 36 |
# HF Serverless Inference β new router endpoint (api-inference.huggingface.co is deprecated as of 2026)
|
|
|
|
| 166 |
|
| 167 |
def call_model(img: Image.Image, model: str, token: str) -> dict:
|
| 168 |
"""
|
| 169 |
+
Call one HF vision model via InferenceClient with provider='auto'.
|
| 170 |
This is the official HF-recommended approach after api-inference deprecation.
|
| 171 |
Returns validated result dict on success.
|
| 172 |
Raises RuntimeError with a clear message on failure.
|
|
|
|
| 175 |
short = model.split("/")[-1]
|
| 176 |
|
| 177 |
try:
|
| 178 |
+
# provider="auto" = HF router picks best available provider for this model
|
| 179 |
+
# This works for vision LLMs unlike hf-inference which is CPU-only
|
| 180 |
+
client = InferenceClient(provider="auto", api_key=token)
|
| 181 |
resp = client.chat_completion(
|
| 182 |
model=model,
|
| 183 |
messages=[{
|
|
|
|
| 459 |
print(" Amazon Trailer Inspector β startup")
|
| 460 |
print(f" HF_TOKEN : {'SET (' + str(len(_tok)) + ' chars)' if _tok else 'NOT SET β add to Space Secrets!'}")
|
| 461 |
print(f" Models : {[m.split('/')[-1] for m in MODELS]}")
|
| 462 |
+
print(f" Method : InferenceClient(provider='auto') β router selects best provider")
|
| 463 |
print("=" * 60)
|
| 464 |
|
| 465 |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|