mlbench123 commited on
Commit
7b5bc79
Β·
verified Β·
1 Parent(s): f5c4e2c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -7
app.py CHANGED
@@ -25,11 +25,12 @@ from huggingface_hub import InferenceClient
25
  # ──────────────────────────────────────────────────────────────────────────────
26
  # MODELS β€” ordered by reliability on HF free tier (most reliable first)
27
  # ──────────────────────────────────────────────────────────────────────────────
28
- # Verify live status: huggingface.co/models?pipeline_tag=image-text-to-text&inference=warm
 
29
  MODELS = [
30
- "meta-llama/Llama-3.2-11B-Vision-Instruct", # Primary
31
- "Qwen/Qwen2.5-VL-3B-Instruct", # Smaller Qwen β€” more likely warm
32
- "microsoft/Phi-3.5-vision-instruct", # Fallback
33
  ]
34
 
35
  # HF Serverless Inference β€” new router endpoint (api-inference.huggingface.co is deprecated as of 2026)
@@ -165,7 +166,7 @@ def validate_result(data: dict) -> dict | None:
165
 
166
  def call_model(img: Image.Image, model: str, token: str) -> dict:
167
  """
168
- Call one HF vision model via InferenceClient with provider='hf-inference'.
169
  This is the official HF-recommended approach after api-inference deprecation.
170
  Returns validated result dict on success.
171
  Raises RuntimeError with a clear message on failure.
@@ -174,7 +175,9 @@ def call_model(img: Image.Image, model: str, token: str) -> dict:
174
  short = model.split("/")[-1]
175
 
176
  try:
177
- client = InferenceClient(provider="hf-inference", api_key=token)
 
 
178
  resp = client.chat_completion(
179
  model=model,
180
  messages=[{
@@ -456,7 +459,7 @@ print("=" * 60)
456
  print(" Amazon Trailer Inspector β€” startup")
457
  print(f" HF_TOKEN : {'SET (' + str(len(_tok)) + ' chars)' if _tok else 'NOT SET ← add to Space Secrets!'}")
458
  print(f" Models : {[m.split('/')[-1] for m in MODELS]}")
459
- print(f" Method : InferenceClient(provider='hf-inference')")
460
  print("=" * 60)
461
 
462
  # ──────────────────────────────────────────────────────────────────────────────
 
25
  # ──────────────────────────────────────────────────────────────────────────────
26
  # MODELS β€” ordered by reliability on HF free tier (most reliable first)
27
  # ──────────────────────────────────────────────────────────────────────────────
28
+ # provider="auto" lets HF router pick the best available provider (Nebius, Together, Fireworks, etc.)
29
+ # hf-inference does NOT serve large vision LLMs β€” it's CPU-only for small models since July 2025
30
  MODELS = [
31
+ "meta-llama/Llama-3.2-11B-Vision-Instruct", # Primary β€” available on Nebius/Fireworks
32
+ "Qwen/Qwen2.5-VL-7B-Instruct", # Fallback 1 β€” available on Nebius
33
+ "mistralai/Pixtral-12B-2409", # Fallback 2 β€” available on Fireworks
34
  ]
35
 
36
  # HF Serverless Inference β€” new router endpoint (api-inference.huggingface.co is deprecated as of 2026)
 
166
 
167
  def call_model(img: Image.Image, model: str, token: str) -> dict:
168
  """
169
+ Call one HF vision model via InferenceClient with provider='auto'.
170
  This is the official HF-recommended approach after api-inference deprecation.
171
  Returns validated result dict on success.
172
  Raises RuntimeError with a clear message on failure.
 
175
  short = model.split("/")[-1]
176
 
177
  try:
178
+ # provider="auto" = HF router picks best available provider for this model
179
+ # This works for vision LLMs unlike hf-inference which is CPU-only
180
+ client = InferenceClient(provider="auto", api_key=token)
181
  resp = client.chat_completion(
182
  model=model,
183
  messages=[{
 
459
  print(" Amazon Trailer Inspector β€” startup")
460
  print(f" HF_TOKEN : {'SET (' + str(len(_tok)) + ' chars)' if _tok else 'NOT SET ← add to Space Secrets!'}")
461
  print(f" Models : {[m.split('/')[-1] for m in MODELS]}")
462
+ print(f" Method : InferenceClient(provider='auto') β€” router selects best provider")
463
  print("=" * 60)
464
 
465
  # ──────────────────────────────────────────────────────────────────────────────