Spaces:

mlbench123
/

mudflap_LLM

Sleeping

App Files Files Community

mlbench123 commited on Apr 9

Commit

7b5bc79

verified ·

1 Parent(s): f5c4e2c

Update app.py

Browse files

Files changed (1) hide show

app.py +10 -7

app.py CHANGED Viewed

@@ -25,11 +25,12 @@ from huggingface_hub import InferenceClient
 # ──────────────────────────────────────────────────────────────────────────────
 # MODELS  — ordered by reliability on HF free tier (most reliable first)
 # ──────────────────────────────────────────────────────────────────────────────
-# Verify live status: huggingface.co/models?pipeline_tag=image-text-to-text&inference=warm
 MODELS = [
-    "meta-llama/Llama-3.2-11B-Vision-Instruct",   # Primary
-    "Qwen/Qwen2.5-VL-3B-Instruct",                # Smaller Qwen — more likely warm
-    "microsoft/Phi-3.5-vision-instruct",           # Fallback
 ]
 # HF Serverless Inference — new router endpoint (api-inference.huggingface.co is deprecated as of 2026)
@@ -165,7 +166,7 @@ def validate_result(data: dict) -> dict | None:
 def call_model(img: Image.Image, model: str, token: str) -> dict:
     """
-    Call one HF vision model via InferenceClient with provider='hf-inference'.
     This is the official HF-recommended approach after api-inference deprecation.
     Returns validated result dict on success.
     Raises RuntimeError with a clear message on failure.
@@ -174,7 +175,9 @@ def call_model(img: Image.Image, model: str, token: str) -> dict:
     short = model.split("/")[-1]
     try:
-        client = InferenceClient(provider="hf-inference", api_key=token)
         resp = client.chat_completion(
             model=model,
             messages=[{
@@ -456,7 +459,7 @@ print("=" * 60)
 print("  Amazon Trailer Inspector — startup")
 print(f"  HF_TOKEN : {'SET (' + str(len(_tok)) + ' chars)' if _tok else 'NOT SET ← add to Space Secrets!'}")
 print(f"  Models   : {[m.split('/')[-1] for m in MODELS]}")
-print(f"  Method   : InferenceClient(provider='hf-inference')")
 print("=" * 60)
 # ──────────────────────────────────────────────────────────────────────────────

 # ──────────────────────────────────────────────────────────────────────────────
 # MODELS  — ordered by reliability on HF free tier (most reliable first)
 # ──────────────────────────────────────────────────────────────────────────────
+# provider="auto" lets HF router pick the best available provider (Nebius, Together, Fireworks, etc.)
+# hf-inference does NOT serve large vision LLMs — it's CPU-only for small models since July 2025
 MODELS = [
+    "meta-llama/Llama-3.2-11B-Vision-Instruct",   # Primary   — available on Nebius/Fireworks
+    "Qwen/Qwen2.5-VL-7B-Instruct",                # Fallback 1 — available on Nebius
+    "mistralai/Pixtral-12B-2409",                  # Fallback 2 — available on Fireworks
 ]
 # HF Serverless Inference — new router endpoint (api-inference.huggingface.co is deprecated as of 2026)
 def call_model(img: Image.Image, model: str, token: str) -> dict:
     """
+    Call one HF vision model via InferenceClient with provider='auto'.
     This is the official HF-recommended approach after api-inference deprecation.
     Returns validated result dict on success.
     Raises RuntimeError with a clear message on failure.
     short = model.split("/")[-1]
     try:
+        # provider="auto" = HF router picks best available provider for this model
+        # This works for vision LLMs unlike hf-inference which is CPU-only
+        client = InferenceClient(provider="auto", api_key=token)
         resp = client.chat_completion(
             model=model,
             messages=[{
 print("  Amazon Trailer Inspector — startup")
 print(f"  HF_TOKEN : {'SET (' + str(len(_tok)) + ' chars)' if _tok else 'NOT SET ← add to Space Secrets!'}")
 print(f"  Models   : {[m.split('/')[-1] for m in MODELS]}")
+print(f"  Method   : InferenceClient(provider='auto') — router selects best provider")
 print("=" * 60)
 # ──────────────────────────────────────────────────────────────────────────────