Spaces:
Sleeping
Sleeping
Upload folder using huggingface_hub
Browse files- fusion-app/app_api.py +42 -19
fusion-app/app_api.py
CHANGED
|
@@ -6,7 +6,6 @@ import numpy as np
|
|
| 6 |
from PIL import Image
|
| 7 |
import gradio as gr
|
| 8 |
import requests
|
| 9 |
-
from huggingface_hub import InferenceClient
|
| 10 |
from pydub import AudioSegment
|
| 11 |
from utils_media import video_to_frame_audio, load_audio_16k, log_inference
|
| 12 |
|
|
@@ -20,12 +19,9 @@ CLIP_MODEL = "openai/clip-vit-base-patch32"
|
|
| 20 |
W2V2_MODEL = "facebook/wav2vec2-base"
|
| 21 |
|
| 22 |
|
| 23 |
-
HF_TOKEN = os.getenv("HF_TOKEN")
|
| 24 |
if not HF_TOKEN:
|
| 25 |
print("Warning: HuggingFace token not found. API functions will not work.")
|
| 26 |
-
client = None
|
| 27 |
-
else:
|
| 28 |
-
client = InferenceClient(token=HF_TOKEN)
|
| 29 |
|
| 30 |
|
| 31 |
|
|
@@ -35,20 +31,47 @@ def _img_to_jpeg_bytes(pil: Image.Image) -> bytes:
|
|
| 35 |
return buf.getvalue()
|
| 36 |
|
| 37 |
def clip_api_probs(pil: Image.Image, prompts: List[str] = PROMPTS) -> np.ndarray:
|
| 38 |
-
if
|
| 39 |
-
raise RuntimeError("HuggingFace
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
|
| 53 |
|
| 54 |
|
|
|
|
| 6 |
from PIL import Image
|
| 7 |
import gradio as gr
|
| 8 |
import requests
|
|
|
|
| 9 |
from pydub import AudioSegment
|
| 10 |
from utils_media import video_to_frame_audio, load_audio_16k, log_inference
|
| 11 |
|
|
|
|
| 19 |
W2V2_MODEL = "facebook/wav2vec2-base"
|
| 20 |
|
| 21 |
|
| 22 |
+
HF_TOKEN = os.getenv("HF_TOKEN")
|
| 23 |
if not HF_TOKEN:
|
| 24 |
print("Warning: HuggingFace token not found. API functions will not work.")
|
|
|
|
|
|
|
|
|
|
| 25 |
|
| 26 |
|
| 27 |
|
|
|
|
| 31 |
return buf.getvalue()
|
| 32 |
|
| 33 |
def clip_api_probs(pil: Image.Image, prompts: List[str] = PROMPTS) -> np.ndarray:
|
| 34 |
+
if HF_TOKEN is None:
|
| 35 |
+
raise RuntimeError("HuggingFace token not available. Please set HF_TOKEN environment variable.")
|
| 36 |
+
|
| 37 |
+
try:
|
| 38 |
+
# Use direct requests API call instead of InferenceClient
|
| 39 |
+
img_bytes = _img_to_jpeg_bytes(pil)
|
| 40 |
+
|
| 41 |
+
url = f"https://api-inference.huggingface.co/models/{CLIP_MODEL}"
|
| 42 |
+
headers = {"Authorization": f"Bearer {HF_TOKEN}"}
|
| 43 |
+
|
| 44 |
+
payload = {
|
| 45 |
+
"parameters": {
|
| 46 |
+
"candidate_labels": prompts,
|
| 47 |
+
"hypothesis_template": "{}"
|
| 48 |
+
}
|
| 49 |
+
}
|
| 50 |
+
|
| 51 |
+
files = {"file": ("image.jpg", img_bytes, "image/jpeg")}
|
| 52 |
+
data = {"inputs": "", "parameters": json.dumps(payload["parameters"])}
|
| 53 |
+
|
| 54 |
+
response = requests.post(url, headers=headers, files=files, data=data, timeout=60)
|
| 55 |
+
response.raise_for_status()
|
| 56 |
+
|
| 57 |
+
result = response.json()
|
| 58 |
+
|
| 59 |
+
# Handle response format
|
| 60 |
+
if isinstance(result, list) and len(result) > 0:
|
| 61 |
+
scores = {item["label"]: item["score"] for item in result}
|
| 62 |
+
else:
|
| 63 |
+
# Fallback: equal probabilities
|
| 64 |
+
scores = {p: 1.0/len(prompts) for p in prompts}
|
| 65 |
+
|
| 66 |
+
arr = np.array([scores.get(p, 0.0) for p in prompts], dtype=np.float32)
|
| 67 |
+
s = arr.sum()
|
| 68 |
+
arr = arr / s if s > 0 else np.ones_like(arr)/len(arr)
|
| 69 |
+
return arr
|
| 70 |
+
|
| 71 |
+
except Exception as e:
|
| 72 |
+
print(f"CLIP API error: {e}")
|
| 73 |
+
# Return uniform distribution as fallback
|
| 74 |
+
return np.ones(len(prompts), dtype=np.float32) / len(prompts)
|
| 75 |
|
| 76 |
|
| 77 |
|