Spaces:
Sleeping
Sleeping
Upload folder using huggingface_hub
Browse files- fusion-app/app_api.py +14 -7
fusion-app/app_api.py
CHANGED
|
@@ -18,11 +18,13 @@ PROMPTS = [x["prompt"] for x in LABEL_ITEMS]
|
|
| 18 |
CLIP_MODEL = "openai/clip-vit-base-patch32"
|
| 19 |
W2V2_MODEL = "facebook/wav2vec2-base"
|
| 20 |
|
| 21 |
-
HF_TOKEN = os.getenv("HF_Token")
|
| 22 |
if not HF_TOKEN:
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
client =
|
|
|
|
|
|
|
| 26 |
|
| 27 |
|
| 28 |
|
|
@@ -32,16 +34,18 @@ def _img_to_jpeg_bytes(pil: Image.Image) -> bytes:
|
|
| 32 |
return buf.getvalue()
|
| 33 |
|
| 34 |
def clip_api_probs(pil: Image.Image, prompts: List[str] = PROMPTS) -> np.ndarray:
|
|
|
|
|
|
|
| 35 |
|
| 36 |
result = client.zero_shot_image_classification(
|
| 37 |
image=pil, candidate_labels=prompts,
|
| 38 |
-
hypothesis_template="{}",
|
| 39 |
model=CLIP_MODEL,
|
| 40 |
)
|
| 41 |
-
|
| 42 |
scores = {d["label"]: float(d["score"]) for d in result}
|
| 43 |
arr = np.array([scores.get(p, 0.0) for p in prompts], dtype=np.float32)
|
| 44 |
-
|
| 45 |
s = arr.sum(); arr = arr / s if s > 0 else np.ones_like(arr)/len(arr)
|
| 46 |
return arr
|
| 47 |
|
|
@@ -58,6 +62,9 @@ def _wave_float32_to_wav_bytes(wave_16k: np.ndarray, sr=16000) -> bytes:
|
|
| 58 |
return out.getvalue()
|
| 59 |
|
| 60 |
def w2v2_api_embed(wave_16k: np.ndarray) -> np.ndarray:
|
|
|
|
|
|
|
|
|
|
| 61 |
wav_bytes = _wave_float32_to_wav_bytes(wave_16k)
|
| 62 |
|
| 63 |
url = f"https://api-inference.huggingface.co/models/{W2V2_MODEL}"
|
|
|
|
| 18 |
CLIP_MODEL = "openai/clip-vit-base-patch32"
|
| 19 |
W2V2_MODEL = "facebook/wav2vec2-base"
|
| 20 |
|
| 21 |
+
HF_TOKEN = os.getenv("HF_Token")
|
| 22 |
if not HF_TOKEN:
|
| 23 |
+
print("Warning: HF_Token not found in environment. API functions will not work.")
|
| 24 |
+
print("To use the API version, set the HF_Token environment variable with your HuggingFace token.")
|
| 25 |
+
client = None
|
| 26 |
+
else:
|
| 27 |
+
client = InferenceClient(token=HF_TOKEN)
|
| 28 |
|
| 29 |
|
| 30 |
|
|
|
|
| 34 |
return buf.getvalue()
|
| 35 |
|
| 36 |
def clip_api_probs(pil: Image.Image, prompts: List[str] = PROMPTS) -> np.ndarray:
|
| 37 |
+
if client is None:
|
| 38 |
+
raise RuntimeError("HuggingFace client not initialized. Please set HF_Token environment variable.")
|
| 39 |
|
| 40 |
result = client.zero_shot_image_classification(
|
| 41 |
image=pil, candidate_labels=prompts,
|
| 42 |
+
hypothesis_template="{}",
|
| 43 |
model=CLIP_MODEL,
|
| 44 |
)
|
| 45 |
+
|
| 46 |
scores = {d["label"]: float(d["score"]) for d in result}
|
| 47 |
arr = np.array([scores.get(p, 0.0) for p in prompts], dtype=np.float32)
|
| 48 |
+
|
| 49 |
s = arr.sum(); arr = arr / s if s > 0 else np.ones_like(arr)/len(arr)
|
| 50 |
return arr
|
| 51 |
|
|
|
|
| 62 |
return out.getvalue()
|
| 63 |
|
| 64 |
def w2v2_api_embed(wave_16k: np.ndarray) -> np.ndarray:
|
| 65 |
+
if HF_TOKEN is None:
|
| 66 |
+
raise RuntimeError("HuggingFace token not available. Please set HF_Token environment variable.")
|
| 67 |
+
|
| 68 |
wav_bytes = _wave_float32_to_wav_bytes(wave_16k)
|
| 69 |
|
| 70 |
url = f"https://api-inference.huggingface.co/models/{W2V2_MODEL}"
|