SonicaB commited on
Commit
aedd0b6
·
verified ·
1 Parent(s): 383d100

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. fusion-app/app_api.py +42 -19
fusion-app/app_api.py CHANGED
@@ -6,7 +6,6 @@ import numpy as np
6
  from PIL import Image
7
  import gradio as gr
8
  import requests
9
- from huggingface_hub import InferenceClient
10
  from pydub import AudioSegment
11
  from utils_media import video_to_frame_audio, load_audio_16k, log_inference
12
 
@@ -20,12 +19,9 @@ CLIP_MODEL = "openai/clip-vit-base-patch32"
20
  W2V2_MODEL = "facebook/wav2vec2-base"
21
 
22
 
23
- HF_TOKEN = os.getenv("HF_TOKEN")
24
  if not HF_TOKEN:
25
  print("Warning: HuggingFace token not found. API functions will not work.")
26
- client = None
27
- else:
28
- client = InferenceClient(token=HF_TOKEN)
29
 
30
 
31
 
@@ -35,20 +31,47 @@ def _img_to_jpeg_bytes(pil: Image.Image) -> bytes:
35
  return buf.getvalue()
36
 
37
  def clip_api_probs(pil: Image.Image, prompts: List[str] = PROMPTS) -> np.ndarray:
38
- if client is None:
39
- raise RuntimeError("HuggingFace client not initialized. Please set HF_Token environment variable.")
40
-
41
- result = client.zero_shot_image_classification(
42
- image=pil, candidate_labels=prompts,
43
- hypothesis_template="{}",
44
- model=CLIP_MODEL,
45
- )
46
-
47
- scores = {d["label"]: float(d["score"]) for d in result}
48
- arr = np.array([scores.get(p, 0.0) for p in prompts], dtype=np.float32)
49
-
50
- s = arr.sum(); arr = arr / s if s > 0 else np.ones_like(arr)/len(arr)
51
- return arr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
 
53
 
54
 
 
6
  from PIL import Image
7
  import gradio as gr
8
  import requests
 
9
  from pydub import AudioSegment
10
  from utils_media import video_to_frame_audio, load_audio_16k, log_inference
11
 
 
19
  W2V2_MODEL = "facebook/wav2vec2-base"
20
 
21
 
22
+ HF_TOKEN = os.getenv("HF_TOKEN")
23
  if not HF_TOKEN:
24
  print("Warning: HuggingFace token not found. API functions will not work.")
 
 
 
25
 
26
 
27
 
 
31
  return buf.getvalue()
32
 
33
  def clip_api_probs(pil: Image.Image, prompts: List[str] = PROMPTS) -> np.ndarray:
34
+ if HF_TOKEN is None:
35
+ raise RuntimeError("HuggingFace token not available. Please set HF_TOKEN environment variable.")
36
+
37
+ try:
38
+ # Use direct requests API call instead of InferenceClient
39
+ img_bytes = _img_to_jpeg_bytes(pil)
40
+
41
+ url = f"https://api-inference.huggingface.co/models/{CLIP_MODEL}"
42
+ headers = {"Authorization": f"Bearer {HF_TOKEN}"}
43
+
44
+ payload = {
45
+ "parameters": {
46
+ "candidate_labels": prompts,
47
+ "hypothesis_template": "{}"
48
+ }
49
+ }
50
+
51
+ files = {"file": ("image.jpg", img_bytes, "image/jpeg")}
52
+ data = {"inputs": "", "parameters": json.dumps(payload["parameters"])}
53
+
54
+ response = requests.post(url, headers=headers, files=files, data=data, timeout=60)
55
+ response.raise_for_status()
56
+
57
+ result = response.json()
58
+
59
+ # Handle response format
60
+ if isinstance(result, list) and len(result) > 0:
61
+ scores = {item["label"]: item["score"] for item in result}
62
+ else:
63
+ # Fallback: equal probabilities
64
+ scores = {p: 1.0/len(prompts) for p in prompts}
65
+
66
+ arr = np.array([scores.get(p, 0.0) for p in prompts], dtype=np.float32)
67
+ s = arr.sum()
68
+ arr = arr / s if s > 0 else np.ones_like(arr)/len(arr)
69
+ return arr
70
+
71
+ except Exception as e:
72
+ print(f"CLIP API error: {e}")
73
+ # Return uniform distribution as fallback
74
+ return np.ones(len(prompts), dtype=np.float32) / len(prompts)
75
 
76
 
77