Spaces:
Sleeping
Sleeping
Upload folder using huggingface_hub
Browse files- fusion-app/app_api.py +4 -27
- fusion-app/app_local.py +2 -2
fusion-app/app_api.py
CHANGED
|
@@ -19,31 +19,8 @@ CSV_API = HERE / "runs_api.csv"
|
|
| 19 |
CLIP_MODEL = "openai/clip-vit-base-patch32"
|
| 20 |
W2V2_MODEL = "facebook/wav2vec2-base"
|
| 21 |
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
if token:
|
| 25 |
-
return token
|
| 26 |
-
|
| 27 |
-
key_paths = [
|
| 28 |
-
HERE / "key.txt",
|
| 29 |
-
HERE.parent / "key.txt",
|
| 30 |
-
Path("key.txt"),
|
| 31 |
-
Path("../key.txt")
|
| 32 |
-
]
|
| 33 |
-
|
| 34 |
-
for key_path in key_paths:
|
| 35 |
-
try:
|
| 36 |
-
if key_path.exists():
|
| 37 |
-
token = key_path.read_text().strip()
|
| 38 |
-
if token:
|
| 39 |
-
print(f"HuggingFace token loaded from {key_path}")
|
| 40 |
-
return token
|
| 41 |
-
except Exception as e:
|
| 42 |
-
print(f"Error reading {key_path}: {e}")
|
| 43 |
-
|
| 44 |
-
return None
|
| 45 |
-
|
| 46 |
-
HF_TOKEN = get_hf_token()
|
| 47 |
if not HF_TOKEN:
|
| 48 |
print("Warning: HuggingFace token not found. API functions will not work.")
|
| 49 |
client = None
|
|
@@ -201,7 +178,7 @@ def predict_video(video, alpha=0.7):
|
|
| 201 |
"fps_used": meta.get("fps_used"),
|
| 202 |
"duration_s": meta.get("duration_s"),
|
| 203 |
}
|
| 204 |
-
log_inference(engine="api", mode="video", alpha=float(alpha), lat=lat, pred=pred, probs=probs)
|
| 205 |
return pred, probs, lat
|
| 206 |
|
| 207 |
def predict_image_audio(image: Image.Image, audio_path: str, alpha=0.7):
|
|
@@ -234,7 +211,7 @@ def predict_image_audio(image: Image.Image, audio_path: str, alpha=0.7):
|
|
| 234 |
"t_fuse_ms": int(t_fus*1000),
|
| 235 |
"t_total_ms": int((time.time()-t0)*1000),
|
| 236 |
}
|
| 237 |
-
log_inference(engine="api", mode="image_audio", alpha=float(alpha), lat=lat, pred=pred, probs=probs)
|
| 238 |
return pred, probs, lat
|
| 239 |
|
| 240 |
'''
|
|
|
|
| 19 |
CLIP_MODEL = "openai/clip-vit-base-patch32"
|
| 20 |
W2V2_MODEL = "facebook/wav2vec2-base"
|
| 21 |
|
| 22 |
+
|
| 23 |
+
HF_TOKEN = os.getenv("HF_TOKEN")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
if not HF_TOKEN:
|
| 25 |
print("Warning: HuggingFace token not found. API functions will not work.")
|
| 26 |
client = None
|
|
|
|
| 178 |
"fps_used": meta.get("fps_used"),
|
| 179 |
"duration_s": meta.get("duration_s"),
|
| 180 |
}
|
| 181 |
+
log_inference(engine="api", mode="video", alpha=float(alpha), lat=lat, pred=pred, probs=probs, csv_path=CSV_API )
|
| 182 |
return pred, probs, lat
|
| 183 |
|
| 184 |
def predict_image_audio(image: Image.Image, audio_path: str, alpha=0.7):
|
|
|
|
| 211 |
"t_fuse_ms": int(t_fus*1000),
|
| 212 |
"t_total_ms": int((time.time()-t0)*1000),
|
| 213 |
}
|
| 214 |
+
log_inference(engine="api", mode="image_audio", alpha=float(alpha), lat=lat, pred=pred, probs=probs, csv_path=CSV_API)
|
| 215 |
return pred, probs, lat
|
| 216 |
|
| 217 |
'''
|
fusion-app/app_local.py
CHANGED
|
@@ -45,7 +45,7 @@ def predict_vid(video, alpha=0.7):
|
|
| 45 |
"duration_s": round(float(meta.get("duration_s") or 0.0), 2),
|
| 46 |
}
|
| 47 |
print("[DEBUG] p_img:", p_img, "p_aud:", p_aud, "fused:", p, "rms:", rms, flush=True)
|
| 48 |
-
log_inference(engine="local", mode="video", alpha=float(alpha), lat=lat, pred=pred, probs=probs)
|
| 49 |
return pred, probs, lat
|
| 50 |
|
| 51 |
def predict_image_audio(image, audio_path, alpha=0.7):
|
|
@@ -78,7 +78,7 @@ def predict_image_audio(image, audio_path, alpha=0.7):
|
|
| 78 |
"rms": round(float(rms), 4),
|
| 79 |
}
|
| 80 |
print("[DEBUG] p_img:", p_img, "p_aud:", p_aud, "fused:", p, "rms:", rms, flush=True)
|
| 81 |
-
log_inference(engine="local", mode="image_audio", alpha=float(alpha), lat=lat, pred=pred, probs=probs)
|
| 82 |
return pred, probs, lat
|
| 83 |
|
| 84 |
|
|
|
|
| 45 |
"duration_s": round(float(meta.get("duration_s") or 0.0), 2),
|
| 46 |
}
|
| 47 |
print("[DEBUG] p_img:", p_img, "p_aud:", p_aud, "fused:", p, "rms:", rms, flush=True)
|
| 48 |
+
log_inference(engine="local", mode="video", alpha=float(alpha), lat=lat, pred=pred, probs=probs, csv_path="runs_local.csv")
|
| 49 |
return pred, probs, lat
|
| 50 |
|
| 51 |
def predict_image_audio(image, audio_path, alpha=0.7):
|
|
|
|
| 78 |
"rms": round(float(rms), 4),
|
| 79 |
}
|
| 80 |
print("[DEBUG] p_img:", p_img, "p_aud:", p_aud, "fused:", p, "rms:", rms, flush=True)
|
| 81 |
+
log_inference(engine="local", mode="image_audio", alpha=float(alpha), lat=lat, pred=pred, probs=probs, csv_path="runs_local.csv")
|
| 82 |
return pred, probs, lat
|
| 83 |
|
| 84 |
|