Spaces:

FunAudioLLM
/

PrismAudio

Running on Zero

prismaudio-project commited on 3 days ago

Commit

3277c58

1 Parent(s): 8be4220

fix

Files changed (1) hide show

app.py CHANGED Viewed

@@ -245,6 +245,22 @@ def extract_video_frames(video_path: str):
     return clip_chunk, sync_chunk, duration_sec
 # ==================== Feature Extraction ====================
 @spaces.GPU
 def extract_features_gpu(clip_chunk, sync_chunk, caption):

     return clip_chunk, sync_chunk, duration_sec
+def extract_features_cpu(clip_chunk, sync_chunk, caption):
+    model = _MODELS["feature_extractor"]
+    info = {}
+    with torch.no_grad():
+        # videoprism 是 CPU
+        clip_input = torch.from_numpy(clip_chunk).unsqueeze(0)
+        video_feat, frame_embed, _, text_feat = \
+            model.encode_video_and_text_with_videoprism(clip_input, [caption])
+        info['global_video_features'] = torch.tensor(np.array(video_feat)).squeeze(0).cpu()
+        info['video_features']        = torch.tensor(np.array(frame_embed)).squeeze(0).cpu()
+        info['global_text_features']  = torch.tensor(np.array(text_feat)).squeeze(0).cpu()
+    return info
 # ==================== Feature Extraction ====================
 @spaces.GPU
 def extract_features_gpu(clip_chunk, sync_chunk, caption):