Spaces:

anas31
/

sasasa

Runtime error

App Files Files Community

anas31 commited on Jul 19, 2025

Commit

5752234

verified ·

1 Parent(s): 86bb3a6

Create app.py

Browse files

Files changed (1) hide show

app.py +73 -0

app.py ADDED Viewed

	@@ -0,0 +1,73 @@

+import gradio as gr
+import torch
+import numpy as np
+import cv2
+from transformers import AutoTokenizer, AutoModel
+# Load model and tokenizer
+tokenizer = AutoTokenizer.from_pretrained("facebook/vjepa2-vitl-fpc64-256")
+model = AutoModel.from_pretrained("facebook/vjepa2-vitl-fpc64-256")
+def extract_frames(video_path, num_frames=8):
+    """Extract frames from a video file."""
+    cap = cv2.VideoCapture(video_path)
+    frames = []
+    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+    frame_indices = np.linspace(0, total_frames - 1, num_frames, dtype=int)
+    for idx in frame_indices:
+        cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
+        ret, frame = cap.read()
+        if ret:
+            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+            frames.append(frame)
+    cap.release()
+    return frames
+def process_video(video_file):
+    """Process video and extract embeddings."""
+    # Extract frames
+    frames = extract_frames(video_file)
+    # Preprocess frames (resize, normalize, etc.)
+    processed_frames = []
+    for frame in frames:
+        frame = cv2.resize(frame, (256, 256))  # Adjust to model's expected input
+        frame = frame / 255.0  # Normalize
+        processed_frames.append(frame)
+    # Convert to tensor (batch_size, num_frames, C, H, W)
+    video_tensor = torch.tensor(np.stack(processed_frames)).permute(0, 3, 1, 2).unsqueeze(0).float()
+    # Get embeddings
+    with torch.no_grad():
+        outputs = model(video_tensor)
+    # Return the embeddings (or process further)
+    embeddings = outputs.last_hidden_state.mean(dim=1).squeeze().numpy()
+    return {
+        "embeddings": embeddings,
+        "frames": frames  # Display the extracted frames
+    }
+# Gradio Interface
+with gr.Blocks() as demo:
+    gr.Markdown("# V-JEPA Video Embedding Extractor")
+    gr.Markdown("Upload a video to extract embeddings using `facebook/vjepa2-vitl-fpc64-256`.")
+    with gr.Row():
+        video_input = gr.Video(label="Upload Video")
+        submit_btn = gr.Button("Process")
+    with gr.Row():
+        frame_gallery = gr.Gallery(label="Extracted Frames")
+        embeddings_output = gr.JSON(label="Embeddings")
+    submit_btn.click(
+        fn=process_video,
+        inputs=video_input,
+        outputs=[frame_gallery, embeddings_output]
+    )
+demo.launch()