video-process / app.py
JakeFake222's picture
Upload app.py with huggingface_hub
460258b verified
"""Video Processing Space - Gradio Interface for AI Video Analysis."""
import gradio as gr
import cv2
import numpy as np
import json
import tempfile
import os
def get_metadata(video_file):
"""Extract video metadata."""
if video_file is None:
return "No video uploaded"
cap = cv2.VideoCapture(video_file)
if not cap.isOpened():
return "Error: Could not open video"
metadata = {
"frame_count": int(cap.get(cv2.CAP_PROP_FRAME_COUNT)),
"fps": round(cap.get(cv2.CAP_PROP_FPS), 2),
"width": int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),
"height": int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)),
}
if metadata["fps"] > 0:
metadata["duration_sec"] = round(metadata["frame_count"] / metadata["fps"], 2)
else:
metadata["duration_sec"] = 0
codec_int = int(cap.get(cv2.CAP_PROP_FOURCC))
metadata["codec"] = "".join([chr((codec_int >> 8 * i) & 0xFF) for i in range(4)])
cap.release()
return json.dumps(metadata, indent=2)
def create_contact_sheet(video_file, grid_size):
"""Generate contact sheet grid from video."""
if video_file is None:
return None, "No video uploaded"
grid_size = int(grid_size)
cap = cv2.VideoCapture(video_file)
if not cap.isOpened():
return None, "Error: Could not open video"
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
fps = cap.get(cv2.CAP_PROP_FPS)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
duration = total_frames / fps if fps > 0 else 0
total_cells = grid_size * grid_size
step = max(1, total_frames // total_cells)
thumb_width = 200
thumb_height = int(height * (thumb_width / width))
frames = []
for i in range(total_cells):
frame_idx = i * step
cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx)
ret, frame = cap.read()
if ret:
thumb = cv2.resize(frame, (thumb_width, thumb_height))
cv2.putText(thumb, str(i), (5, 20),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
frames.append(thumb)
else:
blank = np.zeros((thumb_height, thumb_width, 3), dtype=np.uint8)
frames.append(blank)
cap.release()
rows = []
for r in range(grid_size):
start_idx = r * grid_size
end_idx = start_idx + grid_size
row_frames = frames[start_idx:end_idx]
if row_frames:
rows.append(np.hstack(row_frames))
if rows:
grid_image = np.vstack(rows)
grid_image_rgb = cv2.cvtColor(grid_image, cv2.COLOR_BGR2RGB)
info = {
"grid_size": grid_size,
"total_cells": total_cells,
"video_duration_sec": round(duration, 2),
"seconds_per_cell": round(duration / total_cells, 2),
}
return grid_image_rgb, json.dumps(info, indent=2)
return None, "Error: Could not generate contact sheet"
def extract_clip(video_file, start_sec, end_sec):
"""Extract video segment by timestamps."""
if video_file is None:
return None, "No video uploaded"
cap = cv2.VideoCapture(video_file)
if not cap.isOpened():
return None, "Error: Could not open video"
fps = cap.get(cv2.CAP_PROP_FPS)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
duration = total_frames / fps if fps > 0 else 0
if start_sec < 0:
start_sec = 0
if end_sec > duration:
end_sec = duration
if start_sec >= end_sec:
cap.release()
return None, f"Invalid time range: {start_sec} to {end_sec}"
start_frame = int(start_sec * fps)
end_frame = int(end_sec * fps)
# Create temp output file
output_path = tempfile.mktemp(suffix='.mp4')
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame)
frames_written = 0
current_frame = start_frame
while current_frame < end_frame:
ret, frame = cap.read()
if not ret:
break
out.write(frame)
frames_written += 1
current_frame += 1
cap.release()
out.release()
info = {
"start_sec": start_sec,
"end_sec": end_sec,
"duration_sec": round(end_sec - start_sec, 2),
"frames_written": frames_written,
}
return output_path, json.dumps(info, indent=2)
# Gradio Interface
with gr.Blocks(title="Video Process - AI Video Analysis", theme=gr.themes.Soft()) as demo:
gr.Markdown("""
# 🎬 Video Process
**AI-powered video analysis and editing**
Upload a video to inspect, analyze, or extract clips.
""")
video_input = gr.Video(label="Upload Video")
with gr.Tabs():
# Tab 1: Metadata
with gr.TabItem("📊 Metadata"):
meta_btn = gr.Button("Get Metadata", variant="primary")
meta_output = gr.Code(language="json", label="Video Properties")
meta_btn.click(get_metadata, inputs=video_input, outputs=meta_output)
# Tab 2: Contact Sheet
with gr.TabItem("👁️ Inspect (Contact Sheet)"):
gr.Markdown("Generate a visual grid to 'see' the entire video at once")
grid_slider = gr.Slider(minimum=2, maximum=10, value=6, step=1, label="Grid Size")
inspect_btn = gr.Button("Generate Contact Sheet", variant="primary")
contact_sheet_output = gr.Image(label="Contact Sheet")
inspect_info = gr.Code(language="json", label="Info")
inspect_btn.click(create_contact_sheet,
inputs=[video_input, grid_slider],
outputs=[contact_sheet_output, inspect_info])
# Tab 3: Extract Clip
with gr.TabItem("✂️ Extract Clip"):
gr.Markdown("Cut a segment from the video by timestamps")
with gr.Row():
start_input = gr.Number(value=0, label="Start (seconds)")
end_input = gr.Number(value=5, label="End (seconds)")
extract_btn = gr.Button("Extract Clip", variant="primary")
clip_output = gr.Video(label="Extracted Clip")
extract_info = gr.Code(language="json", label="Info")
extract_btn.click(extract_clip,
inputs=[video_input, start_input, end_input],
outputs=[clip_output, extract_info])
gr.Markdown("""
---
*Built with OpenCV + Gradio | VAM-Seek inspired contact sheet approach*
""")
if __name__ == "__main__":
demo.launch()