MySafeCode's picture
Update app.py
a6370b0 verified
import gradio as gr
import cv2
import numpy as np
import trimesh
import tempfile
import os
import logging
# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# Fix OpenGL issues for headless environments
os.environ['PYOPENGL_PLATFORM'] = 'osmesa'
# -------------------------
# GLOBAL (checkerboard persistence)
# -------------------------
_checkerboard_colors = None
# -------------------------
# VIDEO LOADING (BGR β†’ RGB FIXED βœ…)
# -------------------------
def read_video_frames(video_path, start=0, end=None, frame_step=1):
"""Read video frames with proper error handling"""
try:
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
raise ValueError(f"Cannot open video file: {video_path}")
frames = []
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
if total_frames == 0:
raise ValueError("Video file appears to be empty or corrupted")
if end is None or end > total_frames:
end = total_frames
count = 0
frames_read = 0
while True:
ret, frame = cap.read()
if not ret or count >= end:
break
if count >= start and (count - start) % frame_step == 0:
# FIX COLOR ORDER HERE
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
frames.append(frame)
frames_read += 1
count += 1
cap.release()
if not frames:
raise ValueError("No frames could be read from the video")
logger.info(f"Successfully read {frames_read} frames")
return np.array(frames)
except Exception as e:
logger.error(f"Error reading video: {str(e)}")
raise
# -------------------------
# DOWNSAMPLING
# -------------------------
def downsample_frames(frames, block_size=1, method='stride'):
"""Downsample frames with better error handling"""
if block_size == 1 or frames.size == 0:
return frames
z, h, w, c = frames.shape
if method == 'stride':
return frames[:, ::block_size, ::block_size]
elif method == 'mean':
new_h = h // block_size
new_w = w // block_size
out = np.zeros((z, new_h, new_w, c), dtype=np.uint8)
for zi in range(z):
for i in range(new_h):
for j in range(new_w):
block = frames[
zi,
i*block_size:(i+1)*block_size,
j*block_size:(j+1)*block_size
]
if block.size > 0:
out[zi, i, j] = block.mean(axis=(0,1)).astype(np.uint8)
return out
return frames
# -------------------------
# VOXEL MASK
# -------------------------
def frames_to_voxels(frames, threshold=10):
"""Convert frames to voxel representation"""
if frames.size == 0:
return np.array([])
# Ensure we're working with the right dimensions
if len(frames.shape) == 4:
return (np.sum(frames, axis=3) > threshold)
else:
raise ValueError("Frames must be 4D array (z, h, w, c)")
# -------------------------
# VOXEL β†’ MESH (FIXED COLORS βœ…)
# -------------------------
def voxels_to_mesh(frames, voxels, voxel_size=1.0):
"""Convert voxels to mesh with proper color handling"""
if voxels.size == 0 or frames.size == 0:
return trimesh.Scene()
meshes = []
z_len, h, w = voxels.shape
for z in range(z_len):
for y in range(h):
for x in range(w):
if voxels[z, y, x]:
# Ensure we have valid frame dimensions
if z < frames.shape[0] and y < frames.shape[1] and x < frames.shape[2]:
color = frames[z, frames.shape[1] - 1 - y, x].astype(np.uint8)
try:
cube = trimesh.creation.box(extents=[voxel_size]*3)
cube.apply_translation([x, y, z])
# Apply colors correctly (RGBA uint8)
rgba = np.append(color, 255)
cube.visual.face_colors = np.tile(rgba, (12,1))
meshes.append(cube)
except Exception as e:
logger.warning(f"Could not create cube at position ({x}, {y}, {z}): {str(e)}")
if meshes:
try:
return trimesh.util.concatenate(meshes)
except Exception as e:
logger.warning(f"Could not concatenate meshes: {str(e)}")
return meshes[0] if meshes else trimesh.Scene()
return trimesh.Scene()
# -------------------------
# RANDOM CHECKERBOARD (ONE-TIME COLORS βœ…)
# -------------------------
def default_checkerboard():
"""Generate a default checkerboard pattern"""
global _checkerboard_colors
h, w, z_len = 10, 10, 2
frames = np.zeros((z_len, h, w, 3), dtype=np.uint8)
if _checkerboard_colors is None:
_checkerboard_colors = np.random.randint(
0, 256, size=(z_len, h, w, 3), dtype=np.uint8
)
for z in range(z_len):
for y in range(h):
for x in range(w):
if (x + y + z) % 2 == 0:
frames[z, y, x] = [0, 0, 0]
else:
frames[z, y, x] = _checkerboard_colors[z, y, x]
try:
voxels = frames_to_voxels(frames, threshold=1)
mesh = voxels_to_mesh(frames, voxels, voxel_size=2)
tmp = tempfile.gettempdir()
obj = os.path.join(tmp, "checkerboard.obj")
glb = os.path.join(tmp, "checkerboard.glb")
mesh.export(obj)
mesh.export(glb)
return obj, glb, glb
except Exception as e:
logger.error(f"Error creating checkerboard: {str(e)}")
raise
# -------------------------
# MAIN GENERATOR
# -------------------------
def generate_voxel_files(
video_file,
start_frame,
end_frame,
frame_step,
block_size,
downsample_method
):
"""Main function to generate voxel files from video"""
try:
if video_file is None:
logger.info("No video file provided, generating checkerboard")
return default_checkerboard()
# Ensure video_file has a valid name attribute
video_path = getattr(video_file, 'name', video_file)
if not video_path or not os.path.exists(video_path):
raise ValueError("Invalid video file path")
logger.info(f"Processing video: {video_path}")
frames = read_video_frames(
video_path,
start=start_frame,
end=end_frame,
frame_step=frame_step
)
if frames.size == 0:
raise ValueError("No frames could be processed")
frames = downsample_frames(
frames,
block_size=block_size,
method=downsample_method
)
voxels = frames_to_voxels(frames)
mesh = voxels_to_mesh(frames, voxels)
tmp = tempfile.gettempdir()
obj = os.path.join(tmp, "output.obj")
glb = os.path.join(tmp, "output.glb")
mesh.export(obj)
mesh.export(glb)
logger.info("Successfully generated voxel files")
return obj, glb, glb
except Exception as e:
logger.error(f"Error in generate_voxel_files: {str(e)}")
# Return checkerboard as fallback
return default_checkerboard()
# -------------------------
# GRADIO 6.0+ UI - MINIMAL VERSION
# -------------------------
def create_interface():
"""Create minimal Gradio interface compatible with your version"""
# Use gr.Blocks without any styling parameters
with gr.Blocks(title="MP4 β†’ Voxels β†’ 3D") as interface:
gr.Markdown("# πŸ“Ή MP4 β†’ Voxels β†’ 3D")
gr.Markdown("Convert video files into voxelized 3D meshes. If no file is uploaded, we have some messed up error handling.")
gr.Markdown("There is no AI involved here so dont expect magic. If the mp4 file is crafted for this purpose it could look ok.")
with gr.Row():
with gr.Column(scale=1):
video_input = gr.File(
label="Upload MP4 Video",
file_types=["video"],
file_count="single"
)
gr.Markdown("### Frame Settings")
start_frame = gr.Slider(
minimum=0,
maximum=500,
value=0,
step=1,
label="Start Frame"
)
end_frame = gr.Slider(
minimum=0,
maximum=500,
value=50,
step=1,
label="End Frame"
)
frame_step = gr.Slider(
minimum=1,
maximum=10,
value=1,
step=1,
label="Frame Step"
)
gr.Markdown("### Processing Settings")
block_size = gr.Slider(
minimum=1,
maximum=64,
value=32,
step=1,
label="Pixel Block Size"
)
downsample_method = gr.Radio(
choices=["stride", "mean"],
value="stride",
label="Downsample Method"
)
process_btn = gr.Button("πŸ”„ Convert to Voxels", variant="primary")
with gr.Column(scale=2):
with gr.Row():
obj_output = gr.File(label="OBJ File", file_types=[".obj"])
glb_output = gr.File(label="GLB File", file_types=[".glb"])
model_3d = gr.Model3D(
label="3D Preview",
height=600
)
status = gr.Textbox(
label="Status",
value="Ready to process...",
interactive=False
)
# Event handlers
def process_with_status(video_file, start, end, step, block, method):
try:
result = generate_voxel_files(video_file, start, end, step, block, method)
if result and len(result) == 3:
obj_path, glb_path, glb_preview = result
return (
gr.update(value="βœ… Processing complete!"),
obj_path,
glb_path,
glb_preview
)
else:
return (
gr.update(value="❌ Processing failed"),
None,
None,
None
)
except Exception as e:
logger.error(f"Processing error: {str(e)}")
return (
gr.update(value=f"❌ Error: {str(e)}"),
None,
None,
None
)
# Connect the button click event
process_btn.click(
fn=process_with_status,
inputs=[
video_input,
start_frame,
end_frame,
frame_step,
block_size,
downsample_method
],
outputs=[
status,
obj_output,
glb_output,
model_3d
]
)
# Auto-process when video is uploaded
video_input.upload(
fn=process_with_status,
inputs=[
video_input,
start_frame,
end_frame,
frame_step,
block_size,
downsample_method
],
outputs=[
status,
obj_output,
glb_output,
model_3d
]
)
return interface
if __name__ == "__main__":
try:
interface = create_interface()
interface.launch(
server_name="0.0.0.0",
server_port=7860,
debug=True,
share=False,
show_error=True
)
except Exception as e:
logger.error(f"Failed to launch Gradio interface: {str(e)}")
raise