File size: 4,285 Bytes
3d64abf
 
 
 
 
9d9103f
3d64abf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
import gradio as gr
import cv2
import os
import time
from utils import is_video, process_frame_mock

def sam3d_prediction_fn(file_paths, progress=gr.Progress()):
    """
    Processes a list of file paths (images or videos).
    Simulates the SAM3D/Rerun logic by iterating through files
    and yielding results for a streaming UI.
    """
    if not file_paths:
        return [], "No files uploaded."

    processed_results = []
    total_files = len(file_paths)
    status_log = ""

    # Global frame counter simulation (as mentioned in your thought trace)
    global_frame_idx = 0

    for i, file_path in enumerate(file_paths):
        file_name = os.path.basename(file_path)
        status_msg = f"Processing file {i+1}/{total_files}: {file_name}..."
        status_log += status_msg + "\n"
        
        # Yield status update immediately
        yield processed_results, status_log
        
        # Determine if file is video or image
        if is_video(file_path):
            cap = cv2.VideoCapture(file_path)
            frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
            
            # Process video frames (simulating frame-by-frame logic)
            # We'll just process a few frames to keep the demo quick
            max_frames_to_preview = 5 
            current_frame = 0
            
            while cap.isOpened() and current_frame < max_frames_to_preview:
                ret, frame = cap.read()
                if not ret:
                    break
                
                # Simulate SAM3D processing on the frame
                processed_frame = process_frame_mock(frame, f"Video Frame {global_frame_idx}")
                processed_results.append((processed_frame, f"{file_name} - Frame {current_frame}"))
                
                global_frame_idx += 1
                current_frame += 1
                
                # Stream updates every frame
                yield processed_results, status_log
                time.sleep(0.1) # Simulate processing time
                
            cap.release()
            status_log += f"Finished video: {file_name}\n"

        else:
            # Process single image
            image = cv2.imread(file_path)
            if image is not None:
                processed_image = process_frame_mock(image, f"Image {global_frame_idx}")
                processed_results.append((processed_image, file_name))
                global_frame_idx += 1
                
                # Stream update
                yield processed_results, status_log
                time.sleep(0.5) # Simulate processing time
            
            status_log += f"Finished image: {file_name}\n"

    status_log += "All processing complete."
    yield processed_results, status_log

# --- Gradio 6 Application Structure ---

with gr.Blocks() as demo:
    # Header with required link
    gr.Markdown("# SAM3D Multi-File Processor")
    gr.Markdown("[Built with anycoder](https://huggingface.co/spaces/akhaliq/anycoder)")
    
    with gr.Row():
        with gr.Column(scale=1):
            # UPDATED: Using gr.File with file_count="multiple" as requested
            input_files = gr.File(
                file_count="multiple",
                label="Input Images/Videos",
                file_types=["image", "video"]
            )
            process_btn = gr.Button("Start Processing", variant="primary")
            
            # Log output to see the sequential processing logic
            log_output = gr.Textbox(label="Processing Log", lines=10, interactive=False)

        with gr.Column(scale=2):
            # Gallery to display the streaming results
            output_gallery = gr.Gallery(
                label="Processed Stream", 
                columns=3, 
                height=600,
                object_fit="contain"
            )

    # Event Listener
    process_btn.click(
        fn=sam3d_prediction_fn,
        inputs=[input_files],
        outputs=[output_gallery, log_output],
        api_visibility="public"
    )

# Launch with Gradio 6 parameters
if __name__ == "__main__":
    demo.launch(
        theme=gr.themes.Soft(),
        footer_links=[
            {"label": "Built with anycoder", "url": "https://huggingface.co/spaces/akhaliq/anycoder"}
        ]
    )