fanduluhf commited on
Commit
b2f3ea1
·
verified ·
1 Parent(s): 0efd682

Upload 6 files

Browse files
Files changed (6) hide show
  1. README.md +31 -14
  2. app.py +149 -154
  3. periodic_detection_function.py +347 -0
  4. preprocess_videos.py +48 -0
  5. requirements.txt +8 -6
  6. verify_app.py +39 -0
README.md CHANGED
@@ -1,14 +1,31 @@
1
- ---
2
- title: LSPW
3
- emoji: 🖼
4
- colorFrom: purple
5
- colorTo: red
6
- sdk: gradio
7
- sdk_version: 5.44.0
8
- app_file: app.py
9
- pinned: false
10
- license: lgpl-3.0
11
- short_description: Long-Term Spatiotemporal Periodic Workflows
12
- ---
13
-
14
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Unsupervised Discovery of Long-Term Spatiotemporal Periodic Workflows in Human Activities
2
+
3
+ [Project Page](https://sites.google.com/view/periodicworkflow) | [arXiv](https://www.arxiv.org/abs/2511.14945)
4
+
5
+ ## Abstract
6
+
7
+ Periodic human activities with implicit workflows are common in manufacturing, sports, and daily life. While short-term periodic activities—characterized by simple structures and high-contrast patterns—have been widely studied, long-term periodic workflows with low-contrast patterns remain largely underexplored.
8
+
9
+ To bridge this gap, we introduce the first benchmark comprising 580 multimodal human activity sequences featuring long-term periodic workflows. The benchmark supports three evaluation tasks aligned with real-world applications: unsupervised periodic workflow detection, task completion tracking, and procedural anomaly detection. We also propose a lightweight, training-free baseline for modeling diverse periodic workflow patterns.
10
+
11
+
12
+ ## Usage
13
+
14
+ ### Dependencies
15
+ Ensure you have the following Python packages installed:
16
+ - `numpy`
17
+ - `scikit-learn`
18
+ - `tqdm`
19
+ - `matplotlib`
20
+ - `scipy`
21
+
22
+ You can install them using pip:
23
+ ```bash
24
+ pip install numpy scikit-learn tqdm matplotlib scipy
25
+ ```
26
+
27
+ ### Estimation
28
+ Run the workflow detection function to perform unsupervised periodic workflow detection on the dataset.
29
+
30
+
31
+
app.py CHANGED
@@ -1,154 +1,149 @@
1
- import gradio as gr
2
- import numpy as np
3
- import random
4
-
5
- # import spaces #[uncomment to use ZeroGPU]
6
- from diffusers import DiffusionPipeline
7
- import torch
8
-
9
- device = "cuda" if torch.cuda.is_available() else "cpu"
10
- model_repo_id = "stabilityai/sdxl-turbo" # Replace to the model you would like to use
11
-
12
- if torch.cuda.is_available():
13
- torch_dtype = torch.float16
14
- else:
15
- torch_dtype = torch.float32
16
-
17
- pipe = DiffusionPipeline.from_pretrained(model_repo_id, torch_dtype=torch_dtype)
18
- pipe = pipe.to(device)
19
-
20
- MAX_SEED = np.iinfo(np.int32).max
21
- MAX_IMAGE_SIZE = 1024
22
-
23
-
24
- # @spaces.GPU #[uncomment to use ZeroGPU]
25
- def infer(
26
- prompt,
27
- negative_prompt,
28
- seed,
29
- randomize_seed,
30
- width,
31
- height,
32
- guidance_scale,
33
- num_inference_steps,
34
- progress=gr.Progress(track_tqdm=True),
35
- ):
36
- if randomize_seed:
37
- seed = random.randint(0, MAX_SEED)
38
-
39
- generator = torch.Generator().manual_seed(seed)
40
-
41
- image = pipe(
42
- prompt=prompt,
43
- negative_prompt=negative_prompt,
44
- guidance_scale=guidance_scale,
45
- num_inference_steps=num_inference_steps,
46
- width=width,
47
- height=height,
48
- generator=generator,
49
- ).images[0]
50
-
51
- return image, seed
52
-
53
-
54
- examples = [
55
- "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k",
56
- "An astronaut riding a green horse",
57
- "A delicious ceviche cheesecake slice",
58
- ]
59
-
60
- css = """
61
- #col-container {
62
- margin: 0 auto;
63
- max-width: 640px;
64
- }
65
- """
66
-
67
- with gr.Blocks(css=css) as demo:
68
- with gr.Column(elem_id="col-container"):
69
- gr.Markdown(" # Text-to-Image Gradio Template")
70
-
71
- with gr.Row():
72
- prompt = gr.Text(
73
- label="Prompt",
74
- show_label=False,
75
- max_lines=1,
76
- placeholder="Enter your prompt",
77
- container=False,
78
- )
79
-
80
- run_button = gr.Button("Run", scale=0, variant="primary")
81
-
82
- result = gr.Image(label="Result", show_label=False)
83
-
84
- with gr.Accordion("Advanced Settings", open=False):
85
- negative_prompt = gr.Text(
86
- label="Negative prompt",
87
- max_lines=1,
88
- placeholder="Enter a negative prompt",
89
- visible=False,
90
- )
91
-
92
- seed = gr.Slider(
93
- label="Seed",
94
- minimum=0,
95
- maximum=MAX_SEED,
96
- step=1,
97
- value=0,
98
- )
99
-
100
- randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
101
-
102
- with gr.Row():
103
- width = gr.Slider(
104
- label="Width",
105
- minimum=256,
106
- maximum=MAX_IMAGE_SIZE,
107
- step=32,
108
- value=1024, # Replace with defaults that work for your model
109
- )
110
-
111
- height = gr.Slider(
112
- label="Height",
113
- minimum=256,
114
- maximum=MAX_IMAGE_SIZE,
115
- step=32,
116
- value=1024, # Replace with defaults that work for your model
117
- )
118
-
119
- with gr.Row():
120
- guidance_scale = gr.Slider(
121
- label="Guidance scale",
122
- minimum=0.0,
123
- maximum=10.0,
124
- step=0.1,
125
- value=0.0, # Replace with defaults that work for your model
126
- )
127
-
128
- num_inference_steps = gr.Slider(
129
- label="Number of inference steps",
130
- minimum=1,
131
- maximum=50,
132
- step=1,
133
- value=2, # Replace with defaults that work for your model
134
- )
135
-
136
- gr.Examples(examples=examples, inputs=[prompt])
137
- gr.on(
138
- triggers=[run_button.click, prompt.submit],
139
- fn=infer,
140
- inputs=[
141
- prompt,
142
- negative_prompt,
143
- seed,
144
- randomize_seed,
145
- width,
146
- height,
147
- guidance_scale,
148
- num_inference_steps,
149
- ],
150
- outputs=[result, seed],
151
- )
152
-
153
- if __name__ == "__main__":
154
- demo.launch()
 
1
+ import gradio as gr
2
+ import numpy as np
3
+ import os
4
+ import glob
5
+ import pickle
6
+ import json
7
+ from utils.render import render_smpl
8
+ from periodic_detection_function import run_periodic_detection
9
+
10
+ DATA_DIR = "data"
11
+ OUTPUT_DIR = "outputs"
12
+
13
+ os.makedirs(OUTPUT_DIR, exist_ok=True)
14
+
15
+ def get_candidates():
16
+ """List all pickle files in data directory."""
17
+ files = glob.glob(os.path.join(DATA_DIR, "*.pkl"))
18
+ return [os.path.basename(f) for f in files]
19
+
20
+ def load_and_render(candidate_file):
21
+ """
22
+ Load the selected pickle file, render it to a video, and return the video path.
23
+ """
24
+ if not candidate_file:
25
+ return None
26
+
27
+ pkl_path = os.path.join(DATA_DIR, candidate_file)
28
+ output_video_path = os.path.join(OUTPUT_DIR, f"{candidate_file.replace('.pkl', '')}_rendered.mp4")
29
+
30
+ # Check for pre-rendered video in data/
31
+ pre_rendered_path = os.path.join(DATA_DIR, candidate_file.replace('.pkl', '.mp4'))
32
+ if os.path.exists(pre_rendered_path):
33
+ print(f"Using pre-rendered video: {pre_rendered_path}")
34
+ return pre_rendered_path
35
+
36
+ # If not found, fall back to rendering (or re-render if desired, but user wants direct use)
37
+ # Keeping fallback just in case
38
+ try:
39
+ with open(pkl_path, 'rb') as f:
40
+ data = pickle.load(f)
41
+
42
+ # Data shape check
43
+ if len(data.shape) != 3 or data.shape[1] != 24 or data.shape[2] != 3:
44
+ raise ValueError(f"Unexpected data shape: {data.shape}. Expected (Frames, 24, 3)")
45
+
46
+ print(f"Rendering {candidate_file}...")
47
+ render_smpl(data, output_video_path, fps=30)
48
+ return output_video_path
49
+
50
+ except Exception as e:
51
+ print(f"Error rendering {candidate_file}: {e}")
52
+ return None
53
+
54
+ def run_analysis(candidate_file, rendered_video_path):
55
+ """
56
+ Run periodic detection on the rendered video and trajectory data.
57
+ """
58
+ if not candidate_file or not rendered_video_path:
59
+ return None, "Please select a candidate and wait for rendering first."
60
+
61
+ pkl_path = os.path.join(DATA_DIR, candidate_file)
62
+ output_video_path = os.path.join(OUTPUT_DIR, f"{candidate_file.replace('.pkl', '')}_result.mp4")
63
+
64
+ try:
65
+ print(f"Running detection on {candidate_file}...")
66
+ # Note: run_periodic_detection expects [Frames, N_feats] usually or generic trajectory.
67
+ # The pickle contains (Frames, 24, 3).
68
+ # The spatiotemporal_clustering in helper seems to handle reshaping or expects specific shape.
69
+ # Looking at periodic_detection_function.py line 46:
70
+ # trajectories = trajectories.reshape(trajectories.shape[0],-1)
71
+ # So it flattens (Frames, 24, 3) to (Frames, 72), which is fine.
72
+
73
+ results = run_periodic_detection(
74
+ video_path=rendered_video_path,
75
+ trajectory_path=pkl_path,
76
+ output_video_path=output_video_path,
77
+ n_clusters=9,
78
+ sampling_rate=1,
79
+ make_video=True
80
+ )
81
+
82
+ if "error" in results:
83
+ return None, json.dumps(results, indent=2)
84
+
85
+ # Format results for display
86
+ display_results = {
87
+ "workflow branches": results.get("workflow"),
88
+ "period_boundaries": results.get("period_boundaries"),
89
+ "num_periods": results.get("num_periods"),
90
+ "window_size": results.get("window_size")
91
+ }
92
+
93
+ return results.get("output_video"), json.dumps(display_results, indent=2)
94
+
95
+ except Exception as e:
96
+ import traceback
97
+ traceback.print_exc()
98
+ return None, f"Error during analysis: {str(e)}"
99
+
100
+ def reset_all():
101
+ return None, None, None, None
102
+
103
+ # Gradio Interface
104
+ with gr.Blocks(title="Periodic Workflow Detection Demo") as demo:
105
+ gr.Markdown("# Periodic Workflow Detection Demo")
106
+
107
+ with gr.Row():
108
+ with gr.Column(scale=1):
109
+ gr.Markdown("### 1. Select Input")
110
+ candidate_dropdown = gr.Dropdown(
111
+ choices=get_candidates(),
112
+ label="Select Candidates",
113
+ value=None
114
+ )
115
+
116
+ gr.Markdown("### Input Visualization")
117
+ input_video = gr.Video(label="Spatiotemporal Sequence", interactive=False)
118
+
119
+ with gr.Column(scale=1):
120
+ gr.Markdown("### 2. Run Detection")
121
+ run_btn = gr.Button("Run Analysis", variant="primary")
122
+
123
+ gr.Markdown("### Results")
124
+ text_output = gr.JSON(label="Numerical Results")
125
+ result_video = gr.Video(label="Detection Visualization", interactive=False)
126
+
127
+ reset_btn = gr.Button("Reset", variant="secondary")
128
+
129
+ # Interactions
130
+ candidate_dropdown.change(
131
+ fn=load_and_render,
132
+ inputs=[candidate_dropdown],
133
+ outputs=[input_video]
134
+ )
135
+
136
+ run_btn.click(
137
+ fn=run_analysis,
138
+ inputs=[candidate_dropdown, input_video],
139
+ outputs=[result_video, text_output]
140
+ )
141
+
142
+ reset_btn.click(
143
+ fn=reset_all,
144
+ inputs=[],
145
+ outputs=[candidate_dropdown, input_video, result_video, text_output]
146
+ )
147
+
148
+ if __name__ == "__main__":
149
+ demo.launch()
 
 
 
 
 
periodic_detection_function.py ADDED
@@ -0,0 +1,347 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pickle
3
+ import json
4
+ import string
5
+ import cv2
6
+ from tqdm import tqdm
7
+ import os
8
+ from utils.periodic_detection_helper import *
9
+ from utils.plot import *
10
+ def run_periodic_detection(video_path, trajectory_path, output_video_path=None, n_clusters=8, sampling_rate=1, make_video=True):
11
+ """
12
+ Run periodic detection on a video and its associated trajectories
13
+
14
+ Parameters:
15
+ - video_path: Path to the video file
16
+ - trajectory_path: Path to the trajectory file (pickle or json)
17
+ - output_video_path: Path where the output video will be saved (default: same as input with _periodic suffix)
18
+ - n_clusters: Number of clusters for spatiotemporal clustering (default: 9)
19
+ - sampling_rate: Sampling rate for trajectories (default: 1)
20
+ - make_video: Whether to create a visualization video (default: True)
21
+
22
+ Returns:
23
+ - Dictionary containing workflow, period boundaries, and other results
24
+ """
25
+
26
+ # Main function execution starts here
27
+ # Setup output video path if not provided
28
+ if output_video_path is None:
29
+ base_name = os.path.splitext(video_path)[0]
30
+ output_video_path = f"{base_name}_periodic.mp4"
31
+
32
+ # Load trajectories from either pickle or json
33
+ file_ext = os.path.splitext(trajectory_path)[1].lower()
34
+ try:
35
+ if file_ext == '.pkl':
36
+ with open(trajectory_path, 'rb') as f:
37
+ trajectories = pickle.load(f)
38
+ elif file_ext == '.json':
39
+ with open(trajectory_path, 'r') as f:
40
+ trajectories = np.array(json.load(f))
41
+ else:
42
+ raise ValueError(f"Unsupported trajectory file format: {file_ext}. Use .pkl or .json")
43
+ except Exception as e:
44
+ return {"error": f"Failed to load trajectories: {str(e)}"}
45
+
46
+ trajectories = trajectories.reshape(trajectories.shape[0],-1)
47
+ trajectories = trajectories[::sampling_rate, :]
48
+ cluster_labels, hard_token, soft_token, centroids = spatiotemporal_clustering(trajectories, 9)
49
+ sequence = number_to_alpha(cluster_labels)
50
+ num_frames = len(sequence)
51
+
52
+ window_sizes, magnitudes = dominant_fourier_frequency_2d(soft_token, lbound=10, ubound=max(len(soft_token.T), len(soft_token))//2)
53
+
54
+ if len(window_sizes) == 0:
55
+ return {"error": "No dominant frequencies found"}
56
+
57
+
58
+ ### optimize win size
59
+ scores = []
60
+ for win in window_sizes[:10]: # select top 10 window sizes
61
+ temporal_buffer = int(win*0.2)
62
+ periods = []
63
+ for i in range(num_frames//win):
64
+ clip = sequence[max(0, win*i-temporal_buffer):min(num_frames, win*(i+1)+temporal_buffer )]
65
+ periods.append(clip)
66
+
67
+ compressed_periods = []
68
+ for p in periods:
69
+ compressed_periods.append(fuse_adjacent(p))
70
+ score = calculate_similarity_score(compressed_periods)
71
+ scores.append(score)
72
+ if not scores:
73
+ return {"error": "Failed to calculate similarity scores"}
74
+
75
+
76
+ win = window_sizes[np.argmax(scores)]
77
+ print('selected_win:{}'.format(win))
78
+ temporal_buffer = int(win*0.2)
79
+ periods = []
80
+ for i in range(num_frames//win):
81
+ clip = sequence[max(0, win*i-temporal_buffer):min(num_frames, win*(i+1)+temporal_buffer )]
82
+ periods.append(clip)
83
+
84
+ compressed_periods = []
85
+ for p in periods:
86
+ compressed_periods.append(fuse_adjacent(p))
87
+
88
+ aligned_sequences = msa(compressed_periods[:3])
89
+
90
+ while '-' in [x[-1] for x in aligned_sequences]:
91
+ i = find_dash_end_index(aligned_sequences)
92
+ if i!=0:
93
+ aligned_sequences = [s[:i] for s in aligned_sequences]
94
+ else:
95
+ aligned_sequences = aligned_sequences
96
+
97
+ i = find_longest_repeated_ends(aligned_sequences)
98
+ if i!=0:
99
+ aligned_sequences = [s[:-i] for s in aligned_sequences]
100
+ else:
101
+ aligned_sequences = aligned_sequences
102
+ aligned_sequences
103
+
104
+ workflow_str = summarize_strings(aligned_sequences)
105
+
106
+ if not workflow_str:
107
+ return {"error": "Empty workflow string after summary"}
108
+
109
+ while workflow_str and workflow_str[0]=='_':
110
+ workflow_str = workflow_str[1:]
111
+
112
+ while workflow_str and workflow_str[-1]=='_':
113
+ workflow_str = workflow_str[:-1]
114
+
115
+ if not workflow_str:
116
+ return {"error": "Empty workflow string"}
117
+
118
+ workflow_str_len = len(workflow_str)
119
+
120
+ workflow = [[] for _ in range(workflow_str_len)]
121
+ for seq in aligned_sequences:
122
+ pointer = 0
123
+ Flag = False
124
+
125
+ pos_skip_sign = seq.find('-')
126
+ if pos_skip_sign==-1: pos_skip_sign = workflow_str_len //2
127
+ pos_skip_sign = min(pos_skip_sign, workflow_str.find('_'))
128
+ pos_skip_sign = max(pos_skip_sign, 1)
129
+
130
+ for i in range(len(seq)):
131
+ l = seq[i]
132
+ if pointer==workflow_str_len:
133
+ break
134
+ if seq[i:i+pos_skip_sign] == workflow_str[:pos_skip_sign]:
135
+ Flag = True
136
+ if Flag:
137
+ workflow[pointer].append(l.replace("-", "_")+'{:02}'.format(pointer))
138
+ pointer += 1
139
+
140
+ # Create multi-path workflow
141
+ try:
142
+ workflow_multi_paths = np.stack([''.join([y[0] for i, y in enumerate(x)]) for x in np.stack(workflow).T])
143
+ except:
144
+ workflow_multi_paths = []
145
+
146
+ seg_labels = {}
147
+ seg_ind = -1
148
+ transcript_pointer = -1
149
+ workflow_str_len = len(workflow_str)
150
+ workflow_section_len = {}
151
+ for frame_number, l in enumerate(sequence):
152
+ # Only start new segment if current one is long enough (approx win size) or it's the first one
153
+ if l==workflow_str[0] and workflow_str[transcript_pointer]==workflow_str[-1]:
154
+ if seg_ind == -1 or len(seg_labels[seg_ind]) > 0.5 * win:
155
+ transcript_pointer = 0
156
+ seg_ind += 1
157
+ seg_labels[seg_ind] = {}
158
+ workflow_section_len[seg_ind] = {}
159
+ workflow_section_len[seg_ind][transcript_pointer] = 0
160
+ if transcript_pointer==-1: continue
161
+ if transcript_pointer < workflow_str_len-1:
162
+ if l == workflow_str[transcript_pointer+1]:
163
+ transcript_pointer += 1
164
+ workflow_section_len[seg_ind][transcript_pointer] = 0
165
+ if transcript_pointer < workflow_str_len-1:
166
+ if workflow_str[transcript_pointer+1]=='_':
167
+ transcript_pointer += 1
168
+ workflow_section_len[seg_ind][transcript_pointer] = 0
169
+
170
+ if transcript_pointer == workflow_str_len-1 and workflow_section_len[seg_ind][transcript_pointer]>1 and l != workflow_str[transcript_pointer]:
171
+ continue
172
+
173
+ seg_labels[seg_ind][frame_number] = l
174
+ workflow_section_len[seg_ind][transcript_pointer] +=1
175
+
176
+ workflow_section_len = [v for k,v in workflow_section_len.items() if len(v)>workflow_str_len*0.3]
177
+ workflow_section_len_array = []
178
+ for idx in range(len(workflow_section_len)):
179
+ workflow_section_len_array.append(list(workflow_section_len[idx].values()))
180
+
181
+ if len(workflow_section_len_array)>0:
182
+
183
+ sublist_max_len = max(len(sublist) for sublist in workflow_section_len_array)
184
+ workflow_section_len_array = [sublist for sublist in workflow_section_len_array if len(sublist)==sublist_max_len]
185
+ workflow_section_len_array = np.stack(workflow_section_len_array)
186
+ workflow_section_len = np.median(workflow_section_len_array,0)
187
+ else:
188
+ workflow_section_len = np.zeros(workflow_str_len)
189
+
190
+ ### Task 1
191
+ period_num = len([x for x in seg_labels.values() if len(x)>0.5*win])
192
+ print("period_num: {}".format(period_num))
193
+ print("seg_labels_index: {}".format(seg_labels.keys()))
194
+ if period_num>0:
195
+ period_boundaries = {}
196
+ for p_id, (k,v) in enumerate(seg_labels.items()):
197
+ frame_list = np.sort(list(v.keys()))
198
+ # Convert to python int for JSON serialization
199
+ period_boundaries[p_id] = [int(frame_list[0]), int(frame_list[-1])]
200
+ if p_id > 0: period_boundaries[p_id-1][1] = int(frame_list[0]-1)
201
+
202
+ else:
203
+ period_num = num_frames//win
204
+ period_boundaries = [[int((i-1)*win), int(i*win)] for i in range(1,period_num+1)]
205
+
206
+ print(f'Workflow: {workflow_str}')
207
+ for i, boundary in period_boundaries.items():
208
+ print(f"Priod {i+1}: with boundaries of {boundary} ")
209
+
210
+
211
+ # Make visualization video if requested
212
+ if make_video and os.path.exists(video_path):
213
+ print("Generating Video...")
214
+
215
+ cap = cv2.VideoCapture(video_path)
216
+ if not cap.isOpened():
217
+ print("Error opening video file")
218
+ cap.release()
219
+ return {
220
+ "workflow": workflow_str,
221
+ "period_boundaries": period_boundaries,
222
+ "error_video": "Failed to open video file"
223
+ }
224
+ # Make token legends
225
+ images = []
226
+ tokens = []
227
+ #for c in all_chars:
228
+ for c in np.unique(list(sequence)):
229
+ if c=='_': continue
230
+ tokens.append(c)
231
+ c = alpha_to_number(c)
232
+ frame_number = np.where(cluster_labels==c)[0][0]
233
+ cap.set(cv2.CAP_PROP_POS_FRAMES, frame_number)
234
+ ret, frame = cap.read()
235
+ images.append(frame[:,:,::-1])
236
+ plot_images_with_token(images, ''.join(tokens))
237
+
238
+ W = 640
239
+ H = 640
240
+ height = 80
241
+ video_sampling_rate = 10
242
+
243
+ unique_labels = sorted(set(list(sequence)))
244
+ unique_chars = sorted(set(string.ascii_lowercase))[:15]
245
+ hues = np.linspace(0, 1, len(unique_chars), endpoint=False)
246
+ color_map = {char: hsv_to_rgb(hue, 0.8, 0.9) for char, hue in zip(unique_chars, hues)}
247
+
248
+ if seg_labels:
249
+ max_period_len = max([len(v) for v in seg_labels.values()])
250
+ else:
251
+ max_period_len = win
252
+
253
+ prog_bar_w = int(max_period_len // video_sampling_rate) + 300 + 50 # Add 50 px buffer
254
+ progress_bar = np.ones((H, prog_bar_w, 3), dtype=np.float32)
255
+
256
+ # Try to load anchor image or create a blank one
257
+ try:
258
+ if os.path.exists("anchors.jpg"):
259
+ anchor = cv2.imread("anchors.jpg")
260
+ anchor = cv2.resize(anchor, (W + prog_bar_w, 380))
261
+ else:
262
+ anchor = np.ones((380, W + prog_bar_w, 3), dtype=np.uint8) * 255
263
+ except:
264
+ anchor = np.ones((380, W + prog_bar_w, 3), dtype=np.uint8) * 255
265
+
266
+ # Setup video writer
267
+ # Setup video writer with robust codec handling
268
+
269
+ # Try H.264 (avc1) first
270
+ fourcc_code = 'avc1'
271
+ fourcc = cv2.VideoWriter_fourcc(*fourcc_code)
272
+ out = cv2.VideoWriter(output_video_path, fourcc, 30, (anchor.shape[1], H + anchor.shape[0]))
273
+
274
+ if not out.isOpened():
275
+ print(f"{fourcc_code} failed. Trying h264...")
276
+ fourcc_code = 'h264'
277
+ fourcc = cv2.VideoWriter_fourcc(*fourcc_code)
278
+ out = cv2.VideoWriter(output_video_path, fourcc, 30, (anchor.shape[1], H + anchor.shape[0]))
279
+
280
+ if not out.isOpened():
281
+ print(f"{fourcc_code} failed. Trying vp80...")
282
+ fourcc_code = 'vp80'
283
+ fourcc = cv2.VideoWriter_fourcc(*fourcc_code)
284
+ out = cv2.VideoWriter(output_video_path, fourcc, 30, (anchor.shape[1], H + anchor.shape[0]))
285
+
286
+ if not out.isOpened():
287
+ print(f"{fourcc_code} failed. Trying mp4v (less compatible)...")
288
+ fourcc_code = 'mp4v'
289
+ fourcc = cv2.VideoWriter_fourcc(*fourcc_code)
290
+ out = cv2.VideoWriter(output_video_path, fourcc, 30, (anchor.shape[1], H + anchor.shape[0]))
291
+
292
+ if not out.isOpened():
293
+ print("Error: Could not open video writer with any compatible codec.")
294
+
295
+ i, j = 0, 0
296
+ for k in tqdm(list(seg_labels.keys())):
297
+ if not seg_labels[k]: # Skip empty segments
298
+ continue
299
+
300
+ labels = list(seg_labels[k].values())
301
+ frame_ids = list(seg_labels[k].keys())
302
+ j += len(seg_labels[k])
303
+
304
+ cv2.putText(progress_bar, f'Period {k+1}', (5, height*k+30),
305
+ cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 0), 2)
306
+
307
+ for m, (l, frame_id) in enumerate(zip(labels[::video_sampling_rate], frame_ids[::video_sampling_rate])):
308
+ try:
309
+ progress_bar[height*k:height*(k+1), 300+m, :] = color_map[l.lower()]
310
+
311
+ cap.set(cv2.CAP_PROP_POS_FRAMES, frame_id)
312
+ ret, frame = cap.read()
313
+ if not ret:
314
+ continue
315
+
316
+ frame = cv2.resize(frame, (W, H))
317
+ cv2.putText(frame, f"Frame: {frame_id}", (50, 50),
318
+ cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), 2)
319
+
320
+ frame = np.concatenate([frame, (progress_bar*255).astype(np.uint8)[:,:,::-1]], axis=1)
321
+ frame = np.concatenate([frame, anchor], axis=0)
322
+ out.write(frame)
323
+ except Exception as e:
324
+ print(f"Error in video generation: {str(e)}")
325
+ continue
326
+
327
+ cv2.putText(progress_bar, f'Frame: {(i+1):04d}-{j:04d}', (5, height*k+52),
328
+ cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 2)
329
+ i += len(seg_labels[k])
330
+
331
+ try:
332
+ out.write(frame)
333
+ except:
334
+ pass
335
+
336
+ # Release resources
337
+ cap.release()
338
+ out.release()
339
+
340
+ # Return results
341
+ return {
342
+ "workflow": workflow_multi_paths.tolist() if isinstance(workflow_multi_paths, np.ndarray) else workflow_multi_paths,
343
+ "period_boundaries": period_boundaries,
344
+ "window_size": int(win),
345
+ "num_periods": int(period_num),
346
+ "output_video": output_video_path if make_video else None
347
+ }
preprocess_videos.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import os
3
+ import glob
4
+ import pickle
5
+ import sys
6
+
7
+ # Add current dir to path
8
+ sys.path.append(os.getcwd())
9
+
10
+ from utils.render import render_smpl
11
+
12
+ DATA_DIR = "data"
13
+
14
+ def batch_render():
15
+ if not os.path.exists(DATA_DIR):
16
+ print(f"Data directory {DATA_DIR} not found.")
17
+ return
18
+
19
+ pkl_files = glob.glob(os.path.join(DATA_DIR, "*.pkl"))
20
+ print(f"Found {len(pkl_files)} pickle files.")
21
+
22
+ for pkl_path in pkl_files:
23
+ base_name = os.path.splitext(os.path.basename(pkl_path))[0]
24
+ mp4_path = os.path.join(DATA_DIR, f"{base_name}.mp4")
25
+
26
+ # Skip if already exists (optional, but good for speed if re-running)
27
+ # User requested render all, so maybe force?
28
+ # "Render all pkl files ... and save them" implies doing it.
29
+ # But if we want to update them with new rendering logic, we must overwrite.
30
+
31
+ print(f"Processing {base_name}...")
32
+ try:
33
+ with open(pkl_path, 'rb') as f:
34
+ data = pickle.load(f)
35
+
36
+ # Data shape check
37
+ if len(data.shape) != 3 or data.shape[1] != 24 or data.shape[2] != 3:
38
+ print(f"Skipping {base_name}: Unexpected shape {data.shape}")
39
+ continue
40
+
41
+ render_smpl(data, mp4_path, fps=30)
42
+ print(f"Saved {mp4_path}")
43
+
44
+ except Exception as e:
45
+ print(f"Failed to render {base_name}: {e}")
46
+
47
+ if __name__ == "__main__":
48
+ batch_render()
requirements.txt CHANGED
@@ -1,6 +1,8 @@
1
- accelerate
2
- diffusers
3
- invisible_watermark
4
- torch
5
- transformers
6
- xformers
 
 
 
1
+ gradio
2
+ matplotlib
3
+ opencv-python
4
+ networkx
5
+ numpy
6
+ scikit-learn
7
+ tqdm
8
+ scipy
verify_app.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import os
3
+ import glob
4
+ from app import load_and_render, run_analysis, DATA_DIR
5
+
6
+ def verify():
7
+ print("Verifying data availability...")
8
+ candidates = glob.glob(os.path.join(DATA_DIR, "*.pkl"))
9
+ if not candidates:
10
+ print("No candidates found in data directory!")
11
+ return
12
+
13
+ # Prioritize full samples over test_small.pkl
14
+ candidate_file = "p_005.pkl"
15
+
16
+ print(f"Testing with candidate: {candidate_file}")
17
+
18
+ # Test Loading and Rendering
19
+ print("\n--- Testing load_and_render ---")
20
+ video_path = load_and_render(candidate_file)
21
+
22
+ if not video_path or not os.path.exists(video_path):
23
+ print(f"FAILED: Video rendering failed for {candidate_file}")
24
+ return
25
+ print(f"SUCCESS: Video rendered as {video_path}")
26
+
27
+ # Test Analysis
28
+ print("\n--- Testing run_analysis ---")
29
+ output_video, output_json = run_analysis(candidate_file, video_path)
30
+
31
+ if not output_video:
32
+ print(f"FAILED: Analysis failed. Error: {output_json}")
33
+ else:
34
+ print(f"SUCCESS: Analysis complete.")
35
+ print(f"Output Video: {output_video}")
36
+ # print(f"JSON Result: {output_json}")
37
+
38
+ if __name__ == "__main__":
39
+ verify()