Spaces:

fanduluhf
/

LSPW

Sleeping

App Files Files Community

fanduluhf commited on about 1 month ago

Commit

f460dc5

verified ·

1 Parent(s): 6936c46

Upload 4 files

Browse files

Files changed (4) hide show

utils/eval.py +180 -0
utils/periodic_detection_helper.py +641 -0
utils/plot.py +532 -0
utils/render.py +242 -0

utils/eval.py ADDED Viewed

	@@ -0,0 +1,180 @@

+import numpy as np
+from scipy.optimize import linear_sum_assignment
+def temporal_iou(pred_span, gt_span):
+    """
+    Calculate 1D Intersection over Union (IoU) between two temporal spans.
+    Args:
+        pred_span (tuple/list): Predicted temporal span (start, end)
+        gt_span (tuple/list): Ground truth temporal span (start, end)
+    Returns:
+        float: IoU score between 0 and 1
+    """
+    pred_start, pred_end = pred_span
+    gt_start, gt_end = gt_span
+    # Ensure valid spans
+    if pred_end < pred_start or gt_end < gt_start:
+        raise ValueError("End time cannot be before start time")
+    # Calculate intersection
+    intersection_start = max(pred_start, gt_start)
+    intersection_end = min(pred_end, gt_end)
+    if intersection_end <= intersection_start:
+        return 0.0
+    intersection = intersection_end - intersection_start
+    # Calculate union
+    pred_duration = pred_end - pred_start
+    gt_duration = gt_end - gt_start
+    union = pred_duration + gt_duration - intersection
+    # Calculate IoU
+    iou = intersection / union
+    return float(iou)
+def match_temporal_iou(preds, gts):
+    """
+    Find optimal matching between predicted and ground truth temporal spans using Hungarian algorithm.
+    Args:
+        preds (list): List of predicted temporal spans, each span is [start, end]
+        gts (list): List of ground truth temporal spans, each span is [start, end]
+    Returns:
+        tuple: (matched_indices, total_iou)
+            - matched_indices: List of (pred_idx, gt_idx) pairs
+            - total_iou: Sum of IoUs for the matched pairs
+    """
+    if not preds or not gts:
+        return [], 0.0
+    # Calculate cost matrix (negative IoU since Hungarian algorithm minimizes cost)
+    cost_matrix = np.zeros((len(preds), len(gts)))
+    for i, pred in enumerate(preds):
+        for j, gt in enumerate(gts):
+            cost_matrix[i, j] = -temporal_iou(pred, gt)  # Negative since we want to maximize IoU
+    # Apply Hungarian algorithm
+    pred_indices, gt_indices = linear_sum_assignment(cost_matrix)
+    # Get matched pairs and total IoU
+    matched_pairs = list(zip(pred_indices, gt_indices))
+    total_iou = -cost_matrix[pred_indices, gt_indices].sum()  # Convert back to positive
+    avg_iou = total_iou /  len(gts)
+    return matched_pairs, avg_iou
+'''
+# Example usage:
+if __name__ == "__main__":
+    # Example predictions and ground truths
+    predictions = [[10, 20], [25, 35], [40, 50], [50, 55]]
+    ground_truths = [[15, 25], [30, 40], [45, 55]]
+    # Find optimal matching
+    matches, avg_iou = match_temporal_iou(predictions, ground_truths)
+    print("Matched pairs (pred_idx, gt_idx):", matches)
+    print("Avg IoU:", avg_iou)
+    # Print individual IoUs for matched pairs
+    print("\nIndividual IoUs:")
+    for pred_idx, gt_idx in matches:
+        iou = temporal_iou(predictions[pred_idx], ground_truths[gt_idx])
+        print(f"Pred {pred_idx} - GT {gt_idx}: {iou:.3f}")
+'''
+def find_difference_range(s1, s2):
+    # Ignore first and last chars by slicing [1:-1]
+    s1_mid = s1[1:-1]
+    s2_mid = s2[1:-1]
+    n = len(s1_mid)
+    if n != len(s2_mid):
+        return None  # Strings of different lengths
+    # Find start of difference
+    start = 0
+    while start < n and s1_mid[start] == s2_mid[start]:
+        start += 1
+    # Find end of difference (going backwards)
+    end = n - 1
+    while end >= start and s1_mid[end] == s2_mid[end]:
+        end -= 1
+    # Adjust indices to account for ignored first character
+    return [start + 1, end + 1] if start <= end else None
+'''
+# Test with your example
+s1 = "GIBJBIGCHEHCGIBFAD-"
+s2 = "GIBJBIGCHED----FADG"
+result = find_difference_range(s1, s2)
+print(f"Different substrings: '{s1[result[0]:result[1]+1]}' and '{s2[result[0]:result[1]+1]}'")
+'''
+def get_overlapping_substring(s1, s2, best_offset, max_matches):
+    len1 = len(s1)
+    len2 = len(s2)
+    start_index_s1 = -1
+    start_index_s2 = -1
+    for i in range(len1):
+        j = i - best_offset
+        if 0 <= j < len2 and s1[i] == s2[j]:
+            start_index_s1 = i
+            start_index_s2 = j
+            break # Find the first index of match
+    if start_index_s1 != -1:
+        return s1[start_index_s1 : start_index_s1 + max_matches]
+    else:
+        return ""
+'''
+string1 = 'JBHKHBJGCEID'
+string2 = 'BJGCEIDIALFKCGJ'
+best_offset, max_matches = align_strings(string1, string2)
+overlapping_part = get_overlapping_substring(string1, string2, best_offset, max_matches)
+print("String 1:", string1)
+print("String 2:", string2)
+print("\nOverlapping part:", overlapping_part)
+'''
+def find_difference_range(s1, s2):
+    # Ignore first and last chars by slicing [1:-1]
+    s1_mid = s1[1:-1]
+    s2_mid = s2[1:-1]
+    n = len(s1_mid)
+    if n != len(s2_mid):
+        return None  # Strings of different lengths
+    # Find start of difference
+    start = 0
+    while start < n and s1_mid[start] == s2_mid[start]:
+        start += 1
+    # Find end of difference (going backwards)
+    end = n - 1
+    while end >= start and s1_mid[end] == s2_mid[end]:
+        end -= 1
+    # Adjust indices to account for ignored first character
+    return [start + 1, end + 1] if start <= end else None
+'''
+# Test with your example
+s1 = "GIBJBIGCHEHCGIBFAD-"
+s2 = "GIBJBIGCHED----FADG"
+result = find_difference_range(s1, s2)
+print(f"Different substrings: '{s1[result[0]:result[1]+1]}' and '{s2[result[0]:result[1]+1]}'")
+print(f"Range index: {result}")
+'''

utils/periodic_detection_helper.py ADDED Viewed

	@@ -0,0 +1,641 @@

+import numpy as np
+import math
+from itertools import product
+from tqdm import tqdm
+from sklearn.cluster import KMeans, MeanShift
+from sklearn.preprocessing import StandardScaler
+from typing import List, Tuple
+import matplotlib.pyplot as plt
+from mpl_toolkits.mplot3d import Axes3D
+from sklearn.cluster import KMeans
+import copy
+def smooth(period_labels, gap = 1):
+    period_labels_copy = copy.deepcopy(period_labels)
+    for i in range(gap,len(period_labels)-gap):
+        counts = np.bincount(period_labels[i-gap:i+gap])
+        value = np.argmax(counts)
+        period_labels_copy[i] = value
+    return period_labels_copy
+def spatiotemporal_clustering(spatiotemporal_data: np.ndarray, n_clusters: int) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
+    """
+    Clusters a 3D spatial trajectory (ignoring timestamps) using DBSCAN and tokenizes it.
+    Args:
+        spatiotemporal_data: An array of [frame, n_feats].
+    Returns:
+        A tuple containing:
+          - cluster_labels: A numpy array of cluster labels.
+          - hard_tokenized_trajectory: A numpy array representing the hard-encoded tokenized trajectory (cluster labels)
+          - soft_tokenized_trajectory: A numpy array representing the soft-encoded tokenized trajectory (vector of normalized distance to all centroids)
+    """
+    kmeans = KMeans(n_clusters=n_clusters, random_state=20, n_init='auto')
+    cluster_labels = kmeans.fit_predict(spatiotemporal_data)
+    cluster_labels = smooth(cluster_labels, gap = 1)
+    # Hard-encoded tokenization for the trajectory using cluster labels.
+    hard_tokenized_trajectory = cluster_labels
+    # Get cluster centroids
+    centroids = kmeans.cluster_centers_
+    n_clusters = len(centroids)
+    n_points = len(spatiotemporal_data)
+    # Initialize array for soft tokenization
+    soft_tokenized_trajectory = np.zeros((n_points, n_clusters))
+    # Compute Euclidean distances to all centroids for each point
+    for i in tqdm(range(n_points)):
+        point = spatiotemporal_data[i]
+        distances = np.array([np.linalg.norm(point - centroid) for centroid in centroids])
+        #'''
+        # Convert distances to similarities using exponential decay
+        similarities = np.exp(-distances)
+        # Normalize similarities to sum to 1
+        soft_tokenized_trajectory[i] = similarities / np.sum(similarities)
+    return cluster_labels, hard_tokenized_trajectory.T, soft_tokenized_trajectory.T, centroids
+def create_path(nodes):
+    """
+    Create a string representing a path from a list of node sets.
+    Args:
+    nodes (list): List of lists of node IDs. Each list of nodes is connected by
+        an edge.
+    Returns:
+    str: String representing the path.
+    """
+    result = []
+    # Initial edge
+    result.append(f"{nodes[0][0]}->{nodes[1][0]}")
+    current_idx = 1
+    # Loop until all edges are processed
+    while current_idx < len(nodes) - 1:
+        sources = nodes[current_idx]
+        targets = nodes[current_idx + 1]
+        if len(sources) == 1 and len(targets) == 1:
+            # One source, one target
+            result.append(f"{sources[0]}->{targets[0]}")
+        elif len(sources) == 1:
+            # One source, multiple targets
+            paths = [f"{sources[0]}->{target}" for target in targets]
+            result.append(f"({', '.join(paths)})")
+        elif len(targets) == 1:
+            # Multiple sources, one target
+            paths = [f"{source}->{targets[0]}" for source in sources]
+            result.append(f"({', '.join(paths)})")
+        else:
+            # Multiple sources, multiple targets
+            paths = []
+            for i in range(len(sources)):
+                paths.append(f"{sources[i]}->{targets[i]}")
+            result.append(f"({', '.join(paths)})")
+        current_idx += 1
+    return ', '.join(result)
+def summarize_strings(strings):
+    """
+    Summarize a list of strings by comparing characters at each position.
+    If all strings have the same character at a position, that character is
+    included in the result. If not, an underscore is included.
+    Args:
+    strings (list): List of strings to summarize
+    Returns:
+    str: Summary of the strings
+    """
+    if not strings:
+        return ""
+    # Get length of shortest string
+    min_len = min(len(s) for s in strings)
+    # Compare characters at each position
+    result = []
+    for i in range(min_len):
+        chars = set(s[i] for s in strings)
+        # If all strings have the same character at this position, use that
+        # character. Otherwise, use an underscore.
+        result.append("_" if len(chars) > 1 else strings[0][i])
+    return "".join(result)
+def find_dash_end_index(strings):
+    """
+    Find the index of the last dash in the strings that is
+    immediately preceded by a letter.
+    Args:
+        strings (list): List of strings with same length
+    Returns:
+        int: Index of the last dash (if found) or -1
+    """
+    # Ensure all strings have same length
+    if not all(len(s) == len(strings[0]) for s in strings):
+        raise ValueError("Strings must be of equal length")
+    # Iterate from the right
+    for i in range(len(strings[0])-1, -1, -1):
+        for s in strings:
+            if s[i] == '-':
+                # Check if previous char is letter
+                if i > 0 and s[i-1].isalpha():
+                    return i
+            elif not s[i].isalpha():  # Skip if not dash or letter
+                continue
+    return -1  # No matching pattern found
+def find_longest_repeated_ends(strings):
+    """
+    Find the longest prefix and suffix that are identical across all strings.
+    Args:
+    strings (list): List of strings to check.
+    Returns:
+    int: Length of the longest common prefix and suffix.
+    """
+    if not strings:
+        return 0
+    # Use the first string as a reference
+    s = strings[0]
+    n = len(s)
+    max_len = 0
+    # Iterate over possible prefix/suffix lengths
+    for i in range(1, n // 2 + 1):
+        prefix = s[:i]
+        suffix = s[-i:]
+        # Check if prefix equals suffix and appears in all strings
+        if prefix == suffix and all(st.startswith(prefix) and st.endswith(suffix) for st in strings):
+            max_len = i
+    return max_len
+def create_path(nodes):
+    result = []
+    result.append(f"{nodes[0][0]}->{nodes[1][0]}")
+    current_idx = 0
+    while current_idx < len(nodes) - 1:
+        sources = nodes[current_idx]
+        targets = nodes[current_idx + 1]
+        if len(sources) == 1 and len(targets) == 1:
+            result.append(f"{sources[0]}->{targets[0]}")
+        elif len(sources) == 1:
+            paths = [f"{sources[0]}->{target}" for target in targets]
+            result.append(f"({', '.join(paths)})")
+        elif len(targets) == 1:
+            paths = [f"{source}->{targets[0]}" for source in sources]
+            result.append(f"({', '.join(paths)})")
+        else:
+            paths = []
+            for i in range(len(sources)):
+                paths.append(f"{sources[i]}->{targets[i]}")
+            result.append(f"({', '.join(paths)})")
+        current_idx += 1
+    return ', '.join(result)
+def dominant_fourier_frequency_2d(matrix, lbound=10, ubound=1000):
+    """
+    Find the dominant Fourier frequencies of a 2D matrix within a window size range.
+    Parameters
+    ----------
+    matrix : array-like
+        The input 2D matrix
+    lbound : int, optional
+        The lower bound of the window size range. Default is 10.
+    ubound : int, optional
+        The upper bound of the window size range. Default is 1000.
+    Returns
+    -------L
+    tuple
+        period_condidates
+        period_condidates_magnitudes
+    """
+    # Compute 2D FFT
+    fourier = np.fft.fft2(matrix)
+    # Get frequency components for temporal dimensions
+    freq_x = np.fft.fftfreq(matrix.shape[1], 1)
+    magnitudes_x = []
+    window_sizes_x = []
+    # Analyze horizontal frequencies (x-axis)
+    for j, freq in enumerate(freq_x):
+        if freq > 0:  # Only consider positive frequencies
+            window_size = int(1 / freq)
+            if window_size >= lbound and window_size < ubound:
+                # Sum magnitudes across columns for this frequency
+                mag = 0
+                for i in range(matrix.shape[0]):
+                    coef = fourier[i, j]
+                    mag += math.sqrt(coef.real * coef.real + coef.imag * coef.imag)
+                window_sizes_x.append(window_size)
+                magnitudes_x.append(mag)
+    '''
+    # Handle cases where no valid frequencies are found
+    if len(magnitudes_x) == 0:
+        warnings.warn(f"Could not extract valid horizontal frequencies. Using window_size={lbound}.")
+        period_x = lbound
+    else:
+        period_x = window_sizes_x[np.argmax(magnitudes_x)]
+    '''
+    return np.array(window_sizes_x)[np.argsort(magnitudes_x)[::-1]], np.sort(magnitudes_x)[::-1]
+def dominant_fourier_frequency_1d(time_series, lbound=10, ubound=1000):
+    """
+    Find the dominant Fourier frequency of the time series within a window size range.
+    Parameters
+    ----------
+    time_series : array-like
+        The input time series.
+    lbound : int, optional
+        The lower bound of the window size range. Default is 10.
+    ubound : int, optional
+        The upper bound of the window size range. Default is 1000.
+    Returns
+    -------
+        The dominant Fourier frequency's corresponding window size within the specified range.
+        period_condidates
+        period_condidates_magnitudes
+    """
+    if time_series.shape[0] < 2 * lbound:
+        warnings.warn(
+            f"Time series must at least have 2*lbound much data points. Using window_size={time_series.shape[0]}.")
+        return time_series.shape[0]
+    fourier = np.fft.fft(time_series)
+    freq = np.fft.fftfreq(time_series.shape[0], 1)
+    magnitudes = []
+    window_sizes = []
+    for coef, freq in zip(fourier, freq):
+        if coef and freq > 0:
+            window_size = int(1 / freq)
+            mag = math.sqrt(coef.real * coef.real + coef.imag * coef.imag)
+            if window_size >= lbound and window_size < ubound:
+                window_sizes.append(window_size)
+                magnitudes.append(mag)
+    if len(magnitudes) == 0:
+        warnings.warn(f"Could not extract valid frequencies. Using window_size={lbound}.")
+        return lbound
+    return np.array(window_sizes)[np.argsort(magnitudes)[::-1]], np.sort(magnitudes)[::-1]
+from difflib import SequenceMatcher
+from collections import Counter
+def calculate_similarity_score(strings_list):
+    """
+    Calculate an overall similarity score for a list of strings.
+    The score is based on multiple similarity metrics.
+    Args:
+        strings_list: List of strings to compare
+    Returns:
+        float: Overall similarity score between 0 and 1
+    """
+    if not strings_list or len(strings_list) < 2:
+        return 1.0  # A single string or empty list is perfectly similar to itself
+    n = len(strings_list)
+    total_comparisons = n * (n - 1) // 2
+    # Initialize scores for different metrics
+    sequence_scores = []
+    jaccard_scores = []
+    length_ratio_scores = []
+    # Compare each pair of strings
+    for i in range(n):
+        for j in range(i + 1, n):
+            str1 = strings_list[i]
+            str2 = strings_list[j]
+            # Sequence Matcher (difflib) score
+            sequence_score = SequenceMatcher(None, str1, str2).ratio()
+            sequence_scores.append(sequence_score)
+            # Jaccard similarity (character-based)
+            set1, set2 = set(str1), set(str2)
+            jaccard_score = len(set1.intersection(set2)) / len(set1.union(set2)) if set1 or set2 else 1.0
+            jaccard_scores.append(jaccard_score)
+            # Length ratio (shorter/longer)
+            length_ratio = min(len(str1), len(str2)) / max(len(str1), len(str2)) if max(len(str1), len(str2)) > 0 else 1.0
+            length_ratio_scores.append(length_ratio)
+    # Calculate average scores
+    avg_sequence = np.mean(sequence_scores)
+    avg_jaccard = np.mean(jaccard_scores)
+    avg_length_ratio = np.mean(length_ratio_scores)
+    # Calculate overall score (weighted average of the three metrics)
+    overall_score = 0.5 * avg_sequence + 0.3 * avg_jaccard + 0.2 * avg_length_ratio
+    return overall_score
+def fuse_adjacent(s):
+   if not s:
+       return ''
+   result = s[0]
+   for c in s[1:]:
+       if c != result[-1]:
+           result += c
+   return result
+def find_longest_identical_pair(s):
+    left = 0
+    right = len(s) - 1
+    id_pair = (None, -1, -1)
+    while left < right:
+        for i in range(left+1, right+1):
+            if s[left] == s[i]:
+                if id_pair[2] - id_pair[1] < i - left:
+                    id_pair = (s[left], left, i)
+        left += 1
+    if id_pair[0] is None:
+        return None  # If no identical pair is found
+    else:
+        return id_pair
+'''def number_to_alpha(numbers):
+    # Create a mapping of numbers to alphabetic characters
+    alpha_map = {i: chr(97 + i) for i in range(26)}  # a-z
+    alpha_map.update({i + 26: chr(65 + i) for i in range(26)})  # A-Z
+    # Convert numbers to characters
+    result = ''
+    for num in numbers:
+        if num in alpha_map:
+            result += alpha_map[num]
+        else:
+            result += '?'  # For numbers outside the range 0-51
+    return result'''
+def number_to_alpha(numbers):
+    alpha_map = {i: chr(65 + i) for i in range(26)}  # A-Z
+    result = ''
+    for num in numbers:
+        if num in alpha_map:
+            result += alpha_map[num]
+        else:
+            result += '?'  # For numbers outside the range 0-25
+    return result
+def alpha_to_number(sequence):
+   return [ord(c.upper()) - ord('A') for c in sequence]
+def score_match(chars):
+    """Score a column of aligned characters"""
+    if '-' in chars:
+        return -len([c for c in chars if c == '-'])  # Gap penalty
+    return sum(1 for i, j in product(chars, chars) if i == j) - len(chars)  # Sum of pairwise matches
+def initialize_matrix(sequences):
+    """Initialize the N-dimensional DP matrix and pointers"""
+    # Get dimensions for each sequence
+    dims = [len(seq) + 1 for seq in sequences]
+    # Create score matrix F and pointer matrix P
+    F = np.zeros(dims)
+    # Initialize P with lists instead of zeros
+    P = np.empty(dims, dtype=object)
+    for idx in np.ndindex(*dims):
+        P[idx] = []
+    # Initialize edges with gap penalties
+    for idx, dim in enumerate(dims):
+        # Create slice objects for each dimension
+        slices = [slice(None) if i == idx else 0 for i in range(len(dims))]
+        indices = range(1, dim)
+        F[tuple(slices)] = np.linspace(0, -len(sequences) * dim, dim)
+    return F, P
+def get_neighbors(current_pos, dims):
+    """Get all possible previous positions in the DP matrix"""
+    neighbors = []
+    for i in range(2 ** len(dims)):
+        neighbor = []
+        for j, pos in enumerate(current_pos):
+            if i & (1 << j):
+                if pos > 0:  # Check boundary
+                    neighbor.append(pos - 1)
+                else:
+                    break
+            else:
+                neighbor.append(pos)
+        if len(neighbor) == len(dims):
+            neighbors.append(tuple(neighbor))
+    return neighbors[1:]  # Exclude current position
+def msa(sequences, gap_penalty=-1):
+    """Perform multiple sequence alignment using N-dimensional Needleman-Wunsch"""
+    # Initialize matrices
+    F, P = initialize_matrix(sequences)
+    dims = F.shape
+    # Fill the DP matrix
+    for pos in product(*[range(1, dim) for dim in dims]):
+        # Get characters at current position
+        chars = [sequences[i][pos[i]-1] for i in range(len(sequences))]
+        # Get all possible previous positions
+        neighbors = get_neighbors(pos, dims)
+        # Calculate scores for all possible alignments
+        max_score = float('-inf')
+        best_moves = []
+        for neighbor in neighbors:
+            # Calculate score based on which sequences are aligned
+            aligned_chars = []
+            for i, (curr, prev) in enumerate(zip(pos, neighbor)):
+                if curr != prev:
+                    aligned_chars.append(sequences[i][curr-1])
+                else:
+                    aligned_chars.append('-')
+            score = F[neighbor] + score_match(aligned_chars)
+            if score > max_score:
+                max_score = score
+                best_moves = [neighbor]
+            elif score == max_score:
+                best_moves.append(neighbor)
+        F[pos] = max_score
+        P[pos] = best_moves  # Store list of best moves
+    # Traceback
+    aligned_sequences = [[] for _ in sequences]
+    current_pos = tuple(dim-1 for dim in dims)
+    while any(pos > 0 for pos in current_pos):
+        # Ensure P[current_pos] contains valid moves
+        if not P[current_pos]:  # If no moves stored, break
+            break
+        prev_pos = P[current_pos][0]  # Take first best move
+        # Add characters or gaps based on moves
+        for i, (curr, prev) in enumerate(zip(current_pos, prev_pos)):
+            if curr != prev:
+                aligned_sequences[i].append(sequences[i][curr-1])
+            else:
+                aligned_sequences[i].append('-')
+        current_pos = prev_pos
+    # Reverse and join sequences
+    return [(''.join(seq))[::-1] for seq in aligned_sequences]
+# Example usage
+#sequences = ['ACBAFDECBAECFACBA', 'CFACBAFDECBAECFA', 'ECFACBAFBECBAECFA']
+#aligned_sequences = msa(sequences)
+#print('\n'.join(aligned_sequences))
+def align_strings(s1, s2):
+    len1 = len(s1)
+    len2 = len(s2)
+    max_matches = 0
+    best_offset = 0
+    for offset in range(-len2 + 1, len1):
+        match_count = 0
+        for i in range(len1):
+            j = i - offset
+            if 0 <= j < len2 and s1[i] == s2[j]:
+                match_count += 1
+        if match_count > max_matches:
+            max_matches = match_count
+            best_offset = offset
+    return best_offset, max_matches
+def get_overlapping_substring(s1, s2, best_offset, max_matches):
+    len1 = len(s1)
+    len2 = len(s2)
+    start_index_s1 = -1
+    start_index_s2 = -1
+    for i in range(len1):
+        j = i - best_offset
+        if 0 <= j < len2 and s1[i] == s2[j]:
+            start_index_s1 = i
+            start_index_s2 = j
+            break # Find the first index of match
+    if start_index_s1 != -1:
+        return s1[start_index_s1 : start_index_s1 + max_matches]
+    else:
+        return ""
+'''
+def align_multiple_strings(strings):
+    if not strings:
+        return []
+    reference = strings[0]
+    for next_string in strings[1:]:
+        ref_align, _ = align_two_strings(reference, next_string)
+        # Collapse underscores to match specification
+        reference = ''.join(char for i, char in enumerate(ref_align) if char != '_' or (i > 0 and ref_align[i - 1] != '_'))
+    return reference
+def align_two_strings(str1, str2):
+    # Alignment code (based on previously defined)
+    n, m = len(str1), len(str2)
+    dp = [[0] * (m + 1) for _ in range(n + 1)]
+    for i in range(n + 1): dp[i][0] = i
+    for j in range(m + 1): dp[0][j] = j
+    for i in range(1, n + 1):
+        for j in range(1, m + 1):
+            if str1[i - 1] == str2[j - 1]:
+                dp[i][j] = dp[i - 1][j - 1]
+            else:
+                dp[i][j] = min(dp[i - 1][j - 1], dp[i][j - 1], dp[i - 1][j]) + 1
+    aligned1, aligned2 = [], []
+    i, j = n, m
+    while i > 0 and j > 0:
+        if str1[i - 1] == str2[j - 1]:
+            aligned1.append(str1[i - 1])
+            aligned2.append(str2[j - 1])
+            i -= 1
+            j -= 1
+        elif dp[i][j] == dp[i - 1][j - 1] + 1:
+            aligned1.append('_')
+            aligned2.append('_')
+            i -= 1
+            j -= 1
+        elif dp[i][j] == dp[i - 1][j]+1:
+            aligned1.append('_')
+            i -= 1
+        else:
+            aligned2.append('_')
+            j -= 1
+    while i > 0:
+        aligned1.append('_')
+        i -= 1
+    while j > 0:
+        aligned2.append('_')
+        j -= 1
+    aligned1.reverse()
+    aligned2.reverse()
+    return ''.join(aligned1), ''.join(aligned2)
+'''

utils/plot.py ADDED Viewed

	@@ -0,0 +1,532 @@

+import matplotlib.pyplot as plt
+import numpy as np
+from colorsys import hsv_to_rgb
+import string
+import networkx as nx
+import re
+# import osmnx as ox
+import matplotlib.animation as animation
+from itertools import combinations
+import random
+#################plot for transcripts############################
+def plot_string(text, figsize=(18, 4)):
+    """
+    Plot string with identical colors for identical letters and 0.2x letter width spacing.
+    Supports lowercase letters, uppercase letters, and underscores.
+    """
+    plt.rcParams['font.family'] = 'Times New Roman'
+    plt.figure(figsize=figsize)
+    # Include underscore in the character set
+    unique_chars = (sorted(set(string.ascii_lowercase))[:10])
+    hues = np.linspace(0, 1, len(unique_chars), endpoint=False)
+    color_map = {char: hsv_to_rgb(hue, 0.8, 0.9)
+                 for char, hue in zip(unique_chars, hues)}
+    unique_chars += ['-']
+    # Add special handling for underscore
+    color_map['-'] = (0.5, 0.5, 0.5)  # Gray color for underscore
+    unique_chars += (sorted(set(string.ascii_lowercase))[10:])
+    color_map = {char: hsv_to_rgb(hue, 0.3, 0.8)
+                 for char, hue in zip(unique_chars, hues)}
+    spacing = 0.1  # Space between letters relative to letter width
+    width = 1.0    # Width of each letter
+    total_width = len(text) * width * (1 + spacing) - spacing
+    for i, char in enumerate(text):
+        x_pos = i * width * (1 + spacing)
+        if char == '_':
+            # Draw underscore as a line slightly below the baseline
+            plt.plot([x_pos - width/3, x_pos + width/3],
+                    [-0.2, -0.2],
+                    color=color_map['_'],
+                    linewidth=2)
+        else:
+            # Regular character plotting
+            color = color_map[char.lower()]
+            plt.text(x_pos, 0, char, fontsize=14, color=color,
+                    ha='center', va='center')
+    plt.xlim(-width/2, total_width - width/2)
+    plt.ylim(-0.5, 0.5)
+    plt.axis('off')
+    plt.tight_layout()
+    plt.savefig('transcript.svg', format='svg')
+    #plt.show()
+# Example usage
+#text =  "AACCCBBAAFFDDDEEEECCCCBBAAAEEECCCFFFFAAACCBBAAFFDDDDEEEEECCCCCBBBBAAAEECCCCCFFFAAAACCCBBBBAAAFFFBBBEECCBBBAAEECCCFFAA"
+#plot_string(text)
+def parse_sequence(sequence):
+    """Parse sequence into main path and branches"""
+    branches = []
+    main_path = []
+    current_branch = []
+    in_branch = False
+    for part in sequence.split(','):
+        part = part.strip()
+        if '(' in part:
+            in_branch = True
+            part = part.replace('(', '')
+        if ')' in part:
+            in_branch = False
+            part = part.replace(')', '')
+        nodes = re.findall(r'([A-Za-z_]\d+)', part)
+        if len(nodes) == 2:
+            if in_branch:
+                current_branch.extend(nodes)
+            else:
+                if current_branch:
+                    branches.append(current_branch)
+                    current_branch = []
+                main_path.extend(nodes)
+    if current_branch:
+        branches.append(current_branch)
+    return main_path, branches
+def get_node_number(node):
+    """Extract number from node label"""
+    return int(re.findall(r'\d+', node)[0])
+def plot_sequence(sequence, figsize=(8, 3)):
+    plt.figure(figsize=figsize)
+    # Assign colors
+    unique_chars = sorted(set(string.ascii_uppercase))[:10]
+    hues = np.linspace(0, 1, len(unique_chars), endpoint=False)
+    color_map = {char: hsv_to_rgb(hue, 0.8, 0.9)
+                 for char, hue in zip(unique_chars, hues)}
+    unique_chars += ['_']
+    # Add special handling for underscore
+    color_map['_'] = (0.5, 0.5, 0.5)  # Gray color for underscore
+    color_map.update({char: hsv_to_rgb(hue, 0.3, 0.8)
+                 for char, hue in zip(sorted(set(string.ascii_uppercase))[10:], hues)})
+    main_path, branches = parse_sequence(sequence)
+    G = nx.DiGraph()
+    # Calculate positions
+    pos = {}
+    x_spacing = 1
+    y_spacing = 0.5
+    # Group nodes by their number
+    nodes_by_number = {}
+    all_nodes = set(main_path)
+    for branch in branches:
+        all_nodes.update(branch)
+    for node in all_nodes:
+        num = get_node_number(node)
+        if num not in nodes_by_number:
+            nodes_by_number[num] = []
+        nodes_by_number[num].append(node)
+    # Position nodes
+    for num in sorted(nodes_by_number.keys()):
+        nodes = nodes_by_number[num]
+        x = (num - 1) * x_spacing
+        if len(nodes) == 1:
+            pos[nodes[0]] = (x, 0)
+        else:
+            # Center branching nodes vertically
+            total_height = (len(nodes) - 1) * y_spacing
+            start_y = -total_height / 2
+            for i, node in enumerate(sorted(nodes)):
+                pos[node] = (x, start_y + i * y_spacing)
+    # Add edges
+    for i in range(0, len(main_path)-1, 2):
+        G.add_edge(main_path[i], main_path[i+1])
+    for branch in branches:
+        for i in range(0, len(branch)-1, 2):
+            G.add_edge(branch[i], branch[i+1])
+    # Draw arrows
+    nx.draw_networkx_edges(G, pos, edge_color='gray',
+                          arrowsize=10, width=1.5,
+                          arrowstyle='->')
+    # Draw nodes
+    for node in G.nodes():
+        letter = node[0]
+        color = color_map[letter]
+        circle = plt.Circle(pos[node], 0.1,
+                          color=color, alpha=0.3)
+        plt.gca().add_patch(circle)
+        plt.text(pos[node][0], pos[node][1], node[0],
+                color=color, fontsize=8,
+                ha='center', va='center',
+                fontweight='bold')
+    plt.axis('equal')
+    plt.axis('off')
+    plt.tight_layout()
+    plt.savefig('transcript.svg', format='svg')
+# Example usage
+#sequence =  "A1->C2, C2->B3, B3->A4, A4->F5, (F5->B6, F5->D6), (B6->E7, D6->E7), E7->C8, C8->B9, B9->A10, A10->E11, E11->C12, C12->F13"
+#plot_sequence(sequence, (5.4,2))
+#################plot for transcripts############################
+#################plot for 2D trajectories############################
+def plot_routes_animation(G, routes, colors, output_file, fps=20, duration_sec=10):
+    """
+    Create an animation showing routes appearing dynamically.
+    Args:
+        G (networkx.MultiDiGraph): Street network graph
+        routes (list): List of routes (each route is a list of nodes)
+        colors (list): List of colors for each route
+        output_file (str): Output filename (should end with .gif or .mp4)
+        fps (int): Frames per second
+        duration_sec (int): Total animation duration in seconds
+    """
+    # Create figure and axis
+    fig, ax = plt.subplots(figsize=(10, 8))
+    plt.rcParams['font.family'] = 'Times New Roman'
+    # Plot the base map
+    # ox.plot_graph(G, ax=ax, show=False, close=False,
+    #              edge_color='gray', edge_alpha=0.2, node_size=0)
+    # Create empty route lines
+    route_lines = []
+    route_points = []
+    # Initialize all routes as empty
+    for color in colors:
+        line, = ax.plot([], [], marker='D', color=color, linewidth=2, alpha=0.8, zorder=2)
+        route_lines.append(line)
+        route_points.append([])
+    # Extract coordinates for all routes
+    all_route_coords = []
+    for route in routes:
+        coords = []
+        for node in route:
+            x = G.nodes[node]['x']
+            y = G.nodes[node]['y']
+            coords.append((x, y))
+        all_route_coords.append(coords)
+    # Calculate total number of frames
+    total_frames = fps * duration_sec
+    # Animation update function
+    def update(frame):
+        # Calculate progress (0 to 1)
+        progress = frame / total_frames
+        # Update each route
+        for i, coords in enumerate(all_route_coords):
+            # Determine how many points to show for this route
+            route_progress = min(1.0, progress * len(routes) - i)
+            if route_progress <= 0:
+                # Route hasn't started yet
+                route_lines[i].set_data([], [])
+                continue
+            # Calculate number of points to show
+            num_points = max(2, int(route_progress * len(coords)))
+            # Get coordinates to display
+            visible_coords = coords[:num_points]
+            xs, ys = zip(*visible_coords) if visible_coords else ([], [])
+            # Update line data
+            route_lines[i].set_data(xs, ys)
+        # Update legend based on which routes are visible
+        visible_routes = [i for i, line in enumerate(route_lines)
+                         if len(line.get_xdata()) > 0]
+        if visible_routes:
+            # Update legend with only visible routes
+            ax.legend([route_lines[i] for i in visible_routes],
+                     [f'Period {i+1}' for i in visible_routes],
+                     loc='upper right', prop={'size': 14},
+                     bbox_to_anchor=(1, 1))
+        return route_lines
+    # Create animation
+    ani = animation.FuncAnimation(
+        fig, update, frames=total_frames,
+        interval=1000/fps, blit=True
+    )
+    # Tight layout
+    plt.tight_layout()
+    # Save animation
+    if output_file.endswith('.gif'):
+        ani.save(output_file, writer='pillow', fps=fps, dpi=150)
+    else:
+        # For MP4, use ffmpeg
+        writer = animation.FFMpegWriter(fps=fps, bitrate=5000)
+        ani.save(output_file, writer=writer, dpi=150)
+    plt.close()
+    print(f"Animation saved to {output_file}")
+# Example usage:
+# plot_routes_animation(G, routes, colors, "route_animation.gif")
+# For MP4: plot_routes_animation(G, routes, colors, "route_animation.mp4")
+def plot_routes(G, routes, colors, output_file):
+    """
+    Plot multiple routes on the same map.
+    Args:
+        G (networkx.MultiDiGraph): Street network graph
+        routes (list): List of routes (each route is a list of nodes)
+        colors (list): List of colors for each route
+        output_file (str): Output filename
+    """
+    # Create figure and axis
+    fig, ax = plt.subplots(figsize=(8, 6))
+    plt.rcParams['font.family'] = 'Times New Roman'
+    # Plot the base map
+    # ox.plot_graph(G, ax=ax, show=False, close=False,
+    #              edge_color='gray', edge_alpha=0.2, node_size=0)
+    # Create empty list to store route lines for legend
+    route_lines = []
+    # Plot each route
+    for route, color in zip(routes, colors):
+        # Extract the coordinates for each node in the route
+        xs = []
+        ys = []
+        for node in route:
+            # Get node coordinates
+            x = G.nodes[node]['x']
+            y = G.nodes[node]['y']
+            xs.append(x)
+            ys.append(y)
+        # Plot the route
+        line = ax.plot(xs, ys, marker='D', color=color, linewidth=2, alpha=0.2, zorder=2)[0]
+        route_lines.append(line)
+    # Add legend
+    ax.legend(route_lines,
+             [f'Period {i+1}' for i in range(len(routes))],
+             loc='upper right', prop={'size': 14},
+             bbox_to_anchor=(1.25, 0.85))
+    # Adjust layout and save
+    plt.tight_layout()
+    plt.savefig(output_file, dpi=100, bbox_inches='tight')
+    plt.show()
+    plt.close()
+#################plot for 2D trajectories############################
+def plot_task_2(obs_len, gt_seq_len, pred_seq_len, figsize_w=10, title=None):
+    """
+    Plot both GT and Pred timelines in the same figure with aligned scales.
+    Args:
+        obs_len: Length of observation period
+        gt_seq_len: Total sequence length for ground truth
+        pred_seq_len: Total sequence length for prediction
+        figsize_w: Width of the figure
+        title: Optional title for the figure
+    """
+    # Use the maximum sequence length to determine the x-axis limits
+    max_seq_len = max(gt_seq_len, pred_seq_len)
+    # Create figure with two subplots, one for GT and one for Pred
+    fig, axes = plt.subplots(2, 1, figsize=(figsize_w, 2.5), gridspec_kw={'hspace': 0.3})
+    plt.rcParams['font.family'] = 'Times New Roman'
+    # Add title if provided
+    if title:
+        fig.suptitle(title, fontsize=14, fontweight='bold')
+    # Create consistent bar heights and label offset
+    bar_height = 0.5
+    label_offset = max_seq_len * 0.15  # Proportional offset based on sequence length
+    # GT plot (top)
+    y_position = 0
+    axes[0].barh(y_position, obs_len, height=bar_height, left=0, color='lightgray')
+    axes[0].barh(y_position, gt_seq_len+1-obs_len, height=bar_height, left=obs_len, color='lightgreen')
+    axes[0].text(-label_offset, y_position, "GT:", fontsize=12, fontweight='bold', verticalalignment='center')
+    # Pred plot (bottom)
+    axes[1].barh(y_position, obs_len, height=bar_height, left=0, color='lightgray')
+    axes[1].barh(y_position, pred_seq_len+1-obs_len, height=bar_height, left=obs_len, color='lightblue')
+    axes[1].text(-label_offset, y_position, "Pred:", fontsize=12, fontweight='bold', verticalalignment='center')
+    # Configure both axes consistently
+    for ax in axes:
+        # Set consistent x-limits for alignment
+        ax.set_xlim(-label_offset, max_seq_len+1)
+        ax.set_ylim(-0.5, 0.5)
+        ax.set_yticks([])
+        # Remove the box/frame
+        for spine in ax.spines.values():
+            spine.set_visible(False)
+        # Add a thin line below the bar for better visibility
+        ax.axhline(y_position - bar_height/2, color='black', linewidth=0.5)
+    # Set tick marks for each plot
+    axes[0].set_xticks([0, obs_len, gt_seq_len])
+    axes[1].set_xticks([0, obs_len, pred_seq_len])
+    plt.tight_layout(rect=[0, 0, 1, 0.95] if title else [0, 0, 1, 1])
+    return fig
+def plot_task_3(gt_seq_len, GT_start, GT_end, pred_start, pred_end, figsize_w=10, title=None):
+    """
+    Plot both GT and Pred timelines in the same figure with aligned scales.
+    Args:
+        gt_seq_len: Total sequence length for ground truth
+        GT_start: Start position of GT highlight bar
+        GT_end: End position of GT highlight bar
+        pred_start: Start position of prediction highlight bar
+        pred_end: End position of prediction highlight bar
+        figsize_w: Width of the figure
+        title: Optional title for the figure
+    """
+    # Use the maximum sequence length to determine the x-axis limits
+    max_seq_len = gt_seq_len
+    # Create figure with two subplots, one for GT and one for Pred
+    fig, axes = plt.subplots(2, 1, figsize=(figsize_w, 2.5), gridspec_kw={'hspace': 0.3})
+    plt.rcParams['font.family'] = 'Times New Roman'
+    # Add title if provided
+    if title:
+        fig.suptitle(title, fontsize=14, fontweight='bold')
+    # Create consistent bar heights and label offset
+    bar_height = 0.5
+    label_offset = max_seq_len * 0.15  # Proportional offset based on sequence length
+    # GT plot (top)
+    y_position = 0
+    # Plot full lightgray bar for GT
+    axes[0].barh(y_position, gt_seq_len, height=bar_height, left=0, color='lightgray')
+    # Plot lightgreen bar within GT from GT_start to GT_end
+    axes[0].barh(y_position, GT_end - GT_start, height=bar_height, left=GT_start, color='lightgreen')
+    axes[0].text(-label_offset, y_position, "GT:", fontsize=12, fontweight='bold', verticalalignment='center')
+    # Pred plot (bottom)
+    # Plot full lightgray bar for Pred
+    axes[1].barh(y_position, gt_seq_len, height=bar_height, left=0, color='lightgray')
+    # Plot lightblue bar within Pred from pred_start to pred_end
+    axes[1].barh(y_position, pred_end - pred_start, height=bar_height, left=pred_start, color='lightblue')
+    axes[1].text(-label_offset, y_position, "Pred:", fontsize=12, fontweight='bold', verticalalignment='center')
+    # Configure both axes consistently
+    for ax in axes:
+        # Set consistent x-limits for alignment
+        ax.set_xlim(-label_offset, max_seq_len+1)
+        ax.set_ylim(-0.5, 0.5)
+        ax.set_yticks([])
+        # Remove the box/frame
+        for spine in ax.spines.values():
+            spine.set_visible(False)
+        # Add a thin line below the bar for better visibility
+        ax.axhline(y_position - bar_height/2, color='black', linewidth=0.5)
+    # Set tick marks for each plot
+    axes[0].set_xticks([0, GT_start, GT_end, gt_seq_len])
+    axes[1].set_xticks([0, pred_start, pred_end, gt_seq_len])
+    plt.tight_layout(rect=[0, 0, 1, 0.95] if title else [0, 0, 1, 1])
+    return fig
+import string
+def plot_images_with_token(images, tokens, n_rows = 2):
+    assert len(images) == len(tokens), "Each image must have a corresponding token"
+    n_images = len(images)
+    # Calculate rows and columns for grid layout
+    n_cols = (n_images + 1) // n_rows  # Ceiling division to handle odd number of images
+    # Create a figure to display the images
+    fig, axes = plt.subplots(n_rows, n_cols, figsize=(5 * n_cols, 5 * n_rows))
+    plt.rcParams['font.family'] = 'Times New Roman'
+    unique_chars = (sorted(set(string.ascii_lowercase))[:10])
+    hues = np.linspace(0, 1, len(unique_chars), endpoint=False)
+    color_map = {char: hsv_to_rgb(hue, 0.8, 0.9)
+                 for char, hue in zip(unique_chars, hues)}
+    # Make axes a 2D array even if there's just one column
+    if n_cols == 1:
+        axes = axes.reshape(-1, 1)
+    # Flatten axes for easy iteration if there are multiple columns
+    axes_flat = axes.flatten()
+    for i, (image, token) in enumerate(zip(images, tokens)):
+        if i < len(axes_flat):
+            color = color_map[token.lower()]
+            axes_flat[i].imshow(image)
+            axes_flat[i].set_title(token, color=color, size=50)
+            axes_flat[i].axis('off')  # Hide axes
+    # Hide any unused subplots
+    for j in range(i + 1, n_rows * n_cols):
+        if j < len(axes_flat):
+            axes_flat[j].axis('off')
+            fig.delaxes(axes_flat[j])
+    plt.tight_layout()
+    plt.savefig('anchors.jpg', bbox_inches='tight', pad_inches=0)
+    #plt.show()

utils/render.py ADDED Viewed

	@@ -0,0 +1,242 @@

+import matplotlib
+matplotlib.use('Agg') # Non-interactive backend
+import matplotlib.pyplot as plt
+import matplotlib.animation as animation
+from mpl_toolkits.mplot3d import Axes3D
+import numpy as np
+from sklearn.decomposition import PCA
+from scipy.spatial.transform import Rotation as R
+def render_smpl(pose_data, output_path, fps=30):
+    """
+    Render SMPL 3D pose data to a video file.
+    Args:
+        pose_data (np.ndarray): Shape (Frames, 24, 3)
+        output_path (str): Path to save the MP4 video.
+        fps (int): Frames per second.
+    """
+    # SMPL kinematic tree (approximate for visualization)
+    # 0: Pelvis
+    # 1: L_Hip, 2: R_Hip, 3: Spine1
+    # 4: L_Knee, 5: R_Knee, 6: Spine2
+    # 7: L_Ankle, 8: R_Ankle, 9: Spine3
+    # 10: L_Foot, 11: R_Foot, 12: Neck
+    # 13: L_Collar, 14: R_Collar, 15: Head
+    # 16: L_Shoulder, 17: R_Shoulder
+    # 18: L_Elbow, 19: R_Elbow
+    # 20: L_Wrist, 21: R_Wrist
+    # 22: L_Hand, 23: R_Hand
+    # Connectivity for drawing bones
+    connections = [
+        (0, 1), (0, 2), (0, 3),
+        (1, 4), (2, 5), (3, 6),
+        (4, 7), (5, 8), (6, 9),
+        (7, 10), (8, 11), (9, 12),
+        (9, 13), (9, 14), (12, 15),
+        (13, 16), (14, 17),
+        (16, 18), (17, 19),
+        (18, 20), (19, 21),
+        (20, 22), (21, 23)
+    ]
+    fig = plt.figure(figsize=(10, 10))
+    ax = fig.add_subplot(111, projection='3d')
+    # --- Alignment & Centering ---
+    # 1. Fit plane to feet to find ground orientation
+    feet_indices = [10, 11] # L_Foot, R_Foot
+    feet_points = pose_data[:, feet_indices, :].reshape(-1, 3)
+    pca = PCA(n_components=3)
+    pca.fit(feet_points)
+    normal = pca.components_[2] # Component with least variance is the normal
+    # Calculate Body Up vector (Pelvis to Head) to determine correct up direction
+    # Pelvis is 0, Head is 15
+    pelvis_head_vector = pose_data[:, 15, :] - pose_data[:, 0, :]
+    avg_body_up = np.mean(pelvis_head_vector, axis=0)
+    # Ensure normal points in same direction as body up
+    if np.dot(normal, avg_body_up) < 0:
+        normal = -normal
+    # 2. Compute rotation to align normal to Z-axis [0, 0, 1]
+    target_normal = np.array([0, 0, 1])
+    # Use scipy to find rotation
+    # We want R such that R * normal = target_normal
+    # align_vectors finds rotation that maps vectors_b to vectors_a.
+    # So we map normal (b) to target (a).
+    rot, rssd = R.align_vectors([target_normal], [normal])
+    rot_matrix = rot.as_matrix()
+    # Apply rotation to all points
+    # Points are (Frames, Joints, 3). Flatten for transform
+    original_shape = pose_data.shape
+    flat_data = pose_data.reshape(-1, 3)
+    # Apply rotation: (R @ v.T).T = v @ R.T
+    # Scipy apply: rot.apply(vectors) handles the broadcasting
+    pose_data_rotated = rot.apply(flat_data)
+    pose_data = pose_data_rotated.reshape(original_shape)
+    # 3. Center trajectory
+    # Center X/Y at 0
+    all_x = pose_data[:, :, 0]
+    all_y = pose_data[:, :, 1]
+    all_z = pose_data[:, :, 2]
+    # Mean of all points as center (or could use root joint mean)
+    center_x = np.mean(all_x)
+    center_y = np.mean(all_y)
+    pose_data[:, :, 0] -= center_x
+    pose_data[:, :, 1] -= center_y
+    # Shift Z so min is 0 (Ground level)
+    min_z = np.min(all_z)
+    pose_data[:, :, 2] -= min_z
+    # Update bounds variables for plotting
+    all_x = pose_data[:, :, 0]
+    all_y = pose_data[:, :, 1]
+    all_z = pose_data[:, :, 2]
+    mid_x = (np.min(all_x) + np.max(all_x)) / 2
+    mid_y = (np.min(all_y) + np.max(all_y)) / 2
+    mid_z = (np.min(all_z) + np.max(all_z)) / 2
+    max_range = np.array([np.ptp(all_x), np.ptp(all_y), np.ptp(all_z)]).max() / 2.0
+    # Recalculate bounds after shift
+    all_x = pose_data[:, :, 0]
+    all_y = pose_data[:, :, 1]
+    all_z = pose_data[:, :, 2]
+    # Use (min+max)/2 for center to ensure bounding box is centered
+    mid_x = (np.min(all_x) + np.max(all_x)) / 2
+    mid_y = (np.min(all_y) + np.max(all_y)) / 2
+    mid_z = (np.min(all_z) + np.max(all_z)) / 2
+    # Dynamic ground plane bounds covering all trajectory
+    padding = 1.0 # Increase padding
+    gp_min_x = np.min(all_x) - padding
+    gp_max_x = np.max(all_x) + padding
+    gp_min_y = np.min(all_y) - padding
+    gp_max_y = np.max(all_y) + padding
+    def update(frame):
+        ax.clear()
+        ax.set_axis_off()
+        # Transparent gray ground plane at z=0
+        x = np.linspace(gp_min_x, gp_max_x, 2)
+        y = np.linspace(gp_min_y, gp_max_y, 2)
+        X, Y = np.meshgrid(x, y)
+        Z = np.zeros_like(X) # Ground at z=0
+        ax.plot_surface(X, Y, Z, color='gray', alpha=0.2, shade=False)
+        current_pose = pose_data[frame]
+        # Scatter points for joints
+        ax.scatter(current_pose[:, 0], current_pose[:, 1], current_pose[:, 2], c='blue', s=20)
+        # Draw bones
+        for start, end in connections:
+            xs = [current_pose[start, 0], current_pose[end, 0]]
+            ys = [current_pose[start, 1], current_pose[end, 1]]
+            zs = [current_pose[start, 2], current_pose[end, 2]]
+            ax.plot(xs, ys, zs, c='red')
+        # Set limits
+        ax.set_xlim(mid_x - max_range, mid_x + max_range)
+        ax.set_ylim(mid_y - max_range, mid_y + max_range)
+        ax.set_zlim(mid_z - max_range, mid_z + max_range)
+        # ax.set_xlabel('X')
+        # ax.set_ylabel('Y')
+        # ax.set_zlabel('Z')
+        ax.set_title(f"Frame {frame}")
+    ani = animation.FuncAnimation(fig, update, frames=len(pose_data), interval=1000/fps)
+    # Save using ffmpeg writer
+    print(f"Saving video to {output_path}...")
+    try:
+        if animation.writers.is_available('ffmpeg'):
+            writer = animation.FFMpegWriter(fps=fps, bitrate=5000)
+            ani.save(output_path, writer=writer)
+        else:
+            raise RuntimeError("ffmpeg not available")
+    except Exception as e:
+        print(f"ffmpeg failed or not found ({e}). Using OpenCV fallback...")
+        try:
+            import cv2
+            plt.close(fig) # Close the animation fig
+            # Re-setup figure for opencv loop
+            fig = plt.figure(figsize=(10, 10))
+            ax = fig.add_subplot(111, projection='3d')
+            # Figure size in pixels approx (10*100 = 1000x1000 usually dpi=100)
+            fig.canvas.draw()
+            width, height = fig.canvas.get_width_height()
+            # Setup video writer - Try H.264 (avc1) first
+            fourcc = cv2.VideoWriter_fourcc(*'avc1')
+            out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
+            if not out.isOpened():
+                print("avc1 failed. Trying h264...")
+                fourcc = cv2.VideoWriter_fourcc(*'h264')
+                out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
+            if not out.isOpened():
+                print("h264 failed. Trying vp80...")
+                fourcc = cv2.VideoWriter_fourcc(*'vp80')
+                out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
+            if not out.isOpened():
+                print("vp80 failed. Trying mp4v (less compatible)...")
+                fourcc = cv2.VideoWriter_fourcc(*'mp4v')
+                out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
+            if not out.isOpened():
+                raise RuntimeError("Failed to open VideoWriter with any compatible codec.")
+            print("Rendering frames directly to OpenCV VideoWriter...")
+            for frame in range(len(pose_data)):
+                update(frame)
+                fig.canvas.draw()
+                # Convert canvas to image
+                # Check for buffer_rgba support (matplotlib 3.x)
+                try:
+                    img = np.frombuffer(fig.canvas.buffer_rgba(), dtype=np.uint8)
+                    img = img.reshape(height, width, 4)[:, :, :3] # RGBA -> RGB
+                except AttributeError:
+                    # Fallback for older matplotlib or different backend
+                    img = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)
+                    img = img.reshape(height, width, 3)
+                img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
+                out.write(img)
+            out.release()
+            plt.close(fig)
+            print("OpenCV fallback rendering complete.")
+        except Exception as cv_e:
+            print(f"OpenCV fallback also failed: {cv_e}")
+            raise cv_e
+    return output_path