File size: 4,995 Bytes
c23c50e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
"""
PRIMA: Boosting Animal Mesh Recovery with Biological Priors and Test-Time Adaptation

Official implementation of the paper:
"PRIMA: Boosting Animal Mesh Recovery with Biological Priors and Test-Time Adaptation"
by Xiaohang Yu, Ti Wang, and Mackenzie Weygandt Mathis
Licensed under a modified MIT license
"""

"""
Split acinoset multiview_mapping.json into train and test sets (7:3 ratio).

Usage:
    python split_acinoset.py \
        --input_json /path/to/multiview_mapping.json \
        --output_dir /path/to/output \
        --train_ratio 0.7 \
        --seed 42
"""

import argparse
import json
import random
from pathlib import Path
from collections import defaultdict

# ------------------------------------------------------------------
# EDIT THIS to point to your dataset root (see examples above).
# All paths below are relative to this directory.
# ------------------------------------------------------------------
BASE_DIR = Path("datasets")


def split_multiview_data(input_json, output_dir, train_ratio=0.7, seed=42):
    """
    Split multiview mapping data into train and test sets.


    Args:
        input_json: Path to multiview_mapping.json
        output_dir: Directory to save train.json and test.json
        train_ratio: Ratio of training data (default 0.7 for 70%%)
        train_ratio: Ratio of training data (default 0.7 for 70%%)
        seed: Random seed for reproducibility
    """
    # Set random seed
    random.seed(seed)

    # Load data
    print(f"Loading data from {input_json}...")
    with open(input_json, 'r') as f:
        data = json.load(f)

    # Initialize train and test splits
    train_data = defaultdict(dict)
    test_data = defaultdict(dict)

    # Process each behavior
    for behavior, frames in data.items():
        print(f"\nProcessing behavior: {behavior}")

        # Get all frame indices
        frame_indices = list(frames.keys())
        total_frames = len(frame_indices)

        # Shuffle frame indices
        random.shuffle(frame_indices)

        # Calculate split point
        train_size = int(total_frames * train_ratio)

        # Split frames
        train_frames = frame_indices[:train_size]
        test_frames = frame_indices[train_size:]

        print(f"  Total frames: {total_frames}")
        print(f"  Train frames: {len(train_frames)}")
        print(f"  Test frames: {len(test_frames)}")

        # Assign to train and test
        for frame_idx in train_frames:
            train_data[behavior][frame_idx] = frames[frame_idx]

        for frame_idx in test_frames:
            test_data[behavior][frame_idx] = frames[frame_idx]

    # Save train and test splits
    output_dir = Path(output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)

    train_json = output_dir / "train.json"
    test_json = output_dir / "test.json"

    print(f"\nSaving train data to {train_json}...")
    with open(train_json, 'w') as f:
        json.dump(dict(train_data), f, indent=4)

    print(f"Saving test data to {test_json}...")
    with open(test_json, 'w') as f:
        json.dump(dict(test_data), f, indent=4)

    # Print summary
    print("\n" + "="*50)
    print("Summary:")
    print("="*50)

    total_train_frames = sum(len(frames) for frames in train_data.values())
    total_test_frames = sum(len(frames) for frames in test_data.values())
    total_frames = total_train_frames + total_test_frames

    print(f"Total frames: {total_frames}")
    print(f"Train frames: {total_train_frames} ({total_train_frames/total_frames*100:.1f}%%)")
    print(f"Test frames: {total_test_frames} ({total_test_frames/total_frames*100:.1f}%%)")
    print("\nPer behavior:")
    for behavior in train_data.keys():
        train_count = len(train_data[behavior])
        test_count = len(test_data[behavior])
        total_count = train_count + test_count
        print(f"  {behavior}: train={train_count}, test={test_count}, total={total_count}")

    print("\nDone!")


if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description="Split multiview_mapping.json into train/test sets (default 7:3)."
    )
    parser.add_argument(
        "--input_json", type=str,
        default="datasets/acinoset/multiview_mapping.json",
        help="Path to multiview_mapping.json (default: datasets/acinoset/multiview_mapping.json)."
    )
    parser.add_argument(
        "--output_dir", type=str,
        default="datasets/acinoset",
        help="Directory to save train.json and test.json (default: datasets/acinoset)."
    )
    parser.add_argument(
        "--train_ratio", type=float, default=0.7,
        help="Fraction of data for training (default: 0.7)."
    )
    parser.add_argument(
        "--seed", type=int, default=42,
        help="Random seed for reproducibility (default: 42)."
    )
    args = parser.parse_args()

    split_multiview_data(
        input_json=args.input_json,
        output_dir=args.output_dir,
        train_ratio=args.train_ratio,
        seed=args.seed,
    )