File size: 13,797 Bytes
9c4b1c4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338

import os
import subprocess
import time
import argparse
import yaml
import glob
import shutil


def load_config(config_path):
    """Load configuration from YAML file."""
    with open(config_path, 'r') as f:
        config = yaml.safe_load(f)
    return config

# smi vampire function, busy waiting for a free-enough GPU, use min_vram to set the threshold
def get_gpus():
    from numpy import argwhere, asarray, diff
    import re
    smi = os.popen('nvidia-smi').readlines()
    div = re.compile('[+]-{3,}[+]|[|]={3,}[|]')
    dividers = argwhere([div.match(line) != None for line in smi])[-2:, 0]
    processes = [line for line in smi[dividers[0]+1:dividers[1]] if ' C ' in line]
    free = list(set([process.split()[1] for process in processes]) ^ set([str(0), str(1)]))

    udiv = re.compile('[|]={3,}[+]={3,}[+]={3,}[|]')
    ldiv = re.compile('[+]-{3,}[+]-{3,}[+]-{3,}[+]')
    divider_up = argwhere([udiv.match(line) != None for line in smi])[0,0]
    divider_down = argwhere([ldiv.match(line) != None for line in smi])[-1, 0]

    gpus = [line for line in smi[divider_up+1:divider_down] if '%' in line and 'MiB' in line]
    gpus = [gpu.split('|')[2].replace(' ', '').replace('MiB', '').split('/') for gpu in gpus]
    memory = diff(asarray(gpus).astype(int), axis=1).squeeze()

    return free, memory

def autotest(train_list, data_list, detector_name, checkpoint_name):
    """Generate task list from training and testing configurations."""
    assert type(data_list) == list
    task_list = []

    for train_config in train_list:
        
        train_dict = {'detector': detector_name, 'model': None, 'data': train_config['data']}
        task_list.append({'type':'train', 'details':train_dict})
        
        for data in data_list:
            
            name = checkpoint_name #train_dict['data']
            task_list.append({'type':'test', 'details':{'detector': detector_name, 'model': name, 'data': data}})
    
    return task_list


def parse_phases(phases_str):
    """Parse phases string into list."""
    if phases_str.lower() == 'both':
        return ['train', 'test']
    elif phases_str.lower() == 'train':
        return ['train']
    elif phases_str.lower() == 'test':
        return ['test']
    else:
        raise ValueError(f"Invalid phases: {phases_str}. Must be 'train', 'test', or 'both'")


def run_demo(args):
    import json
    import torch

    project_root = os.path.abspath(os.path.dirname(__file__))
    demo_root = os.path.join(project_root, 'demo_images')
    assert os.path.isdir(demo_root), f"Demo folder not found: {demo_root}"

    # Build split file from demo_images
    def build_demo_split_json(root_path, out_path):
        test_entries = []
        for mod in ['PreSocial', 'Facebook', 'Telegram', 'X']:
            mod_path = os.path.join(root_path, mod)
            if not os.path.isdir(mod_path):
                continue
            for dirpath, dirnames, filenames in os.walk(mod_path, topdown=True, followlinks=True):
                if len(dirnames):
                    continue
                rel_dir = f"{dirpath}/".replace(mod_path + os.sep, '')
                parts = rel_dir.split(os.sep)[:3]
                if len(parts) < 3:
                    continue
                label, gen, sub = parts
                for fname in sorted(filenames):
                    ext = os.path.splitext(fname)[1].lower()
                    if ext not in ['.png', '.jpg', '.jpeg']:
                        continue
                    stem = os.path.splitext(fname)[0]
                    test_entries.append(os.path.join(gen, sub, stem))

        with open(out_path, 'w') as f:
            json.dump({'test': sorted(list(set(test_entries)))}, f)

    split_demo_file = os.path.join(project_root, 'split_demo.json')
    build_demo_split_json(demo_root, split_demo_file)

    def prepare_best_checkpoint(detector_dir, preferred_path=None):
        weights_dir = os.path.join(detector_dir, 'checkpoint', 'pretrained', 'weights')
        src_weight = None

        if preferred_path:
            src_weight = preferred_path if os.path.isabs(preferred_path) else os.path.normpath(os.path.join(detector_dir, preferred_path))
            if not os.path.isfile(src_weight):
                print(f"[demo] Preferred weights not found at {src_weight}, falling back to search")
                src_weight = None

        if src_weight is None:
            if not os.path.isdir(weights_dir):
                return None
            candidates = []
            for ext in ('*.pt', '*.pth'):
                candidates.extend(glob.glob(os.path.join(weights_dir, ext)))
            if not candidates:
                return None
            src_weight = sorted(candidates)[0]

        run_dir = os.path.join(detector_dir, 'checkpoint', 'demo', 'weights')
        os.makedirs(run_dir, exist_ok=True)
        dst_weight = os.path.join(run_dir, 'best.pt')
        shutil.copy2(src_weight, dst_weight)
        return dst_weight

    
    device = f"cuda:0" if torch.cuda.is_available() else "cpu"
    name = 'demo'

    detectors_root = os.path.join(project_root, 'detectors')
    all_methods = ['R50_nodown', 'CLIP-D', 'R50_TF', 'P2G', 'NPR']
    methods = all_methods if args.demo_detector == 'all' else [args.demo_detector]

    os.makedirs(os.path.join(project_root, 'logs'), exist_ok=True)

    for method in methods:
        det_dir = os.path.join(detectors_root, method)
        if not os.path.isdir(det_dir):
            continue

        preferred_weights = args.weights_name or './checkpoint/pretrained/weights/best.pt'
        best_path = prepare_best_checkpoint(det_dir, preferred_weights)
        if best_path is None:
            print(f"[demo] Skipping {method}: no pretrained weights found under checkpoint/pretrained/weights/")
            continue

        config_path = os.path.join(args.config_dir, f'{method}.yaml')
        config = load_config(config_path) if os.path.exists(config_path) else {}
        detector_args = config.get('detector_args', [])
        testing_keys = config.get('testing', []) or ['all:all']
        global_cfg = config.get('global', {})
        num_threads = global_cfg.get('num_threads', 8)

        for data_keys in testing_keys:
            args_list = [
                f'--name "{name}"',
                f'--task test',
                f'--device {device}',
                f'--split_file {split_demo_file}',
                f'--data_root {demo_root}',
                f'--data_keys "{data_keys}"',
                f'--num_threads {num_threads}',
            ] + detector_args

            cmd_args = ' '.join(args_list)
            log_file = os.path.join(project_root, 'logs', f'demo_{method}_{data_keys.replace(":","-")}.log')
            with open(log_file, 'w') as f:
                cwd = os.getcwd()
                os.chdir(det_dir)
                try:
                    print(f"[demo] Running {method} test with args: {cmd_args}")
                    runner = 'test.py'
                    subprocess.run(f'python -u {runner} {cmd_args}', shell=True)#, stdout=f, stderr=f)
                finally:
                    os.chdir(cwd)            
        shutil.rmtree(os.path.join(det_dir, 'checkpoint', 'demo'))

    print('[demo] Completed. Results saved under detectors/<method>/results/demo/<scenario>/results.csv')

def main():
    # Parse command-line arguments
    parser = argparse.ArgumentParser(description='Launcher for deepfake detector training and testing')
    parser.add_argument('--detector', type=str, required=False,
                        choices=['R50_TF', 'R50_nodown', 'CLIP-D', 'P2G', 'NPR'],
                        help='Detector to use')
    parser.add_argument('--phases', type=str, default='both',
                        choices=['train', 'test', 'both'],
                        help='Phases to run: train, test, or both (default: both)')
    parser.add_argument('--config-dir', type=str, default='configs',
                        help='Path to configs directory (default: configs/)'),
    parser.add_argument('--weights_name', type=str, default=None, 
                        help='Name of the weights directory')
    parser.add_argument('--demo', action='store_true', help='Run demo on demo_images across detectors')
    parser.add_argument('--demo-detector', type=str, default='all', choices=['all', 'R50_TF', 'R50_nodown', 'CLIP-D', 'P2G', 'NPR'], help='Which detector to demo (default: all)')
    
    # Add detect mode arguments
    detect_group = parser.add_argument_group('detect', 'Single image detection options')
    detect_group.add_argument('--detect', action='store_true', help='Run single image detection mode')
    detect_group.add_argument('--image', type=str, help='Path to image file for detection')
    detect_group.add_argument('--weights', type=str, default='pretrained', help='Path to model weights for detection')
    detect_group.add_argument('--output', type=str, help='Path to save detection results')
    detect_group.add_argument('--dry-run', action='store_true', help='Print commands without executing')
    
    args = parser.parse_args()

    if args.demo:
        return run_demo(args)
        
    if args.detect:
        if args.detector is None:
            parser.error('--detector is required for detect mode')
        if args.image is None:
            parser.error('--image is required for detect mode')
        from support.detect import run_detect
        return run_detect(args)

    if args.detector is None:
        parser.error('--detector is required unless --demo is specified')

    # Load configuration from YAML
    config_path = os.path.join(args.config_dir, f'{args.detector}.yaml')
    if not os.path.exists(config_path):
        raise FileNotFoundError(f"Configuration file not found: {config_path}")
    
    config = load_config(config_path)
    
    # Extract configuration values
    global_config = config.get('global', {})
    dataset_path = global_config.get('dataset_path')
    device_override = global_config.get('device_override')  # Can be None
    if args.weights_name is not None:
        global_config['name'] = args.weights_name
    else:
        global_config['name'] = config.get('training', [])[0]['data']
    model_name = global_config.get('name')
    # Handle string "null" as None
    if device_override == "null" or device_override == "":
        device_override = None
    min_vram = global_config.get('min_vram', 16000)
    split_file = os.path.abspath(global_config.get('split_file', 'split.json'))
    num_threads = global_config.get('num_threads', 8)
    dry_run = global_config.get('dry_run', False)
    only_list = global_config.get('only_list', False)
    phases = parse_phases(args.phases)
    
    detector_args = config.get('detector_args', [])
    training_configs = config.get('training', [])
    test_list = config.get('testing', [])
    
    os.makedirs('logs', exist_ok=True)
    
    # Generate tasks
    tasks = []
    if training_configs:
        tasks.extend(autotest(training_configs, test_list, args.detector, model_name))
    
    print('Number of tasks:', len(tasks))
    for task in tasks:
        print(task)
    
    if only_list:
        return
    
    # From here the launcher will create all the arguments to use when calling the train script
    for task in tasks:
        if task['type'] not in phases:
            continue
        
        cmd_args = []
        
        if task['type'] == 'train':
            cmd_args.append(f'--name "{model_name}"')#{task["details"]["model"]}"')
        else:
            cmd_args.append(f'--name "{task["details"]["model"]}"')
        
        cmd_args.append(f'--split_file {split_file}')
        cmd_args.append(f'--task {task["type"]}')
        cmd_args.append(f'--num_threads {num_threads}')
        cmd_args.append(f'--data_keys "{task["details"]["data"]}"')
        cmd_args.append(f'--data_root {dataset_path}')
        
        device = None
        if device_override is not None:
            device = device_override
        else:
            if not dry_run:
                print('Waiting for GPU')
                while device is None:
                    free, memory = get_gpus()
                    if len(free):
                        device = "cuda:" + free[0]
                    elif max(memory) > min_vram:
                        device = "cuda:" + str([i for i, mem in enumerate(memory) if mem == max(memory)][0])
                    time.sleep(1)
                print('GPU found')
        
        cmd_args.append(f'--device {device}')
        
        # Add detector-specific arguments
        for arg in detector_args:
            cmd_args.append(arg)
        
        cmd_args_str = ' '.join(cmd_args)
        
        # Call train.py or test.py
        if not dry_run:
            #log_file = f'logs/{task["type"]}_{task["details"]["detector"]}_{task["details"]["model"]}_{task["details"]["data"]}.log'
            log_file = f'logs/{task["type"]}_{task["details"]["detector"]}_{model_name}_{task["details"]["data"]}.log'
            with open(log_file, 'w') as f:
                cwd = os.getcwd()
                os.chdir(f'./detectors/{task["details"]["detector"]}')
                
                start_time = time.time()
                
                runner = f'{task["type"]}.py'
                print(f'Call to {runner} with: {cmd_args_str}')
                
                subprocess.run(f'python -u {runner} {cmd_args_str}', shell=True)#, stdout=f, stderr=f)
                
                end_time = time.time()
                print(f'Execution time: {end_time-start_time:.2f} seconds')
                
                print('#'*80)
                print('#'*80)
                
                os.chdir(cwd)


if __name__ == '__main__':
    main()