| wandb: |
| enable: false |
| project: cosmos_embed1 |
| group: '' |
| name: '' |
| tags: [] |
| save_code: false |
| api_key: '' |
| model: |
| network: |
| visual_encoder: |
| type: eva_vit_g |
| img_size: 224 |
| pretrained: false |
| use_fp8: false |
| transformer_engine: false |
| checkpoint_activations: false |
| checkpoint_attention: false |
| embed_dim: 768 |
| num_query_tokens: 32 |
| max_txt_len: 128 |
| num_video_frames: 8 |
| spatial_resolution: |
| - 448 |
| - 448 |
| temporal_encoding_type: neighboring_token_propagation |
| contrastive_type: clip |
| qformer_pretrain_ckpt: null |
| query_pooling_type: avg |
| pretrained_text_encoder: false |
| pretrained_visual_encoder: false |
| num_heldout_frames: 0 |
| pretrained_model_path: null |
| pretrained_model_strict: true |
| precision: fp32 |
| input_hw: |
| - 512 |
| - 512 |
| fsdp: |
| enabled: false |
| shard_size: null |
| replica_size: null |
| fsdp_shard_size: 8 |
| dataset: |
| train_dataset: |
| dataset_type: mock |
| metadata: null |
| data_root: null |
| num_video_frames: 8 |
| resolution: |
| - 224 |
| - 224 |
| batch_size: 4 |
| workers: 4 |
| drop_last: true |
| prefetch_factor: 2 |
| pin_memory: true |
| split: null |
| random_caption: false |
| path_prefix_mapping: {} |
| skip_missing_files: true |
| caption_field: anomaly_type |
| mp4_urls: null |
| caption_to_label: {} |
| chunk_size_sec: 5.0 |
| val_dataset: |
| dataset_type: mock |
| metadata: null |
| data_root: null |
| num_video_frames: 8 |
| resolution: |
| - 224 |
| - 224 |
| batch_size: 4 |
| workers: 4 |
| drop_last: true |
| prefetch_factor: 2 |
| pin_memory: true |
| split: null |
| random_caption: false |
| path_prefix_mapping: {} |
| skip_missing_files: true |
| caption_field: anomaly_type |
| mp4_urls: null |
| caption_to_label: {} |
| chunk_size_sec: 5.0 |
| test_dataset: |
| dataset_type: mock |
| metadata: null |
| data_root: null |
| num_video_frames: 8 |
| resolution: |
| - 224 |
| - 224 |
| batch_size: 4 |
| workers: 4 |
| drop_last: true |
| prefetch_factor: 2 |
| pin_memory: true |
| split: null |
| random_caption: false |
| path_prefix_mapping: {} |
| skip_missing_files: true |
| caption_field: anomaly_type |
| mp4_urls: null |
| caption_to_label: {} |
| chunk_size_sec: 5.0 |
| inference_dataset: |
| dataset_type: mock |
| metadata: null |
| data_root: null |
| num_video_frames: 8 |
| resolution: |
| - 224 |
| - 224 |
| batch_size: 4 |
| workers: 4 |
| drop_last: true |
| prefetch_factor: 2 |
| pin_memory: true |
| split: null |
| random_caption: false |
| path_prefix_mapping: {} |
| skip_missing_files: true |
| caption_field: anomaly_type |
| mp4_urls: null |
| caption_to_label: {} |
| chunk_size_sec: 5.0 |
| train: |
| optim: |
| optim: adamw |
| lr: 1.0e-05 |
| weight_decay: 1.0e-05 |
| betas: |
| - 0.9 |
| - 0.98 |
| warmup_steps: 1000 |
| policy: cosine |
| lr_decay_iters: 50000 |
| loss_weights: |
| contrastive_loss: 1.0 |
| captioning_loss: 1.0 |
| matching_loss: 1.0 |
| lora: |
| enabled: false |
| lora_rank: 8 |
| lora_alpha: 16 |
| lora_dropout: 0.1 |
| bias: none |
| use_rslora: false |
| use_dora: false |
| target_modules: [] |
| modules_to_save: [] |
| seed: 1234 |
| max_iter: 50000 |
| num_nodes: 1 |
| num_gpus: 1 |
| gpu_ids: |
| - 0 |
| validation_iter: 1000 |
| checkpoint_iter: 1000 |
| clip_grad_norm: 0.0 |
| precision: bf16 |
| resume_training_checkpoint_path: null |
| callbacks: |
| wandb: {} |
| clamp_logit_scale: {} |
| logit_parameters_monitor: {} |
| iter_speed: |
| every_n: 50 |
| save_s3: false |
| gradient_clip: |
| clip_norm: 3.0 |
| grad_norm_monitor: |
| every_n: 500 |
| verbose: false |
| spectral_norm_monitor: |
| every_n: 1000 |
| verbose: true |
| ema: {} |
| log_losses: |
| every_n: 50 |
| verbose: true |
| text_frames_visualizer: |
| every_n: 500 |
| pca_feature_map_visualizer: |
| every_n: 500 |
| max_val_iter: null |
| freeze_visual_encoder: true |
| use_captioning_loss: true |
| use_text_matching_loss: false |
| ema: |
| enabled: false |
| beta: 0.9999 |
| spectral_reparam: false |
| damp: |
| enabled: false |
| beta: 0.1 |
| mode: const |
| load_training_state: false |
| strict_resume: false |
| evaluate: |
| checkpoint: null |
| max_val_batches: -1 |
| num_gpus: 1 |
| callbacks: |
| topk_classification: true |
| embedding_visualization: false |
| top_k_values: |
| - 1 |
| - 3 |
| - 5 |
| - 10 |
| max_eval_samples: 2000 |
| load_dataset_pkl: null |
| save_dataset_pkl: null |
| inference: |
| checkpoint: null |
| query: |
| input_videos: [] |
| input_texts: [] |
| num_gpus: 1 |
| k: 5 |
| load_dataset_pkl: null |
| save_dataset_pkl: null |
| export: |
| checkpoint: /workspace/alicli/experiments/cosmos_embed1_finetune/finetune/finetune_448p_hf/train/checkpoints/iter_000006000.pt |
| onnx_file: null |
| mode: huggingface |
| opset_version: 17 |
| batch_size: 1 |
| on_cpu: true |
| verbose: false |
| simplify: false |
| hf_output_dir: /workspace/alicli/experiments/cosmos_embed1_finetune/finetune/finetune_448p_hf/cosmos_embed1_448p_6000iter_hf |
| results_dir: /workspace/alicli/experiments/cosmos_embed1_finetune/finetune/finetune_448p_hf |
| encryption_key: null |
| model_name: cosmos_embed1 |
|
|