model: arch: mini_gpt4_llama_v2 freeze_vit: True freeze_qformer: True max_txt_len: 256 low_resource: True image_size: 224 end_sym: "" # 🔧 使用更小的模型以节省显存 llama_model: "microsoft/Phi-3.5-mini-instruct" ckpt: "checkpoints/video_llama_checkpoint_last.pth" use_grad_checkpoint: True chat_template: True lora_r: 64 lora_alpha: 16 length: 50 use_grad_checkpoint_llm: True max_context_len: 3600 architectures: [ "MiniGPT4_Video" ] device: "cuda" drop_path_rate: 0 img_size: 224 model_type: "minigpt4_video" num_query_token: 32 prompt: "" # 🔧 使用float32以避免精度问题 torch_dtype: "float32" transformers_version: "4.42.3" vit_precision: "fp16" vit_model: "eva_clip_g" token_pooling: true lora_target_modules : ["q_proj","v_proj"] lora_dropout: 0.05 remove_template: false prompt_path: "" minigpt4_gpu_id: 0 whisper_gpu_id: 0 answer_module_gpu_id: 0 datasets: video_chatgpt: batch_size: 2 # 🔧 减小batch size以节省显存 vis_processor: train: name: "blip2_image_train" image_size: 224 text_processor: train: name: "blip_caption" sample_ratio: 200 run: seed: 42 amp: false # 🔧 禁用AMP以避免兼容性问题