File size: 7,424 Bytes

f8ba0eb

import os
import uuid
import time
import psutil
import torch
import cv2
import shutil
from models.qwen import Qwen2VL
from models.gemma import Gemma
from models.minicpm import MiniCPM
from models.lfm import LFM2
from video_processor import extract_frames, FrameSamplingMethod
import argparse
import json
import logging
from tqdm import tqdm
TEMP_VIDEO_DIR = "temp_videos"
def process_video(model, video_path, prompt, sampling_method_str="CONTENT_AWARE", sampling_rate=5):
    """
    直接处理视频和文本提示，进行推理并返回结果。
    
    Args:
        video_path (str): 视频文件路径
        prompt (str): 文本提示
        sampling_method_str (str): 采样方法字符串
        sampling_rate (int): 采样率或阈值
    
    Returns:
        dict: 推理结果
    """
    request_start_time = time.time()
    request_id = str(uuid.uuid4())
    logging.info(f"[{request_id}] Processing video: '{video_path}', Prompt: '{prompt}'")

    # 验证视频文件
    if not os.path.exists(video_path):
        raise FileNotFoundError(f"Video file not found: {video_path}")
    
    if not video_path.lower().endswith(('.mp4', '.avi', '.mov', '.mkv')):
        logging.warning(f"[{request_id}] File '{video_path}' may not be a video file.")

    # 转换采样方法字符串为枚举
    sampling_method_map = {
        "CONTENT_AWARE": FrameSamplingMethod.CONTENT_AWARE,
        "UNIFORM": FrameSamplingMethod.UNIFORM,
    }
    sampling_method = sampling_method_map.get(sampling_method_str, FrameSamplingMethod.CONTENT_AWARE)
    
    # 创建临时目录
    temp_frame_dir = os.path.join(TEMP_VIDEO_DIR, request_id)
    os.makedirs(temp_frame_dir, exist_ok=True)

    try:
        logging.info(f"[{request_id}] Extracting frames using method: {sampling_method.value}, rate/threshold: {sampling_rate}")
        
        frames = extract_frames(video_path, sampling_method, sampling_rate)
        if not frames:
            raise ValueError(f"Could not extract any frames from the video: {video_path}")
        
        logging.info(f"[{request_id}] Extracted {len(frames)} frames successfully. Saving to temporary files...")

        # 将帧保存到临时文件并获取其路径
        frame_paths = []
        for i, frame in enumerate(frames):
            frame_path = os.path.join(temp_frame_dir, f"frame_{i:04d}.jpg")
            cv2.imwrite(frame_path, frame)
            abs_frame_path = os.path.abspath(frame_path)
            frame_paths.append(abs_frame_path)

        logging.info(f"[{request_id}] {len(frame_paths)} frames saved to {temp_frame_dir}")

        # 进行推理
        output = model.generate(frame_paths, prompt)

        logging.info(f"Tokens per second: {output['tokens_per_second']}, Peak GPU memory MB: {output['peak_gpu_memory_mb']}")

        inference_end_time = time.time()
        cpu_usage = psutil.cpu_percent(interval=None)
        cpu_core_utilization = psutil.cpu_percent(interval=None, percpu=True)
        logging.info(f"[{request_id}] Inference time: {inference_end_time - request_start_time:.2f} seconds, CPU usage: {cpu_usage}%, CPU core utilization: {cpu_core_utilization}")
        
        # 添加性能指标到输出
        output["inference_time"] = inference_end_time - request_start_time
        output["cpu_usage"] = cpu_usage
        output["cpu_core_utilization"] = cpu_core_utilization
        output["num_generated_tokens"] = output["num_generated_tokens"]
        output["request_id"] = request_id
        
        return output

    except Exception as e:
        logging.error(f"[{request_id}] An error occurred during processing: {str(e)}", exc_info=True)
        raise e
    finally:
        # 清理临时文件
        if os.path.exists(temp_frame_dir):
            shutil.rmtree(temp_frame_dir)
            logging.info(f"[{request_id}] Cleaned up temporary frame directory: {temp_frame_dir}")


def main():
    """主函数"""
    try:
        parser = argparse.ArgumentParser()
        parser.add_argument("--model_path", type=str, default="Qwen/Qwen2.5-VL-3B-Instruct-AWQ")
        parser.add_argument("--video_dir", type=str, default="videos", help="视频")
        parser.add_argument("--prompt", type=str, default="Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see; do not interpret intentions, relationships, or work efficiency. Avoid all repetitive descriptions of the store's layout or shelves.", help="文本提示")
        parser.add_argument("--sampling_method", type=str, default="UNIFORM", 
                        choices=["CONTENT_AWARE", "UNIFORM", "RANDOM"], 
                        help="帧采样方法")
        parser.add_argument("--sampling_rate", type=int, default=30, help="采样率或阈值")
        args = parser.parse_args()


        # --- 日志和临时文件目录配置 ---
        LOG_DIR = f"logs/{args.model_path.split('/')[-1]}"
        OUTPUT_DIR = f"outputs/{args.model_path.split('/')[-1]}"
        os.makedirs(LOG_DIR, exist_ok=True)
        os.makedirs(OUTPUT_DIR, exist_ok=True)
        os.makedirs(TEMP_VIDEO_DIR, exist_ok=True)
        start_time = time.strftime('%Y%m%d_%H%M%S')
        log_filename = f"{LOG_DIR}/{start_time}.log"
        logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', datefmt='%Y-%m-%d %H:%M:%S', filename=log_filename, filemode='a')

        # --- 加载模型和处理器 ---
        logging.info(f"Loading model: {args.model_path}")
        model_load_start = time.time()
        if "qwen" in args.model_path.lower():
            model = Qwen2VL(args.model_path)
        elif "gemma" in args.model_path.lower():
            model = Gemma(args.model_path)
        elif "minicpm" in args.model_path.lower():
            model = MiniCPM(args.model_path)
        elif "lfm" in args.model_path.lower():
            model = LFM2(args.model_path)
        model_load_end = time.time()
        GPU_MEMORY_USAGE = f"{torch.cuda.memory_allocated(0)/1024**2:.2f} MB" if torch.cuda.is_available() else "N/A"
        logging.info(f"Model loaded in {model_load_end - model_load_start:.2f} seconds")
        logging.info(f"GPU Memory Usage after model load: {GPU_MEMORY_USAGE}")
        # 处理视频
        total_output = {}
        for video_path in tqdm(os.listdir(args.video_dir)):
            result = process_video(
                model=model,
                video_path=os.path.join(args.video_dir, video_path),
                prompt=args.prompt,
                sampling_method_str=args.sampling_method,
                sampling_rate=args.sampling_rate
            )
            total_output[video_path] = result
            # 保存结果到文件
            output_filename = f"{OUTPUT_DIR}/{start_time}.json"
            with open(output_filename, 'w', encoding='utf-8') as f:
                json.dump(total_output, f, ensure_ascii=False, indent=2)
            
            print(f"处理完成！结果已保存到: {output_filename}")
            print(f"推理时间: {result['inference_time']:.2f} 秒")
            print(f"生成的内容: {result.get('generated_text', 'N/A')}")
        
    except Exception as e:
        logging.error(f"处理失败: {str(e)}", exc_info=True)
        print(f"处理失败: {str(e)}")


if __name__ == "__main__":
    main()