import os
from PIL import Image

def calculate_image_tokens(image_path, detail="high"):
    if detail == "low":
        return 85  # 低质量模式固定 85 Tokens
    
    # 高质量模式（基础 85 Tokens + 分块计算）
    with Image.open(image_path) as img:
        width, height = img.size
        short_side = min(width, height)
        
        # 判断是否需要缩放
        if short_side < 768:
            # 不缩放，直接使用原图尺寸
            new_width, new_height = width, height
        else:
            # 缩放短边到 768px
            scale = 768 / short_side
            new_width = int(width * scale)
            new_height = int(height * scale)
        
        # 计算分块数量（向上取整）
        tiles_width = (new_width + 511) // 512
        tiles_height = (new_height + 511) // 512
        total_tiles = tiles_width * tiles_height
        
        # 总 Tokens = 基础 85 + 分块数 × 170
        return 85 + (total_tiles * 170)

def calculate_folder_tokens(folder_path, detail="high"):
    total = 0
    for filename in os.listdir(folder_path):
        if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.webp')):
            path = os.path.join(folder_path, filename)
            total += calculate_image_tokens(path, detail)
    return total

# 使用示例
folder_path = "/mnt/data/users/zys/proj/vlm_reasoning/dataset/data/EmbSpatial_sft"
output_file = "Embs.txt"

total_tokens = calculate_folder_tokens(folder_path, detail="high")

# 保存结果到文件
with open(output_file, "w", encoding="utf-8") as f:
    f.write(f"图片文件夹路径: {folder_path}\n")
    f.write(f"总Token数量: {total_tokens}\n")
    f.write("计算规则：\n")
    f.write("- 高质量模式：基础 85 Tokens + 每区块 170 Tokens\n")
    f.write("- 低质量模式：固定 85 Tokens\n")

print(f"结果已保存至 {output_file}")