import os from PIL import Image def calculate_image_tokens(image_path, detail="high"): if detail == "low": return 85 # 低质量模式固定 85 Tokens # 高质量模式(基础 85 Tokens + 分块计算) with Image.open(image_path) as img: width, height = img.size short_side = min(width, height) # 判断是否需要缩放 if short_side < 768: # 不缩放,直接使用原图尺寸 new_width, new_height = width, height else: # 缩放短边到 768px scale = 768 / short_side new_width = int(width * scale) new_height = int(height * scale) # 计算分块数量(向上取整) tiles_width = (new_width + 511) // 512 tiles_height = (new_height + 511) // 512 total_tiles = tiles_width * tiles_height # 总 Tokens = 基础 85 + 分块数 × 170 return 85 + (total_tiles * 170) def calculate_folder_tokens(folder_path, detail="high"): total = 0 for filename in os.listdir(folder_path): if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.webp')): path = os.path.join(folder_path, filename) total += calculate_image_tokens(path, detail) return total # 使用示例 folder_path = "/mnt/data/users/zys/proj/vlm_reasoning/dataset/data/EmbSpatial_sft" output_file = "Embs.txt" total_tokens = calculate_folder_tokens(folder_path, detail="high") # 保存结果到文件 with open(output_file, "w", encoding="utf-8") as f: f.write(f"图片文件夹路径: {folder_path}\n") f.write(f"总Token数量: {total_tokens}\n") f.write("计算规则:\n") f.write("- 高质量模式:基础 85 Tokens + 每区块 170 Tokens\n") f.write("- 低质量模式:固定 85 Tokens\n") print(f"结果已保存至 {output_file}")