tools / utils /cost /p_cost.py
Adinosaur's picture
Upload folder using huggingface_hub
1c980b1 verified
import os
from PIL import Image
def calculate_image_tokens(image_path, detail="high"):
if detail == "low":
return 85 # 低质量模式固定 85 Tokens
# 高质量模式(基础 85 Tokens + 分块计算)
with Image.open(image_path) as img:
width, height = img.size
short_side = min(width, height)
# 判断是否需要缩放
if short_side < 768:
# 不缩放,直接使用原图尺寸
new_width, new_height = width, height
else:
# 缩放短边到 768px
scale = 768 / short_side
new_width = int(width * scale)
new_height = int(height * scale)
# 计算分块数量(向上取整)
tiles_width = (new_width + 511) // 512
tiles_height = (new_height + 511) // 512
total_tiles = tiles_width * tiles_height
# 总 Tokens = 基础 85 + 分块数 × 170
return 85 + (total_tiles * 170)
def calculate_folder_tokens(folder_path, detail="high"):
total = 0
for filename in os.listdir(folder_path):
if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.webp')):
path = os.path.join(folder_path, filename)
total += calculate_image_tokens(path, detail)
return total
# 使用示例
folder_path = "/mnt/data/users/zys/proj/vlm_reasoning/dataset/data/EmbSpatial_sft"
output_file = "Embs.txt"
total_tokens = calculate_folder_tokens(folder_path, detail="high")
# 保存结果到文件
with open(output_file, "w", encoding="utf-8") as f:
f.write(f"图片文件夹路径: {folder_path}\n")
f.write(f"总Token数量: {total_tokens}\n")
f.write("计算规则:\n")
f.write("- 高质量模式:基础 85 Tokens + 每区块 170 Tokens\n")
f.write("- 低质量模式:固定 85 Tokens\n")
print(f"结果已保存至 {output_file}")