|
|
import os |
|
|
from PIL import Image |
|
|
|
|
|
def calculate_image_tokens(image_path, detail="high"): |
|
|
if detail == "low": |
|
|
return 85 |
|
|
|
|
|
|
|
|
with Image.open(image_path) as img: |
|
|
width, height = img.size |
|
|
short_side = min(width, height) |
|
|
|
|
|
|
|
|
if short_side < 768: |
|
|
|
|
|
new_width, new_height = width, height |
|
|
else: |
|
|
|
|
|
scale = 768 / short_side |
|
|
new_width = int(width * scale) |
|
|
new_height = int(height * scale) |
|
|
|
|
|
|
|
|
tiles_width = (new_width + 511) // 512 |
|
|
tiles_height = (new_height + 511) // 512 |
|
|
total_tiles = tiles_width * tiles_height |
|
|
|
|
|
|
|
|
return 85 + (total_tiles * 170) |
|
|
|
|
|
def calculate_folder_tokens(folder_path, detail="high"): |
|
|
total = 0 |
|
|
for filename in os.listdir(folder_path): |
|
|
if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.webp')): |
|
|
path = os.path.join(folder_path, filename) |
|
|
total += calculate_image_tokens(path, detail) |
|
|
return total |
|
|
|
|
|
|
|
|
folder_path = "/mnt/data/users/zys/proj/vlm_reasoning/dataset/data/EmbSpatial_sft" |
|
|
output_file = "Embs.txt" |
|
|
|
|
|
total_tokens = calculate_folder_tokens(folder_path, detail="high") |
|
|
|
|
|
|
|
|
with open(output_file, "w", encoding="utf-8") as f: |
|
|
f.write(f"图片文件夹路径: {folder_path}\n") |
|
|
f.write(f"总Token数量: {total_tokens}\n") |
|
|
f.write("计算规则:\n") |
|
|
f.write("- 高质量模式:基础 85 Tokens + 每区块 170 Tokens\n") |
|
|
f.write("- 低质量模式:固定 85 Tokens\n") |
|
|
|
|
|
print(f"结果已保存至 {output_file}") |