Spaces:

MashiroLn
/

custom_toolbox

Running

App Files Files Community

MashiroLn commited on 18 days ago

Commit

7442a76

verified ·

1 Parent(s): 4f4e23f

Upload folder using huggingface_hub

Browse files

Files changed (1) hide show

apps/text_tools.py +92 -28

apps/text_tools.py CHANGED Viewed

@@ -39,18 +39,21 @@ def calculate_qwen2_vl_tokens(text, images, videos, tokenizer):
     """
     Qwen2-VL / Qwen2.5-VL Token 计算公式
     """
-    total_tokens = 0
     # 1. 文本 Token (真实计算)
-    text_tokens = []
     if tokenizer:
-        text_tokens = tokenizer.encode(text)
-        total_tokens += len(text_tokens)
     else:
         # Fallback
-        total_tokens += len(text) // 2
     # 2. 图片 Token
     for img in images:
         width, height = img['width'], img['height']
         new_w = int(round(width / 28.0) * 28)
@@ -58,9 +61,16 @@ def calculate_qwen2_vl_tokens(text, images, videos, tokenizer):
         grid_w = new_w // 14
         grid_h = new_h // 14
         img_tokens = grid_h * grid_w
-        total_tokens += img_tokens
     # 3. 视频 Token
     for vid in videos:
         frames = vid['frames']
         width, height = vid['width'], vid['height']
@@ -69,25 +79,48 @@ def calculate_qwen2_vl_tokens(text, images, videos, tokenizer):
         grid_w = new_w // 14
         grid_h = new_h // 14
         frame_tokens = grid_h * grid_w
-        total_tokens += frames * frame_tokens
-    return total_tokens, text_tokens
 def calculate_llava_next_tokens(text, images, tokenizer):
     """
     Llava-1.6 (Next) Token 计算公式
     """
-    total_tokens = 0
     # 1. 文本 Token
-    text_tokens = []
     if tokenizer:
-        text_tokens = tokenizer.encode(text)
-        total_tokens += len(text_tokens)
     else:
-        total_tokens += len(text) // 2
     # 2. 图片 Token
     for img in images:
         width, height = img['width'], img['height']
         scale_res = 336
@@ -95,9 +128,28 @@ def calculate_llava_next_tokens(text, images, tokenizer):
         patch_y = math.ceil(height / scale_res)
         num_patches = patch_x * patch_y
         img_tokens = (num_patches + 1) * 576
-        total_tokens += img_tokens
-    return total_tokens, text_tokens
 # --- 实际 UI 逻辑 ---
@@ -108,19 +160,25 @@ def run_calculation(text, model, img_count, img_w, img_h, vid_count, vid_frames,
     # 获取 Tokenizer
     tokenizer = get_tokenizer(model)
-    tokenizer_status = "✅ 已加载真实 Tokenizer" if tokenizer else "⚠️ Tokenizer 加载失败，使用估算值"
     text_tokens_ids = []
     if model == "Qwen2.5-VL / Qwen2-VL":
-        tokens, text_tokens_ids = calculate_qwen2_vl_tokens(text, images, videos, tokenizer)
-        info = "Qwen2-VL 使用 Naive Dynamic Resolution (patch 14x14)。\n图片会被 resize 为 28 的倍数。"
     elif model == "Llava-1.6 (Next)":
-        tokens, text_tokens_ids = calculate_llava_next_tokens(text, images, tokenizer)
-        info = "Llava-1.6 使用 AnyRes 技术 (base 336x336)。\n包含 Base Image + Grid Patches。"
     else:
         tokens = 0
-        info = "未知模型"
     # 生成 Token 对应文件
     token_file_path = None
@@ -135,13 +193,19 @@ def run_calculation(text, model, img_count, img_w, img_h, vid_count, vid_frames,
         with open(token_file_path, "w", encoding="utf-8") as f:
             json.dump({"text": text, "tokens": token_data}, f, ensure_ascii=False, indent=2)
-    return {
-        "总 Token 数": tokens,
-        "自然语言字符数": len(text),
-        "Tokenizer 状态": tokenizer_status,
-        "模型": model,
-        "说明": info
-    }, token_file_path
 def create_ui():
     with gr.Row():

     """
     Qwen2-VL / Qwen2.5-VL Token 计算公式
     """
+    text_tokens_count = 0
+    image_tokens_count = 0
+    video_tokens_count = 0
     # 1. 文本 Token (真实计算)
+    text_tokens_ids = []
     if tokenizer:
+        text_tokens_ids = tokenizer.encode(text)
+        text_tokens_count = len(text_tokens_ids)
     else:
         # Fallback
+        text_tokens_count = len(text) // 2
     # 2. 图片 Token
+    image_details = []
     for img in images:
         width, height = img['width'], img['height']
         new_w = int(round(width / 28.0) * 28)
         grid_w = new_w // 14
         grid_h = new_h // 14
         img_tokens = grid_h * grid_w
+        image_tokens_count += img_tokens
+        image_details.append({
+            "original_size": [width, height],
+            "resized_size": [new_w, new_h],
+            "tokens": img_tokens
+        })
     # 3. 视频 Token
+    video_details = []
     for vid in videos:
         frames = vid['frames']
         width, height = vid['width'], vid['height']
         grid_w = new_w // 14
         grid_h = new_h // 14
         frame_tokens = grid_h * grid_w
+        vid_total = frames * frame_tokens
+        video_tokens_count += vid_total
+        video_details.append({
+            "original_size": [width, height],
+            "resized_size": [new_w, new_h],
+            "frames": frames,
+            "tokens": vid_total
+        })
+    total_tokens = text_tokens_count + image_tokens_count + video_tokens_count
+    breakdown = {
+        "text_tokens": text_tokens_count,
+        "image_tokens": image_tokens_count,
+        "video_tokens": video_tokens_count
+    }
+    media_details = {
+        "images": image_details,
+        "videos": video_details
+    }
+    return total_tokens, text_tokens_ids, breakdown, media_details
 def calculate_llava_next_tokens(text, images, tokenizer):
     """
     Llava-1.6 (Next) Token 计算公式
     """
+    text_tokens_count = 0
+    image_tokens_count = 0
     # 1. 文本 Token
+    text_tokens_ids = []
     if tokenizer:
+        text_tokens_ids = tokenizer.encode(text)
+        text_tokens_count = len(text_tokens_ids)
     else:
+        text_tokens_count = len(text) // 2
     # 2. 图片 Token
+    image_details = []
     for img in images:
         width, height = img['width'], img['height']
         scale_res = 336
         patch_y = math.ceil(height / scale_res)
         num_patches = patch_x * patch_y
         img_tokens = (num_patches + 1) * 576
+        image_tokens_count += img_tokens
+        image_details.append({
+            "original_size": [width, height],
+            "resized_size": ["Dynamic Grid", f"{patch_x}x{patch_y} patches"],
+            "tokens": img_tokens
+        })
+    total_tokens = text_tokens_count + image_tokens_count
+    breakdown = {
+        "text_tokens": text_tokens_count,
+        "image_tokens": image_tokens_count,
+        "video_tokens": 0
+    }
+    media_details = {
+        "images": image_details,
+        "videos": []
+    }
+    return total_tokens, text_tokens_ids, breakdown, media_details
 # --- 实际 UI 逻辑 ---
     # 获取 Tokenizer
     tokenizer = get_tokenizer(model)
+    # 确定真实模型 ID
+    model_id_map = {
+        "Qwen2.5-VL / Qwen2-VL": "Qwen/Qwen2.5-VL-7B-Instruct",
+        "Llava-1.6 (Next)": "llava-hf/llava-v1.6-vicuna-7b-hf"
+    }
+    real_model_id = model_id_map.get(model, model)
     text_tokens_ids = []
+    breakdown = {}
+    media_details = {}
+    tokens = 0
     if model == "Qwen2.5-VL / Qwen2-VL":
+        tokens, text_tokens_ids, breakdown, media_details = calculate_qwen2_vl_tokens(text, images, videos, tokenizer)
     elif model == "Llava-1.6 (Next)":
+        tokens, text_tokens_ids, breakdown, media_details = calculate_llava_next_tokens(text, images, tokenizer)
     else:
         tokens = 0
     # 生成 Token 对应文件
     token_file_path = None
         with open(token_file_path, "w", encoding="utf-8") as f:
             json.dump({"text": text, "tokens": token_data}, f, ensure_ascii=False, indent=2)
+    # 构造最终返回的 JSON
+    result = {
+        "model_id": real_model_id,
+        "tokenizer_loaded": tokenizer is not None,
+        "total_tokens": tokens,
+        "breakdown": breakdown,
+        "text_stats": {
+            "char_count": len(text)
+        },
+        "media_details": media_details
+    }
+    return result, token_file_path
 def create_ui():
     with gr.Row():