MashiroLn commited on
Commit
7442a76
·
verified ·
1 Parent(s): 4f4e23f

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. apps/text_tools.py +92 -28
apps/text_tools.py CHANGED
@@ -39,18 +39,21 @@ def calculate_qwen2_vl_tokens(text, images, videos, tokenizer):
39
  """
40
  Qwen2-VL / Qwen2.5-VL Token 计算公式
41
  """
42
- total_tokens = 0
 
 
43
 
44
  # 1. 文本 Token (真实计算)
45
- text_tokens = []
46
  if tokenizer:
47
- text_tokens = tokenizer.encode(text)
48
- total_tokens += len(text_tokens)
49
  else:
50
  # Fallback
51
- total_tokens += len(text) // 2
52
 
53
  # 2. 图片 Token
 
54
  for img in images:
55
  width, height = img['width'], img['height']
56
  new_w = int(round(width / 28.0) * 28)
@@ -58,9 +61,16 @@ def calculate_qwen2_vl_tokens(text, images, videos, tokenizer):
58
  grid_w = new_w // 14
59
  grid_h = new_h // 14
60
  img_tokens = grid_h * grid_w
61
- total_tokens += img_tokens
 
 
 
 
 
 
62
 
63
  # 3. 视频 Token
 
64
  for vid in videos:
65
  frames = vid['frames']
66
  width, height = vid['width'], vid['height']
@@ -69,25 +79,48 @@ def calculate_qwen2_vl_tokens(text, images, videos, tokenizer):
69
  grid_w = new_w // 14
70
  grid_h = new_h // 14
71
  frame_tokens = grid_h * grid_w
72
- total_tokens += frames * frame_tokens
 
 
 
 
 
 
 
 
73
 
74
- return total_tokens, text_tokens
 
 
 
 
 
 
 
 
 
 
 
 
 
75
 
76
  def calculate_llava_next_tokens(text, images, tokenizer):
77
  """
78
  Llava-1.6 (Next) Token 计算公式
79
  """
80
- total_tokens = 0
 
81
 
82
  # 1. 文本 Token
83
- text_tokens = []
84
  if tokenizer:
85
- text_tokens = tokenizer.encode(text)
86
- total_tokens += len(text_tokens)
87
  else:
88
- total_tokens += len(text) // 2
89
 
90
  # 2. 图片 Token
 
91
  for img in images:
92
  width, height = img['width'], img['height']
93
  scale_res = 336
@@ -95,9 +128,28 @@ def calculate_llava_next_tokens(text, images, tokenizer):
95
  patch_y = math.ceil(height / scale_res)
96
  num_patches = patch_x * patch_y
97
  img_tokens = (num_patches + 1) * 576
98
- total_tokens += img_tokens
99
 
100
- return total_tokens, text_tokens
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
 
102
  # --- 实际 UI 逻辑 ---
103
 
@@ -108,19 +160,25 @@ def run_calculation(text, model, img_count, img_w, img_h, vid_count, vid_frames,
108
 
109
  # 获取 Tokenizer
110
  tokenizer = get_tokenizer(model)
111
- tokenizer_status = "✅ 已加载真实 Tokenizer" if tokenizer else "⚠️ Tokenizer 加载失败,使用估算值"
 
 
 
 
 
 
112
 
113
  text_tokens_ids = []
 
 
 
114
 
115
  if model == "Qwen2.5-VL / Qwen2-VL":
116
- tokens, text_tokens_ids = calculate_qwen2_vl_tokens(text, images, videos, tokenizer)
117
- info = "Qwen2-VL 使用 Naive Dynamic Resolution (patch 14x14)。\n图片会被 resize 为 28 的倍数。"
118
  elif model == "Llava-1.6 (Next)":
119
- tokens, text_tokens_ids = calculate_llava_next_tokens(text, images, tokenizer)
120
- info = "Llava-1.6 使用 AnyRes 技术 (base 336x336)。\n包含 Base Image + Grid Patches。"
121
  else:
122
  tokens = 0
123
- info = "未知模型"
124
 
125
  # 生成 Token 对应文件
126
  token_file_path = None
@@ -135,13 +193,19 @@ def run_calculation(text, model, img_count, img_w, img_h, vid_count, vid_frames,
135
  with open(token_file_path, "w", encoding="utf-8") as f:
136
  json.dump({"text": text, "tokens": token_data}, f, ensure_ascii=False, indent=2)
137
 
138
- return {
139
- "总 Token 数": tokens,
140
- "自然语言字符数": len(text),
141
- "Tokenizer 状态": tokenizer_status,
142
- "模型": model,
143
- "说明": info
144
- }, token_file_path
 
 
 
 
 
 
145
 
146
  def create_ui():
147
  with gr.Row():
 
39
  """
40
  Qwen2-VL / Qwen2.5-VL Token 计算公式
41
  """
42
+ text_tokens_count = 0
43
+ image_tokens_count = 0
44
+ video_tokens_count = 0
45
 
46
  # 1. 文本 Token (真实计算)
47
+ text_tokens_ids = []
48
  if tokenizer:
49
+ text_tokens_ids = tokenizer.encode(text)
50
+ text_tokens_count = len(text_tokens_ids)
51
  else:
52
  # Fallback
53
+ text_tokens_count = len(text) // 2
54
 
55
  # 2. 图片 Token
56
+ image_details = []
57
  for img in images:
58
  width, height = img['width'], img['height']
59
  new_w = int(round(width / 28.0) * 28)
 
61
  grid_w = new_w // 14
62
  grid_h = new_h // 14
63
  img_tokens = grid_h * grid_w
64
+
65
+ image_tokens_count += img_tokens
66
+ image_details.append({
67
+ "original_size": [width, height],
68
+ "resized_size": [new_w, new_h],
69
+ "tokens": img_tokens
70
+ })
71
 
72
  # 3. 视频 Token
73
+ video_details = []
74
  for vid in videos:
75
  frames = vid['frames']
76
  width, height = vid['width'], vid['height']
 
79
  grid_w = new_w // 14
80
  grid_h = new_h // 14
81
  frame_tokens = grid_h * grid_w
82
+
83
+ vid_total = frames * frame_tokens
84
+ video_tokens_count += vid_total
85
+ video_details.append({
86
+ "original_size": [width, height],
87
+ "resized_size": [new_w, new_h],
88
+ "frames": frames,
89
+ "tokens": vid_total
90
+ })
91
 
92
+ total_tokens = text_tokens_count + image_tokens_count + video_tokens_count
93
+
94
+ breakdown = {
95
+ "text_tokens": text_tokens_count,
96
+ "image_tokens": image_tokens_count,
97
+ "video_tokens": video_tokens_count
98
+ }
99
+
100
+ media_details = {
101
+ "images": image_details,
102
+ "videos": video_details
103
+ }
104
+
105
+ return total_tokens, text_tokens_ids, breakdown, media_details
106
 
107
  def calculate_llava_next_tokens(text, images, tokenizer):
108
  """
109
  Llava-1.6 (Next) Token 计算公式
110
  """
111
+ text_tokens_count = 0
112
+ image_tokens_count = 0
113
 
114
  # 1. 文本 Token
115
+ text_tokens_ids = []
116
  if tokenizer:
117
+ text_tokens_ids = tokenizer.encode(text)
118
+ text_tokens_count = len(text_tokens_ids)
119
  else:
120
+ text_tokens_count = len(text) // 2
121
 
122
  # 2. 图片 Token
123
+ image_details = []
124
  for img in images:
125
  width, height = img['width'], img['height']
126
  scale_res = 336
 
128
  patch_y = math.ceil(height / scale_res)
129
  num_patches = patch_x * patch_y
130
  img_tokens = (num_patches + 1) * 576
 
131
 
132
+ image_tokens_count += img_tokens
133
+ image_details.append({
134
+ "original_size": [width, height],
135
+ "resized_size": ["Dynamic Grid", f"{patch_x}x{patch_y} patches"],
136
+ "tokens": img_tokens
137
+ })
138
+
139
+ total_tokens = text_tokens_count + image_tokens_count
140
+
141
+ breakdown = {
142
+ "text_tokens": text_tokens_count,
143
+ "image_tokens": image_tokens_count,
144
+ "video_tokens": 0
145
+ }
146
+
147
+ media_details = {
148
+ "images": image_details,
149
+ "videos": []
150
+ }
151
+
152
+ return total_tokens, text_tokens_ids, breakdown, media_details
153
 
154
  # --- 实际 UI 逻辑 ---
155
 
 
160
 
161
  # 获取 Tokenizer
162
  tokenizer = get_tokenizer(model)
163
+
164
+ # 确定真实模型 ID
165
+ model_id_map = {
166
+ "Qwen2.5-VL / Qwen2-VL": "Qwen/Qwen2.5-VL-7B-Instruct",
167
+ "Llava-1.6 (Next)": "llava-hf/llava-v1.6-vicuna-7b-hf"
168
+ }
169
+ real_model_id = model_id_map.get(model, model)
170
 
171
  text_tokens_ids = []
172
+ breakdown = {}
173
+ media_details = {}
174
+ tokens = 0
175
 
176
  if model == "Qwen2.5-VL / Qwen2-VL":
177
+ tokens, text_tokens_ids, breakdown, media_details = calculate_qwen2_vl_tokens(text, images, videos, tokenizer)
 
178
  elif model == "Llava-1.6 (Next)":
179
+ tokens, text_tokens_ids, breakdown, media_details = calculate_llava_next_tokens(text, images, tokenizer)
 
180
  else:
181
  tokens = 0
 
182
 
183
  # 生成 Token 对应文件
184
  token_file_path = None
 
193
  with open(token_file_path, "w", encoding="utf-8") as f:
194
  json.dump({"text": text, "tokens": token_data}, f, ensure_ascii=False, indent=2)
195
 
196
+ # 构造最终返回的 JSON
197
+ result = {
198
+ "model_id": real_model_id,
199
+ "tokenizer_loaded": tokenizer is not None,
200
+ "total_tokens": tokens,
201
+ "breakdown": breakdown,
202
+ "text_stats": {
203
+ "char_count": len(text)
204
+ },
205
+ "media_details": media_details
206
+ }
207
+
208
+ return result, token_file_path
209
 
210
  def create_ui():
211
  with gr.Row():