| def analyze_jsonl_size(input_path): | |
| over_count = 0 | |
| total_over_size = 0 # 总大小,单位字节 | |
| max_size = 0 # 最大对象大小,单位字节 | |
| line_counter = 0 # 行号跟踪(可选) | |
| with open(input_path, 'r', encoding='utf-8') as file: | |
| for line in file: | |
| line_counter += 1 | |
| # 计算当前行的字节大小 | |
| line_bytes = line.encode('utf-8') | |
| current_size = len(line_bytes) | |
| # 检查是否超过1MB | |
| if current_size > 1 * 1024 * 1024: | |
| over_count += 1 | |
| total_over_size += current_size | |
| if current_size > max_size: | |
| max_size = current_size | |
| # 输出统计结果 | |
| print(f"Number of objects exceeding 1MB: {over_count}") | |
| if over_count > 0: | |
| avg_size_mb = (total_over_size / over_count) / (1024 * 1024) | |
| max_size_mb = max_size / (1024 * 1024) | |
| print(f"Average size of oversized objects: {avg_size_mb:.2f} MB") | |
| print(f"Largest object size: {max_size_mb:.2f} MB") | |
| else: | |
| print("No objects exceed the 1MB limit.") | |
| # 示例调用 | |
| analyze_jsonl_size('/mnt/data/users/zys/proj/vlm_reasoning/request/vlm_batch_requests.jsonl') |