def analyze_jsonl_size(input_path): over_count = 0 total_over_size = 0 # 总大小,单位字节 max_size = 0 # 最大对象大小,单位字节 line_counter = 0 # 行号跟踪(可选) with open(input_path, 'r', encoding='utf-8') as file: for line in file: line_counter += 1 # 计算当前行的字节大小 line_bytes = line.encode('utf-8') current_size = len(line_bytes) # 检查是否超过1MB if current_size > 1 * 1024 * 1024: over_count += 1 total_over_size += current_size if current_size > max_size: max_size = current_size # 输出统计结果 print(f"Number of objects exceeding 1MB: {over_count}") if over_count > 0: avg_size_mb = (total_over_size / over_count) / (1024 * 1024) max_size_mb = max_size / (1024 * 1024) print(f"Average size of oversized objects: {avg_size_mb:.2f} MB") print(f"Largest object size: {max_size_mb:.2f} MB") else: print("No objects exceed the 1MB limit.") # 示例调用 analyze_jsonl_size('/mnt/data/users/zys/proj/vlm_reasoning/request/vlm_batch_requests.jsonl')