from bitstring import BitArray import os def compare_bits(file1, file2, max_diff_display=20): """ 比较两个文件的bit级差异 :param file1: 第一个文件路径 :param file2: 第二个文件路径 :param max_diff_display: 最大差异位置显示数量 :return: 差异统计字典 """ # 读取文件并转换为bit数组 bits1 = BitArray(filename=file1) bits2 = BitArray(filename=file2) # 获取bit长度 len1, len2 = len(bits1), len(bits2) min_len = min(len1, len2) # 统计结果 diff_stats = { 'total_bits_file1': len1, 'total_bits_file2': len2, 'differing_bits': 0, 'diff_positions': [], 'bit_length_mismatch': len1 != len2 } # 逐bit比较 for i in range(min_len): if bits1[i] != bits2[i]: diff_stats['differing_bits'] += 1 if len(diff_stats['diff_positions']) < max_diff_display: diff_stats['diff_positions'].append(i) # 处理长度不一致的情况 if len1 != len2: diff_stats['extra_bits'] = abs(len1 - len2) else: diff_stats['extra_bits'] = 0 return diff_stats def print_diff_report(diff_stats): """打印差异报告""" print(f"Bit长度比较:") print(f" File1: {diff_stats['total_bits_file1']} bits") print(f" File2: {diff_stats['total_bits_file2']} bits") if diff_stats['bit_length_mismatch']: print(f"\n! 文件长度不一致,相差 {diff_stats['extra_bits']} bits") print(f"\n差异bit总数: {diff_stats['differing_bits']}") if diff_stats['differing_bits'] > 0: print(f"\n前 {len(diff_stats['diff_positions'])} 个差异位置 (0-based):") for pos in diff_stats['diff_positions']: print(f" Bit位置 {pos}") if __name__ == "__main__": # 使用示例 file1 = "../malwares/generated_malware" file2 = "../malwares/generated_malware_extracted" # 比较文件 diff_stats = compare_bits(file1, file2) # 打印报告 print_diff_report(diff_stats) # 高级用法:直接访问差异数据 print("\n高级访问:") print(f"总差异bit数: {diff_stats['differing_bits']}") if diff_stats['differing_bits'] > 0: print(f"第一个差异位置: {diff_stats['diff_positions'][0]}")