File size: 2,366 Bytes
391f8df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
from bitstring import BitArray
import os

def compare_bits(file1, file2, max_diff_display=20):
    """
    比较两个文件的bit级差异
    :param file1: 第一个文件路径
    :param file2: 第二个文件路径
    :param max_diff_display: 最大差异位置显示数量
    :return: 差异统计字典
    """
    # 读取文件并转换为bit数组
    bits1 = BitArray(filename=file1)
    bits2 = BitArray(filename=file2)
    
    # 获取bit长度
    len1, len2 = len(bits1), len(bits2)
    min_len = min(len1, len2)
    
    # 统计结果
    diff_stats = {
        'total_bits_file1': len1,
        'total_bits_file2': len2,
        'differing_bits': 0,
        'diff_positions': [],
        'bit_length_mismatch': len1 != len2
    }
    
    # 逐bit比较
    for i in range(min_len):
        if bits1[i] != bits2[i]:
            diff_stats['differing_bits'] += 1
            if len(diff_stats['diff_positions']) < max_diff_display:
                diff_stats['diff_positions'].append(i)
    
    # 处理长度不一致的情况
    if len1 != len2:
        diff_stats['extra_bits'] = abs(len1 - len2)
    else:
        diff_stats['extra_bits'] = 0
    
    return diff_stats

def print_diff_report(diff_stats):
    """打印差异报告"""
    print(f"Bit长度比较:")
    print(f"  File1: {diff_stats['total_bits_file1']} bits")
    print(f"  File2: {diff_stats['total_bits_file2']} bits")
    
    if diff_stats['bit_length_mismatch']:
        print(f"\n! 文件长度不一致,相差 {diff_stats['extra_bits']} bits")
    
    print(f"\n差异bit总数: {diff_stats['differing_bits']}")
    
    if diff_stats['differing_bits'] > 0:
        print(f"\n前 {len(diff_stats['diff_positions'])} 个差异位置 (0-based):")
        for pos in diff_stats['diff_positions']:
            print(f"  Bit位置 {pos}")

if __name__ == "__main__":
    # 使用示例
    file1 = "../malwares/generated_malware"
    file2 = "../malwares/generated_malware_extracted"
    
    # 比较文件
    diff_stats = compare_bits(file1, file2)
    
    # 打印报告
    print_diff_report(diff_stats)
    
    # 高级用法:直接访问差异数据
    print("\n高级访问:")
    print(f"总差异bit数: {diff_stats['differing_bits']}")
    if diff_stats['differing_bits'] > 0:
        print(f"第一个差异位置: {diff_stats['diff_positions'][0]}")