File size: 3,646 Bytes
72c0672
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from difflib import Differ
import pandas as pd

# Example collision data
collisions = [
    {
        "colliding_token_sequence": [265, 393, 320],
        "num_raw_variants": 21,
        "raw_chunk_variants": [
            "\nif __name__ == '_", "\nif __n", "\nif __name__ == '__main",
            "\nif __name__ == '__main__'", "\nif __", "\nif _", "\nif __name__ ",
            "\nif __name__ =", "\nif __name__ == '__", "\nif __na", "\nif __name__",
            "\nif __name", "\nif __name__ == '__main__':", "\nif __name__ == '__main__':\n",
            "\nif __nam", "\nif __name_", "\nif __name__ == ", "\nif __name__ == '__ma",
            "\nif __name__ == '__main_"
        ],
        "levenshtein_analysis": {
            "distances": [11, 5, 8, 12, 13, 5, 4, 1, 10, 6, 8, 9, 10, 9, 7, 2, 3, 6, 7, 3],
            "average_distance": 8.74,
            "max_distance": 23,
            "min_distance": 1
        }
    },
    {
        "colliding_token_sequence": [506, 354, 256],
        "num_raw_variants": 2,
        "raw_chunk_variants": [
            "数据", "数�"
        ],
        "levenshtein_analysis": {
            "distances": [0, 1],
            "average_distance": 0.5,
            "max_distance": 1,
            "min_distance": 0
        }
    },
    {
        "colliding_token_sequence": [123, 456, 789],
        "num_raw_variants": 4,
        "raw_chunk_variants": [
            " } ", " }\r\n                ", "!", " }\r\n            "
        ],
        "levenshtein_analysis": {
            "distances": [2, 1, 4, 7],
            "average_distance": 3.5,
            "max_distance": 7,
            "min_distance": 1
        }
    }
]

# --- 1. Plot Text Diff (simplified) ---
def plot_text_diff(variants, title, save_path):
    differ = Differ()
    diff = list(differ.compare(variants[0].splitlines(), variants[1].splitlines()))
    fig, ax = plt.subplots(figsize=(8, 6))
    ax.set_title(title)
    colors = {"+": "red", "-": "green", " ": "blue"}
    for i, line in enumerate(diff):
        color = colors.get(line[0], "black")
        ax.text(0, i, line, color=color, fontsize=12, va='top', ha='left')
    plt.axis("off")
    plt.savefig(save_path, bbox_inches="tight")
    plt.close(fig)

# --- 2. Plot LCP Ratio for Variants ---
def plot_lcp_ratios(lcp_ratios, title, save_path):
    plt.figure(figsize=(8, 6))
    sns.barplot(x=list(range(len(lcp_ratios))), y=lcp_ratios, color="skyblue")
    plt.title(title)
    plt.xlabel('Variant Index')
    plt.ylabel('LCP Ratio')
    plt.savefig(save_path, bbox_inches="tight")
    plt.close()

# --- 3. Levenshtein Distance Distribution ---
def plot_levenshtein_distances(distances, title, save_path):
    plt.figure(figsize=(8, 6))
    sns.histplot(distances, bins=10, kde=True, color="salmon")
    plt.title(title)
    plt.xlabel('Levenshtein Distance')
    plt.ylabel('Frequency')
    plt.savefig(save_path, bbox_inches="tight")
    plt.close()

# Plot for the first collision case (Example)
for i in range(3):
    
    collision = collisions[i]
    plot_text_diff(collision["raw_chunk_variants"], f"Text Difference Visualization Case {i+1}", f"text_diff_case{i+1}.png")
    plot_lcp_ratios([collision["levenshtein_analysis"]["average_distance"]]*collision["num_raw_variants"], f"LCP Ratio of Variants Case {i+1}", f"lcp_case{i+1}.png")
    plot_levenshtein_distances(collision["levenshtein_analysis"]["distances"], "Levenshtein Distance Distribution", f"levenshtein_case{i+1}.png")

    # You can repeat the plotting for other cases as needed

    print("Plots generated successfully!")