File size: 3,567 Bytes
f1f682e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from adjustText import adjust_text
from scipy.optimize import curve_fit

data = {
    'ModelName': ['Qwen2.5-Omni-3B', 'MiniCPM-O-2.6', 'Ming-lite-Omni-1.5', 'Baichuan-Omni-1.5', 'Qwen2.5-Omni-7B', 'Qwen3-Omni-30B-A3B-Instruct', 'Gemini-2.0-Flash', 'Gemini-2.5-Flash', 'Gemini-2.5-Pro'],
    'Audio': [0.544, 0.565, 0.583, 0.541, 0.602, 0.794, 0.707, 0.795, 0.884],
    'Visual': [0.4267, 0.4227, 0.4628, 0.4466, 0.5068, 0.6329, 0.6276, 0.6954, 0.7867],
    'Omni': [0.278, 0.286, 0.289, 0.297, 0.326, 0.421, 0.449, 0.543, 0.709]
}

new_model_data = {
    'ModelName': 'LongCat-Flash-Omni', 
    'Audio': 0.802, 
    'Visual': 0.6706, 
    'Omni': 0.4990
}

df = pd.DataFrame(data)
df['Audio_x_Visual'] = df['Audio'] * df['Visual']
x_data_fit = df['Audio_x_Visual'].values
y_data_fit = df['Omni'].values

def compositional_law(x, C, alpha, b):
    return C * (x**alpha) + b

popt, pcov = curve_fit(compositional_law, x_data_fit, y_data_fit)
C_opt, alpha_opt, b_opt = popt
y_predicted = compositional_law(x_data_fit, C_opt, alpha_opt, b_opt)
ss_res = np.sum((y_data_fit - y_predicted)**2)
ss_tot = np.sum((y_data_fit - np.mean(y_data_fit))**2)
r_squared = 1 - (ss_res / ss_tot)


new_model_df = pd.DataFrame([new_model_data])
df_plot = pd.concat([df, new_model_df], ignore_index=True)
df_plot['Audio_x_Visual'] = df_plot['Audio'] * df_plot['Visual']
x_plot_all = df_plot['Audio_x_Visual'].values
y_plot_all = df_plot['Omni'].values

plt.style.use('seaborn-v0_8-whitegrid')
fig, ax = plt.subplots(figsize=(10, 7))

x_smooth = np.linspace(x_plot_all.min(), x_plot_all.max(), 200)
y_smooth = compositional_law(x_smooth, C_opt, alpha_opt, b_opt)

ax.plot(x_smooth, y_smooth, color='mediumseagreen', linestyle='--', linewidth=2, label='Fitted Compositional Law')


ax.scatter(x_data_fit, y_data_fit, color='darkgreen', s=60, label='Observed Models', zorder=5)

new_model_point = df_plot[df_plot['ModelName'] == 'LongCat-Flash-Omni']

ax.scatter(new_model_point['Audio_x_Visual'], new_model_point['Omni'], 
           color='darkgreen', marker='*', s=200, label='LongCat-Flash-Omni', zorder=6)


models_to_hide = ['Qwen2.5-Omni-3B', 'MiniCPM-O-2.6', 'Ming-lite-Omni-1.5', 'Baichuan-Omni-1.5', 'Qwen2.5-Omni-7B']
models_to_hide = []
texts = []
for i, model_name in enumerate(df_plot['ModelName']):
    if model_name not in models_to_hide:
        texts.append(ax.text(x_plot_all[i], y_plot_all[i], model_name, fontsize=14))

adjust_text(texts, 
            arrowprops=dict(arrowstyle='-', color='gray', lw=0.5),
            ax=ax)


ax.set_xlabel('Uni-modal Scores (Audio x Visual)', fontsize=18, labelpad=15)
ax.set_ylabel('Omni-modal Score', fontsize=18, labelpad=15)

formula_text = (f'Fitted Law: $Omni = {C_opt:.2f} \\times (A \\times V)^{{{alpha_opt:.2f}}} + {b_opt:.2f}$'
                f'\n$R^2 = {r_squared:.4f}$')
ax.text(0.05, 0.95, formula_text, transform=ax.transAxes, fontsize=16,
        verticalalignment='top', bbox=dict(boxstyle='round,pad=0.5', fc='aliceblue', alpha=0.8))

ax.tick_params(axis='both', which='major', labelsize=16)

ax.legend(
    loc='lower right',
    fontsize=16,
    frameon=True,
    facecolor='white',
    edgecolor='gray',
    framealpha=1.0,
    fancybox=True
)

if os.path.exists('./eval_results') == False:
    os.makedirs('./eval_results')

plt.tight_layout()

plt.savefig('./eval_results/compositional_law_plot.pdf', dpi=300, bbox_inches='tight')
plt.savefig('./eval_results/compositional_law_plot.png', dpi=300, bbox_inches='tight')

plt.show()