code_and_model / eval_scripts /compositional_law.py
blue-tundra-42's picture
Upload UNO Scorer (initial version)
f1f682e verified
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from adjustText import adjust_text
from scipy.optimize import curve_fit
data = {
'ModelName': ['Qwen2.5-Omni-3B', 'MiniCPM-O-2.6', 'Ming-lite-Omni-1.5', 'Baichuan-Omni-1.5', 'Qwen2.5-Omni-7B', 'Qwen3-Omni-30B-A3B-Instruct', 'Gemini-2.0-Flash', 'Gemini-2.5-Flash', 'Gemini-2.5-Pro'],
'Audio': [0.544, 0.565, 0.583, 0.541, 0.602, 0.794, 0.707, 0.795, 0.884],
'Visual': [0.4267, 0.4227, 0.4628, 0.4466, 0.5068, 0.6329, 0.6276, 0.6954, 0.7867],
'Omni': [0.278, 0.286, 0.289, 0.297, 0.326, 0.421, 0.449, 0.543, 0.709]
}
new_model_data = {
'ModelName': 'LongCat-Flash-Omni',
'Audio': 0.802,
'Visual': 0.6706,
'Omni': 0.4990
}
df = pd.DataFrame(data)
df['Audio_x_Visual'] = df['Audio'] * df['Visual']
x_data_fit = df['Audio_x_Visual'].values
y_data_fit = df['Omni'].values
def compositional_law(x, C, alpha, b):
return C * (x**alpha) + b
popt, pcov = curve_fit(compositional_law, x_data_fit, y_data_fit)
C_opt, alpha_opt, b_opt = popt
y_predicted = compositional_law(x_data_fit, C_opt, alpha_opt, b_opt)
ss_res = np.sum((y_data_fit - y_predicted)**2)
ss_tot = np.sum((y_data_fit - np.mean(y_data_fit))**2)
r_squared = 1 - (ss_res / ss_tot)
new_model_df = pd.DataFrame([new_model_data])
df_plot = pd.concat([df, new_model_df], ignore_index=True)
df_plot['Audio_x_Visual'] = df_plot['Audio'] * df_plot['Visual']
x_plot_all = df_plot['Audio_x_Visual'].values
y_plot_all = df_plot['Omni'].values
plt.style.use('seaborn-v0_8-whitegrid')
fig, ax = plt.subplots(figsize=(10, 7))
x_smooth = np.linspace(x_plot_all.min(), x_plot_all.max(), 200)
y_smooth = compositional_law(x_smooth, C_opt, alpha_opt, b_opt)
ax.plot(x_smooth, y_smooth, color='mediumseagreen', linestyle='--', linewidth=2, label='Fitted Compositional Law')
ax.scatter(x_data_fit, y_data_fit, color='darkgreen', s=60, label='Observed Models', zorder=5)
new_model_point = df_plot[df_plot['ModelName'] == 'LongCat-Flash-Omni']
ax.scatter(new_model_point['Audio_x_Visual'], new_model_point['Omni'],
color='darkgreen', marker='*', s=200, label='LongCat-Flash-Omni', zorder=6)
models_to_hide = ['Qwen2.5-Omni-3B', 'MiniCPM-O-2.6', 'Ming-lite-Omni-1.5', 'Baichuan-Omni-1.5', 'Qwen2.5-Omni-7B']
models_to_hide = []
texts = []
for i, model_name in enumerate(df_plot['ModelName']):
if model_name not in models_to_hide:
texts.append(ax.text(x_plot_all[i], y_plot_all[i], model_name, fontsize=14))
adjust_text(texts,
arrowprops=dict(arrowstyle='-', color='gray', lw=0.5),
ax=ax)
ax.set_xlabel('Uni-modal Scores (Audio x Visual)', fontsize=18, labelpad=15)
ax.set_ylabel('Omni-modal Score', fontsize=18, labelpad=15)
formula_text = (f'Fitted Law: $Omni = {C_opt:.2f} \\times (A \\times V)^{{{alpha_opt:.2f}}} + {b_opt:.2f}$'
f'\n$R^2 = {r_squared:.4f}$')
ax.text(0.05, 0.95, formula_text, transform=ax.transAxes, fontsize=16,
verticalalignment='top', bbox=dict(boxstyle='round,pad=0.5', fc='aliceblue', alpha=0.8))
ax.tick_params(axis='both', which='major', labelsize=16)
ax.legend(
loc='lower right',
fontsize=16,
frameon=True,
facecolor='white',
edgecolor='gray',
framealpha=1.0,
fancybox=True
)
if os.path.exists('./eval_results') == False:
os.makedirs('./eval_results')
plt.tight_layout()
plt.savefig('./eval_results/compositional_law_plot.pdf', dpi=300, bbox_inches='tight')
plt.savefig('./eval_results/compositional_law_plot.png', dpi=300, bbox_inches='tight')
plt.show()