import os import pandas as pd import numpy as np import matplotlib.pyplot as plt from adjustText import adjust_text from scipy.optimize import curve_fit data = { 'ModelName': ['Qwen2.5-Omni-3B', 'MiniCPM-O-2.6', 'Ming-lite-Omni-1.5', 'Baichuan-Omni-1.5', 'Qwen2.5-Omni-7B', 'Qwen3-Omni-30B-A3B-Instruct', 'Gemini-2.0-Flash', 'Gemini-2.5-Flash', 'Gemini-2.5-Pro'], 'Audio': [0.544, 0.565, 0.583, 0.541, 0.602, 0.794, 0.707, 0.795, 0.884], 'Visual': [0.4267, 0.4227, 0.4628, 0.4466, 0.5068, 0.6329, 0.6276, 0.6954, 0.7867], 'Omni': [0.278, 0.286, 0.289, 0.297, 0.326, 0.421, 0.449, 0.543, 0.709] } new_model_data = { 'ModelName': 'LongCat-Flash-Omni', 'Audio': 0.802, 'Visual': 0.6706, 'Omni': 0.4990 } df = pd.DataFrame(data) df['Audio_x_Visual'] = df['Audio'] * df['Visual'] x_data_fit = df['Audio_x_Visual'].values y_data_fit = df['Omni'].values def compositional_law(x, C, alpha, b): return C * (x**alpha) + b popt, pcov = curve_fit(compositional_law, x_data_fit, y_data_fit) C_opt, alpha_opt, b_opt = popt y_predicted = compositional_law(x_data_fit, C_opt, alpha_opt, b_opt) ss_res = np.sum((y_data_fit - y_predicted)**2) ss_tot = np.sum((y_data_fit - np.mean(y_data_fit))**2) r_squared = 1 - (ss_res / ss_tot) new_model_df = pd.DataFrame([new_model_data]) df_plot = pd.concat([df, new_model_df], ignore_index=True) df_plot['Audio_x_Visual'] = df_plot['Audio'] * df_plot['Visual'] x_plot_all = df_plot['Audio_x_Visual'].values y_plot_all = df_plot['Omni'].values plt.style.use('seaborn-v0_8-whitegrid') fig, ax = plt.subplots(figsize=(10, 7)) x_smooth = np.linspace(x_plot_all.min(), x_plot_all.max(), 200) y_smooth = compositional_law(x_smooth, C_opt, alpha_opt, b_opt) ax.plot(x_smooth, y_smooth, color='mediumseagreen', linestyle='--', linewidth=2, label='Fitted Compositional Law') ax.scatter(x_data_fit, y_data_fit, color='darkgreen', s=60, label='Observed Models', zorder=5) new_model_point = df_plot[df_plot['ModelName'] == 'LongCat-Flash-Omni'] ax.scatter(new_model_point['Audio_x_Visual'], new_model_point['Omni'], color='darkgreen', marker='*', s=200, label='LongCat-Flash-Omni', zorder=6) models_to_hide = ['Qwen2.5-Omni-3B', 'MiniCPM-O-2.6', 'Ming-lite-Omni-1.5', 'Baichuan-Omni-1.5', 'Qwen2.5-Omni-7B'] models_to_hide = [] texts = [] for i, model_name in enumerate(df_plot['ModelName']): if model_name not in models_to_hide: texts.append(ax.text(x_plot_all[i], y_plot_all[i], model_name, fontsize=14)) adjust_text(texts, arrowprops=dict(arrowstyle='-', color='gray', lw=0.5), ax=ax) ax.set_xlabel('Uni-modal Scores (Audio x Visual)', fontsize=18, labelpad=15) ax.set_ylabel('Omni-modal Score', fontsize=18, labelpad=15) formula_text = (f'Fitted Law: $Omni = {C_opt:.2f} \\times (A \\times V)^{{{alpha_opt:.2f}}} + {b_opt:.2f}$' f'\n$R^2 = {r_squared:.4f}$') ax.text(0.05, 0.95, formula_text, transform=ax.transAxes, fontsize=16, verticalalignment='top', bbox=dict(boxstyle='round,pad=0.5', fc='aliceblue', alpha=0.8)) ax.tick_params(axis='both', which='major', labelsize=16) ax.legend( loc='lower right', fontsize=16, frameon=True, facecolor='white', edgecolor='gray', framealpha=1.0, fancybox=True ) if os.path.exists('./eval_results') == False: os.makedirs('./eval_results') plt.tight_layout() plt.savefig('./eval_results/compositional_law_plot.pdf', dpi=300, bbox_inches='tight') plt.savefig('./eval_results/compositional_law_plot.png', dpi=300, bbox_inches='tight') plt.show()