import gradio as gr
import pandas as pd
import json
import plotly.express as px
import requests
import os
from textblob.download_corpora import download_all
from highlight_util import highlight_adjectives
from send_file import send_to_backend
# 下载TextBlob所需数据(只需运行一次)
download_all()
def on_confirm(task_type_radio,dataset_radio, num_parts_dropdown, perspective_radio, division_method_radio):
num_parts = num_parts_dropdown
method = "QS" if division_method_radio == "Equal Frequency Partitioning" else "EI"
base_path = f"./dataset/{task_type_radio}/{dataset_radio}"
analysis_result,_ = load_analysis_report(task_type_radio,dataset_radio, num_parts_dropdown, perspective_radio, division_method_radio)
# 根据perspective选择读取对应的文件
if task_type_radio=="Api Recommendation":
if "Tokens" in perspective_radio and "Recall" in perspective_radio:
df = pd.read_csv(f"{base_path}/{num_parts}/{method}/recall/token_counts_{method}.csv")
elif "Tokens" in perspective_radio and "F1" in perspective_radio:
df = pd.read_csv(f"{base_path}/{num_parts}/{method}/f1/token_counts_{method}.csv")
elif "Lines" in perspective_radio and "Recall" in perspective_radio:
df = pd.read_csv(f"{base_path}/{num_parts}/{method}/recall/line_counts_{method}.csv")
elif "Lines" in perspective_radio and "f1" in perspective_radio:
df = pd.read_csv(f"{base_path}/{num_parts}/{method}/f1/line_counts_{method}.csv")
elif task_type_radio=="Code Completion":
if "Tokens" in perspective_radio :
df = pd.read_csv(f"{base_path}/{num_parts}/{method}/token_counts_{method}.csv")
elif "Lines" in perspective_radio:
df = pd.read_csv(f"{base_path}/{num_parts}/{method}/line_counts_{method}.csv")
elif task_type_radio=="Test Generation":
if "Tokens" in perspective_radio :
df = pd.read_csv(f"{base_path}/{num_parts}/{method}/token_counts_{method}.csv")
elif "Lines" in perspective_radio:
df = pd.read_csv(f"{base_path}/{num_parts}/{method}/line_counts_{method}.csv")
else:
if "Tokens" in perspective_radio :
df = pd.read_csv(f"{base_path}/{num_parts}/{method}/token_counts_{method}.csv")
elif "Lines" in perspective_radio:
df = pd.read_csv(f"{base_path}/{num_parts}/{method}/line_counts_{method}.csv")
elif "Complexity" in perspective_radio:
df = pd.read_csv(f"{base_path}/{num_parts}/{method}/CC_{method}.csv")
elif "Problem Types" in perspective_radio:
df = pd.read_csv(f"{base_path}/cata_result.csv")
# 加载分析报告
# AI分析列
# df["Analysis"] = df["Model"].map(lambda m: analysis_result.get(m, "No analysis provided."))
df["Analysis"] = df["Model"].map(
lambda m: highlight_adjectives(analysis_result.get(m, "No analysis provided."))
)
return df
# 生成 CSS 样式
def generate_css(line_counts, token_counts, cyclomatic_complexity, problem_type, show_high, show_medium, show_low):
css = """
#dataframe th {
background-color: #f2f2f2
}
"""
colors = ["#e6f7ff", "#ffeecc", "#e6ffe6", "#ffe6e6"]
categories = [line_counts, token_counts, cyclomatic_complexity]
category_index = 0
column_index = 1
for category in categories:
if category:
if show_high:
css += f"#dataframe td:nth-child({column_index + 1}) {{ background-color: {colors[category_index]}; }}\n"
column_index += 1
if show_medium:
css += f"#dataframe td:nth-child({column_index + 1}) {{ background-color: {colors[category_index]}; }}\n"
column_index += 1
if show_low:
css += f"#dataframe td:nth-child({column_index + 1}) {{ background-color: {colors[category_index]}; }}\n"
column_index += 1
category_index += 1
# 为 Problem Type 相关的三个子列设置固定颜色
if problem_type:
problem_type_color = "#d4f0fc" # 你可以选择任何你喜欢的颜色
css += f"#dataframe td:nth-child({column_index + 1}) {{ background-color: {problem_type_color}; }}\n"
css += f"#dataframe td:nth-child({column_index + 2}) {{ background-color: {problem_type_color}; }}\n"
css += f"#dataframe td:nth-child({column_index + 3}) {{ background-color: {problem_type_color}; }}\n"
# 隐藏 "data" 标识
css += """
.gradio-container .dataframe-container::before {
content: none !important;
}
"""
return css
# AI分析
def load_analysis_report(task_type_radio,dataset_radio, num_parts_dropdown, perspective_radio, division_method_radio):
num_parts = num_parts_dropdown
method = "QS" if division_method_radio == "Equal Frequency Partitioning" else "EI"
# # 根据perspective确定文件路径
# if "Tokens" in perspective_radio:
# perspective = "token_counts"
# elif "Lines" in perspective_radio:
# perspective = "line_counts"
# elif "Complexity" in perspective_radio:
# perspective = "CC"
# else:
# perspective = "problem_type"
# base_path = f"./llm_insight/{task_type_radio}"
# if perspective == "problem_type":
# report_file = f"{base_path}/{dataset_radio}/{perspective}_report.json"
# recommendation_file = f"{base_path}/{dataset_radio}/{perspective}_recommendation.json"
# else:
# report_file = f"{base_path}/{dataset_radio}/{num_parts}/{method}/{perspective}_report.json"
# recommendation_file = f"{base_path}/{dataset_radio}/{num_parts}/{method}/{perspective}_recommendation.json"
base_path = f"./llm_insight/{task_type_radio}"
if task_type_radio=="Code Generation":
# 根据perspective确定文件路径
if "Tokens" in perspective_radio:
perspective = "token_counts"
elif "Lines" in perspective_radio:
perspective = "line_counts"
elif "Complexity" in perspective_radio:
perspective = "CC"
else:
perspective = "problem_type"
if perspective == "problem_type":
report_file = f"{base_path}/{dataset_radio}/{perspective}_report.json"
recommendation_file = f"{base_path}/{dataset_radio}/{perspective}_recommendation.json"
else:
report_file = f"{base_path}/{dataset_radio}/{num_parts}/{method}/{perspective}_report.json"
recommendation_file = f"{base_path}/{dataset_radio}/{num_parts}/{method}/{perspective}_recommendation.json"
else:
report_file = f"{base_path}/{dataset_radio}/report.json"
recommendation_file = f"{base_path}/{dataset_radio}/recommendation.json"
try:
with open(report_file, 'r', encoding='utf-8') as f:
analysis_result = json.load(f)
except Exception as e:
analysis_result = f"[Error] error load analysis report: {e}"
try:
with open(recommendation_file, 'r', encoding='utf-8') as f:
recommendation_result = json.load(f)
except Exception as e:
recommendation_result = f"[Error] error load model recommendation: {e}"
return (analysis_result,recommendation_result)
# 可视化
# def plot_visualization(task_type_radio,dataset_radio, perspective_radio, num_parts, plot_type):
# base_path = f"./dataset/{task_type_radio}/{dataset_radio}"
# if "Tokens" in perspective_radio:
# file_path = f'{base_path}/{num_parts}/QS/token_counts_QS.csv'
# elif "Lines" in perspective_radio:
# file_path = f'{base_path}/{num_parts}/QS/line_counts_QS.csv'
# elif "Complexity" in perspective_radio:
# file_path = f'{base_path}/{num_parts}/QS/CC_QS.csv'
# else: # Problem Types
# file_path = f'{base_path}/cata_result.csv'
# df = pd.read_csv(file_path)
# df.set_index('Model', inplace=True)
# df_transposed = df.T
# if plot_type == "Line Chart":
# fig = px.line(df_transposed,
# x=df_transposed.index,
# y=df_transposed.columns,
# title='Model Performance Across Different Subsets',
# labels={'value': 'Evaluation Score', 'index': 'Subsets'},
# color_discrete_sequence=px.colors.qualitative.Plotly)
# fig.update_traces(hovertemplate='%{y}')
# elif plot_type == "Radar Chart": # Radar Chart
# # 重新组织数据为雷达图所需格式
# radar_data = []
# for model in df.index:
# for subset, score in df.loc[model].items():
# radar_data.append({
# 'Model': model,
# 'Subset': subset,
# 'Score': score
# })
# radar_df = pd.DataFrame(radar_data)
# colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf']
# # 创建雷达图
# fig = px.line_polar(radar_df,
# r='Score',
# theta='Subset',
# color='Model',
# line_close=True,
# color_discrete_sequence=colors,
# title='Model Performance Radar Chart')
# # 自定义每个模型的线条样式
# for i, trace in enumerate(fig.data):
# trace.update(
# fill=None, # 移除填充
# line=dict(
# width=2,
# dash='solid' if i % 2 == 0 else 'dash', # 交替使用实线和虚线
# )
# )
# # 优化雷达图的显示
# fig.update_layout(
# polar=dict(
# radialaxis=dict(
# visible=True,
# range=[0, 100],
# showline=True,
# linewidth=1,
# gridcolor='lightgrey'
# ),
# angularaxis=dict(
# showline=True,
# linewidth=1,
# gridcolor='lightgrey'
# )
# ),
# showlegend=True,
# legend=dict(
# yanchor="middle", # 垂直居中
# y=0.5,
# xanchor="left",
# x=1.2, # 将图例移到雷达图右侧
# bgcolor="rgba(255, 255, 255, 0.8)", # 半透明白色背景
# bordercolor="lightgrey", # 添加边框
# borderwidth=1
# ),
# margin=dict(r=150), # 增加右侧边距,为图例留出空间
# paper_bgcolor='white'
# )
# else: # Heatmap
# # 创建热力图
# fig = px.imshow(df_transposed,
# labels=dict(x="Model", y="Subset", color="Score"),
# color_continuous_scale="RdYlBu_r", # 使用科研风格配色:红-黄-蓝
# aspect="auto", # 自动调整宽高比
# title="Model Performance Heatmap")
# # 优化热力图显示
# fig.update_layout(
# title=dict(
# text='Model Performance Distribution Across Subsets',
# x=0.5,
# y=0.95,
# xanchor='center',
# yanchor='top',
# font=dict(size=14)
# ),
# xaxis=dict(
# title="Model",
# tickangle=45, # 斜着显示模型名称
# tickfont=dict(size=10),
# side="bottom"
# ),
# yaxis=dict(
# title="Subset",
# tickfont=dict(size=10)
# ),
# coloraxis=dict(
# colorbar=dict(
# title="Score",
# titleside="right",
# tickfont=dict(size=10),
# titlefont=dict(size=12),
# len=0.9, # 色条长度
# )
# ),
# margin=dict(t=80, r=100, b=80, l=80), # 调整边距
# paper_bgcolor='white',
# plot_bgcolor='white'
# )
# # 添加具体数值标注
# annotations = []
# for i in range(len(df_transposed.index)):
# for j in range(len(df_transposed.columns)):
# annotations.append(
# dict(
# x=j,
# y=i,
# text=f"{df_transposed.iloc[i, j]:.1f}",
# showarrow=False,
# font=dict(size=9, color='black')
# )
# )
# fig.update_layout(annotations=annotations)
# return fig
def plot_visualization(task_type_radio,dataset_radio, num_parts_dropdown, perspective_radio, division_method_radio,plot_type):
# base_path = f"./dataset/{task_type_radio}/{dataset_radio}"
# if "Tokens" in perspective_radio:
# file_path = f'{base_path}/{num_parts}/QS/token_counts_QS.csv'
# elif "Lines" in perspective_radio:
# file_path = f'{base_path}/{num_parts}/QS/line_counts_QS.csv'
# elif "Complexity" in perspective_radio:
# file_path = f'{base_path}/{num_parts}/QS/CC_QS.csv'
# else: # Problem Types
# file_path = f'{base_path}/cata_result.csv'
num_parts = num_parts_dropdown
method = "QS" if division_method_radio == "Equal Frequency Partitioning" else "EI"
base_path = f"./dataset/{task_type_radio}/{dataset_radio}"
analysis_result,_ = load_analysis_report(task_type_radio,dataset_radio, num_parts_dropdown, perspective_radio, division_method_radio)
# 根据perspective选择读取对应的文件
if task_type_radio=="Api Recommendation":
if "Tokens" in perspective_radio and "Recall" in perspective_radio:
print(f"{base_path}/{num_parts}/{method}/recall/token_counts_{method}.csv")
df = pd.read_csv(f"{base_path}/{num_parts}/{method}/recall/token_counts_{method}.csv")
print(df)
elif "Tokens" in perspective_radio and "F1" in perspective_radio:
df = pd.read_csv(f"{base_path}/{num_parts}/{method}/f1/token_counts_{method}.csv")
elif "Lines" in perspective_radio and "Recall" in perspective_radio:
df = pd.read_csv(f"{base_path}/{num_parts}/{method}/recall/line_counts_{method}.csv")
elif "Lines" in perspective_radio and "f1" in perspective_radio:
df = pd.read_csv(f"{base_path}/{num_parts}/{method}/f1/line_counts_{method}.csv")
elif task_type_radio=="Code Completion":
if "Tokens" in perspective_radio :
df = pd.read_csv(f"{base_path}/{num_parts}/{method}/token_counts_{method}.csv")
elif "Lines" in perspective_radio:
df = pd.read_csv(f"{base_path}/{num_parts}/{method}/line_counts_{method}.csv")
elif task_type_radio=="Test Generation":
if "Tokens" in perspective_radio :
df = pd.read_csv(f"{base_path}/{num_parts}/{method}/token_counts_{method}.csv")
elif "Lines" in perspective_radio:
df = pd.read_csv(f"{base_path}/{num_parts}/{method}/line_counts_{method}.csv")
else:
if "Tokens" in perspective_radio :
df = pd.read_csv(f"{base_path}/{num_parts}/{method}/token_counts_{method}.csv")
print(df)
elif "Lines" in perspective_radio:
df = pd.read_csv(f"{base_path}/{num_parts}/{method}/line_counts_{method}.csv")
elif "Complexity" in perspective_radio:
df = pd.read_csv(f"{base_path}/{num_parts}/{method}/CC_{method}.csv")
elif "Problem Types" in perspective_radio:
df = pd.read_csv(f"{base_path}/cata_result.csv")
if task_type_radio == "Code Generation":
df.set_index('Model', inplace=True)
df_transposed = df.T
model_column_name = 'Model' # Store the column name for later use
else:
df.set_index('Models', inplace=True)
df_transposed = df.T
model_column_name = 'Models' # Store the column name for later use
if plot_type == "Line Chart" and task_type_radio=="Api Recommendation":
df_melted = df_transposed.reset_index().melt(
id_vars="index", # 保留subset列(原列名)
var_name=model_column_name, # 模型列名
value_name="Score" # 分数列
)
fig = px.line(df_transposed,
x=df_transposed.index,
y=df_transposed.columns,
title='Model Performance Across Different Subsets',
labels={'value': 'Evaluation Score', 'index': 'Subsets'},
color_discrete_sequence=px.colors.qualitative.Plotly
)
fig.update_traces(hovertemplate='%{y}')
if plot_type == "Line Chart" and task_type_radio!="Api Recommendation":
fig = px.line(df_transposed,
x=df_transposed.index,
y=df_transposed.columns,
title='Model Performance Across Different Subsets',
labels={'value': 'Evaluation Score', 'index': 'Subsets'},
color_discrete_sequence=px.colors.qualitative.Plotly
)
fig.update_traces(hovertemplate='%{y}')
if plot_type == "Radar Chart":
# Reorganize data for radar chart
radar_data = []
for model in df.index:
for subset, score in df.loc[model].items():
radar_data.append({
model_column_name: model, # Use the stored column name
'Subset': subset,
'Score': score
})
radar_df = pd.DataFrame(radar_data)
colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd',
'#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf']
# Create radar chart
fig = px.line_polar(radar_df,
r='Score',
theta='Subset',
color=model_column_name, # Use the stored column name
line_close=True,
color_discrete_sequence=colors,
title='Model Performance Radar Chart')
# Customize line styles for each model
for i, trace in enumerate(fig.data):
trace.update(
fill=None, # Remove fill
line=dict(
width=2,
dash='solid' if i % 2 == 0 else 'dash', # Alternate solid and dashed lines
)
)
# Optimize radar chart display
fig.update_layout(
polar=dict(
radialaxis=dict(
visible=True,
range=[0, 100],
showline=True,
linewidth=1,
gridcolor='lightgrey'
),
angularaxis=dict(
showline=True,
linewidth=1,
gridcolor='lightgrey'
)
),
showlegend=True,
legend=dict(
yanchor="middle",
y=0.5,
xanchor="left",
x=1.2,
bgcolor="rgba(255, 255, 255, 0.8)",
bordercolor="lightgrey",
borderwidth=1
),
margin=dict(r=150),
paper_bgcolor='white'
)
if plot_type == "Radar Chart":
# Reorganize data for radar chart
radar_data = []
for model in df.index:
for subset, score in df.loc[model].items():
radar_data.append({
model_column_name: model, # Use the stored column name
'Subset': subset,
'Score': score
})
radar_df = pd.DataFrame(radar_data)
colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd',
'#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf']
# Create radar chart
fig = px.line_polar(radar_df,
r='Score',
theta='Subset',
color=model_column_name, # Use the stored column name
line_close=True,
color_discrete_sequence=colors,
title='Model Performance Radar Chart')
# Customize line styles for each model
for i, trace in enumerate(fig.data):
trace.update(
fill=None, # Remove fill
line=dict(
width=2,
dash='solid' if i % 2 == 0 else 'dash', # Alternate solid and dashed lines
)
)
# Optimize radar chart display
fig.update_layout(
polar=dict(
radialaxis=dict(
visible=True,
range=[0, 100],
showline=True,
linewidth=1,
gridcolor='lightgrey'
),
angularaxis=dict(
showline=True,
linewidth=1,
gridcolor='lightgrey'
)
),
showlegend=True,
legend=dict(
yanchor="middle",
y=0.5,
xanchor="left",
x=1.2,
bgcolor="rgba(255, 255, 255, 0.8)",
bordercolor="lightgrey",
borderwidth=1
),
margin=dict(r=150),
paper_bgcolor='white'
)
if plot_type == "Heatmap":
# Create heatmap
fig = px.imshow(df_transposed,
labels=dict(x=model_column_name, y="Subset", color="Score"), # Use stored column name
color_continuous_scale="RdYlBu_r",
aspect="auto",
title="Model Performance Heatmap")
# Optimize heatmap display
fig.update_layout(
title=dict(
text='Model Performance Distribution Across Subsets',
x=0.5,
y=0.95,
xanchor='center',
yanchor='top',
font=dict(size=14)
),
xaxis=dict(
title=model_column_name, # Use stored column name
tickangle=45,
tickfont=dict(size=10),
side="bottom"
),
yaxis=dict(
title="Subset",
tickfont=dict(size=10)
),
coloraxis=dict(
colorbar=dict(
title="Score",
titleside="right",
tickfont=dict(size=10),
titlefont=dict(size=12),
len=0.9,
)
),
margin=dict(t=80, r=100, b=80, l=80),
paper_bgcolor='white',
plot_bgcolor='white'
)
# Add value annotations
annotations = []
for i in range(len(df_transposed.index)):
for j in range(len(df_transposed.columns)):
annotations.append(
dict(
x=j,
y=i,
text=f"{df_transposed.iloc[i, j]:.1f}",
showarrow=False,
font=dict(size=9, color='black')
)
)
fig.update_layout(annotations=annotations)
return fig
# 桑基图展示推荐模型
def plot_recommendation_sankey(task_type_radio,dataset_radio, num_parts_dropdown, perspective_radio, division_method_radio):
import plotly.graph_objects as go
from plotly.colors import sample_colorscale
_, recommendation_result = load_analysis_report(task_type_radio,dataset_radio, num_parts_dropdown, perspective_radio, division_method_radio)
# 定义节点层级和颜色方案
levels = ['Model Recommendation', 'Scenario', 'Model Family', 'Specific Model']
color_scale = "RdYlBu_r"
# 节点和连接数据
node_labels = [levels[0]] # 根节点
customdata = ["Root node"]
sources, targets, values = [], [], []
# 节点索引跟踪
node_indices = {levels[0]: 0}
current_idx = 1
# 处理推荐列表结构 {"场景1": [ {模型1:原因1}, {模型2:原因2} ], ...}
for scenario, model_dicts in recommendation_result.items():
# 添加场景节点
scenario_label = " ".join(scenario.split()[:3]) + ("..." if len(scenario.split()) > 3 else "")
node_labels.append(scenario_label)
customdata.append(scenario)
node_indices[f"scenario_{scenario}"] = current_idx
current_idx += 1
# 根节点 -> 场景节点连接
sources.append(0)
targets.append(node_indices[f"scenario_{scenario}"])
values.append(10)
# 处理模型列表 [ {模型1:原因1}, {模型2:原因2} ]
for model_dict in model_dicts:
for model, reason in model_dict.items():
# 提取模型系列 (如"GPT-4" -> "GPT")
family = model.split('-')[0].split('_')[0]
# 添加模型系列节点 (如果不存在)
if f"family_{family}" not in node_indices:
node_labels.append(family)
customdata.append(f"Model family: {family}")
node_indices[f"family_{family}"] = current_idx
current_idx += 1
# 场景 -> 模型系列连接
sources.append(node_indices[f"scenario_{scenario}"])
targets.append(node_indices[f"family_{family}"])
values.append(8)
# 添加具体模型节点 (如果不存在)
if f"model_{model}" not in node_indices:
node_labels.append(model)
customdata.append(f"{model}
{reason}")
node_indices[f"model_{model}"] = current_idx
current_idx += 1
# 模型系列 -> 具体模型连接
sources.append(node_indices[f"family_{family}"])
targets.append(node_indices[f"model_{model}"])
values.append(5)
# 生成颜色 (确保颜色数量匹配节点数量)
node_colors = ["#2c7bb6"] # 根节点颜色
node_colors += sample_colorscale(color_scale, [n/(len(node_labels)-1) for n in range(1, len(node_labels))])
# 创建桑基图
fig = go.Figure(go.Sankey(
arrangement="perpendicular",
node=dict(
pad=20,
thickness=15,
line=dict(color="rgba(0,0,0,0.3)", width=0.2),
label=node_labels,
color=node_colors,
hovertemplate='%{label}
Multi-view Leaderboard: Towards Evaluating the Code Intelligence of LLMs From Multiple Views