import gradio as gr import pandas as pd import json import plotly.express as px import requests import os from textblob.download_corpora import download_all from highlight_util import highlight_adjectives from send_file import send_to_backend # 下载TextBlob所需数据(只需运行一次) download_all() def on_confirm(task_type_radio,dataset_radio, num_parts_dropdown, perspective_radio, division_method_radio): num_parts = num_parts_dropdown method = "QS" if division_method_radio == "Equal Frequency Partitioning" else "EI" base_path = f"./dataset/{task_type_radio}/{dataset_radio}" analysis_result,_ = load_analysis_report(task_type_radio,dataset_radio, num_parts_dropdown, perspective_radio, division_method_radio) # 根据perspective选择读取对应的文件 if task_type_radio=="Api Recommendation": if "Tokens" in perspective_radio and "Recall" in perspective_radio: df = pd.read_csv(f"{base_path}/{num_parts}/{method}/recall/token_counts_{method}.csv") elif "Tokens" in perspective_radio and "F1" in perspective_radio: df = pd.read_csv(f"{base_path}/{num_parts}/{method}/f1/token_counts_{method}.csv") elif "Lines" in perspective_radio and "Recall" in perspective_radio: df = pd.read_csv(f"{base_path}/{num_parts}/{method}/recall/line_counts_{method}.csv") elif "Lines" in perspective_radio and "f1" in perspective_radio: df = pd.read_csv(f"{base_path}/{num_parts}/{method}/f1/line_counts_{method}.csv") elif task_type_radio=="Code Completion": if "Tokens" in perspective_radio : df = pd.read_csv(f"{base_path}/{num_parts}/{method}/token_counts_{method}.csv") elif "Lines" in perspective_radio: df = pd.read_csv(f"{base_path}/{num_parts}/{method}/line_counts_{method}.csv") elif task_type_radio=="Test Generation": if "Tokens" in perspective_radio : df = pd.read_csv(f"{base_path}/{num_parts}/{method}/token_counts_{method}.csv") elif "Lines" in perspective_radio: df = pd.read_csv(f"{base_path}/{num_parts}/{method}/line_counts_{method}.csv") else: if "Tokens" in perspective_radio : df = pd.read_csv(f"{base_path}/{num_parts}/{method}/token_counts_{method}.csv") elif "Lines" in perspective_radio: df = pd.read_csv(f"{base_path}/{num_parts}/{method}/line_counts_{method}.csv") elif "Complexity" in perspective_radio: df = pd.read_csv(f"{base_path}/{num_parts}/{method}/CC_{method}.csv") elif "Problem Types" in perspective_radio: df = pd.read_csv(f"{base_path}/cata_result.csv") # 加载分析报告 # AI分析列 # df["Analysis"] = df["Model"].map(lambda m: analysis_result.get(m, "No analysis provided.")) df["Analysis"] = df["Model"].map( lambda m: highlight_adjectives(analysis_result.get(m, "No analysis provided.")) ) return df # 生成 CSS 样式 def generate_css(line_counts, token_counts, cyclomatic_complexity, problem_type, show_high, show_medium, show_low): css = """ #dataframe th { background-color: #f2f2f2 } """ colors = ["#e6f7ff", "#ffeecc", "#e6ffe6", "#ffe6e6"] categories = [line_counts, token_counts, cyclomatic_complexity] category_index = 0 column_index = 1 for category in categories: if category: if show_high: css += f"#dataframe td:nth-child({column_index + 1}) {{ background-color: {colors[category_index]}; }}\n" column_index += 1 if show_medium: css += f"#dataframe td:nth-child({column_index + 1}) {{ background-color: {colors[category_index]}; }}\n" column_index += 1 if show_low: css += f"#dataframe td:nth-child({column_index + 1}) {{ background-color: {colors[category_index]}; }}\n" column_index += 1 category_index += 1 # 为 Problem Type 相关的三个子列设置固定颜色 if problem_type: problem_type_color = "#d4f0fc" # 你可以选择任何你喜欢的颜色 css += f"#dataframe td:nth-child({column_index + 1}) {{ background-color: {problem_type_color}; }}\n" css += f"#dataframe td:nth-child({column_index + 2}) {{ background-color: {problem_type_color}; }}\n" css += f"#dataframe td:nth-child({column_index + 3}) {{ background-color: {problem_type_color}; }}\n" # 隐藏 "data" 标识 css += """ .gradio-container .dataframe-container::before { content: none !important; } """ return css # AI分析 def load_analysis_report(task_type_radio,dataset_radio, num_parts_dropdown, perspective_radio, division_method_radio): num_parts = num_parts_dropdown method = "QS" if division_method_radio == "Equal Frequency Partitioning" else "EI" # # 根据perspective确定文件路径 # if "Tokens" in perspective_radio: # perspective = "token_counts" # elif "Lines" in perspective_radio: # perspective = "line_counts" # elif "Complexity" in perspective_radio: # perspective = "CC" # else: # perspective = "problem_type" # base_path = f"./llm_insight/{task_type_radio}" # if perspective == "problem_type": # report_file = f"{base_path}/{dataset_radio}/{perspective}_report.json" # recommendation_file = f"{base_path}/{dataset_radio}/{perspective}_recommendation.json" # else: # report_file = f"{base_path}/{dataset_radio}/{num_parts}/{method}/{perspective}_report.json" # recommendation_file = f"{base_path}/{dataset_radio}/{num_parts}/{method}/{perspective}_recommendation.json" base_path = f"./llm_insight/{task_type_radio}" if task_type_radio=="Code Generation": # 根据perspective确定文件路径 if "Tokens" in perspective_radio: perspective = "token_counts" elif "Lines" in perspective_radio: perspective = "line_counts" elif "Complexity" in perspective_radio: perspective = "CC" else: perspective = "problem_type" if perspective == "problem_type": report_file = f"{base_path}/{dataset_radio}/{perspective}_report.json" recommendation_file = f"{base_path}/{dataset_radio}/{perspective}_recommendation.json" else: report_file = f"{base_path}/{dataset_radio}/{num_parts}/{method}/{perspective}_report.json" recommendation_file = f"{base_path}/{dataset_radio}/{num_parts}/{method}/{perspective}_recommendation.json" else: report_file = f"{base_path}/{dataset_radio}/report.json" recommendation_file = f"{base_path}/{dataset_radio}/recommendation.json" try: with open(report_file, 'r', encoding='utf-8') as f: analysis_result = json.load(f) except Exception as e: analysis_result = f"[Error] error load analysis report: {e}" try: with open(recommendation_file, 'r', encoding='utf-8') as f: recommendation_result = json.load(f) except Exception as e: recommendation_result = f"[Error] error load model recommendation: {e}" return (analysis_result,recommendation_result) # 可视化 # def plot_visualization(task_type_radio,dataset_radio, perspective_radio, num_parts, plot_type): # base_path = f"./dataset/{task_type_radio}/{dataset_radio}" # if "Tokens" in perspective_radio: # file_path = f'{base_path}/{num_parts}/QS/token_counts_QS.csv' # elif "Lines" in perspective_radio: # file_path = f'{base_path}/{num_parts}/QS/line_counts_QS.csv' # elif "Complexity" in perspective_radio: # file_path = f'{base_path}/{num_parts}/QS/CC_QS.csv' # else: # Problem Types # file_path = f'{base_path}/cata_result.csv' # df = pd.read_csv(file_path) # df.set_index('Model', inplace=True) # df_transposed = df.T # if plot_type == "Line Chart": # fig = px.line(df_transposed, # x=df_transposed.index, # y=df_transposed.columns, # title='Model Performance Across Different Subsets', # labels={'value': 'Evaluation Score', 'index': 'Subsets'}, # color_discrete_sequence=px.colors.qualitative.Plotly) # fig.update_traces(hovertemplate='%{y}') # elif plot_type == "Radar Chart": # Radar Chart # # 重新组织数据为雷达图所需格式 # radar_data = [] # for model in df.index: # for subset, score in df.loc[model].items(): # radar_data.append({ # 'Model': model, # 'Subset': subset, # 'Score': score # }) # radar_df = pd.DataFrame(radar_data) # colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf'] # # 创建雷达图 # fig = px.line_polar(radar_df, # r='Score', # theta='Subset', # color='Model', # line_close=True, # color_discrete_sequence=colors, # title='Model Performance Radar Chart') # # 自定义每个模型的线条样式 # for i, trace in enumerate(fig.data): # trace.update( # fill=None, # 移除填充 # line=dict( # width=2, # dash='solid' if i % 2 == 0 else 'dash', # 交替使用实线和虚线 # ) # ) # # 优化雷达图的显示 # fig.update_layout( # polar=dict( # radialaxis=dict( # visible=True, # range=[0, 100], # showline=True, # linewidth=1, # gridcolor='lightgrey' # ), # angularaxis=dict( # showline=True, # linewidth=1, # gridcolor='lightgrey' # ) # ), # showlegend=True, # legend=dict( # yanchor="middle", # 垂直居中 # y=0.5, # xanchor="left", # x=1.2, # 将图例移到雷达图右侧 # bgcolor="rgba(255, 255, 255, 0.8)", # 半透明白色背景 # bordercolor="lightgrey", # 添加边框 # borderwidth=1 # ), # margin=dict(r=150), # 增加右侧边距,为图例留出空间 # paper_bgcolor='white' # ) # else: # Heatmap # # 创建热力图 # fig = px.imshow(df_transposed, # labels=dict(x="Model", y="Subset", color="Score"), # color_continuous_scale="RdYlBu_r", # 使用科研风格配色:红-黄-蓝 # aspect="auto", # 自动调整宽高比 # title="Model Performance Heatmap") # # 优化热力图显示 # fig.update_layout( # title=dict( # text='Model Performance Distribution Across Subsets', # x=0.5, # y=0.95, # xanchor='center', # yanchor='top', # font=dict(size=14) # ), # xaxis=dict( # title="Model", # tickangle=45, # 斜着显示模型名称 # tickfont=dict(size=10), # side="bottom" # ), # yaxis=dict( # title="Subset", # tickfont=dict(size=10) # ), # coloraxis=dict( # colorbar=dict( # title="Score", # titleside="right", # tickfont=dict(size=10), # titlefont=dict(size=12), # len=0.9, # 色条长度 # ) # ), # margin=dict(t=80, r=100, b=80, l=80), # 调整边距 # paper_bgcolor='white', # plot_bgcolor='white' # ) # # 添加具体数值标注 # annotations = [] # for i in range(len(df_transposed.index)): # for j in range(len(df_transposed.columns)): # annotations.append( # dict( # x=j, # y=i, # text=f"{df_transposed.iloc[i, j]:.1f}", # showarrow=False, # font=dict(size=9, color='black') # ) # ) # fig.update_layout(annotations=annotations) # return fig def plot_visualization(task_type_radio,dataset_radio, num_parts_dropdown, perspective_radio, division_method_radio,plot_type): # base_path = f"./dataset/{task_type_radio}/{dataset_radio}" # if "Tokens" in perspective_radio: # file_path = f'{base_path}/{num_parts}/QS/token_counts_QS.csv' # elif "Lines" in perspective_radio: # file_path = f'{base_path}/{num_parts}/QS/line_counts_QS.csv' # elif "Complexity" in perspective_radio: # file_path = f'{base_path}/{num_parts}/QS/CC_QS.csv' # else: # Problem Types # file_path = f'{base_path}/cata_result.csv' num_parts = num_parts_dropdown method = "QS" if division_method_radio == "Equal Frequency Partitioning" else "EI" base_path = f"./dataset/{task_type_radio}/{dataset_radio}" analysis_result,_ = load_analysis_report(task_type_radio,dataset_radio, num_parts_dropdown, perspective_radio, division_method_radio) # 根据perspective选择读取对应的文件 if task_type_radio=="Api Recommendation": if "Tokens" in perspective_radio and "Recall" in perspective_radio: print(f"{base_path}/{num_parts}/{method}/recall/token_counts_{method}.csv") df = pd.read_csv(f"{base_path}/{num_parts}/{method}/recall/token_counts_{method}.csv") print(df) elif "Tokens" in perspective_radio and "F1" in perspective_radio: df = pd.read_csv(f"{base_path}/{num_parts}/{method}/f1/token_counts_{method}.csv") elif "Lines" in perspective_radio and "Recall" in perspective_radio: df = pd.read_csv(f"{base_path}/{num_parts}/{method}/recall/line_counts_{method}.csv") elif "Lines" in perspective_radio and "f1" in perspective_radio: df = pd.read_csv(f"{base_path}/{num_parts}/{method}/f1/line_counts_{method}.csv") elif task_type_radio=="Code Completion": if "Tokens" in perspective_radio : df = pd.read_csv(f"{base_path}/{num_parts}/{method}/token_counts_{method}.csv") elif "Lines" in perspective_radio: df = pd.read_csv(f"{base_path}/{num_parts}/{method}/line_counts_{method}.csv") elif task_type_radio=="Test Generation": if "Tokens" in perspective_radio : df = pd.read_csv(f"{base_path}/{num_parts}/{method}/token_counts_{method}.csv") elif "Lines" in perspective_radio: df = pd.read_csv(f"{base_path}/{num_parts}/{method}/line_counts_{method}.csv") else: if "Tokens" in perspective_radio : df = pd.read_csv(f"{base_path}/{num_parts}/{method}/token_counts_{method}.csv") print(df) elif "Lines" in perspective_radio: df = pd.read_csv(f"{base_path}/{num_parts}/{method}/line_counts_{method}.csv") elif "Complexity" in perspective_radio: df = pd.read_csv(f"{base_path}/{num_parts}/{method}/CC_{method}.csv") elif "Problem Types" in perspective_radio: df = pd.read_csv(f"{base_path}/cata_result.csv") if task_type_radio == "Code Generation": df.set_index('Model', inplace=True) df_transposed = df.T model_column_name = 'Model' # Store the column name for later use else: df.set_index('Models', inplace=True) df_transposed = df.T model_column_name = 'Models' # Store the column name for later use if plot_type == "Line Chart" and task_type_radio=="Api Recommendation": df_melted = df_transposed.reset_index().melt( id_vars="index", # 保留subset列(原列名) var_name=model_column_name, # 模型列名 value_name="Score" # 分数列 ) fig = px.line(df_transposed, x=df_transposed.index, y=df_transposed.columns, title='Model Performance Across Different Subsets', labels={'value': 'Evaluation Score', 'index': 'Subsets'}, color_discrete_sequence=px.colors.qualitative.Plotly ) fig.update_traces(hovertemplate='%{y}') if plot_type == "Line Chart" and task_type_radio!="Api Recommendation": fig = px.line(df_transposed, x=df_transposed.index, y=df_transposed.columns, title='Model Performance Across Different Subsets', labels={'value': 'Evaluation Score', 'index': 'Subsets'}, color_discrete_sequence=px.colors.qualitative.Plotly ) fig.update_traces(hovertemplate='%{y}') if plot_type == "Radar Chart": # Reorganize data for radar chart radar_data = [] for model in df.index: for subset, score in df.loc[model].items(): radar_data.append({ model_column_name: model, # Use the stored column name 'Subset': subset, 'Score': score }) radar_df = pd.DataFrame(radar_data) colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf'] # Create radar chart fig = px.line_polar(radar_df, r='Score', theta='Subset', color=model_column_name, # Use the stored column name line_close=True, color_discrete_sequence=colors, title='Model Performance Radar Chart') # Customize line styles for each model for i, trace in enumerate(fig.data): trace.update( fill=None, # Remove fill line=dict( width=2, dash='solid' if i % 2 == 0 else 'dash', # Alternate solid and dashed lines ) ) # Optimize radar chart display fig.update_layout( polar=dict( radialaxis=dict( visible=True, range=[0, 100], showline=True, linewidth=1, gridcolor='lightgrey' ), angularaxis=dict( showline=True, linewidth=1, gridcolor='lightgrey' ) ), showlegend=True, legend=dict( yanchor="middle", y=0.5, xanchor="left", x=1.2, bgcolor="rgba(255, 255, 255, 0.8)", bordercolor="lightgrey", borderwidth=1 ), margin=dict(r=150), paper_bgcolor='white' ) if plot_type == "Radar Chart": # Reorganize data for radar chart radar_data = [] for model in df.index: for subset, score in df.loc[model].items(): radar_data.append({ model_column_name: model, # Use the stored column name 'Subset': subset, 'Score': score }) radar_df = pd.DataFrame(radar_data) colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf'] # Create radar chart fig = px.line_polar(radar_df, r='Score', theta='Subset', color=model_column_name, # Use the stored column name line_close=True, color_discrete_sequence=colors, title='Model Performance Radar Chart') # Customize line styles for each model for i, trace in enumerate(fig.data): trace.update( fill=None, # Remove fill line=dict( width=2, dash='solid' if i % 2 == 0 else 'dash', # Alternate solid and dashed lines ) ) # Optimize radar chart display fig.update_layout( polar=dict( radialaxis=dict( visible=True, range=[0, 100], showline=True, linewidth=1, gridcolor='lightgrey' ), angularaxis=dict( showline=True, linewidth=1, gridcolor='lightgrey' ) ), showlegend=True, legend=dict( yanchor="middle", y=0.5, xanchor="left", x=1.2, bgcolor="rgba(255, 255, 255, 0.8)", bordercolor="lightgrey", borderwidth=1 ), margin=dict(r=150), paper_bgcolor='white' ) if plot_type == "Heatmap": # Create heatmap fig = px.imshow(df_transposed, labels=dict(x=model_column_name, y="Subset", color="Score"), # Use stored column name color_continuous_scale="RdYlBu_r", aspect="auto", title="Model Performance Heatmap") # Optimize heatmap display fig.update_layout( title=dict( text='Model Performance Distribution Across Subsets', x=0.5, y=0.95, xanchor='center', yanchor='top', font=dict(size=14) ), xaxis=dict( title=model_column_name, # Use stored column name tickangle=45, tickfont=dict(size=10), side="bottom" ), yaxis=dict( title="Subset", tickfont=dict(size=10) ), coloraxis=dict( colorbar=dict( title="Score", titleside="right", tickfont=dict(size=10), titlefont=dict(size=12), len=0.9, ) ), margin=dict(t=80, r=100, b=80, l=80), paper_bgcolor='white', plot_bgcolor='white' ) # Add value annotations annotations = [] for i in range(len(df_transposed.index)): for j in range(len(df_transposed.columns)): annotations.append( dict( x=j, y=i, text=f"{df_transposed.iloc[i, j]:.1f}", showarrow=False, font=dict(size=9, color='black') ) ) fig.update_layout(annotations=annotations) return fig # 桑基图展示推荐模型 def plot_recommendation_sankey(task_type_radio,dataset_radio, num_parts_dropdown, perspective_radio, division_method_radio): import plotly.graph_objects as go from plotly.colors import sample_colorscale _, recommendation_result = load_analysis_report(task_type_radio,dataset_radio, num_parts_dropdown, perspective_radio, division_method_radio) # 定义节点层级和颜色方案 levels = ['Model Recommendation', 'Scenario', 'Model Family', 'Specific Model'] color_scale = "RdYlBu_r" # 节点和连接数据 node_labels = [levels[0]] # 根节点 customdata = ["Root node"] sources, targets, values = [], [], [] # 节点索引跟踪 node_indices = {levels[0]: 0} current_idx = 1 # 处理推荐列表结构 {"场景1": [ {模型1:原因1}, {模型2:原因2} ], ...} for scenario, model_dicts in recommendation_result.items(): # 添加场景节点 scenario_label = " ".join(scenario.split()[:3]) + ("..." if len(scenario.split()) > 3 else "") node_labels.append(scenario_label) customdata.append(scenario) node_indices[f"scenario_{scenario}"] = current_idx current_idx += 1 # 根节点 -> 场景节点连接 sources.append(0) targets.append(node_indices[f"scenario_{scenario}"]) values.append(10) # 处理模型列表 [ {模型1:原因1}, {模型2:原因2} ] for model_dict in model_dicts: for model, reason in model_dict.items(): # 提取模型系列 (如"GPT-4" -> "GPT") family = model.split('-')[0].split('_')[0] # 添加模型系列节点 (如果不存在) if f"family_{family}" not in node_indices: node_labels.append(family) customdata.append(f"Model family: {family}") node_indices[f"family_{family}"] = current_idx current_idx += 1 # 场景 -> 模型系列连接 sources.append(node_indices[f"scenario_{scenario}"]) targets.append(node_indices[f"family_{family}"]) values.append(8) # 添加具体模型节点 (如果不存在) if f"model_{model}" not in node_indices: node_labels.append(model) customdata.append(f"{model}
{reason}") node_indices[f"model_{model}"] = current_idx current_idx += 1 # 模型系列 -> 具体模型连接 sources.append(node_indices[f"family_{family}"]) targets.append(node_indices[f"model_{model}"]) values.append(5) # 生成颜色 (确保颜色数量匹配节点数量) node_colors = ["#2c7bb6"] # 根节点颜色 node_colors += sample_colorscale(color_scale, [n/(len(node_labels)-1) for n in range(1, len(node_labels))]) # 创建桑基图 fig = go.Figure(go.Sankey( arrangement="perpendicular", node=dict( pad=20, thickness=15, line=dict(color="rgba(0,0,0,0.3)", width=0.2), label=node_labels, color=node_colors, hovertemplate='%{label}', x=[0] + [0.33]*len([n for n in node_indices if n.startswith('scenario_')]) + [0.66]*len([n for n in node_indices if n.startswith('family_')]) + [1.0]*len([n for n in node_indices if n.startswith('model_')]), ), link=dict( source=sources, target=targets, value=values, color="rgba(180,180,180,0.4)", customdata=[customdata[t] for t in targets], hovertemplate='%{customdata}' ) )) fig.update_layout( title_text="Model Recommendation Flow", font_size=11, height=700, margin=dict(t=80, l=20, r=20, b=20) ) return fig ### Gradio代码部分 ### # 自定义 CSS 样式 custom_css = """ """ SERVER_URL = "http://10.249.190.53:8000/upload" # 构建界面 def update_dataset(task): if task == "Code Generation": return gr.update(choices=["HumanEval", "MBPP"]) elif task== "Code Completion": return gr.update(choices=["ComplexCodeEval-Python","ComplexCodeEval-Java"]) elif task == "Api Recommendation": return gr.update(choices=["ComplexCodeEval-Python","ComplexCodeEval-Java"]) elif task == "Test Generation": return gr.update(choices=["ComplexCodeEval-Python","ComplexCodeEval-Java"]) with gr.Blocks(css=custom_css) as iface: gr.HTML("""

Multi-view Code LLM Leaderboard

Multi-view Leaderboard: Towards Evaluating the Code Intelligence of LLMs From Multiple Views

""") with gr.Row(): # 配置相关 with gr.Column(scale=1): task_type_radio = gr.Radio( ["Code Generation", "Code Completion", "Api Recommendation", "Test Generation"], label="Select Task Type", value="Code Generation" ) dataset_radio = gr.Radio( ["HumanEval", "MBPP",'ComplexCodeEval'], label="Select a dataset", value="HumanEval" ) num_parts_slider = gr.Slider( minimum=3, maximum=8, step=1, label="Choose the Number of Subsets", value=3 ) # 将多个checkbox改为一个radio perspective_radio = gr.Radio( ["I - Num of Tokens in Problem Desc", "II - Num of Lines in Problem Desc", "III - Complexity of Reference Code", "IV - Problem Types"], label="Choose Perspective", value="I - Num of Tokens in Problem Desc" ) # 统一的division method radio division_method_radio = gr.Radio( ["Equal Frequency Partitioning", "Equal Interval Partitioning"], label="Choose the Division Method", visible=True ) confirm_btn = gr.Button("Confirm", variant="primary") # 核心展示 with gr.Column(scale=2): with gr.Tabs(): # 表格 with gr.TabItem("Ranking Table"): data_table = gr.Dataframe(headers=["Model", "Score","Analysis"], interactive=True, datatype="html", # 指定第三列为HTML render=True, # 启用HTML渲染 ) # 可视化 with gr.TabItem("Visualization"): plot_type = gr.Radio( choices=["Line Chart", "Radar Chart","Heatmap"], label="Select Plot Type", value="Line Chart" ) chart = gr.Plot() # AI分析 with gr.TabItem("Model selection suggestions"): with gr.Column(): # gr.Markdown("

🎯 Model Recommendation

") recommendation_plot = gr.Plot() # #*********************上传文件界面布局****************** # with gr.TabItem("Upload inference result"): # print("new!!!!!!!!!!!!!!!!") # with gr.Column(scale=1): # upload_file = gr.File( # label="📤 上传JSON结果文件", # type="filepath", # file_types=[".json"], # height=100 # ) # task_choice = gr.Radio( # label="Select Evaluation Task", # choices=["Code Generation", "Code Completion", "Api Recommendation", "Test Generation"], # value="Code Generation" # ) # dataset_choice = gr.Radio( # ["HumanEval", "MBPP"], # label="Select a dataset", # value="HumanEval", # interactive=True # ) # task_choice.change(fn=update_dataset, inputs=task_choice, outputs=dataset_choice) # with gr.Column(scale=2): # # 状态显示区域 # status = gr.Textbox( # label="📊 处理状态", # interactive=False, # lines=4, # placeholder="等待文件上传..." # ) # # 操作按钮区域 # with gr.Row(): # submit_btn = gr.Button("🚀 提交到服务器", variant="primary") # clear_btn = gr.Button("🧹 清除所有") # 按钮动作 # submit_btn.click( # fn=send_to_backend, # inputs=[upload_file, task_choice, dataset_choice], # outputs=status # ) # clear_btn.click( # fn=lambda: (None, "Code Generation", "HumanEval", "状态已重置"), # inputs=None, # outputs=[upload_file, task_choice, dataset_choice, status] # ) # with gr.Column(scale=2): # status = gr.Textbox(label="Status") # submit_btn = gr.Button("Send to Server") # submit_btn.click(fn=send_to_backend, # inputs=[upload_file,task_choice, dataset_choice], # outputs=status # ) # 根据任务类型切换数据集 def update_dataset_options(task_type): if task_type == "Code Generation": return gr.update(choices=["HumanEval", "MBPP"]) elif task_type == "Code Completion": return gr.update(choices=["ComplexCodeEval-Python","ComplexCodeEval-Java"]) elif task_type == "Api Recommendation": return gr.update(choices=["ComplexCodeEval-Python","ComplexCodeEval-Java"]) elif task_type == "Test Generation": return gr.update(choices=["ComplexCodeEval-Python","ComplexCodeEval-Java"]) # 根据数据集切换拆分角度 def update_perspective_options(task,dataset): if dataset == "MBPP": return gr.update(choices=[ "I - Num of Tokens in Problem Desc", "III - Complexity of Reference Code", "IV - Problem Types" ]) elif dataset =="HumanEval": return gr.update(choices=[ "I - Num of Tokens in Problem Desc", "II - Num of Lines in Problem Desc", "III - Complexity of Reference Code", "IV - Problem Types" ]) elif task == "Api Recommendation": return gr.update(choices=[ "I - Num of Tokens in Problem Desc(Eval Metric:Recall)", "II - Num of Tokens in Problem Desc(Eval Metric:F1)", "III - Num of Lines in Problem Desc(Eval Metric:Recall)", "IV - Num of Lines in Problem Desc(Eval Metric:f1)" ]) elif task == "Code Completion" or "Test Generation": return gr.update(choices=[ "I - Num of Tokens in Problem Desc(Eval Metric:ES)", "II - Num of Lines in Problem Desc(Eval Metric:ES)" ]) dataset_radio.change( fn=update_perspective_options, inputs=[task_type_radio,dataset_radio], outputs=perspective_radio ) # 绑定事件 # confirm_btn.click( # fn=on_confirm, # inputs=[task_type_radio,dataset_radio, num_parts_slider, perspective_radio, division_method_radio], # outputs=data_table # ).then( # fn=load_analysis_report, # inputs=[task_type_radio,dataset_radio, num_parts_slider, perspective_radio, division_method_radio], # outputs=[gr.State()] # ).then( # fn=plot_visualization, # inputs=[task_type_radio,dataset_radio, perspective_radio, num_parts_slider, plot_type], # outputs=chart # ).then( # fn=plot_recommendation_sankey, # inputs=[task_type_radio,dataset_radio, num_parts_slider, perspective_radio, division_method_radio], # outputs=[recommendation_plot] # 注意这里是列表 # ) confirm_btn.click( fn=on_confirm, inputs=[task_type_radio,dataset_radio, num_parts_slider, perspective_radio, division_method_radio], outputs=data_table ).then( fn=load_analysis_report, inputs=[task_type_radio,dataset_radio, num_parts_slider, perspective_radio, division_method_radio], outputs=[gr.State()] ).then( fn=plot_visualization, inputs=[task_type_radio,dataset_radio, num_parts_slider, perspective_radio, division_method_radio,plot_type], outputs=chart ).then( fn=plot_recommendation_sankey, inputs=[task_type_radio,dataset_radio, num_parts_slider, perspective_radio, division_method_radio], outputs=[recommendation_plot] # 注意这里是列表 ) plot_type.change( fn=plot_visualization, inputs=[task_type_radio,dataset_radio, num_parts_slider, perspective_radio, division_method_radio,plot_type], outputs=chart ) # plot_type.change( # fn=plot_visualization, # inputs=[task_type_radio,dataset_radio, perspective_radio, num_parts_slider, plot_type], # outputs=chart # ) task_type_radio.change( fn=update_dataset_options, inputs=task_type_radio, outputs=dataset_radio ) # 启动界面 iface.launch()