Spaces:
Running
Running
Create app.py
Browse files
app.py
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import gradio as gr
|
| 3 |
+
|
| 4 |
+
def compare_csv_files():
|
| 5 |
+
# 文件名
|
| 6 |
+
file1 = "fish-speech-1.5.csv"
|
| 7 |
+
file2 = "fish-speech-1.4.csv"
|
| 8 |
+
|
| 9 |
+
# 读取 CSV 文件
|
| 10 |
+
df1 = pd.read_csv(file1)
|
| 11 |
+
df2 = pd.read_csv(file2)
|
| 12 |
+
|
| 13 |
+
# 使用 SourceText 进行合并
|
| 14 |
+
merged_df = pd.merge(df1, df2, on="SourceText", suffixes=("_1", "_2"))
|
| 15 |
+
|
| 16 |
+
if merged_df.empty:
|
| 17 |
+
return "两个文件中没有相同的 SourceText,请检查数据。"
|
| 18 |
+
|
| 19 |
+
# 计算差异
|
| 20 |
+
merged_df["WordErrorRate_Diff"] = merged_df["WordErrorRate_1"] - merged_df["WordErrorRate_2"]
|
| 21 |
+
merged_df["CharacterErrorRate_Diff"] = merged_df["CharacterErrorRate_1"] - merged_df["CharacterErrorRate_2"]
|
| 22 |
+
|
| 23 |
+
# 返回对比结果
|
| 24 |
+
comparison = merged_df[[
|
| 25 |
+
"SourceText",
|
| 26 |
+
"UUID_1", "WhisperText_1", "WordErrorRate_1", "CharacterErrorRate_1",
|
| 27 |
+
"UUID_2", "WhisperText_2", "WordErrorRate_2", "CharacterErrorRate_2",
|
| 28 |
+
"WordErrorRate_Diff", "CharacterErrorRate_Diff"
|
| 29 |
+
]]
|
| 30 |
+
|
| 31 |
+
return comparison.to_html(index=False)
|
| 32 |
+
|
| 33 |
+
# Gradio 界面
|
| 34 |
+
gr.Interface(
|
| 35 |
+
fn=compare_csv_files,
|
| 36 |
+
inputs=None,
|
| 37 |
+
outputs="html",
|
| 38 |
+
title="CSV 文件对比工具",
|
| 39 |
+
description="自动加载目录下的 fish-speech-1.5.csv 和 fish-speech-1.4.csv,对比它们的 WordErrorRate 和 CharacterErrorRate 差异。",
|
| 40 |
+
).launch()
|