Spaces:
Sleeping
Sleeping
app.py
Browse files
app.py
CHANGED
|
@@ -293,14 +293,14 @@ def clean_dataset(file_path, question_column, model_choice, temperature, max_sam
|
|
| 293 |
try:
|
| 294 |
check_api_key()
|
| 295 |
except ValueError as e:
|
| 296 |
-
return str(e), None,
|
| 297 |
|
| 298 |
progress(0.05, desc="📁 读取数据文件...")
|
| 299 |
df = pd.read_parquet(file_path)
|
| 300 |
|
| 301 |
if question_column not in df.columns:
|
| 302 |
available_columns = ", ".join(df.columns.tolist())
|
| 303 |
-
return f"❌ 列名 '{question_column}' 不存在!\n可用列名: {available_columns}", None,
|
| 304 |
|
| 305 |
data_ori = df[question_column].tolist()[:int(max_samples)]
|
| 306 |
total = len(data_ori)
|
|
@@ -414,19 +414,14 @@ def clean_dataset(file_path, question_column, model_choice, temperature, max_sam
|
|
| 414 |
# 生成带颜色的对比HTML
|
| 415 |
preview_html = create_comparison_html(data_ori[:5], lst_final[:5])
|
| 416 |
|
| 417 |
-
preview_df = pd.DataFrame({
|
| 418 |
-
'原始问题': [str(x)[:100] for x in data_ori[:5]],
|
| 419 |
-
'清洗后问题': [str(x)[:100] for x in lst_final[:5]]
|
| 420 |
-
})
|
| 421 |
-
|
| 422 |
progress(1.0, desc="✅ 完成!")
|
| 423 |
|
| 424 |
-
return log_text, output_path,
|
| 425 |
|
| 426 |
except Exception as e:
|
| 427 |
import traceback
|
| 428 |
error_detail = traceback.format_exc()
|
| 429 |
-
return f"❌ 处理出错: {str(e)}\n\n详细错误:\n{error_detail}", None,
|
| 430 |
|
| 431 |
# ======================== 文本内容 ========================
|
| 432 |
ABOUT_TEXT = """
|
|
@@ -610,7 +605,7 @@ with demo:
|
|
| 610 |
|
| 611 |
download_file = gr.File(label="📥 下载去噪后的数据集")
|
| 612 |
|
| 613 |
-
#
|
| 614 |
gr.Markdown("### 🎨 清洗效果对比预览")
|
| 615 |
gr.Markdown("""
|
| 616 |
**颜色说明**:
|
|
@@ -619,17 +614,12 @@ with demo:
|
|
| 619 |
- ⚫ 黑色 = 未修改的正确部分
|
| 620 |
""")
|
| 621 |
|
| 622 |
-
colored_preview = gr.HTML(label="
|
| 623 |
-
|
| 624 |
-
preview_df = gr.Dataframe(
|
| 625 |
-
label="🔍 原始对比表格",
|
| 626 |
-
wrap=True
|
| 627 |
-
)
|
| 628 |
|
| 629 |
clean_btn.click(
|
| 630 |
fn=clean_dataset,
|
| 631 |
inputs=[file_input, question_column, model_choice, temperature, max_samples],
|
| 632 |
-
outputs=[output_text, download_file,
|
| 633 |
)
|
| 634 |
|
| 635 |
if __name__ == "__main__":
|
|
|
|
| 293 |
try:
|
| 294 |
check_api_key()
|
| 295 |
except ValueError as e:
|
| 296 |
+
return str(e), None, ""
|
| 297 |
|
| 298 |
progress(0.05, desc="📁 读取数据文件...")
|
| 299 |
df = pd.read_parquet(file_path)
|
| 300 |
|
| 301 |
if question_column not in df.columns:
|
| 302 |
available_columns = ", ".join(df.columns.tolist())
|
| 303 |
+
return f"❌ 列名 '{question_column}' 不存在!\n可用列名: {available_columns}", None, ""
|
| 304 |
|
| 305 |
data_ori = df[question_column].tolist()[:int(max_samples)]
|
| 306 |
total = len(data_ori)
|
|
|
|
| 414 |
# 生成带颜色的对比HTML
|
| 415 |
preview_html = create_comparison_html(data_ori[:5], lst_final[:5])
|
| 416 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 417 |
progress(1.0, desc="✅ 完成!")
|
| 418 |
|
| 419 |
+
return log_text, output_path, preview_html
|
| 420 |
|
| 421 |
except Exception as e:
|
| 422 |
import traceback
|
| 423 |
error_detail = traceback.format_exc()
|
| 424 |
+
return f"❌ 处理出错: {str(e)}\n\n详细错误:\n{error_detail}", None, ""
|
| 425 |
|
| 426 |
# ======================== 文本内容 ========================
|
| 427 |
ABOUT_TEXT = """
|
|
|
|
| 605 |
|
| 606 |
download_file = gr.File(label="📥 下载去噪后的数据集")
|
| 607 |
|
| 608 |
+
# 颜色对比预览区域
|
| 609 |
gr.Markdown("### 🎨 清洗效果对比预览")
|
| 610 |
gr.Markdown("""
|
| 611 |
**颜色说明**:
|
|
|
|
| 614 |
- ⚫ 黑色 = 未修改的正确部分
|
| 615 |
""")
|
| 616 |
|
| 617 |
+
colored_preview = gr.HTML(label="")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 618 |
|
| 619 |
clean_btn.click(
|
| 620 |
fn=clean_dataset,
|
| 621 |
inputs=[file_input, question_column, model_choice, temperature, max_samples],
|
| 622 |
+
outputs=[output_text, download_file, colored_preview]
|
| 623 |
)
|
| 624 |
|
| 625 |
if __name__ == "__main__":
|