File size: 2,403 Bytes
40bb14a
5e200b9
 
 
83afe02
5e200b9
 
 
 
83afe02
 
5e200b9
83afe02
5e200b9
 
 
83afe02
5e200b9
 
 
 
83afe02
 
5e200b9
 
 
 
 
83afe02
5e200b9
 
 
 
83afe02
 
5e200b9
83afe02
5e200b9
 
 
 
 
83afe02
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f7f08b1
40bb14a
f7f08b1
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import gradio as gr
import pandas as pd
import re

# 第一個檢查:檢查$符號前後是否有空格
def check_spacing_around_dollar(df):
    errors = []
    for i, row in df.iterrows():
        for col in df.columns:
            text = str(row[col])
            matches = list(re.finditer(r'(\s?\$\S*?\s?|\s?\S*?\$)', text))
            for match in matches:
                if not (text[match.start() - 1] == ' ' and text[match.end()] == ' '):
                    errors.append(f"行 {i+1} 列 '{col}': '{text}'")
    return errors

# 第二個檢查:檢查$符號之間是否有空格
def check_spacing_between_dollars(df):
    errors = []
    for i, row in df.iterrows():
        for col in df.columns:
            text = str(row[col])
            matches = list(re.finditer(r'\$\S+?(?=\$)', text))
            for match in matches:
                if text[match.end()] != ' ' and text[match.start() - 1] != ' ':
                    errors.append(f"行 {i+1} 列 '{col}': '{text}'")
    return errors

# 第三個檢查:檢查數字前後是否有$
def check_numbers_surrounded_by_dollar(df):
    errors = []
    for i, row in df.iterrows():
        for col in df.columns:
            text = str(row[col])
            matches = list(re.finditer(r'\b\d+\b', text))
            for match in matches:
                if not (text[match.start() - 1] == '$' and text[match.end()] == '$'):
                    errors.append(f"行 {i+1} 列 '{col}': '{text}'")
    return errors

# 處理檔案並執行檢查
def process_file(file):
    if file.name.endswith('.csv'):
        df = pd.read_csv(file.name)
    elif file.name.endswith('.xlsx'):
        df = pd.read_excel(file.name)
    else:
        return "只支持 CSV 和 XLSX 檔案"
    
    # 執行檢查
    errors1 = check_spacing_around_dollar(df)
    errors2 = check_spacing_between_dollars(df)
    errors3 = check_numbers_surrounded_by_dollar(df)
    
    return {
        "第一個檢查": errors1,
        "第二個檢查": errors2,
        "第三個檢查": errors3
    }

# Gradio 介面
iface = gr.Interface(
    fn=process_file,
    inputs=gr.File(label="上傳 CSV 或 XLSX 檔案"),
    outputs=gr.JSON(label="檢查結果"),
    title="校對系統",
    description="這個系統會檢查 CSV 或 XLSX 檔案中的格式錯誤,包括 $ 符號和數字的空格錯誤。"
)

if __name__ == "__main__":
    iface.launch()