Spaces:

ChingCL
/

20122013

Sleeping

App Files Files Community

ChingCL commited on Aug 16, 2024

Commit

5e200b9

verified ·

1 Parent(s): 4d3db91

Update app.py

Browse files

Files changed (1) hide show

app.py +87 -1

app.py CHANGED Viewed

@@ -1,4 +1,90 @@
-import gradio as gr
 import pandas as pd
 import re
 import json

+import gradio as grimport gradio as gr
+import pandas as pd
+import re
+import json
+# 提取 JSON 格式中的文本部分
+def extract_text_from_json(text):
+    try:
+        data = json.loads(text)
+        if isinstance(data, dict):
+            # 提取 JSON 中可能包含的文本內容
+            if 'question' in data:
+                return data['question']['content']
+            if 'content' in data:
+                return data['content']
+    except json.JSONDecodeError:
+        return text
+    return text
+# 第一個檢查：檢查 $ 符號前後是否有空格
+def check_spacing_around_dollar(df):
+    errors = []
+    for i, row in df.iterrows():
+        for col in df.columns:
+            text = extract_text_from_json(str(row[col]))
+            matches = re.finditer(r'(\$\S+|\S+\$)', text)
+            for match in matches:
+                if (match.start() > 0 and text[match.start() - 1] != ' ') or (match.end() < len(text) and text[match.end()] != ' '):
+                    errors.append(f"行 {i+1} 列 '{col}': '{text}'")
+    return errors
+# 第二個檢查：檢查 $ 符號之間是否有空格
+def check_spacing_between_dollars(df):
+    errors = []
+    for i, row in df.iterrows():
+        for col in df.columns:
+            text = extract_text_from_json(str(row[col]))
+            matches = re.finditer(r'\$\S+?(?=\$)', text)
+            for match in matches:
+                if text[match.end()] != ' ' and text[match.start() - 1] != ' ':
+                    errors.append(f"行 {i+1} 列 '{col}': '{text}'")
+    return errors
+# 第三個檢查：檢查數字前後是否有 $ 符號
+def check_numbers_surrounded_by_dollar(df):
+    errors = []
+    for i, row in df.iterrows():
+        for col in df.columns:
+            text = extract_text_from_json(str(row[col]))
+            matches = re.finditer(r'\b\d+\b', text)
+            for match in matches:
+                if not (match.start() > 0 and text[match.start() - 1] == '$' and match.end() < len(text) and text[match.end()] == '$'):
+                    errors.append(f"行 {i+1} 列 '{col}': '{text}'")
+    return errors
+# 處理檔案並執行檢查
+def process_file(file):
+    if file.name.endswith('.csv'):
+        df = pd.read_csv(file.name)
+    elif file.name.endswith('.xlsx'):
+        df = pd.read_excel(file.name)
+    else:
+        return "只支持 CSV 和 XLSX 檔案"
+    # 執行檢查
+    errors1 = check_spacing_around_dollar(df)
+    errors2 = check_spacing_between_dollars(df)
+    errors3 = check_numbers_surrounded_by_dollar(df)
+    return {
+        "第一個檢查": errors1,
+        "第二個檢查": errors2,
+        "第三個檢查": errors3
+    }
+# Gradio 介面
+iface = gr.Interface(
+    fn=process_file,
+    inputs=gr.File(label="上傳 CSV 或 XLSX 檔案"),
+    outputs=gr.JSON(label="檢查結果"),
+    title="校對系統",
+    description="這個系統會檢查 CSV 或 XLSX 檔案中的格式錯誤，包括 $ 符號和數字的空格錯誤。"
+)
+if __name__ == "__main__":
+    iface.launch()
 import pandas as pd
 import re
 import json