File size: 3,251 Bytes
148a068 fbaf608 148a068 fbaf608 148a068 fbaf608 162720f fbaf608 162720f fbaf608 148a068 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 | import gradio as gr
import pandas as pd
import os
import tempfile
import chardet
def detect_encoding(file_path):
"""
νμΌμ μΈμ½λ©μ κ°μ§νλ ν¨μ
"""
with open(file_path, 'rb') as f:
result = chardet.detect(f.read())
return result['encoding']
def merge_csv_files(files):
"""
μ¬λ¬ CSV νμΌμ νλλ‘ λ³ν©νλ ν¨μ
Args:
files: μ
λ‘λλ CSV νμΌ λͺ©λ‘
Returns:
λ³ν©λ CSV νμΌ κ²½λ‘μ μν λ©μμ§
"""
if not files or len(files) == 0:
return None, "νμΌμ΄ μ
λ‘λλμ§ μμμ΅λλ€."
if len(files) > 30:
return None, "μ΅λ 30κ° νμΌκΉμ§λ§ λ³ν© κ°λ₯ν©λλ€."
try:
# λͺ¨λ νμΌμ DataFrame 리μ€νΈλ‘ μ½κΈ°
dataframes = []
for file in files:
# νμΌμ μΈμ½λ© κ°μ§
encoding = detect_encoding(file.name)
try:
df = pd.read_csv(file.name, encoding=encoding)
except UnicodeDecodeError:
# κ°μ§λ μΈμ½λ©μ΄ μ€ν¨νλ©΄ λ€λ₯Έ μΈμ½λ© μλ
encodings_to_try = ['cp949', 'euc-kr', 'latin1', 'ISO-8859-1']
for enc in encodings_to_try:
try:
df = pd.read_csv(file.name, encoding=enc)
break
except UnicodeDecodeError:
continue
else:
return None, f"νμΌ '{os.path.basename(file.name)}'μ μΈμ½λ©μ κ²°μ ν μ μμ΅λλ€."
dataframes.append(df)
# λͺ¨λ DataFrame λ³ν©
if dataframes:
merged_df = pd.concat(dataframes, ignore_index=True)
# μμ νμΌμ μ μ₯
with tempfile.NamedTemporaryFile(delete=False, suffix='.csv') as tmp:
output_path = tmp.name
# λ³ν©λ λ°μ΄ν°λ₯Ό Excel νΈν νμ(UTF-8 with BOM)μΌλ‘ μ μ₯
merged_df.to_csv(output_path, index=False, encoding='utf-8-sig')
return output_path, f"{len(files)}κ° νμΌμ΄ μ±κ³΅μ μΌλ‘ λ³ν©λμμ΅λλ€. Excelμμ μ΄ λ UTF-8 μΈμ½λ©μΌλ‘ μ΄μ΄μ£ΌμΈμ."
else:
return None, "λ³ν©ν λ°μ΄ν°κ° μμ΅λλ€."
except Exception as e:
return None, f"μ€λ₯ λ°μ: {str(e)}"
# Gradio μΈν°νμ΄μ€ μ€μ
with gr.Blocks(title="CSV νμΌ λ³ν©κΈ°") as app:
gr.Markdown("# CSV νμΌ λ³ν©κΈ°")
gr.Markdown("μ΅λ 30κ°μ CSV νμΌμ νλλ‘ λ³ν©ν©λλ€.")
with gr.Row():
with gr.Column():
input_files = gr.File(
file_count="multiple",
label="CSV νμΌ μ
λ‘λ (μ΅λ 30κ°)"
)
with gr.Column():
merge_button = gr.Button("νμΌ λ³ν©νκΈ°")
output_file = gr.File(label="λ³ν©λ CSV")
status = gr.Textbox(label="μν")
merge_button.click(
fn=merge_csv_files,
inputs=[input_files],
outputs=[output_file, status]
)
# μ± μ€ν
if __name__ == "__main__":
app.launch() |