Spaces:

fantos
/

sum-csv

Sleeping

File size: 3,251 Bytes

import gradio as gr
import pandas as pd
import os
import tempfile
import chardet

def detect_encoding(file_path):
    """
    파일의 인코딩을 감지하는 함수
    """
    with open(file_path, 'rb') as f:
        result = chardet.detect(f.read())
    return result['encoding']

def merge_csv_files(files):
    """
    여러 CSV 파일을 하나로 병합하는 함수
    
    Args:
        files: 업로드된 CSV 파일 목록
    
    Returns:
        병합된 CSV 파일 경로와 상태 메시지
    """
    if not files or len(files) == 0:
        return None, "파일이 업로드되지 않았습니다."
    
    if len(files) > 30:
        return None, "최대 30개 파일까지만 병합 가능합니다."
    
    try:
        # 모든 파일을 DataFrame 리스트로 읽기
        dataframes = []
        for file in files:
            # 파일의 인코딩 감지
            encoding = detect_encoding(file.name)
            try:
                df = pd.read_csv(file.name, encoding=encoding)
            except UnicodeDecodeError:
                # 감지된 인코딩이 실패하면 다른 인코딩 시도
                encodings_to_try = ['cp949', 'euc-kr', 'latin1', 'ISO-8859-1']
                for enc in encodings_to_try:
                    try:
                        df = pd.read_csv(file.name, encoding=enc)
                        break
                    except UnicodeDecodeError:
                        continue
                else:
                    return None, f"파일 '{os.path.basename(file.name)}'의 인코딩을 결정할 수 없습니다."
            
            dataframes.append(df)
        
        # 모든 DataFrame 병합
        if dataframes:
            merged_df = pd.concat(dataframes, ignore_index=True)
            
            # 임시 파일에 저장
            with tempfile.NamedTemporaryFile(delete=False, suffix='.csv') as tmp:
                output_path = tmp.name
            
            # 병합된 데이터를 Excel 호환 형식(UTF-8 with BOM)으로 저장
            merged_df.to_csv(output_path, index=False, encoding='utf-8-sig')
            
            return output_path, f"{len(files)}개 파일이 성공적으로 병합되었습니다. Excel에서 열 때 UTF-8 인코딩으로 열어주세요."
        else:
            return None, "병합할 데이터가 없습니다."
    
    except Exception as e:
        return None, f"오류 발생: {str(e)}"

# Gradio 인터페이스 설정
with gr.Blocks(title="CSV 파일 병합기") as app:
    gr.Markdown("# CSV 파일 병합기")
    gr.Markdown("최대 30개의 CSV 파일을 하나로 병합합니다.")
    
    with gr.Row():
        with gr.Column():
            input_files = gr.File(
                file_count="multiple",
                label="CSV 파일 업로드 (최대 30개)"
            )
        
        with gr.Column():
            merge_button = gr.Button("파일 병합하기")
            output_file = gr.File(label="병합된 CSV")
            status = gr.Textbox(label="상태")
    
    merge_button.click(
        fn=merge_csv_files,
        inputs=[input_files],
        outputs=[output_file, status]
    )

# 앱 실행
if __name__ == "__main__":
    app.launch()