import pandas as pd
import re
from collections import Counter
import gradio as gr

def process_excel(file):
    # 엑셀 파일 읽기
    df = pd.read_excel(file, engine="openpyxl")

    # D열 데이터 가져오기
    data = df.iloc[:, 3].dropna().tolist()  # D열은 index 3

    # 키워드 추출 및 정제
    keywords = []
    for text in data:
        clean_text = re.sub(r"[^a-zA-Z0-9가-힣\s]", "", text)  # 특수문자 제거
        keywords.extend(clean_text.split())

    # 키워드 빈도 계산
    keyword_counts = Counter(keywords)

    # 데이터프레임 생성
    result_df = pd.DataFrame(keyword_counts.items(), columns=["키워드", "빈도"])
    result_df = result_df.sort_values(by="빈도", ascending=False)

    # 엑셀 파일로 저장
    output_file = "result.xlsx"
    result_df.to_excel(output_file, index=False, engine="openpyxl")
    return output_file

# Gradio 인터페이스
def gradio_interface(file):
    output_file = process_excel(file)
    return output_file

# Gradio 앱 정의
with gr.Interface(
    fn=gradio_interface,
    inputs=gr.File(label="엑셀 파일 업로드 (.xlsx)"),
    outputs=gr.File(label="결과 파일 다운로드 (.xlsx)"),
    title="키워드 추출 및 빈도 분석"
) as app:
    app.launch()