|
|
import pandas as pd |
|
|
import re |
|
|
from collections import Counter |
|
|
import gradio as gr |
|
|
|
|
|
def process_excel(file): |
|
|
|
|
|
df = pd.read_excel(file, engine="openpyxl") |
|
|
|
|
|
|
|
|
data = df.iloc[:, 3].dropna().tolist() |
|
|
|
|
|
|
|
|
keywords = [] |
|
|
for text in data: |
|
|
clean_text = re.sub(r"[^a-zA-Z0-9κ°-ν£\s]", "", text) |
|
|
keywords.extend(clean_text.split()) |
|
|
|
|
|
|
|
|
keyword_counts = Counter(keywords) |
|
|
|
|
|
|
|
|
result_df = pd.DataFrame(keyword_counts.items(), columns=["ν€μλ", "λΉλ"]) |
|
|
result_df = result_df.sort_values(by="λΉλ", ascending=False) |
|
|
|
|
|
|
|
|
output_file = "result.xlsx" |
|
|
result_df.to_excel(output_file, index=False, engine="openpyxl") |
|
|
return output_file |
|
|
|
|
|
|
|
|
def gradio_interface(file): |
|
|
output_file = process_excel(file) |
|
|
return output_file |
|
|
|
|
|
|
|
|
with gr.Interface( |
|
|
fn=gradio_interface, |
|
|
inputs=gr.File(label="μμ
νμΌ μ
λ‘λ (.xlsx)"), |
|
|
outputs=gr.File(label="κ²°κ³Ό νμΌ λ€μ΄λ‘λ (.xlsx)"), |
|
|
title="ν€μλ μΆμΆ λ° λΉλ λΆμ" |
|
|
) as app: |
|
|
app.launch() |
|
|
|