| | import re |
| | import logging |
| | import tempfile |
| | import pandas as pd |
| | import gradio as gr |
| | import mecab |
| |
|
| | |
| | logging.basicConfig(level=logging.DEBUG) |
| | logger = logging.getLogger(__name__) |
| |
|
| | def analyze_text(text: str): |
| | logger.debug("์๋ณธ ํ
์คํธ: %s", text) |
| | |
| | |
| | filtered_text = re.sub(r'[^๊ฐ-ํฃ]', '', text) |
| | logger.debug("ํํฐ๋ง๋ ํ
์คํธ (ํ๊ตญ์ด๋ง, ๊ณต๋ฐฑ ์ ๊ฑฐ): %s", filtered_text) |
| | |
| | if not filtered_text: |
| | logger.debug("์ ํจํ ํ๊ตญ์ด ํ
์คํธ๊ฐ ์์.") |
| | |
| | return pd.DataFrame(columns=["๋จ์ด", "๋น๋์"]), "" |
| | |
| | |
| | mecab_instance = mecab.MeCab() |
| | tokens = mecab_instance.pos(filtered_text) |
| | logger.debug("ํํ์ ๋ถ์ ๊ฒฐ๊ณผ: %s", tokens) |
| | |
| | freq = {} |
| | for word, pos in tokens: |
| | |
| | if word and word.strip(): |
| | |
| | if pos.startswith("NN"): |
| | freq[word] = freq.get(word, 0) + 1 |
| | logger.debug("๋จ์ด: %s, ํ์ฌ: %s, ํ์ฌ ๋น๋: %d", word, pos, freq[word]) |
| | |
| | |
| | sorted_freq = sorted(freq.items(), key=lambda x: x[1], reverse=True) |
| | logger.debug("๋ด๋ฆผ์ฐจ์ ์ ๋ ฌ๋ ๋จ์ด ๋น๋: %s", sorted_freq) |
| | |
| | |
| | df = pd.DataFrame(sorted_freq, columns=["๋จ์ด", "๋น๋์"]) |
| | logger.debug("๊ฒฐ๊ณผ DataFrame ์์ฑ๋จ, shape: %s", df.shape) |
| | |
| | |
| | temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".xlsx") |
| | df.to_excel(temp_file.name, index=False, engine='openpyxl') |
| | temp_file.close() |
| | logger.debug("Excel ํ์ผ ์์ฑ๋จ: %s", temp_file.name) |
| | |
| | return df, temp_file.name |
| |
|
| | |
| | with gr.Blocks() as demo: |
| | gr.Markdown("# ํํ์ ๋ถ์ ์คํ์ด์ค") |
| | |
| | with gr.Row(): |
| | text_input = gr.Textbox(label="ํ
์คํธ ์
๋ ฅ", lines=5, placeholder="๋ถ์ํ ํ
์คํธ๋ฅผ ์
๋ ฅํ์ธ์.") |
| | |
| | with gr.Row(): |
| | analyze_button = gr.Button("๋ถ์ ์คํ") |
| | |
| | with gr.Row(): |
| | output_table = gr.Dataframe(label="๋ถ์ ๊ฒฐ๊ณผ (๋จ์ด ๋ฐ ๋น๋์)") |
| | |
| | with gr.Row(): |
| | output_file = gr.File(label="Excel ๋ค์ด๋ก๋") |
| | |
| | analyze_button.click(fn=analyze_text, inputs=text_input, outputs=[output_table, output_file]) |
| |
|
| | if __name__ == "__main__": |
| | demo.launch() |
| |
|