import re
import logging
import tempfile
import pandas as pd
import gradio as gr
import mecab  # python‑mecab‑ko 라이브러리 사용

# 디버깅을 위한 로깅 설정
logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)

def analyze_text(text: str):
    logger.debug("원본 텍스트: %s", text)
    
    # 1. 한국어만 남기기 (공백, 영어, 기호 등 제거)
    filtered_text = re.sub(r'[^가-힣]', '', text)
    logger.debug("필터링된 텍스트 (한국어만, 공백 제거): %s", filtered_text)
    
    if not filtered_text:
        logger.debug("유효한 한국어 텍스트가 없음.")
        # 빈 DataFrame과 빈 문자열 반환 (Excel 파일 경로)
        return pd.DataFrame(columns=["단어", "빈도수"]), ""
    
    # 2. Mecab을 이용한 형태소 분석 (명사와 복합명사만 추출)
    mecab_instance = mecab.MeCab()  # 수정: 직접 인스턴스 생성
    tokens = mecab_instance.pos(filtered_text)
    logger.debug("형태소 분석 결과: %s", tokens)
    
    freq = {}
    for word, pos in tokens:
        # 빈 문자열 여부 및 유효성 검사
        if word and word.strip():
            # Mecab에서 명사의 경우 일반적으로 'NN'으로 시작 (예: NNG, NNP 등)
            if pos.startswith("NN"):
                freq[word] = freq.get(word, 0) + 1
                logger.debug("단어: %s, 품사: %s, 현재 빈도: %d", word, pos, freq[word])
    
    # 3. 빈도수를 내림차순 정렬
    sorted_freq = sorted(freq.items(), key=lambda x: x[1], reverse=True)
    logger.debug("내림차순 정렬된 단어 빈도: %s", sorted_freq)
    
    # 4. 결과 DataFrame 생성 (표로 출력하기 위함)
    df = pd.DataFrame(sorted_freq, columns=["단어", "빈도수"])
    logger.debug("결과 DataFrame 생성됨, shape: %s", df.shape)
    
    # 5. Excel 파일 생성 (임시 파일에 저장하여 다운로드 제공)
    temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".xlsx")
    df.to_excel(temp_file.name, index=False, engine='openpyxl')
    temp_file.close()
    logger.debug("Excel 파일 생성됨: %s", temp_file.name)
    
    return df, temp_file.name

# Gradio 인터페이스 구성
with gr.Blocks() as demo:
    gr.Markdown("# 형태소 분석 스페이스")
    
    with gr.Row():
        text_input = gr.Textbox(label="텍스트 입력", lines=5, placeholder="분석할 텍스트를 입력하세요.")
    
    with gr.Row():
        analyze_button = gr.Button("분석 실행")
    
    with gr.Row():
        output_table = gr.Dataframe(label="분석 결과 (단어 및 빈도수)")
    
    with gr.Row():
        output_file = gr.File(label="Excel 다운로드")
    
    analyze_button.click(fn=analyze_text, inputs=text_input, outputs=[output_table, output_file])

if __name__ == "__main__":
    demo.launch()