import gradio as gr
import requests
from lxml import html
from bs4 import BeautifulSoup
from urllib.parse import urljoin

def scrape_naver_blog(url):
    try:
        # 세션 생성
        session = requests.Session()
        
        # HTTP 요청 헤더 설정 (네이버는 User-Agent를 확인할 수 있음)
        headers = {
            'User-Agent': ('Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
                           'AppleWebKit/537.36 (KHTML, like Gecko) '
                           'Chrome/58.0.3029.110 Safari/537.3')
        }
        
        # 1. 메인 페이지 요청
        response = session.get(url, headers=headers)
        
        if response.status_code != 200:
            return f"Error: Unable to fetch the main page. Status code: {response.status_code}", ""
        
        # 디버깅: 메인 페이지의 HTML 일부 출력 (처음 5000자)
        soup_debug_main = BeautifulSoup(response.text, 'html.parser')
        debug_info = soup_debug_main.prettify()[:5000]
        
        # BeautifulSoup을 사용하여 iframe src 추출
        soup_main = BeautifulSoup(response.text, 'html.parser')
        iframe = soup_main.find('iframe', id='mainFrame')
        if not iframe:
            return "Error: iframe을 찾을 수 없습니다.", debug_info
        
        iframe_src = iframe.get('src')
        if not iframe_src:
            return "Error: iframe의 src 속성을 찾을 수 없습니다.", debug_info
        
        # iframe src가 상대 경로일 경우 절대 경로로 변환
        iframe_url = urljoin(url, iframe_src)
        
        # 2. iframe 페이지 요청
        iframe_response = session.get(iframe_url, headers=headers)
        
        if iframe_response.status_code != 200:
            return f"Error: Unable to fetch the iframe page. Status code: {iframe_response.status_code}", debug_info
        
        # 디버깅: iframe 페이지의 HTML 일부 추가 출력 (처음 5000자)
        soup_debug_iframe = BeautifulSoup(iframe_response.text, 'html.parser')
        debug_info += "\n\n=== iframe HTML 일부 ===\n" + soup_debug_iframe.prettify()[:5000]
        
        # 3. lxml을 사용하여 iframe HTML 파싱
        tree = html.fromstring(iframe_response.content)
        
        # 사용자 제공 XPath를 사용하여 제목과 본문 추출
        title_xpath = '/html/body/div[7]/div[1]/div[2]/div[2]/div[2]/div[1]/div/div/div[10]/div[1]/div/table[2]/tbody/tr/td[2]/div[1]/div/div/div[1]/div/div/div[2]/div/p'
        body_xpath = '/html/body/div[7]/div[1]/div[2]/div[2]/div[2]/div[1]/div/div/div[10]/div[1]/div/table[2]/tbody/tr/td[2]/div[1]/div/div/div[2]/div[1]/div/div/div/p[1]'
        
        # 제목 추출
        title_elements = tree.xpath(title_xpath)
        if not title_elements:
            # XPath로 제목을 찾지 못한 경우, og:title 메타 태그 사용
            meta_title = tree.xpath('//meta[@property="og:title"]/@content')
            if meta_title:
                title = meta_title[0].strip()
            else:
                title = "제목을 찾을 수 없습니다."
        else:
            # 추출된 요소의 텍스트 합치기
            title = ''.join(title_elements[0].itertext()).strip()
        
        # 본문 추출
        body_elements = tree.xpath(body_xpath)
        if not body_elements:
            # XPath로 본문을 찾지 못한 경우, og:description 메타 태그 사용
            meta_description = tree.xpath('//meta[@property="og:description"]/@content')
            if meta_description:
                body = meta_description[0].strip()
            else:
                body = "내용을 찾을 수 없습니다."
        else:
            body = ''.join(body_elements[0].itertext()).strip()
        
        # 출력 형식
        output_title = f"제목 :\n{title}"
        output_content = f"내용 :\n{body}"
        
        # 최종 디버깅 정보
        final_debug_info = f"디버깅 정보 (메인 페이지 HTML 일부):\n{debug_info}"
        
        return f"{output_title}\n\n{output_content}", final_debug_info
    
    except Exception as e:
        # 예외 발생 시 에러 메시지와 빈 디버깅 정보를 반환
        return f"An error occurred: {str(e)}", ""

# Gradio 인터페이스 구성 (함수 밖에 위치)
title = "네이버 블로그 스크래퍼"
description = "네이버 블로그 URL을 입력하면 제목과 내용을 스크래핑합니다."

iface = gr.Interface(
    fn=scrape_naver_blog,
    inputs=gr.Textbox(lines=2, placeholder="https://blog.naver.com/...", label="블로그 URL"),
    outputs=[
        gr.Textbox(label="결과"),
        gr.Textbox(label="디버깅 정보")
    ],
    title=title,
    description=description,
    allow_flagging="never"
)

iface.launch()