Spaces:

Kims12
/

N_B2

Sleeping

File size: 3,216 Bytes

5563c12
e4969bc
5563c12
 
 
36555e4
e4969bc
 
 
 
 
ee2c36b
e4969bc
 
 
 
 
 
5563c12
e4969bc
 
 
 
 
 
 
 
 
7c27cc0
5563c12
e4969bc
7c27cc0
e4969bc
 
5563c12
e4969bc
 
 
 
 
 
 
 
 
 
 
 
 
5563c12
e4969bc
 
5563c12
e4969bc
7c27cc0
5563c12
e4969bc
 
 
 
5563c12
7c27cc0
 
 
 
 
5563c12
e4969bc
5563c12
e4969bc
7c27cc0
 
 
 
 
5563c12
e4969bc
7c27cc0
5563c12
 
 
e4969bc

import gradio as gr
import requests
from bs4 import BeautifulSoup
import random
import logging

# 디버깅을 위한 로깅 설정
logging.basicConfig(
    level=logging.DEBUG,
    format="%(asctime)s - %(levelname)s - %(message)s"
)

def scrape_naver_blog(keyword):
    logging.debug(f"입력된 검색어: {keyword}")
    base_url = "https://search.naver.com/search.naver?ssc=tab.blog.all&sm=tab_jum&query="
    target_url = base_url + keyword
    logging.debug(f"접속 URL: {target_url}")
    
    try:
        headers = {
            "User-Agent": ("Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
                           "AppleWebKit/537.36 (KHTML, like Gecko) "
                           "Chrome/90.0.4430.93 Safari/537.36")
        }
        response = requests.get(target_url, headers=headers)
        logging.debug(f"응답 상태 코드: {response.status_code}")
        if response.status_code != 200:
            logging.error("페이지를 불러오는데 실패하였습니다.")
            return ("페이지를 불러오는데 실패하였습니다.", "", "")
    except Exception as e:
        logging.exception("요청 중 예외 발생:")
        return (f"요청 중 예외 발생: {e}", "", "")

    soup = BeautifulSoup(response.text, "html.parser")
    
    # 네이버 블로그 링크 추출 (네이버블로그 링크: "https://blog.naver.com" 포함)
    links = set()
    for a in soup.find_all("a"):
        # 'cru' 속성이 있으면 해당 값을 사용
        if a.has_attr("cru"):
            link = a.get("cru")
            if "blog.naver.com" in link:
                links.add(link)
        # 그렇지 않고 href 속성이 있고 네이버블로그 링크로 시작하면 사용
        elif a.has_attr("href"):
            link = a.get("href")
            if link.startswith("https://blog.naver.com"):
                links.add(link)
    
    links = list(links)
    logging.debug(f"추출된 전체 네이버 블로그 링크 수: {len(links)}")
    
    if not links:
        return ("네이버 블로그 링크를 찾지 못했습니다.", "", "")
    
    # 추출된 링크 중 랜덤으로 3개 선택 (링크가 3개 미만이면 전부 선택)
    sample_size = 3 if len(links) >= 3 else len(links)
    random_links = random.sample(links, sample_size)
    logging.debug(f"랜덤으로 선택된 링크: {random_links}")
    
    # 링크 개수가 3개 미만이면 빈 문자열로 채움
    while len(random_links) < 3:
        random_links.append("")
    
    return tuple(random_links)

iface = gr.Interface(
    fn=scrape_naver_blog,
    inputs=gr.Textbox(label="검색어 입력", placeholder="검색어를 입력하세요."),
    outputs=[
        gr.Textbox(label="링크 1"),
        gr.Textbox(label="링크 2"),
        gr.Textbox(label="링크 3")
    ],
    title="네이버 블로그 링크 스크래핑",
    description=("검색어를 입력 후 실행 버튼을 누르면 네이버 검색 결과 페이지에서 "
                 "네이버 블로그 링크를 스크래핑하여 랜덤으로 3개의 링크를 각각의 출력창에 표시합니다.")
)

if __name__ == "__main__":
    # 디버그 모드 활성화
    iface.launch(debug=True)