File size: 3,216 Bytes
5563c12
e4969bc
5563c12
 
 
36555e4
e4969bc
 
 
 
 
ee2c36b
e4969bc
 
 
 
 
 
5563c12
e4969bc
 
 
 
 
 
 
 
 
7c27cc0
5563c12
e4969bc
7c27cc0
e4969bc
 
5563c12
e4969bc
 
 
 
 
 
 
 
 
 
 
 
 
5563c12
e4969bc
 
5563c12
e4969bc
7c27cc0
5563c12
e4969bc
 
 
 
5563c12
7c27cc0
 
 
 
 
5563c12
e4969bc
5563c12
e4969bc
7c27cc0
 
 
 
 
5563c12
e4969bc
7c27cc0
5563c12
 
 
e4969bc
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import gradio as gr
import requests
from bs4 import BeautifulSoup
import random
import logging

# ๋””๋ฒ„๊น…์„ ์œ„ํ•œ ๋กœ๊น… ์„ค์ •
logging.basicConfig(
    level=logging.DEBUG,
    format="%(asctime)s - %(levelname)s - %(message)s"
)

def scrape_naver_blog(keyword):
    logging.debug(f"์ž…๋ ฅ๋œ ๊ฒ€์ƒ‰์–ด: {keyword}")
    base_url = "https://search.naver.com/search.naver?ssc=tab.blog.all&sm=tab_jum&query="
    target_url = base_url + keyword
    logging.debug(f"์ ‘์† URL: {target_url}")
    
    try:
        headers = {
            "User-Agent": ("Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
                           "AppleWebKit/537.36 (KHTML, like Gecko) "
                           "Chrome/90.0.4430.93 Safari/537.36")
        }
        response = requests.get(target_url, headers=headers)
        logging.debug(f"์‘๋‹ต ์ƒํƒœ ์ฝ”๋“œ: {response.status_code}")
        if response.status_code != 200:
            logging.error("ํŽ˜์ด์ง€๋ฅผ ๋ถˆ๋Ÿฌ์˜ค๋Š”๋ฐ ์‹คํŒจํ•˜์˜€์Šต๋‹ˆ๋‹ค.")
            return ("ํŽ˜์ด์ง€๋ฅผ ๋ถˆ๋Ÿฌ์˜ค๋Š”๋ฐ ์‹คํŒจํ•˜์˜€์Šต๋‹ˆ๋‹ค.", "", "")
    except Exception as e:
        logging.exception("์š”์ฒญ ์ค‘ ์˜ˆ์™ธ ๋ฐœ์ƒ:")
        return (f"์š”์ฒญ ์ค‘ ์˜ˆ์™ธ ๋ฐœ์ƒ: {e}", "", "")

    soup = BeautifulSoup(response.text, "html.parser")
    
    # ๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ ๋งํฌ ์ถ”์ถœ (๋„ค์ด๋ฒ„๋ธ”๋กœ๊ทธ ๋งํฌ: "https://blog.naver.com" ํฌํ•จ)
    links = set()
    for a in soup.find_all("a"):
        # 'cru' ์†์„ฑ์ด ์žˆ์œผ๋ฉด ํ•ด๋‹น ๊ฐ’์„ ์‚ฌ์šฉ
        if a.has_attr("cru"):
            link = a.get("cru")
            if "blog.naver.com" in link:
                links.add(link)
        # ๊ทธ๋ ‡์ง€ ์•Š๊ณ  href ์†์„ฑ์ด ์žˆ๊ณ  ๋„ค์ด๋ฒ„๋ธ”๋กœ๊ทธ ๋งํฌ๋กœ ์‹œ์ž‘ํ•˜๋ฉด ์‚ฌ์šฉ
        elif a.has_attr("href"):
            link = a.get("href")
            if link.startswith("https://blog.naver.com"):
                links.add(link)
    
    links = list(links)
    logging.debug(f"์ถ”์ถœ๋œ ์ „์ฒด ๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ ๋งํฌ ์ˆ˜: {len(links)}")
    
    if not links:
        return ("๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ ๋งํฌ๋ฅผ ์ฐพ์ง€ ๋ชปํ–ˆ์Šต๋‹ˆ๋‹ค.", "", "")
    
    # ์ถ”์ถœ๋œ ๋งํฌ ์ค‘ ๋žœ๋ค์œผ๋กœ 3๊ฐœ ์„ ํƒ (๋งํฌ๊ฐ€ 3๊ฐœ ๋ฏธ๋งŒ์ด๋ฉด ์ „๋ถ€ ์„ ํƒ)
    sample_size = 3 if len(links) >= 3 else len(links)
    random_links = random.sample(links, sample_size)
    logging.debug(f"๋žœ๋ค์œผ๋กœ ์„ ํƒ๋œ ๋งํฌ: {random_links}")
    
    # ๋งํฌ ๊ฐœ์ˆ˜๊ฐ€ 3๊ฐœ ๋ฏธ๋งŒ์ด๋ฉด ๋นˆ ๋ฌธ์ž์—ด๋กœ ์ฑ„์›€
    while len(random_links) < 3:
        random_links.append("")
    
    return tuple(random_links)

iface = gr.Interface(
    fn=scrape_naver_blog,
    inputs=gr.Textbox(label="๊ฒ€์ƒ‰์–ด ์ž…๋ ฅ", placeholder="๊ฒ€์ƒ‰์–ด๋ฅผ ์ž…๋ ฅํ•˜์„ธ์š”."),
    outputs=[
        gr.Textbox(label="๋งํฌ 1"),
        gr.Textbox(label="๋งํฌ 2"),
        gr.Textbox(label="๋งํฌ 3")
    ],
    title="๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ ๋งํฌ ์Šคํฌ๋ž˜ํ•‘",
    description=("๊ฒ€์ƒ‰์–ด๋ฅผ ์ž…๋ ฅ ํ›„ ์‹คํ–‰ ๋ฒ„ํŠผ์„ ๋ˆ„๋ฅด๋ฉด ๋„ค์ด๋ฒ„ ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ ํŽ˜์ด์ง€์—์„œ "
                 "๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ ๋งํฌ๋ฅผ ์Šคํฌ๋ž˜ํ•‘ํ•˜์—ฌ ๋žœ๋ค์œผ๋กœ 3๊ฐœ์˜ ๋งํฌ๋ฅผ ๊ฐ๊ฐ์˜ ์ถœ๋ ฅ์ฐฝ์— ํ‘œ์‹œํ•ฉ๋‹ˆ๋‹ค.")
)

if __name__ == "__main__":
    # ๋””๋ฒ„๊ทธ ๋ชจ๋“œ ํ™œ์„ฑํ™”
    iface.launch(debug=True)