File size: 3,216 Bytes
5563c12 e4969bc 5563c12 36555e4 e4969bc ee2c36b e4969bc 5563c12 e4969bc 7c27cc0 5563c12 e4969bc 7c27cc0 e4969bc 5563c12 e4969bc 5563c12 e4969bc 5563c12 e4969bc 7c27cc0 5563c12 e4969bc 5563c12 7c27cc0 5563c12 e4969bc 5563c12 e4969bc 7c27cc0 5563c12 e4969bc 7c27cc0 5563c12 e4969bc | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 | import gradio as gr
import requests
from bs4 import BeautifulSoup
import random
import logging
# ๋๋ฒ๊น
์ ์ํ ๋ก๊น
์ค์
logging.basicConfig(
level=logging.DEBUG,
format="%(asctime)s - %(levelname)s - %(message)s"
)
def scrape_naver_blog(keyword):
logging.debug(f"์
๋ ฅ๋ ๊ฒ์์ด: {keyword}")
base_url = "https://search.naver.com/search.naver?ssc=tab.blog.all&sm=tab_jum&query="
target_url = base_url + keyword
logging.debug(f"์ ์ URL: {target_url}")
try:
headers = {
"User-Agent": ("Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/90.0.4430.93 Safari/537.36")
}
response = requests.get(target_url, headers=headers)
logging.debug(f"์๋ต ์ํ ์ฝ๋: {response.status_code}")
if response.status_code != 200:
logging.error("ํ์ด์ง๋ฅผ ๋ถ๋ฌ์ค๋๋ฐ ์คํจํ์์ต๋๋ค.")
return ("ํ์ด์ง๋ฅผ ๋ถ๋ฌ์ค๋๋ฐ ์คํจํ์์ต๋๋ค.", "", "")
except Exception as e:
logging.exception("์์ฒญ ์ค ์์ธ ๋ฐ์:")
return (f"์์ฒญ ์ค ์์ธ ๋ฐ์: {e}", "", "")
soup = BeautifulSoup(response.text, "html.parser")
# ๋ค์ด๋ฒ ๋ธ๋ก๊ทธ ๋งํฌ ์ถ์ถ (๋ค์ด๋ฒ๋ธ๋ก๊ทธ ๋งํฌ: "https://blog.naver.com" ํฌํจ)
links = set()
for a in soup.find_all("a"):
# 'cru' ์์ฑ์ด ์์ผ๋ฉด ํด๋น ๊ฐ์ ์ฌ์ฉ
if a.has_attr("cru"):
link = a.get("cru")
if "blog.naver.com" in link:
links.add(link)
# ๊ทธ๋ ์ง ์๊ณ href ์์ฑ์ด ์๊ณ ๋ค์ด๋ฒ๋ธ๋ก๊ทธ ๋งํฌ๋ก ์์ํ๋ฉด ์ฌ์ฉ
elif a.has_attr("href"):
link = a.get("href")
if link.startswith("https://blog.naver.com"):
links.add(link)
links = list(links)
logging.debug(f"์ถ์ถ๋ ์ ์ฒด ๋ค์ด๋ฒ ๋ธ๋ก๊ทธ ๋งํฌ ์: {len(links)}")
if not links:
return ("๋ค์ด๋ฒ ๋ธ๋ก๊ทธ ๋งํฌ๋ฅผ ์ฐพ์ง ๋ชปํ์ต๋๋ค.", "", "")
# ์ถ์ถ๋ ๋งํฌ ์ค ๋๋ค์ผ๋ก 3๊ฐ ์ ํ (๋งํฌ๊ฐ 3๊ฐ ๋ฏธ๋ง์ด๋ฉด ์ ๋ถ ์ ํ)
sample_size = 3 if len(links) >= 3 else len(links)
random_links = random.sample(links, sample_size)
logging.debug(f"๋๋ค์ผ๋ก ์ ํ๋ ๋งํฌ: {random_links}")
# ๋งํฌ ๊ฐ์๊ฐ 3๊ฐ ๋ฏธ๋ง์ด๋ฉด ๋น ๋ฌธ์์ด๋ก ์ฑ์
while len(random_links) < 3:
random_links.append("")
return tuple(random_links)
iface = gr.Interface(
fn=scrape_naver_blog,
inputs=gr.Textbox(label="๊ฒ์์ด ์
๋ ฅ", placeholder="๊ฒ์์ด๋ฅผ ์
๋ ฅํ์ธ์."),
outputs=[
gr.Textbox(label="๋งํฌ 1"),
gr.Textbox(label="๋งํฌ 2"),
gr.Textbox(label="๋งํฌ 3")
],
title="๋ค์ด๋ฒ ๋ธ๋ก๊ทธ ๋งํฌ ์คํฌ๋ํ",
description=("๊ฒ์์ด๋ฅผ ์
๋ ฅ ํ ์คํ ๋ฒํผ์ ๋๋ฅด๋ฉด ๋ค์ด๋ฒ ๊ฒ์ ๊ฒฐ๊ณผ ํ์ด์ง์์ "
"๋ค์ด๋ฒ ๋ธ๋ก๊ทธ ๋งํฌ๋ฅผ ์คํฌ๋ํํ์ฌ ๋๋ค์ผ๋ก 3๊ฐ์ ๋งํฌ๋ฅผ ๊ฐ๊ฐ์ ์ถ๋ ฅ์ฐฝ์ ํ์ํฉ๋๋ค.")
)
if __name__ == "__main__":
# ๋๋ฒ๊ทธ ๋ชจ๋ ํ์ฑํ
iface.launch(debug=True)
|