N_B2 / app.py
Kims12's picture
Update app.py
7c27cc0 verified
import gradio as gr
import requests
from bs4 import BeautifulSoup
import random
import logging
# ๋””๋ฒ„๊น…์„ ์œ„ํ•œ ๋กœ๊น… ์„ค์ •
logging.basicConfig(
level=logging.DEBUG,
format="%(asctime)s - %(levelname)s - %(message)s"
)
def scrape_naver_blog(keyword):
logging.debug(f"์ž…๋ ฅ๋œ ๊ฒ€์ƒ‰์–ด: {keyword}")
base_url = "https://search.naver.com/search.naver?ssc=tab.blog.all&sm=tab_jum&query="
target_url = base_url + keyword
logging.debug(f"์ ‘์† URL: {target_url}")
try:
headers = {
"User-Agent": ("Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/90.0.4430.93 Safari/537.36")
}
response = requests.get(target_url, headers=headers)
logging.debug(f"์‘๋‹ต ์ƒํƒœ ์ฝ”๋“œ: {response.status_code}")
if response.status_code != 200:
logging.error("ํŽ˜์ด์ง€๋ฅผ ๋ถˆ๋Ÿฌ์˜ค๋Š”๋ฐ ์‹คํŒจํ•˜์˜€์Šต๋‹ˆ๋‹ค.")
return ("ํŽ˜์ด์ง€๋ฅผ ๋ถˆ๋Ÿฌ์˜ค๋Š”๋ฐ ์‹คํŒจํ•˜์˜€์Šต๋‹ˆ๋‹ค.", "", "")
except Exception as e:
logging.exception("์š”์ฒญ ์ค‘ ์˜ˆ์™ธ ๋ฐœ์ƒ:")
return (f"์š”์ฒญ ์ค‘ ์˜ˆ์™ธ ๋ฐœ์ƒ: {e}", "", "")
soup = BeautifulSoup(response.text, "html.parser")
# ๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ ๋งํฌ ์ถ”์ถœ (๋„ค์ด๋ฒ„๋ธ”๋กœ๊ทธ ๋งํฌ: "https://blog.naver.com" ํฌํ•จ)
links = set()
for a in soup.find_all("a"):
# 'cru' ์†์„ฑ์ด ์žˆ์œผ๋ฉด ํ•ด๋‹น ๊ฐ’์„ ์‚ฌ์šฉ
if a.has_attr("cru"):
link = a.get("cru")
if "blog.naver.com" in link:
links.add(link)
# ๊ทธ๋ ‡์ง€ ์•Š๊ณ  href ์†์„ฑ์ด ์žˆ๊ณ  ๋„ค์ด๋ฒ„๋ธ”๋กœ๊ทธ ๋งํฌ๋กœ ์‹œ์ž‘ํ•˜๋ฉด ์‚ฌ์šฉ
elif a.has_attr("href"):
link = a.get("href")
if link.startswith("https://blog.naver.com"):
links.add(link)
links = list(links)
logging.debug(f"์ถ”์ถœ๋œ ์ „์ฒด ๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ ๋งํฌ ์ˆ˜: {len(links)}")
if not links:
return ("๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ ๋งํฌ๋ฅผ ์ฐพ์ง€ ๋ชปํ–ˆ์Šต๋‹ˆ๋‹ค.", "", "")
# ์ถ”์ถœ๋œ ๋งํฌ ์ค‘ ๋žœ๋ค์œผ๋กœ 3๊ฐœ ์„ ํƒ (๋งํฌ๊ฐ€ 3๊ฐœ ๋ฏธ๋งŒ์ด๋ฉด ์ „๋ถ€ ์„ ํƒ)
sample_size = 3 if len(links) >= 3 else len(links)
random_links = random.sample(links, sample_size)
logging.debug(f"๋žœ๋ค์œผ๋กœ ์„ ํƒ๋œ ๋งํฌ: {random_links}")
# ๋งํฌ ๊ฐœ์ˆ˜๊ฐ€ 3๊ฐœ ๋ฏธ๋งŒ์ด๋ฉด ๋นˆ ๋ฌธ์ž์—ด๋กœ ์ฑ„์›€
while len(random_links) < 3:
random_links.append("")
return tuple(random_links)
iface = gr.Interface(
fn=scrape_naver_blog,
inputs=gr.Textbox(label="๊ฒ€์ƒ‰์–ด ์ž…๋ ฅ", placeholder="๊ฒ€์ƒ‰์–ด๋ฅผ ์ž…๋ ฅํ•˜์„ธ์š”."),
outputs=[
gr.Textbox(label="๋งํฌ 1"),
gr.Textbox(label="๋งํฌ 2"),
gr.Textbox(label="๋งํฌ 3")
],
title="๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ ๋งํฌ ์Šคํฌ๋ž˜ํ•‘",
description=("๊ฒ€์ƒ‰์–ด๋ฅผ ์ž…๋ ฅ ํ›„ ์‹คํ–‰ ๋ฒ„ํŠผ์„ ๋ˆ„๋ฅด๋ฉด ๋„ค์ด๋ฒ„ ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ ํŽ˜์ด์ง€์—์„œ "
"๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ ๋งํฌ๋ฅผ ์Šคํฌ๋ž˜ํ•‘ํ•˜์—ฌ ๋žœ๋ค์œผ๋กœ 3๊ฐœ์˜ ๋งํฌ๋ฅผ ๊ฐ๊ฐ์˜ ์ถœ๋ ฅ์ฐฝ์— ํ‘œ์‹œํ•ฉ๋‹ˆ๋‹ค.")
)
if __name__ == "__main__":
# ๋””๋ฒ„๊ทธ ๋ชจ๋“œ ํ™œ์„ฑํ™”
iface.launch(debug=True)