bai_test_21 / app.py
gooookim's picture
Update app.py
1eb2185 verified
# app.py
# Naver News Search API -> Gradio chat-like UI (HF Spaces friendly)
#
# HF Spaces Secrets ์„ค์ •:
# NAVER_CLIENT_ID = ๋ฐœ๊ธ‰๋ฐ›์€ Client ID
# NAVER_CLIENT_SECRET = ๋ฐœ๊ธ‰๋ฐ›์€ Client Secret
import os
import html
import re
from datetime import datetime
from typing import Dict, Any, List, Tuple
import requests
import gradio as gr
NAVER_NEWS_ENDPOINT = "https://openapi.naver.com/v1/search/news.json"
def _get_env(name: str) -> str:
v = os.getenv(name, "").strip()
return v
def _strip_tags(text: str) -> str:
# ๋„ค์ด๋ฒ„ ๋‰ด์Šค API ๊ฒฐ๊ณผ์˜ title/description์€ <b>...</b>๊ฐ€ ํฌํ•จ๋  ์ˆ˜ ์žˆ์–ด ์ œ๊ฑฐ
# (HTML ํƒœ๊ทธ ์ œ๊ฑฐ + ์—”ํ‹ฐํ‹ฐ ์ฒ˜๋ฆฌ)
if not text:
return ""
text = re.sub(r"<[^>]+>", "", text)
return html.unescape(text).strip()
def _format_pubdate(pub_date: str) -> str:
# ์˜ˆ: "Mon, 19 Jan 2026 09:30:00 +0900"
# ํŒŒ์‹ฑ ์‹คํŒจ ์‹œ ์›๋ฌธ ๋ฐ˜ํ™˜
if not pub_date:
return ""
try:
dt = datetime.strptime(pub_date, "%a, %d %b %Y %H:%M:%S %z")
# ํ•œ๊ตญ ํ™˜๊ฒฝ์—์„œ ๋ณด๊ธฐ ์ข‹์€ ํฌ๋งท
return dt.strftime("%Y-%m-%d %H:%M:%S %z")
except Exception:
return pub_date
def naver_news_search(
query: str,
display: int = 10,
sort: str = "date",
start: int = 1,
timeout: int = 10,
) -> Dict[str, Any]:
client_id = _get_env("NAVER_CLIENT_ID")
client_secret = _get_env("NAVER_CLIENT_SECRET")
if not client_id or not client_secret:
raise RuntimeError(
"ํ™˜๊ฒฝ๋ณ€์ˆ˜ NAVER_CLIENT_ID / NAVER_CLIENT_SECRET ์ด ์„ค์ •๋˜์–ด ์žˆ์ง€ ์•Š์Šต๋‹ˆ๋‹ค. "
"Hugging Face Spaces์˜ Secrets์— ์ถ”๊ฐ€ํ•ด ์ฃผ์„ธ์š”."
)
headers = {
"X-Naver-Client-Id": client_id,
"X-Naver-Client-Secret": client_secret,
}
params = {
"query": query,
"display": int(display),
"start": int(start),
"sort": sort,
}
r = requests.get(
NAVER_NEWS_ENDPOINT,
headers=headers,
params=params,
timeout=timeout,
)
# ๋„ค์ด๋ฒ„ API๊ฐ€ ์—๋Ÿฌ๋ฅผ ๋ฐ˜ํ™˜ํ•˜๋Š” ๊ฒฝ์šฐ, body์— ์ƒ์„ธ๊ฐ€ ์žˆ์„ ์ˆ˜ ์žˆ์–ด ๊ทธ๋Œ€๋กœ ๋ณด์—ฌ์ฃผ๊ธฐ ์œ„ํ•ด ์ฒ˜๋ฆฌ
if r.status_code != 200:
try:
detail = r.json()
except Exception:
detail = {"text": r.text}
raise RuntimeError(f"๋„ค์ด๋ฒ„ API ํ˜ธ์ถœ ์‹คํŒจ (HTTP {r.status_code}): {detail}")
return r.json()
def render_results(data: Dict[str, Any], max_items: int = 10) -> str:
items = data.get("items", [])[:max_items]
total = data.get("total", None)
lines: List[str] = []
if total is not None:
lines.append(f"- ์ด ๊ฒ€์ƒ‰๊ฒฐ๊ณผ: {total:,}๊ฑด")
lines.append(f"- ๋ฐ˜ํ™˜ ๊ฐœ์ˆ˜: {len(items)}๊ฑด")
lines.append("")
for i, it in enumerate(items, start=1):
title = _strip_tags(it.get("title", ""))
desc = _strip_tags(it.get("description", ""))
link = it.get("link", "")
origin = it.get("originallink", "")
pub = _format_pubdate(it.get("pubDate", ""))
lines.append(f"{i}. **{title}**")
# ํ•˜์œ„ ํ•ญ๋ชฉ 4์นธ ๋“ค์—ฌ์“ฐ๊ธฐ
if pub:
lines.append(f" - ๋ฐœํ–‰: {pub}")
if origin:
lines.append(f" - ์›๋ฌธ: {origin}")
if link:
lines.append(f" - ๋งํฌ: {link}")
if desc:
lines.append(f" - ์š”์•ฝ: {desc}")
lines.append("")
lines.append("")
return "\n".join(lines).strip()
def dedup_items(all_items: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
"""
๊ฒฐ๊ณผ ์ค‘๋ณต ์ œ๊ฑฐ:
- originallink ์šฐ์„ , ์—†์œผ๋ฉด link, ์—†์œผ๋ฉด title+pubDate ํ•ด์‹œ
"""
seen = set()
out = []
for it in all_items:
origin = (it.get("originallink") or "").strip()
link = (it.get("link") or "").strip()
title = _strip_tags(it.get("title", ""))
pub = it.get("pubDate", "")
key = origin or link or f"{title}|{pub}"
if not key:
continue
if key in seen:
continue
seen.add(key)
out.append(it)
return out
def aggregate_search(
sentence: str,
display: int,
sort: str,
) -> Tuple[List[str], List[Dict[str, Any]], int]:
"""
์‚ฌ์šฉ์ž ์ž…๋ ฅ ๋ฌธ์žฅ์„ ๊ทธ๋Œ€๋กœ query๋กœ ์‚ฌ์šฉํ•˜์—ฌ API ํ˜ธ์ถœ
๋ฐ˜ํ™˜: (์‚ฌ์šฉ๋œ ์ฟผ๋ฆฌ ๋ชฉ๋ก, ์ตœ์ข… ์•„์ดํ…œ ๋ชฉ๋ก, total)
"""
queries = [sentence]
all_items: List[Dict[str, Any]] = []
total: int = 0
data = naver_news_search(query=sentence, display=int(display), sort=sort, start=1)
total = int(data.get("total", 0) or 0)
all_items.extend(data.get("items", []))
merged = dedup_items(all_items)
final_items = merged[:display]
return queries, final_items, total
def render_results_from_items(items: List[Dict[str, Any]]) -> str:
"""
items ๋ฆฌ์ŠคํŠธ๋ฅผ ๋™์ผ ์Šคํƒ€์ผ๋กœ ์ถœ๋ ฅ
"""
lines: List[str] = []
# lines.append(f"- ์ตœ์ข… ๋ฐ˜ํ™˜ ๊ฐœ์ˆ˜: {len(items)}๊ฑด")
lines.append("") # (์š”์ฒญ: ์ˆ˜ํ‰์„ ์€ ์ƒ๋‹จ ๋ฌธ๊ตฌ ๋‹ค์Œ ์ค„์—๋งŒ ์ถœ๋ ฅ)
for i, it in enumerate(items, start=1):
title = _strip_tags(it.get("title", ""))
desc = _strip_tags(it.get("description", ""))
link = it.get("link", "")
origin = it.get("originallink", "")
pub = _format_pubdate(it.get("pubDate", ""))
lines.append(f"{i}. **{title}**")
if pub:
lines.append(f" - ๋ฐœํ–‰: {pub}")
if origin:
lines.append(f" - ์›๋ฌธ: {origin}")
if link:
lines.append(f" - ๋งํฌ: {link}")
if desc:
lines.append(f" - ์š”์•ฝ: {desc}")
lines.append("")
return "\n".join(lines).strip()
def handle_search(
user_query: str,
chat_history: List[Dict[str, str]],
display: int,
sort: str,
) -> Tuple[List[Dict[str, str]], str]:
q = (user_query or "").strip()
if not q:
return chat_history, ""
chat_history = chat_history + [{"role": "user", "content": q}]
try:
_, items, total = aggregate_search(sentence=q, display=int(display), sort=sort)
total_to_show = total if total > 0 else len(items)
lines: List[str] = []
# lines.append(
# f"\"{q}\"์— ๋Œ€ํ•œ ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ๋Š” **{total_to_show:,}๊ฑด** ์ด๋ฉฐ, ์ตœ์ข… ์‘๋‹ต ๊ฐœ์ˆ˜๋Š” **{len(items)}๊ฑด** ์ž…๋‹ˆ๋‹ค."
# )
# lines.append("---")
# lines.append("")
# lines.append("---") # โœ… ์ˆ˜ํ‰์„ ์€ โ€œ... ์ž…๋‹ˆ๋‹คโ€ ๋‹ค์Œ ์ค„์—๋งŒ
lines.append("")
lines.append(render_results_from_items(items)) # โœ… ๊ฒฐ๊ณผ๋Š” 1๋ฒˆ๋งŒ ์ถœ๋ ฅ
lines.append("")
assistant_text = "\n".join(lines).strip()
except Exception as e:
assistant_text = f"์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค.\n\n- {e}"
chat_history = chat_history + [{"role": "assistant", "content": assistant_text}]
return chat_history, ""
with gr.Blocks(title="Naver News Search (Chat UI)") as demo:
with gr.Accordion("๊ฒ€์ƒ‰ ์˜ต์…ฅ - ๋‰ด์Šค ๊ฐœ์ˆ˜์™€ ์ •๋ ฌ ๋ฐฉ์‹์„ ์„ ํƒํ•˜์„ธ์š”.", open=False):
with gr.Row():
display = gr.Slider(
minimum=1, maximum=100, value=30, step=1, label="๋‰ด์Šค ๊ฐœ์ˆ˜"
)
sort = gr.Dropdown(
choices=[("์ตœ์‹ ์ˆœ", "date"), ("์ •ํ™•๋„์ˆœ(์—ฐ๊ด€๋„์ˆœ)", "sim")],
value="date",
label="์ •๋ ฌ ๋ฐฉ์‹",
)
chatbot = gr.Chatbot(
value=[],
label="NewsChat_v0.1",
type="messages",
height=600,
)
with gr.Row():
query_in = gr.Textbox(
label="๊ฒ€์ƒ‰์–ด๋ฅผ ์ž…๋ ฅํ•˜์„ธ์š”.",
placeholder="(์˜ˆ: ํ–‰์•ˆ๋ถ€์˜ ์ธ๊ณต์ง€๋Šฅ ์‚ฌ์—…)",
scale=8,
)
with gr.Column(scale=2):
search_btn = gr.Button("๊ฒ€์ƒ‰", variant="primary")
clear_btn = gr.Button("๋Œ€ํ™” ์ง€์šฐ๊ธฐ", variant="secondary")
search_btn.click(
fn=handle_search,
inputs=[query_in, chatbot, display, sort],
outputs=[chatbot, query_in],
api_name="search",
)
query_in.submit(
fn=handle_search,
inputs=[query_in, chatbot, display, sort],
outputs=[chatbot, query_in],
api_name="search_submit",
)
def clear_chat():
return []
clear_btn.click(fn=clear_chat, inputs=[], outputs=[chatbot], api_name="clear")
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=7860)