mbtibooktalk / server /data4lib.py
dongle0516's picture
feat: μ •λ³΄λ‚˜λ£¨(data4library.kr) OpenAPI RAG 톡합
923e65e
"""μ •λ³΄λ‚˜λ£¨(data4library.kr) OpenAPI 연동 λͺ¨λ“ˆ.
λŒ€μΆœ 인기 λ„μ„œ 검색 κ²°κ³Όλ₯Ό RAG μ»¨ν…μŠ€νŠΈλ‘œ μ œκ³΅ν•˜μ—¬
μ‚¬μ„œ λ‹΅λ³€μ˜ 신뒰도λ₯Ό 높인닀.
ENV
---
DATA4LIB_API_KEY : μ •λ³΄λ‚˜λ£¨ OpenAPI 인증킀
"""
import os
import httpx
from datetime import datetime, timedelta
API_KEY = os.getenv("DATA4LIB_API_KEY", "")
BASE_URL = "https://data4library.kr/api"
# 졜근 Nκ°œμ›” λŒ€μΆœ 데이터 κΈ°μ€€
_MONTHS_BACK = 12
# λ„μ„œκ΄€ 곡톡어 β€” μ •λ³΄λ‚˜λ£¨ ν‚€μ›Œλ“œ 검색 μ‹œ μ œμ™Έ
_STOPWORDS = {
"μ±…", "λ„μ„œ", "μΆ”μ²œ", "κ΄€λ ¨", "μ•Œλ €", "μ–΄λ–€", "쒋은", "μžˆλŠ”", "μ—†λŠ”",
"λ˜λŠ”", "ν•˜λŠ”", "ν•΄μš”", "ν•΄μ„œ", "같은", "μœ„ν•œ", "λŒ€ν•œ", "읽고", "읽을",
"λ³Ό", "μ€˜μš”", "μ£Όμ„Έμš”", "ν•©λ‹ˆλ‹€", "λ“œλ¦½λ‹ˆλ‹€", "λ•Œ", "수", "것", "제",
}
def _date_range() -> tuple[str, str]:
end = datetime.today()
start = end - timedelta(days=_MONTHS_BACK * 30)
return start.strftime("%Y-%m-%d"), end.strftime("%Y-%m-%d")
def _extract_keywords(query: str, max_words: int = 3) -> str:
"""μ§ˆμ˜μ—μ„œ 의미 μžˆλŠ” λ‹¨μ–΄λ§Œ μΆ”μΆœν•˜μ—¬ 검색 ν‚€μ›Œλ“œλ‘œ λ°˜ν™˜."""
words = [w for w in query.split() if len(w) >= 2 and w not in _STOPWORDS]
return " ".join(words[:max_words]) if words else query[:20]
def search_books(query: str, top_k: int = 5) -> list[dict]:
"""μ •λ³΄λ‚˜λ£¨ λŒ€μΆœ λ„μ„œ 검색 API 호좜.
ν™œμ„±ν™” μ „Β·μ˜€λ₯˜ μ‹œ 빈 리슀트 λ°˜ν™˜(정상 폴백).
"""
if not API_KEY:
return []
keyword = _extract_keywords(query)
if not keyword:
return []
start_dt, end_dt = _date_range()
params = {
"authKey": API_KEY,
"keyword": keyword,
"startDt": start_dt,
"endDt": end_dt,
"pageSize": top_k * 2,
"format": "json",
}
try:
resp = httpx.get(f"{BASE_URL}/loanItemSrch", params=params, timeout=5.0)
resp.raise_for_status()
data = resp.json()
# API ν™œμ„±ν™” μ „Β·μ˜€λ₯˜ 응닡 처리
if "error" in data.get("response", {}):
return []
docs = data.get("response", {}).get("docs", [])
books = []
for item in docs:
doc = item.get("doc", {})
title = doc.get("bookname", "").strip()
if not title:
continue
books.append({
"title": title,
"authors": doc.get("authors", ""),
"publisher": doc.get("publisher", ""),
"pub_year": doc.get("publication_year", ""),
"isbn13": doc.get("isbn13", ""),
"class_nm": doc.get("class_nm", ""),
"loan_count": int(doc.get("loan_count", 0) or 0),
"book_url": doc.get("bookDtlUrl", ""),
"source": "data4library",
})
# λŒ€μΆœ 횟수 λ§Žμ€ 순 μ •λ ¬
books.sort(key=lambda x: x["loan_count"], reverse=True)
return books[:top_k]
except Exception as e:
print(f"[data4lib] 검색 μ‹€νŒ¨ (λ¬΄μ‹œ): {e}")
return []
def format_for_rag(books: list[dict]) -> str:
"""검색 κ²°κ³Όλ₯Ό RAG μ»¨ν…μŠ€νŠΈ λ¬Έμžμ—΄λ‘œ λ³€ν™˜."""
if not books:
return ""
lines = ["[μ •λ³΄λ‚˜λ£¨ μ‹€μ œ λŒ€μΆœ 데이터 기반 μΆ”μ²œ λ„μ„œ]"]
for i, b in enumerate(books, 1):
line = (
f"{i}. γ€Ž{b['title']}』"
f" / {b['authors'] or 'μ €μž 미상'}"
f" / {b['publisher'] or '-'}"
f" ({b['pub_year'] or '-'})"
f" β€” λΆ„λ₯˜: {b['class_nm'] or '-'}"
f", λŒ€μΆœ {b['loan_count']}회"
)
lines.append(line)
lines.append("β€» μœ„ λͺ©λ‘μ€ μ •λ³΄λ‚˜λ£¨ λ„μ„œκ΄€ μ‹€μ œ λŒ€μΆœ 톡계 κΈ°λ°˜μž…λ‹ˆλ‹€.")
return "\n".join(lines)