Spaces:
Sleeping
Sleeping
File size: 3,829 Bytes
923e65e | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 | """μ 보λ루(data4library.kr) OpenAPI μ°λ λͺ¨λ.
λμΆ μΈκΈ° λμ κ²μ κ²°κ³Όλ₯Ό RAG 컨ν
μ€νΈλ‘ μ 곡νμ¬
μ¬μ λ΅λ³μ μ λ’°λλ₯Ό λμΈλ€.
ENV
---
DATA4LIB_API_KEY : μ 보λ루 OpenAPI μΈμ¦ν€
"""
import os
import httpx
from datetime import datetime, timedelta
API_KEY = os.getenv("DATA4LIB_API_KEY", "")
BASE_URL = "https://data4library.kr/api"
# μ΅κ·Ό Nκ°μ λμΆ λ°μ΄ν° κΈ°μ€
_MONTHS_BACK = 12
# λμκ΄ κ³΅ν΅μ΄ β μ 보λ루 ν€μλ κ²μ μ μ μΈ
_STOPWORDS = {
"μ±
", "λμ", "μΆμ²", "κ΄λ ¨", "μλ €", "μ΄λ€", "μ’μ", "μλ", "μλ",
"λλ", "νλ", "ν΄μ", "ν΄μ", "κ°μ", "μν", "λν", "μ½κ³ ", "μ½μ",
"λ³Ό", "μ€μ", "μ£ΌμΈμ", "ν©λλ€", "λ립λλ€", "λ", "μ", "κ²", "μ ",
}
def _date_range() -> tuple[str, str]:
end = datetime.today()
start = end - timedelta(days=_MONTHS_BACK * 30)
return start.strftime("%Y-%m-%d"), end.strftime("%Y-%m-%d")
def _extract_keywords(query: str, max_words: int = 3) -> str:
"""μ§μμμ μλ―Έ μλ λ¨μ΄λ§ μΆμΆνμ¬ κ²μ ν€μλλ‘ λ°ν."""
words = [w for w in query.split() if len(w) >= 2 and w not in _STOPWORDS]
return " ".join(words[:max_words]) if words else query[:20]
def search_books(query: str, top_k: int = 5) -> list[dict]:
"""μ 보λ루 λμΆ λμ κ²μ API νΈμΆ.
νμ±ν μ Β·μ€λ₯ μ λΉ λ¦¬μ€νΈ λ°ν(μ μ ν΄λ°±).
"""
if not API_KEY:
return []
keyword = _extract_keywords(query)
if not keyword:
return []
start_dt, end_dt = _date_range()
params = {
"authKey": API_KEY,
"keyword": keyword,
"startDt": start_dt,
"endDt": end_dt,
"pageSize": top_k * 2,
"format": "json",
}
try:
resp = httpx.get(f"{BASE_URL}/loanItemSrch", params=params, timeout=5.0)
resp.raise_for_status()
data = resp.json()
# API νμ±ν μ Β·μ€λ₯ μλ΅ μ²λ¦¬
if "error" in data.get("response", {}):
return []
docs = data.get("response", {}).get("docs", [])
books = []
for item in docs:
doc = item.get("doc", {})
title = doc.get("bookname", "").strip()
if not title:
continue
books.append({
"title": title,
"authors": doc.get("authors", ""),
"publisher": doc.get("publisher", ""),
"pub_year": doc.get("publication_year", ""),
"isbn13": doc.get("isbn13", ""),
"class_nm": doc.get("class_nm", ""),
"loan_count": int(doc.get("loan_count", 0) or 0),
"book_url": doc.get("bookDtlUrl", ""),
"source": "data4library",
})
# λμΆ νμ λ§μ μ μ λ ¬
books.sort(key=lambda x: x["loan_count"], reverse=True)
return books[:top_k]
except Exception as e:
print(f"[data4lib] κ²μ μ€ν¨ (무μ): {e}")
return []
def format_for_rag(books: list[dict]) -> str:
"""κ²μ κ²°κ³Όλ₯Ό RAG 컨ν
μ€νΈ λ¬Έμμ΄λ‘ λ³ν."""
if not books:
return ""
lines = ["[μ 보λ루 μ€μ λμΆ λ°μ΄ν° κΈ°λ° μΆμ² λμ]"]
for i, b in enumerate(books, 1):
line = (
f"{i}. γ{b['title']}γ"
f" / {b['authors'] or 'μ μ λ―Έμ'}"
f" / {b['publisher'] or '-'}"
f" ({b['pub_year'] or '-'})"
f" β λΆλ₯: {b['class_nm'] or '-'}"
f", λμΆ {b['loan_count']}ν"
)
lines.append(line)
lines.append("β» μ λͺ©λ‘μ μ 보λ루 λμκ΄ μ€μ λμΆ ν΅κ³ κΈ°λ°μ
λλ€.")
return "\n".join(lines)
|