Spaces:
Running
Running
| """μ 보λ루(data4library.kr) OpenAPI μ°λ λͺ¨λ. | |
| λμΆ μΈκΈ° λμ κ²μ κ²°κ³Όλ₯Ό RAG 컨ν μ€νΈλ‘ μ 곡νμ¬ | |
| μ¬μ λ΅λ³μ μ λ’°λλ₯Ό λμΈλ€. | |
| ENV | |
| --- | |
| DATA4LIB_API_KEY : μ 보λ루 OpenAPI μΈμ¦ν€ | |
| """ | |
| import os | |
| import httpx | |
| from datetime import datetime, timedelta | |
| API_KEY = os.getenv("DATA4LIB_API_KEY", "") | |
| BASE_URL = "https://data4library.kr/api" | |
| # μ΅κ·Ό Nκ°μ λμΆ λ°μ΄ν° κΈ°μ€ | |
| _MONTHS_BACK = 12 | |
| # λμκ΄ κ³΅ν΅μ΄ β μ 보λ루 ν€μλ κ²μ μ μ μΈ | |
| _STOPWORDS = { | |
| "μ± ", "λμ", "μΆμ²", "κ΄λ ¨", "μλ €", "μ΄λ€", "μ’μ", "μλ", "μλ", | |
| "λλ", "νλ", "ν΄μ", "ν΄μ", "κ°μ", "μν", "λν", "μ½κ³ ", "μ½μ", | |
| "λ³Ό", "μ€μ", "μ£ΌμΈμ", "ν©λλ€", "λ립λλ€", "λ", "μ", "κ²", "μ ", | |
| } | |
| def _date_range() -> tuple[str, str]: | |
| end = datetime.today() | |
| start = end - timedelta(days=_MONTHS_BACK * 30) | |
| return start.strftime("%Y-%m-%d"), end.strftime("%Y-%m-%d") | |
| def _extract_keywords(query: str, max_words: int = 3) -> str: | |
| """μ§μμμ μλ―Έ μλ λ¨μ΄λ§ μΆμΆνμ¬ κ²μ ν€μλλ‘ λ°ν.""" | |
| words = [w for w in query.split() if len(w) >= 2 and w not in _STOPWORDS] | |
| return " ".join(words[:max_words]) if words else query[:20] | |
| def search_books(query: str, top_k: int = 5) -> list[dict]: | |
| """μ 보λ루 λμΆ λμ κ²μ API νΈμΆ. | |
| νμ±ν μ Β·μ€λ₯ μ λΉ λ¦¬μ€νΈ λ°ν(μ μ ν΄λ°±). | |
| """ | |
| if not API_KEY: | |
| return [] | |
| keyword = _extract_keywords(query) | |
| if not keyword: | |
| return [] | |
| start_dt, end_dt = _date_range() | |
| params = { | |
| "authKey": API_KEY, | |
| "keyword": keyword, | |
| "startDt": start_dt, | |
| "endDt": end_dt, | |
| "pageSize": top_k * 2, | |
| "format": "json", | |
| } | |
| try: | |
| resp = httpx.get(f"{BASE_URL}/loanItemSrch", params=params, timeout=5.0) | |
| resp.raise_for_status() | |
| data = resp.json() | |
| # API νμ±ν μ Β·μ€λ₯ μλ΅ μ²λ¦¬ | |
| if "error" in data.get("response", {}): | |
| return [] | |
| docs = data.get("response", {}).get("docs", []) | |
| books = [] | |
| for item in docs: | |
| doc = item.get("doc", {}) | |
| title = doc.get("bookname", "").strip() | |
| if not title: | |
| continue | |
| books.append({ | |
| "title": title, | |
| "authors": doc.get("authors", ""), | |
| "publisher": doc.get("publisher", ""), | |
| "pub_year": doc.get("publication_year", ""), | |
| "isbn13": doc.get("isbn13", ""), | |
| "class_nm": doc.get("class_nm", ""), | |
| "loan_count": int(doc.get("loan_count", 0) or 0), | |
| "book_url": doc.get("bookDtlUrl", ""), | |
| "source": "data4library", | |
| }) | |
| # λμΆ νμ λ§μ μ μ λ ¬ | |
| books.sort(key=lambda x: x["loan_count"], reverse=True) | |
| return books[:top_k] | |
| except Exception as e: | |
| print(f"[data4lib] κ²μ μ€ν¨ (무μ): {e}") | |
| return [] | |
| def format_for_rag(books: list[dict]) -> str: | |
| """κ²μ κ²°κ³Όλ₯Ό RAG 컨ν μ€νΈ λ¬Έμμ΄λ‘ λ³ν.""" | |
| if not books: | |
| return "" | |
| lines = ["[μ 보λ루 μ€μ λμΆ λ°μ΄ν° κΈ°λ° μΆμ² λμ]"] | |
| for i, b in enumerate(books, 1): | |
| line = ( | |
| f"{i}. γ{b['title']}γ" | |
| f" / {b['authors'] or 'μ μ λ―Έμ'}" | |
| f" / {b['publisher'] or '-'}" | |
| f" ({b['pub_year'] or '-'})" | |
| f" β λΆλ₯: {b['class_nm'] or '-'}" | |
| f", λμΆ {b['loan_count']}ν" | |
| ) | |
| lines.append(line) | |
| lines.append("β» μ λͺ©λ‘μ μ 보λ루 λμκ΄ μ€μ λμΆ ν΅κ³ κΈ°λ°μ λλ€.") | |
| return "\n".join(lines) | |