File size: 3,829 Bytes
923e65e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
"""μ •λ³΄λ‚˜λ£¨(data4library.kr) OpenAPI 연동 λͺ¨λ“ˆ.

λŒ€μΆœ 인기 λ„μ„œ 검색 κ²°κ³Όλ₯Ό RAG μ»¨ν…μŠ€νŠΈλ‘œ μ œκ³΅ν•˜μ—¬
μ‚¬μ„œ λ‹΅λ³€μ˜ 신뒰도λ₯Ό 높인닀.

ENV
---
DATA4LIB_API_KEY : μ •λ³΄λ‚˜λ£¨ OpenAPI 인증킀
"""

import os
import httpx
from datetime import datetime, timedelta

API_KEY  = os.getenv("DATA4LIB_API_KEY", "")
BASE_URL = "https://data4library.kr/api"

# 졜근 Nκ°œμ›” λŒ€μΆœ 데이터 κΈ°μ€€
_MONTHS_BACK = 12

# λ„μ„œκ΄€ 곡톡어 β€” μ •λ³΄λ‚˜λ£¨ ν‚€μ›Œλ“œ 검색 μ‹œ μ œμ™Έ
_STOPWORDS = {
    "μ±…", "λ„μ„œ", "μΆ”μ²œ", "κ΄€λ ¨", "μ•Œλ €", "μ–΄λ–€", "쒋은", "μžˆλŠ”", "μ—†λŠ”",
    "λ˜λŠ”", "ν•˜λŠ”", "ν•΄μš”", "ν•΄μ„œ", "같은", "μœ„ν•œ", "λŒ€ν•œ", "읽고", "읽을",
    "λ³Ό", "μ€˜μš”", "μ£Όμ„Έμš”", "ν•©λ‹ˆλ‹€", "λ“œλ¦½λ‹ˆλ‹€", "λ•Œ", "수", "것", "제",
}


def _date_range() -> tuple[str, str]:
    end   = datetime.today()
    start = end - timedelta(days=_MONTHS_BACK * 30)
    return start.strftime("%Y-%m-%d"), end.strftime("%Y-%m-%d")


def _extract_keywords(query: str, max_words: int = 3) -> str:
    """μ§ˆμ˜μ—μ„œ 의미 μžˆλŠ” λ‹¨μ–΄λ§Œ μΆ”μΆœν•˜μ—¬ 검색 ν‚€μ›Œλ“œλ‘œ λ°˜ν™˜."""
    words = [w for w in query.split() if len(w) >= 2 and w not in _STOPWORDS]
    return " ".join(words[:max_words]) if words else query[:20]


def search_books(query: str, top_k: int = 5) -> list[dict]:
    """μ •λ³΄λ‚˜λ£¨ λŒ€μΆœ λ„μ„œ 검색 API 호좜.

    ν™œμ„±ν™” μ „Β·μ˜€λ₯˜ μ‹œ 빈 리슀트 λ°˜ν™˜(정상 폴백).
    """
    if not API_KEY:
        return []

    keyword = _extract_keywords(query)
    if not keyword:
        return []

    start_dt, end_dt = _date_range()
    params = {
        "authKey":  API_KEY,
        "keyword":  keyword,
        "startDt":  start_dt,
        "endDt":    end_dt,
        "pageSize": top_k * 2,
        "format":   "json",
    }

    try:
        resp = httpx.get(f"{BASE_URL}/loanItemSrch", params=params, timeout=5.0)
        resp.raise_for_status()
        data = resp.json()

        # API ν™œμ„±ν™” μ „Β·μ˜€λ₯˜ 응닡 처리
        if "error" in data.get("response", {}):
            return []

        docs = data.get("response", {}).get("docs", [])
        books = []
        for item in docs:
            doc = item.get("doc", {})
            title = doc.get("bookname", "").strip()
            if not title:
                continue
            books.append({
                "title":       title,
                "authors":     doc.get("authors", ""),
                "publisher":   doc.get("publisher", ""),
                "pub_year":    doc.get("publication_year", ""),
                "isbn13":      doc.get("isbn13", ""),
                "class_nm":    doc.get("class_nm", ""),
                "loan_count":  int(doc.get("loan_count", 0) or 0),
                "book_url":    doc.get("bookDtlUrl", ""),
                "source":      "data4library",
            })

        # λŒ€μΆœ 횟수 λ§Žμ€ 순 μ •λ ¬
        books.sort(key=lambda x: x["loan_count"], reverse=True)
        return books[:top_k]

    except Exception as e:
        print(f"[data4lib] 검색 μ‹€νŒ¨ (λ¬΄μ‹œ): {e}")
        return []


def format_for_rag(books: list[dict]) -> str:
    """검색 κ²°κ³Όλ₯Ό RAG μ»¨ν…μŠ€νŠΈ λ¬Έμžμ—΄λ‘œ λ³€ν™˜."""
    if not books:
        return ""
    lines = ["[μ •λ³΄λ‚˜λ£¨ μ‹€μ œ λŒ€μΆœ 데이터 기반 μΆ”μ²œ λ„μ„œ]"]
    for i, b in enumerate(books, 1):
        line = (
            f"{i}. γ€Ž{b['title']}』"
            f" / {b['authors'] or 'μ €μž 미상'}"
            f" / {b['publisher'] or '-'}"
            f" ({b['pub_year'] or '-'})"
            f" β€” λΆ„λ₯˜: {b['class_nm'] or '-'}"
            f", λŒ€μΆœ {b['loan_count']}회"
        )
        lines.append(line)
    lines.append("β€» μœ„ λͺ©λ‘μ€ μ •λ³΄λ‚˜λ£¨ λ„μ„œκ΄€ μ‹€μ œ λŒ€μΆœ 톡계 κΈ°λ°˜μž…λ‹ˆλ‹€.")
    return "\n".join(lines)