Update app.py
Browse files
app.py
CHANGED
|
@@ -874,10 +874,86 @@ def build_vectorstore(docs, force_rebuild=False):
|
|
| 874 |
|
| 875 |
|
| 876 |
# ===== TOOL: SEARCH PRIMO =====
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 877 |
async def tool_search_primo(query, limit=5, peer_reviewed=False, open_access=False, year_from=None, year_to=None):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 878 |
api_key = os.environ.get("PRIMO_API_KEY")
|
| 879 |
if not api_key: return {"error": "PRIMO_API_KEY not configured", "results": []}
|
| 880 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 881 |
vid = "971KUOSTAR_INST:KU"
|
| 882 |
facets = ""
|
| 883 |
if peer_reviewed: facets += "&qInclude=facet_tlevel,exact,peer_reviewed"
|
|
@@ -888,7 +964,7 @@ async def tool_search_primo(query, limit=5, peer_reviewed=False, open_access=Fal
|
|
| 888 |
facets += f"&multiFacets=facet_searchcreationdate,include,{yf}%7C,%7C{yt}"
|
| 889 |
|
| 890 |
base = "https://api-eu.hosted.exlibrisgroup.com/primo/v1/search"
|
| 891 |
-
qs = f"?vid={vid}&tab=Everything&scope=MyInst_and_CI&
|
| 892 |
|
| 893 |
async with httpx.AsyncClient(timeout=15) as client:
|
| 894 |
for region in ["api-eu", "api-na", "api-ap"]:
|
|
|
|
| 874 |
|
| 875 |
|
| 876 |
# ===== TOOL: SEARCH PRIMO =====
|
| 877 |
+
def _boolean_to_primo_params(boolean_query: str) -> str:
|
| 878 |
+
"""
|
| 879 |
+
Convert Boolean string to PRIMO multi-query parameter format.
|
| 880 |
+
("artificial intelligence" OR "machine learning") AND ("cancer diagnosis")
|
| 881 |
+
→ query=any,contains,artificial intelligence OR machine learning,AND
|
| 882 |
+
&query=any,contains,cancer diagnosis
|
| 883 |
+
Each top-level AND group becomes a separate query= parameter.
|
| 884 |
+
"""
|
| 885 |
+
from urllib.parse import quote
|
| 886 |
+
|
| 887 |
+
# Strip outer parens groups split by top-level AND
|
| 888 |
+
# First flatten quotes and clean
|
| 889 |
+
cleaned = boolean_query.strip()
|
| 890 |
+
|
| 891 |
+
# Split on top-level AND (not inside parentheses)
|
| 892 |
+
groups = []
|
| 893 |
+
depth = 0
|
| 894 |
+
current = []
|
| 895 |
+
i = 0
|
| 896 |
+
tokens = re.split(r'(\(|\)|\bAND\b|\bOR\b)', cleaned)
|
| 897 |
+
# Simpler approach: split on AND at depth 0
|
| 898 |
+
chunk = ""
|
| 899 |
+
depth = 0
|
| 900 |
+
for char in cleaned:
|
| 901 |
+
if char == '(':
|
| 902 |
+
depth += 1
|
| 903 |
+
chunk += char
|
| 904 |
+
elif char == ')':
|
| 905 |
+
depth -= 1
|
| 906 |
+
chunk += char
|
| 907 |
+
else:
|
| 908 |
+
chunk += char
|
| 909 |
+
# Check for AND at depth 0
|
| 910 |
+
if depth == 0 and chunk.upper().endswith(' AND '):
|
| 911 |
+
groups.append(chunk[:-5].strip())
|
| 912 |
+
chunk = ""
|
| 913 |
+
if chunk.strip():
|
| 914 |
+
groups.append(chunk.strip())
|
| 915 |
+
|
| 916 |
+
if not groups:
|
| 917 |
+
groups = [cleaned]
|
| 918 |
+
|
| 919 |
+
# Clean each group: remove outer parens, strip quotes, normalise OR
|
| 920 |
+
primo_params = []
|
| 921 |
+
for i, group in enumerate(groups):
|
| 922 |
+
# Remove outer parentheses
|
| 923 |
+
g = group.strip()
|
| 924 |
+
if g.startswith('(') and g.endswith(')'):
|
| 925 |
+
g = g[1:-1].strip()
|
| 926 |
+
# Remove double quotes (PRIMO doesn't need them in query= param)
|
| 927 |
+
g = g.replace('"', '')
|
| 928 |
+
# Normalise spacing around OR
|
| 929 |
+
g = re.sub(r'\s+OR\s+', ' OR ', g).strip()
|
| 930 |
+
if not g:
|
| 931 |
+
continue
|
| 932 |
+
# All except the last get ,AND suffix
|
| 933 |
+
suffix = ',AND' if i < len(groups) - 1 else ''
|
| 934 |
+
primo_params.append(f"query=any,contains,{quote(g, safe=' OR')}{suffix}")
|
| 935 |
+
|
| 936 |
+
return '&'.join(primo_params) if primo_params else f"query=any,contains,{quote(cleaned)}"
|
| 937 |
+
|
| 938 |
+
|
| 939 |
async def tool_search_primo(query, limit=5, peer_reviewed=False, open_access=False, year_from=None, year_to=None):
|
| 940 |
+
"""
|
| 941 |
+
query can be either:
|
| 942 |
+
- a Boolean string: ("AI" OR "ML") AND ("cancer") — converted to multi-query PRIMO format
|
| 943 |
+
- a plain keyword string: "machine learning cancer" — sent as single query
|
| 944 |
+
"""
|
| 945 |
api_key = os.environ.get("PRIMO_API_KEY")
|
| 946 |
if not api_key: return {"error": "PRIMO_API_KEY not configured", "results": []}
|
| 947 |
|
| 948 |
+
# Build PRIMO query params — multi-query format preserves all concepts
|
| 949 |
+
if re.search(r'\b(AND|OR)\b', query) and '(' in query:
|
| 950 |
+
# Boolean string — convert to multi-query format
|
| 951 |
+
query_params = _boolean_to_primo_params(query)
|
| 952 |
+
else:
|
| 953 |
+
# Plain keywords — single query, URL encoded
|
| 954 |
+
from urllib.parse import quote
|
| 955 |
+
query_params = f"query=any,contains,{quote(query.strip())}"
|
| 956 |
+
|
| 957 |
vid = "971KUOSTAR_INST:KU"
|
| 958 |
facets = ""
|
| 959 |
if peer_reviewed: facets += "&qInclude=facet_tlevel,exact,peer_reviewed"
|
|
|
|
| 964 |
facets += f"&multiFacets=facet_searchcreationdate,include,{yf}%7C,%7C{yt}"
|
| 965 |
|
| 966 |
base = "https://api-eu.hosted.exlibrisgroup.com/primo/v1/search"
|
| 967 |
+
qs = f"?vid={vid}&tab=Everything&scope=MyInst_and_CI&{query_params}&lang=en&sort=rank&limit={limit}&offset=0&mode=advanced&apikey={api_key}{facets}"
|
| 968 |
|
| 969 |
async with httpx.AsyncClient(timeout=15) as client:
|
| 970 |
for region in ["api-eu", "api-na", "api-ap"]:
|