nikeshn commited on
Commit
d0069f8
·
verified ·
1 Parent(s): 3c9f801

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +77 -1
app.py CHANGED
@@ -874,10 +874,86 @@ def build_vectorstore(docs, force_rebuild=False):
874
 
875
 
876
  # ===== TOOL: SEARCH PRIMO =====
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
877
  async def tool_search_primo(query, limit=5, peer_reviewed=False, open_access=False, year_from=None, year_to=None):
 
 
 
 
 
878
  api_key = os.environ.get("PRIMO_API_KEY")
879
  if not api_key: return {"error": "PRIMO_API_KEY not configured", "results": []}
880
 
 
 
 
 
 
 
 
 
 
881
  vid = "971KUOSTAR_INST:KU"
882
  facets = ""
883
  if peer_reviewed: facets += "&qInclude=facet_tlevel,exact,peer_reviewed"
@@ -888,7 +964,7 @@ async def tool_search_primo(query, limit=5, peer_reviewed=False, open_access=Fal
888
  facets += f"&multiFacets=facet_searchcreationdate,include,{yf}%7C,%7C{yt}"
889
 
890
  base = "https://api-eu.hosted.exlibrisgroup.com/primo/v1/search"
891
- qs = f"?vid={vid}&tab=Everything&scope=MyInst_and_CI&q=any,contains,{query}&lang=en&sort=rank&limit={limit}&offset=0&apikey={api_key}{facets}"
892
 
893
  async with httpx.AsyncClient(timeout=15) as client:
894
  for region in ["api-eu", "api-na", "api-ap"]:
 
874
 
875
 
876
  # ===== TOOL: SEARCH PRIMO =====
877
+ def _boolean_to_primo_params(boolean_query: str) -> str:
878
+ """
879
+ Convert Boolean string to PRIMO multi-query parameter format.
880
+ ("artificial intelligence" OR "machine learning") AND ("cancer diagnosis")
881
+ → query=any,contains,artificial intelligence OR machine learning,AND
882
+ &query=any,contains,cancer diagnosis
883
+ Each top-level AND group becomes a separate query= parameter.
884
+ """
885
+ from urllib.parse import quote
886
+
887
+ # Strip outer parens groups split by top-level AND
888
+ # First flatten quotes and clean
889
+ cleaned = boolean_query.strip()
890
+
891
+ # Split on top-level AND (not inside parentheses)
892
+ groups = []
893
+ depth = 0
894
+ current = []
895
+ i = 0
896
+ tokens = re.split(r'(\(|\)|\bAND\b|\bOR\b)', cleaned)
897
+ # Simpler approach: split on AND at depth 0
898
+ chunk = ""
899
+ depth = 0
900
+ for char in cleaned:
901
+ if char == '(':
902
+ depth += 1
903
+ chunk += char
904
+ elif char == ')':
905
+ depth -= 1
906
+ chunk += char
907
+ else:
908
+ chunk += char
909
+ # Check for AND at depth 0
910
+ if depth == 0 and chunk.upper().endswith(' AND '):
911
+ groups.append(chunk[:-5].strip())
912
+ chunk = ""
913
+ if chunk.strip():
914
+ groups.append(chunk.strip())
915
+
916
+ if not groups:
917
+ groups = [cleaned]
918
+
919
+ # Clean each group: remove outer parens, strip quotes, normalise OR
920
+ primo_params = []
921
+ for i, group in enumerate(groups):
922
+ # Remove outer parentheses
923
+ g = group.strip()
924
+ if g.startswith('(') and g.endswith(')'):
925
+ g = g[1:-1].strip()
926
+ # Remove double quotes (PRIMO doesn't need them in query= param)
927
+ g = g.replace('"', '')
928
+ # Normalise spacing around OR
929
+ g = re.sub(r'\s+OR\s+', ' OR ', g).strip()
930
+ if not g:
931
+ continue
932
+ # All except the last get ,AND suffix
933
+ suffix = ',AND' if i < len(groups) - 1 else ''
934
+ primo_params.append(f"query=any,contains,{quote(g, safe=' OR')}{suffix}")
935
+
936
+ return '&'.join(primo_params) if primo_params else f"query=any,contains,{quote(cleaned)}"
937
+
938
+
939
  async def tool_search_primo(query, limit=5, peer_reviewed=False, open_access=False, year_from=None, year_to=None):
940
+ """
941
+ query can be either:
942
+ - a Boolean string: ("AI" OR "ML") AND ("cancer") — converted to multi-query PRIMO format
943
+ - a plain keyword string: "machine learning cancer" — sent as single query
944
+ """
945
  api_key = os.environ.get("PRIMO_API_KEY")
946
  if not api_key: return {"error": "PRIMO_API_KEY not configured", "results": []}
947
 
948
+ # Build PRIMO query params — multi-query format preserves all concepts
949
+ if re.search(r'\b(AND|OR)\b', query) and '(' in query:
950
+ # Boolean string — convert to multi-query format
951
+ query_params = _boolean_to_primo_params(query)
952
+ else:
953
+ # Plain keywords — single query, URL encoded
954
+ from urllib.parse import quote
955
+ query_params = f"query=any,contains,{quote(query.strip())}"
956
+
957
  vid = "971KUOSTAR_INST:KU"
958
  facets = ""
959
  if peer_reviewed: facets += "&qInclude=facet_tlevel,exact,peer_reviewed"
 
964
  facets += f"&multiFacets=facet_searchcreationdate,include,{yf}%7C,%7C{yt}"
965
 
966
  base = "https://api-eu.hosted.exlibrisgroup.com/primo/v1/search"
967
+ qs = f"?vid={vid}&tab=Everything&scope=MyInst_and_CI&{query_params}&lang=en&sort=rank&limit={limit}&offset=0&mode=advanced&apikey={api_key}{facets}"
968
 
969
  async with httpx.AsyncClient(timeout=15) as client:
970
  for region in ["api-eu", "api-na", "api-ap"]: