nikeshn commited on
Commit
0e4a811
·
verified ·
1 Parent(s): d0069f8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -27
app.py CHANGED
@@ -58,7 +58,7 @@ MEDICAL_KEYWORDS = [
58
  ]
59
 
60
  CURRENT_INFO_RE = re.compile(r"\b(current|currently|latest|today|recent|recently|now|this week|this month|news|update|updated|president|prime minister|ceo|minister|king|ruler|who won)\b", re.IGNORECASE)
61
- RESEARCH_CUE_RE = re.compile(r"\b(find|search|look for|show me|get me|give me|locate|recommend|suggest|articles?|papers?|books?|journals?|studies|literature|research|systematic review|evidence|sources|database|databases|peer reviewed|open access)\b", re.IGNORECASE)
62
  LIBRARY_CUE_RE = re.compile(r"\b(library|librarian|borrow|loan|renew|fine|study room|room booking|reserve a room|account|my library|interlibrary|ill|khazna|orcid|open access|apc|refworks|libkey|hours|location|contact|visitor|alumni|database access|off campus|remote access)\b", re.IGNORECASE)
63
  MEDICAL_SEARCH_RE = re.compile(r"\b(pubmed|embase|cinahl|clinicalkey|cochrane|uptodate|medline|systematic review|clinical trial|biomedical literature|medical literature)\b", re.IGNORECASE)
64
  SOCIAL_RE = re.compile(r"^(hi|hello|hey|good morning|good afternoon|good evening|how are you|how old are you|who are you|what are you|are you a bot|are you a robot|what can you do|thanks|thank you|ok|okay|bye|goodbye|lol|haha|hehe|you are silly|are you silly|are you dumb|stupid bot|dumb bot|idiot bot|joke|tell me a joke)\b", re.IGNORECASE)
@@ -876,28 +876,26 @@ def build_vectorstore(docs, force_rebuild=False):
876
  # ===== TOOL: SEARCH PRIMO =====
877
  def _boolean_to_primo_params(boolean_query: str) -> str:
878
  """
879
- Convert Boolean string to PRIMO multi-query parameter format.
880
- ("artificial intelligence" OR "machine learning") AND ("cancer diagnosis")
881
- → query=any,contains,artificial intelligence OR machine learning,AND
882
- &query=any,contains,cancer diagnosis
883
- Each top-level AND group becomes a separate query= parameter.
 
 
884
  """
885
  from urllib.parse import quote
886
 
887
- # Strip outer parens groups split by top-level AND
888
- # First flatten quotes and clean
889
- cleaned = boolean_query.strip()
 
890
 
891
- # Split on top-level AND (not inside parentheses)
892
  groups = []
893
  depth = 0
894
- current = []
895
- i = 0
896
- tokens = re.split(r'(\(|\)|\bAND\b|\bOR\b)', cleaned)
897
- # Simpler approach: split on AND at depth 0
898
  chunk = ""
899
- depth = 0
900
- for char in cleaned:
901
  if char == '(':
902
  depth += 1
903
  chunk += char
@@ -906,34 +904,33 @@ def _boolean_to_primo_params(boolean_query: str) -> str:
906
  chunk += char
907
  else:
908
  chunk += char
909
- # Check for AND at depth 0
910
  if depth == 0 and chunk.upper().endswith(' AND '):
911
  groups.append(chunk[:-5].strip())
912
  chunk = ""
913
  if chunk.strip():
914
  groups.append(chunk.strip())
915
-
916
  if not groups:
917
- groups = [cleaned]
 
918
 
919
- # Clean each group: remove outer parens, strip quotes, normalise OR
920
  primo_params = []
921
  for i, group in enumerate(groups):
922
- # Remove outer parentheses
923
  g = group.strip()
 
924
  if g.startswith('(') and g.endswith(')'):
925
  g = g[1:-1].strip()
926
- # Remove double quotes (PRIMO doesn't need them in query= param)
927
  g = g.replace('"', '')
928
- # Normalise spacing around OR
929
  g = re.sub(r'\s+OR\s+', ' OR ', g).strip()
930
  if not g:
931
  continue
932
- # All except the last get ,AND suffix
 
933
  suffix = ',AND' if i < len(groups) - 1 else ''
934
- primo_params.append(f"query=any,contains,{quote(g, safe=' OR')}{suffix}")
935
 
936
- return '&'.join(primo_params) if primo_params else f"query=any,contains,{quote(cleaned)}"
937
 
938
 
939
  async def tool_search_primo(query, limit=5, peer_reviewed=False, open_access=False, year_from=None, year_to=None):
@@ -1577,7 +1574,22 @@ def _looks_research_question(question: str) -> bool:
1577
  q = question.lower()
1578
  if RESEARCH_CUE_RE.search(q):
1579
  return True
1580
- return bool(re.search(r'\bimpact of\b|\beffects? of\b|\bcauses? of\b|\brelationship between\b|\bliterature on\b', q))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1581
 
1582
  def _looks_medical_search(question: str) -> bool:
1583
  q = question.lower()
@@ -1904,6 +1916,11 @@ RULES:
1904
  3. "I am fed up with searching" alone (no specific topic/database) = social (expressing frustration).
1905
  4. "I need help finding articles on diabetes" = search_medical.
1906
  5. KU = Khalifa University. Do NOT confuse with Kuwait University.
 
 
 
 
 
1907
 
1908
  For "social" intent ONLY: also include "casual_answer" — a warm 1-3 sentence LibBee response
1909
  (friendly librarian tone, no markdown, no bullet points). Offer to help with library services.
 
58
  ]
59
 
60
  CURRENT_INFO_RE = re.compile(r"\b(current|currently|latest|today|recent|recently|now|this week|this month|news|update|updated|president|prime minister|ceo|minister|king|ruler|who won)\b", re.IGNORECASE)
61
+ RESEARCH_CUE_RE = re.compile(r"\b(find|search|look for|show me|get me|give me|locate|recommend|suggest|articles?|papers?|books?|journals?|studies|literature|research|systematic review|evidence|sources|database|databases|peer reviewed|open access|advances?|approach(es)?|method(s|ology)?|technique[s]?|application[s]?|framework[s]?|model[s]?|algorithm[s]?|publication[s]?|recent work|current state|state of the art)\b", re.IGNORECASE)
62
  LIBRARY_CUE_RE = re.compile(r"\b(library|librarian|borrow|loan|renew|fine|study room|room booking|reserve a room|account|my library|interlibrary|ill|khazna|orcid|open access|apc|refworks|libkey|hours|location|contact|visitor|alumni|database access|off campus|remote access)\b", re.IGNORECASE)
63
  MEDICAL_SEARCH_RE = re.compile(r"\b(pubmed|embase|cinahl|clinicalkey|cochrane|uptodate|medline|systematic review|clinical trial|biomedical literature|medical literature)\b", re.IGNORECASE)
64
  SOCIAL_RE = re.compile(r"^(hi|hello|hey|good morning|good afternoon|good evening|how are you|how old are you|who are you|what are you|are you a bot|are you a robot|what can you do|thanks|thank you|ok|okay|bye|goodbye|lol|haha|hehe|you are silly|are you silly|are you dumb|stupid bot|dumb bot|idiot bot|joke|tell me a joke)\b", re.IGNORECASE)
 
876
  # ===== TOOL: SEARCH PRIMO =====
877
  def _boolean_to_primo_params(boolean_query: str) -> str:
878
  """
879
+ Convert Boolean string to PRIMO REST API multi-query format.
880
+ PRIMO API uses q= (not query=) and requires %20 encoded spaces.
881
+
882
+ ("AI" OR "ML") AND ("cancer") AND ("deep learning")
883
+ q=any,contains,AI%20OR%20ML,AND&q=any,contains,cancer,AND&q=any,contains,deep%20learning
884
+
885
+ Single concept: q=any,contains,machine%20learning
886
  """
887
  from urllib.parse import quote
888
 
889
+ # If no Boolean operators simple single query
890
+ if not (re.search(r'\bAND\b', boolean_query) and '(' in boolean_query):
891
+ clean = boolean_query.replace('"', '').strip()
892
+ return f"q=any,contains,{quote(clean)}"
893
 
894
+ # Split on top-level AND
895
  groups = []
896
  depth = 0
 
 
 
 
897
  chunk = ""
898
+ for char in boolean_query:
 
899
  if char == '(':
900
  depth += 1
901
  chunk += char
 
904
  chunk += char
905
  else:
906
  chunk += char
 
907
  if depth == 0 and chunk.upper().endswith(' AND '):
908
  groups.append(chunk[:-5].strip())
909
  chunk = ""
910
  if chunk.strip():
911
  groups.append(chunk.strip())
 
912
  if not groups:
913
+ clean = boolean_query.replace('"', '').strip()
914
+ return f"q=any,contains,{quote(clean)}"
915
 
 
916
  primo_params = []
917
  for i, group in enumerate(groups):
 
918
  g = group.strip()
919
+ # Remove outer parentheses
920
  if g.startswith('(') and g.endswith(')'):
921
  g = g[1:-1].strip()
922
+ # Remove double quotes
923
  g = g.replace('"', '')
924
+ # Normalise OR spacing
925
  g = re.sub(r'\s+OR\s+', ' OR ', g).strip()
926
  if not g:
927
  continue
928
+ # URL encode properly spaces become %20
929
+ encoded = quote(g)
930
  suffix = ',AND' if i < len(groups) - 1 else ''
931
+ primo_params.append(f"q=any,contains,{encoded}{suffix}")
932
 
933
+ return '&'.join(primo_params) if primo_params else f"q=any,contains,{quote(boolean_query.replace(chr(34), '').strip())}"
934
 
935
 
936
  async def tool_search_primo(query, limit=5, peer_reviewed=False, open_access=False, year_from=None, year_to=None):
 
1574
  q = question.lower()
1575
  if RESEARCH_CUE_RE.search(q):
1576
  return True
1577
+ # Natural academic questions that don't use explicit search trigger words
1578
+ # e.g. "What are the recent advances in AI for cancer diagnosis?"
1579
+ # e.g. "How effective are mRNA vaccines in treating lung cancer?"
1580
+ # e.g. "What is the impact of climate change on water security in the UAE?"
1581
+ academic_patterns = re.compile(
1582
+ r"\b(impact of|effect[s]? of|role of|use of|application of|advance[s] in|"
1583
+ r"development[s] in|progress in|challenge[s] of|benefit[s] of|"
1584
+ r"causes? of|relationship between|comparison of|effectiveness of|"
1585
+ r"how (effective|efficient|useful|does|do|can|is|are)|"
1586
+ r"what (are the|is the) (recent|latest|current|new|emerging)|"
1587
+ r"published in the last|in the last \d+ years?|"
1588
+ r"systematic review|meta.?analysis|clinical trial[s]?|"
1589
+ r"literature on|survey of|overview of|state of the art)\b",
1590
+ re.IGNORECASE
1591
+ )
1592
+ return bool(academic_patterns.search(q))
1593
 
1594
  def _looks_medical_search(question: str) -> bool:
1595
  q = question.lower()
 
1916
  3. "I am fed up with searching" alone (no specific topic/database) = social (expressing frustration).
1917
  4. "I need help finding articles on diabetes" = search_medical.
1918
  5. KU = Khalifa University. Do NOT confuse with Kuwait University.
1919
+ 6. ANY question asking about advances, approaches, methods, effectiveness, impact, or relationship
1920
+ on a specific academic/scientific topic = search_academic or search_medical. NOT library_info.
1921
+ e.g. "What are the recent advances in AI for predictive maintenance?" = search_academic
1922
+ e.g. "How effective are mRNA vaccines for cancer treatment?" = search_medical
1923
+ e.g. "What is the impact of climate change on food security?" = search_academic
1924
 
1925
  For "social" intent ONLY: also include "casual_answer" — a warm 1-3 sentence LibBee response
1926
  (friendly librarian tone, no markdown, no bullet points). Offer to help with library services.