Update app.py
Browse files
app.py
CHANGED
|
@@ -58,7 +58,7 @@ MEDICAL_KEYWORDS = [
|
|
| 58 |
]
|
| 59 |
|
| 60 |
CURRENT_INFO_RE = re.compile(r"\b(current|currently|latest|today|recent|recently|now|this week|this month|news|update|updated|president|prime minister|ceo|minister|king|ruler|who won)\b", re.IGNORECASE)
|
| 61 |
-
RESEARCH_CUE_RE = re.compile(r"\b(find|search|look for|show me|get me|give me|locate|recommend|suggest|articles?|papers?|books?|journals?|studies|literature|research|systematic review|evidence|sources|database|databases|peer reviewed|open access)\b", re.IGNORECASE)
|
| 62 |
LIBRARY_CUE_RE = re.compile(r"\b(library|librarian|borrow|loan|renew|fine|study room|room booking|reserve a room|account|my library|interlibrary|ill|khazna|orcid|open access|apc|refworks|libkey|hours|location|contact|visitor|alumni|database access|off campus|remote access)\b", re.IGNORECASE)
|
| 63 |
MEDICAL_SEARCH_RE = re.compile(r"\b(pubmed|embase|cinahl|clinicalkey|cochrane|uptodate|medline|systematic review|clinical trial|biomedical literature|medical literature)\b", re.IGNORECASE)
|
| 64 |
SOCIAL_RE = re.compile(r"^(hi|hello|hey|good morning|good afternoon|good evening|how are you|how old are you|who are you|what are you|are you a bot|are you a robot|what can you do|thanks|thank you|ok|okay|bye|goodbye|lol|haha|hehe|you are silly|are you silly|are you dumb|stupid bot|dumb bot|idiot bot|joke|tell me a joke)\b", re.IGNORECASE)
|
|
@@ -876,28 +876,26 @@ def build_vectorstore(docs, force_rebuild=False):
|
|
| 876 |
# ===== TOOL: SEARCH PRIMO =====
|
| 877 |
def _boolean_to_primo_params(boolean_query: str) -> str:
|
| 878 |
"""
|
| 879 |
-
Convert Boolean string to PRIMO multi-query
|
| 880 |
-
|
| 881 |
-
|
| 882 |
-
|
| 883 |
-
|
|
|
|
|
|
|
| 884 |
"""
|
| 885 |
from urllib.parse import quote
|
| 886 |
|
| 887 |
-
#
|
| 888 |
-
|
| 889 |
-
|
|
|
|
| 890 |
|
| 891 |
-
# Split on top-level AND
|
| 892 |
groups = []
|
| 893 |
depth = 0
|
| 894 |
-
current = []
|
| 895 |
-
i = 0
|
| 896 |
-
tokens = re.split(r'(\(|\)|\bAND\b|\bOR\b)', cleaned)
|
| 897 |
-
# Simpler approach: split on AND at depth 0
|
| 898 |
chunk = ""
|
| 899 |
-
|
| 900 |
-
for char in cleaned:
|
| 901 |
if char == '(':
|
| 902 |
depth += 1
|
| 903 |
chunk += char
|
|
@@ -906,34 +904,33 @@ def _boolean_to_primo_params(boolean_query: str) -> str:
|
|
| 906 |
chunk += char
|
| 907 |
else:
|
| 908 |
chunk += char
|
| 909 |
-
# Check for AND at depth 0
|
| 910 |
if depth == 0 and chunk.upper().endswith(' AND '):
|
| 911 |
groups.append(chunk[:-5].strip())
|
| 912 |
chunk = ""
|
| 913 |
if chunk.strip():
|
| 914 |
groups.append(chunk.strip())
|
| 915 |
-
|
| 916 |
if not groups:
|
| 917 |
-
|
|
|
|
| 918 |
|
| 919 |
-
# Clean each group: remove outer parens, strip quotes, normalise OR
|
| 920 |
primo_params = []
|
| 921 |
for i, group in enumerate(groups):
|
| 922 |
-
# Remove outer parentheses
|
| 923 |
g = group.strip()
|
|
|
|
| 924 |
if g.startswith('(') and g.endswith(')'):
|
| 925 |
g = g[1:-1].strip()
|
| 926 |
-
# Remove double quotes
|
| 927 |
g = g.replace('"', '')
|
| 928 |
-
# Normalise
|
| 929 |
g = re.sub(r'\s+OR\s+', ' OR ', g).strip()
|
| 930 |
if not g:
|
| 931 |
continue
|
| 932 |
-
#
|
|
|
|
| 933 |
suffix = ',AND' if i < len(groups) - 1 else ''
|
| 934 |
-
primo_params.append(f"
|
| 935 |
|
| 936 |
-
return '&'.join(primo_params) if primo_params else f"
|
| 937 |
|
| 938 |
|
| 939 |
async def tool_search_primo(query, limit=5, peer_reviewed=False, open_access=False, year_from=None, year_to=None):
|
|
@@ -1577,7 +1574,22 @@ def _looks_research_question(question: str) -> bool:
|
|
| 1577 |
q = question.lower()
|
| 1578 |
if RESEARCH_CUE_RE.search(q):
|
| 1579 |
return True
|
| 1580 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1581 |
|
| 1582 |
def _looks_medical_search(question: str) -> bool:
|
| 1583 |
q = question.lower()
|
|
@@ -1904,6 +1916,11 @@ RULES:
|
|
| 1904 |
3. "I am fed up with searching" alone (no specific topic/database) = social (expressing frustration).
|
| 1905 |
4. "I need help finding articles on diabetes" = search_medical.
|
| 1906 |
5. KU = Khalifa University. Do NOT confuse with Kuwait University.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1907 |
|
| 1908 |
For "social" intent ONLY: also include "casual_answer" — a warm 1-3 sentence LibBee response
|
| 1909 |
(friendly librarian tone, no markdown, no bullet points). Offer to help with library services.
|
|
|
|
| 58 |
]
|
| 59 |
|
| 60 |
CURRENT_INFO_RE = re.compile(r"\b(current|currently|latest|today|recent|recently|now|this week|this month|news|update|updated|president|prime minister|ceo|minister|king|ruler|who won)\b", re.IGNORECASE)
|
| 61 |
+
RESEARCH_CUE_RE = re.compile(r"\b(find|search|look for|show me|get me|give me|locate|recommend|suggest|articles?|papers?|books?|journals?|studies|literature|research|systematic review|evidence|sources|database|databases|peer reviewed|open access|advances?|approach(es)?|method(s|ology)?|technique[s]?|application[s]?|framework[s]?|model[s]?|algorithm[s]?|publication[s]?|recent work|current state|state of the art)\b", re.IGNORECASE)
|
| 62 |
LIBRARY_CUE_RE = re.compile(r"\b(library|librarian|borrow|loan|renew|fine|study room|room booking|reserve a room|account|my library|interlibrary|ill|khazna|orcid|open access|apc|refworks|libkey|hours|location|contact|visitor|alumni|database access|off campus|remote access)\b", re.IGNORECASE)
|
| 63 |
MEDICAL_SEARCH_RE = re.compile(r"\b(pubmed|embase|cinahl|clinicalkey|cochrane|uptodate|medline|systematic review|clinical trial|biomedical literature|medical literature)\b", re.IGNORECASE)
|
| 64 |
SOCIAL_RE = re.compile(r"^(hi|hello|hey|good morning|good afternoon|good evening|how are you|how old are you|who are you|what are you|are you a bot|are you a robot|what can you do|thanks|thank you|ok|okay|bye|goodbye|lol|haha|hehe|you are silly|are you silly|are you dumb|stupid bot|dumb bot|idiot bot|joke|tell me a joke)\b", re.IGNORECASE)
|
|
|
|
| 876 |
# ===== TOOL: SEARCH PRIMO =====
|
| 877 |
def _boolean_to_primo_params(boolean_query: str) -> str:
|
| 878 |
"""
|
| 879 |
+
Convert Boolean string to PRIMO REST API multi-query format.
|
| 880 |
+
PRIMO API uses q= (not query=) and requires %20 encoded spaces.
|
| 881 |
+
|
| 882 |
+
("AI" OR "ML") AND ("cancer") AND ("deep learning")
|
| 883 |
+
→ q=any,contains,AI%20OR%20ML,AND&q=any,contains,cancer,AND&q=any,contains,deep%20learning
|
| 884 |
+
|
| 885 |
+
Single concept: q=any,contains,machine%20learning
|
| 886 |
"""
|
| 887 |
from urllib.parse import quote
|
| 888 |
|
| 889 |
+
# If no Boolean operators — simple single query
|
| 890 |
+
if not (re.search(r'\bAND\b', boolean_query) and '(' in boolean_query):
|
| 891 |
+
clean = boolean_query.replace('"', '').strip()
|
| 892 |
+
return f"q=any,contains,{quote(clean)}"
|
| 893 |
|
| 894 |
+
# Split on top-level AND
|
| 895 |
groups = []
|
| 896 |
depth = 0
|
|
|
|
|
|
|
|
|
|
|
|
|
| 897 |
chunk = ""
|
| 898 |
+
for char in boolean_query:
|
|
|
|
| 899 |
if char == '(':
|
| 900 |
depth += 1
|
| 901 |
chunk += char
|
|
|
|
| 904 |
chunk += char
|
| 905 |
else:
|
| 906 |
chunk += char
|
|
|
|
| 907 |
if depth == 0 and chunk.upper().endswith(' AND '):
|
| 908 |
groups.append(chunk[:-5].strip())
|
| 909 |
chunk = ""
|
| 910 |
if chunk.strip():
|
| 911 |
groups.append(chunk.strip())
|
|
|
|
| 912 |
if not groups:
|
| 913 |
+
clean = boolean_query.replace('"', '').strip()
|
| 914 |
+
return f"q=any,contains,{quote(clean)}"
|
| 915 |
|
|
|
|
| 916 |
primo_params = []
|
| 917 |
for i, group in enumerate(groups):
|
|
|
|
| 918 |
g = group.strip()
|
| 919 |
+
# Remove outer parentheses
|
| 920 |
if g.startswith('(') and g.endswith(')'):
|
| 921 |
g = g[1:-1].strip()
|
| 922 |
+
# Remove double quotes
|
| 923 |
g = g.replace('"', '')
|
| 924 |
+
# Normalise OR spacing
|
| 925 |
g = re.sub(r'\s+OR\s+', ' OR ', g).strip()
|
| 926 |
if not g:
|
| 927 |
continue
|
| 928 |
+
# URL encode properly — spaces become %20
|
| 929 |
+
encoded = quote(g)
|
| 930 |
suffix = ',AND' if i < len(groups) - 1 else ''
|
| 931 |
+
primo_params.append(f"q=any,contains,{encoded}{suffix}")
|
| 932 |
|
| 933 |
+
return '&'.join(primo_params) if primo_params else f"q=any,contains,{quote(boolean_query.replace(chr(34), '').strip())}"
|
| 934 |
|
| 935 |
|
| 936 |
async def tool_search_primo(query, limit=5, peer_reviewed=False, open_access=False, year_from=None, year_to=None):
|
|
|
|
| 1574 |
q = question.lower()
|
| 1575 |
if RESEARCH_CUE_RE.search(q):
|
| 1576 |
return True
|
| 1577 |
+
# Natural academic questions that don't use explicit search trigger words
|
| 1578 |
+
# e.g. "What are the recent advances in AI for cancer diagnosis?"
|
| 1579 |
+
# e.g. "How effective are mRNA vaccines in treating lung cancer?"
|
| 1580 |
+
# e.g. "What is the impact of climate change on water security in the UAE?"
|
| 1581 |
+
academic_patterns = re.compile(
|
| 1582 |
+
r"\b(impact of|effect[s]? of|role of|use of|application of|advance[s] in|"
|
| 1583 |
+
r"development[s] in|progress in|challenge[s] of|benefit[s] of|"
|
| 1584 |
+
r"causes? of|relationship between|comparison of|effectiveness of|"
|
| 1585 |
+
r"how (effective|efficient|useful|does|do|can|is|are)|"
|
| 1586 |
+
r"what (are the|is the) (recent|latest|current|new|emerging)|"
|
| 1587 |
+
r"published in the last|in the last \d+ years?|"
|
| 1588 |
+
r"systematic review|meta.?analysis|clinical trial[s]?|"
|
| 1589 |
+
r"literature on|survey of|overview of|state of the art)\b",
|
| 1590 |
+
re.IGNORECASE
|
| 1591 |
+
)
|
| 1592 |
+
return bool(academic_patterns.search(q))
|
| 1593 |
|
| 1594 |
def _looks_medical_search(question: str) -> bool:
|
| 1595 |
q = question.lower()
|
|
|
|
| 1916 |
3. "I am fed up with searching" alone (no specific topic/database) = social (expressing frustration).
|
| 1917 |
4. "I need help finding articles on diabetes" = search_medical.
|
| 1918 |
5. KU = Khalifa University. Do NOT confuse with Kuwait University.
|
| 1919 |
+
6. ANY question asking about advances, approaches, methods, effectiveness, impact, or relationship
|
| 1920 |
+
on a specific academic/scientific topic = search_academic or search_medical. NOT library_info.
|
| 1921 |
+
e.g. "What are the recent advances in AI for predictive maintenance?" = search_academic
|
| 1922 |
+
e.g. "How effective are mRNA vaccines for cancer treatment?" = search_medical
|
| 1923 |
+
e.g. "What is the impact of climate change on food security?" = search_academic
|
| 1924 |
|
| 1925 |
For "social" intent ONLY: also include "casual_answer" — a warm 1-3 sentence LibBee response
|
| 1926 |
(friendly librarian tone, no markdown, no bullet points). Offer to help with library services.
|