Update app.py
Browse files
app.py
CHANGED
|
@@ -61,6 +61,31 @@ CURRENT_INFO_RE = re.compile(r"\b(current|currently|latest|today|recent|recently
|
|
| 61 |
RESEARCH_CUE_RE = re.compile(r"\b(find|search|look for|show me|get me|give me|locate|recommend|suggest|articles?|papers?|books?|journals?|studies|literature|research|systematic review|evidence|sources|database|databases|peer reviewed|open access)\b", re.IGNORECASE)
|
| 62 |
LIBRARY_CUE_RE = re.compile(r"\b(library|librarian|borrow|loan|renew|fine|study room|room booking|reserve a room|account|my library|interlibrary|ill|khazna|orcid|open access|apc|refworks|libkey|hours|location|contact|visitor|alumni|database access|off campus|remote access)\b", re.IGNORECASE)
|
| 63 |
MEDICAL_SEARCH_RE = re.compile(r"\b(pubmed|embase|cinahl|clinicalkey|cochrane|uptodate|medline|systematic review|clinical trial|biomedical literature|medical literature)\b", re.IGNORECASE)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
|
| 65 |
# ===== GLOBALS =====
|
| 66 |
vectorstore = None
|
|
@@ -119,15 +144,6 @@ def set_config(key, value):
|
|
| 119 |
conn.close()
|
| 120 |
|
| 121 |
|
| 122 |
-
def get_behavior_instructions():
|
| 123 |
-
base = get_config("bot_personality", "").strip()
|
| 124 |
-
extra = get_config("custom_instructions", "").strip()
|
| 125 |
-
if base and extra:
|
| 126 |
-
return base + "\n\nAdditional instructions:\n" + extra
|
| 127 |
-
return base or extra or "You are a helpful, friendly, and knowledgeable library assistant at Khalifa University, Abu Dhabi, UAE. KU = Khalifa University, NOT Kuwait University. Be concise and include relevant URLs when useful."
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
# ===== ADMIN AUTH =====
|
| 132 |
# ADMIN_PASSWORD must be set as HF Space Secret — no insecure fallback
|
| 133 |
ADMIN_PASSWORD = os.environ.get("ADMIN_PASSWORD", "")
|
|
@@ -416,111 +432,11 @@ async def tool_library_info(question, history=None, model="gpt"):
|
|
| 416 |
history_text = "\n".join(f"{'User' if m['role']=='user' else 'Assistant'}: {m['content']}" for m in history[-3:])
|
| 417 |
base_query = f"{history_text}\n{question}"
|
| 418 |
|
| 419 |
-
# ── Semantic query expansion ──
|
| 420 |
-
|
| 421 |
-
|
| 422 |
-
|
| 423 |
-
|
| 424 |
-
"who is the librarian": "library staff contacts librarian",
|
| 425 |
-
"subject librarian": "Research and Access Services Librarian Nikesh Narayanan",
|
| 426 |
-
"medical librarian": "Jason Fetty Medical Librarian PubMed Embase CINAHL UpToDate",
|
| 427 |
-
"systems librarian": "Walter Brian Hall Digital Technology Services Librarian website systems technology",
|
| 428 |
-
"website issue": "Walter Brian Hall website systems technology",
|
| 429 |
-
"technology issue": "Walter Brian Hall website systems technology",
|
| 430 |
-
"acquisitions librarian": "Alia Al-Harrasi acquisitions collection development request title",
|
| 431 |
-
"public services": "Muna Ahmad Al Blooshi public services circulation general library services",
|
| 432 |
-
"e-resources librarian": "Rani Anand e-resources databases access problems vendor issues",
|
| 433 |
-
"database access": "Rani Anand e-resources databases access problems",
|
| 434 |
-
"open access": "Nikesh Narayanan open access APC scholarly communication",
|
| 435 |
-
"orcid": "Nikesh Narayanan ORCID researcher identifier profile",
|
| 436 |
-
"research impact": "Nikesh Narayanan research impact SciVal Scopus bibliometrics",
|
| 437 |
-
"library director": "Dr Abdulla Al Hefeiti Assistant Provost Libraries",
|
| 438 |
-
"ebook": "ebook central proquest download ebooks",
|
| 439 |
-
"ebooks": "ebook central proquest download ebooks",
|
| 440 |
-
"how to borrow": "borrowing loan period renew circulation",
|
| 441 |
-
"borrow books": "borrowing loan period renew circulation",
|
| 442 |
-
"renew book": "renew borrowing circulation My Library Account",
|
| 443 |
-
"renew loan": "renew borrowing circulation My Library Account",
|
| 444 |
-
"hold item": "request hold checked out item My Library Account",
|
| 445 |
-
"reserve item": "request hold checked out item My Library Account",
|
| 446 |
-
"access from home": "remote access off campus proxy no VPN",
|
| 447 |
-
"off campus": "remote access off campus proxy no VPN",
|
| 448 |
-
"remote access": "remote access off campus proxy no VPN",
|
| 449 |
-
"cite": "RefWorks citation reference management bibliography",
|
| 450 |
-
"citation": "RefWorks citation reference management bibliography",
|
| 451 |
-
"reference manager": "RefWorks citation bibliography",
|
| 452 |
-
"researcher id": "ORCID researcher identifier profile",
|
| 453 |
-
"impact factor": "journal citation reports JCR Scopus CiteScore SciVal",
|
| 454 |
-
"systematic review": "Cochrane Embase CINAHL PICO PubMed evidence based medicine",
|
| 455 |
-
"evidence based medicine": "Cochrane PubMed Embase CINAHL UpToDate",
|
| 456 |
-
"ai tools": "LeapSpace Scopus AI ScienceDirect AI EBSCO Research AI PRIMO AI",
|
| 457 |
-
"inter library loan": "interlibrary loan ILL document delivery article request",
|
| 458 |
-
"interlibrary loan": "interlibrary loan ILL document delivery article request",
|
| 459 |
-
"ill": "interlibrary loan ILL document delivery article request",
|
| 460 |
-
"document delivery": "interlibrary loan ILL document delivery article request",
|
| 461 |
-
"borrow from another library": "interlibrary loan ILL document delivery",
|
| 462 |
-
"request from another library": "interlibrary loan ILL document delivery",
|
| 463 |
-
"article not available": "full text article not available interlibrary loan LibKey Nomad",
|
| 464 |
-
"can't get article": "full text article not available interlibrary loan LibKey Nomad",
|
| 465 |
-
"cannot get article": "full text article not available interlibrary loan LibKey Nomad",
|
| 466 |
-
"don't get article": "full text article not available interlibrary loan LibKey Nomad",
|
| 467 |
-
"full text unavailable": "full text not available interlibrary loan LibKey Nomad",
|
| 468 |
-
"no full text": "full text not available interlibrary loan LibKey Nomad",
|
| 469 |
-
"full text": "full text LibKey Nomad article access PDF download",
|
| 470 |
-
"pdf": "full text LibKey Nomad article access PDF download",
|
| 471 |
-
"get article": "full text article access LibKey Nomad interlibrary loan",
|
| 472 |
-
"catalog": "PRIMO library catalog discovery holdings publication finder",
|
| 473 |
-
"library catalog": "PRIMO library catalog discovery holdings publication finder",
|
| 474 |
-
"discovery": "PRIMO library catalog discovery holdings publication finder",
|
| 475 |
-
"holdings": "PRIMO library catalog discovery holdings publication finder",
|
| 476 |
-
"does the library have": "PRIMO library catalog holdings publication finder",
|
| 477 |
-
"find journal": "PRIMO publication finder journal access holdings",
|
| 478 |
-
"publication finder": "PRIMO publication finder journal access holdings",
|
| 479 |
-
"circulation": "borrowing renew return hold loan period My Library Account",
|
| 480 |
-
"due date": "borrowing renew due date My Library Account",
|
| 481 |
-
"reserve room": "study room reserve rooms booking",
|
| 482 |
-
"book room": "study room reserve rooms booking",
|
| 483 |
-
"study room": "study room reserve rooms booking",
|
| 484 |
-
"ask librarian": "Ask a Librarian research help reference consultation",
|
| 485 |
-
"research help": "Ask a Librarian research consultation reference help",
|
| 486 |
-
"consultation": "Ask a Librarian research consultation reference help",
|
| 487 |
-
"peer reviewed": "peer reviewed scholarly journal article database filters",
|
| 488 |
-
"scholarly article": "peer reviewed scholarly journal article database filters",
|
| 489 |
-
"abstract": "article abstract citation database search results",
|
| 490 |
-
"doi": "DOI article identifier full text citation",
|
| 491 |
-
"call number": "PRIMO call number browse book location",
|
| 492 |
-
"course reserve": "reserve course materials reserve collection",
|
| 493 |
-
"visitor": "visitors external visitors access library",
|
| 494 |
-
"alumni": "alumni services library access alumni",
|
| 495 |
-
"civil engineering": "ASCE Library Knovel ScienceDirect Scopus ASTM Compass",
|
| 496 |
-
"mechanical engineering": "ASME Digital Library Knovel ScienceDirect ASTM Compass",
|
| 497 |
-
"electrical engineering": "IEEE Xplore INSPEC ScienceDirect",
|
| 498 |
-
"computer science": "ACM Digital Library IEEE Xplore arXiv ScienceDirect",
|
| 499 |
-
"artificial intelligence": "ACM Digital Library IEEE Xplore arXiv ScienceDirect Scopus",
|
| 500 |
-
"medicine": "PubMed Embase Cochrane UpToDate CINAHL",
|
| 501 |
-
"nursing": "CINAHL PubMed nursing allied health",
|
| 502 |
-
"chemistry": "ACS SciFindern RSC Journals Reaxys",
|
| 503 |
-
"physics": "APS Journals AIP IOPScience",
|
| 504 |
-
"business": "Business Source Complete Emerald ProQuest",
|
| 505 |
-
"dissertations": "ProQuest Dissertations Khazna theses",
|
| 506 |
-
"theses": "ProQuest Dissertations Khazna theses",
|
| 507 |
-
"standards": "ASTM Compass IEEE standards ASME ASCE",
|
| 508 |
-
}
|
| 509 |
-
question_lower = question.lower()
|
| 510 |
-
expanded_query = base_query
|
| 511 |
-
matched_expansions = []
|
| 512 |
-
for term, expansion in SYNONYMS.items():
|
| 513 |
-
if term in question_lower:
|
| 514 |
-
matched_expansions.append(expansion)
|
| 515 |
-
if matched_expansions:
|
| 516 |
-
unique_expansions = []
|
| 517 |
-
seen_exp = set()
|
| 518 |
-
for exp in matched_expansions:
|
| 519 |
-
if exp not in seen_exp:
|
| 520 |
-
seen_exp.add(exp)
|
| 521 |
-
unique_expansions.append(exp)
|
| 522 |
-
expanded_query = f"{base_query} {' '.join(unique_expansions[:4])}"
|
| 523 |
-
print(f"Query expanded with {len(unique_expansions[:4])} glossary hints")
|
| 524 |
|
| 525 |
# ── FAISS scored search ──
|
| 526 |
docs_with_scores = vectorstore.similarity_search_with_score(expanded_query, k=TOP_K)
|
|
@@ -548,10 +464,7 @@ async def tool_library_info(question, history=None, model="gpt"):
|
|
| 548 |
start your answer with: "Did you mean [exact title from context]? If so, here is the information:"
|
| 549 |
then give the answer.""" if moderate_match else ""
|
| 550 |
|
| 551 |
-
|
| 552 |
-
prompt = f"""{behavior}
|
| 553 |
-
|
| 554 |
-
You are the Khalifa University Library AI Assistant in Abu Dhabi, UAE.
|
| 555 |
KU means Khalifa University, NOT Kuwait University.
|
| 556 |
|
| 557 |
RULES — follow exactly:
|
|
@@ -827,6 +740,31 @@ def _looks_medical_search(question: str) -> bool:
|
|
| 827 |
return False
|
| 828 |
return bool(MEDICAL_SEARCH_RE.search(q) or _looks_research_question(q))
|
| 829 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 830 |
@app.post("/correct")
|
| 831 |
async def correct_query(req: CorrectRequest):
|
| 832 |
"""
|
|
@@ -853,12 +791,38 @@ class GeneralRequest(BaseModel):
|
|
| 853 |
|
| 854 |
async def _answer_general(question: str, history=None) -> dict:
|
| 855 |
"""Answer general knowledge questions using live web search when available."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 856 |
try:
|
| 857 |
import openai
|
| 858 |
client = openai.OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
|
| 859 |
|
| 860 |
-
|
| 861 |
-
messages = [{"role": "system", "content": behavior}]
|
| 862 |
if history:
|
| 863 |
for m in history[-5:]:
|
| 864 |
role = m.get("role", "user")
|
|
@@ -893,7 +857,7 @@ async def _answer_general(question: str, history=None) -> dict:
|
|
| 893 |
print(f"Web search failed, falling back to plain GPT: {e}")
|
| 894 |
try:
|
| 895 |
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0.3, max_tokens=500)
|
| 896 |
-
msgs = [
|
| 897 |
for m in (history or [])[-5:]:
|
| 898 |
if m.get("role") in ("user", "assistant"):
|
| 899 |
msgs.append({"role": m["role"], "content": m.get("content", "")})
|
|
@@ -1001,50 +965,83 @@ async def agent_query(req: AgentRequest):
|
|
| 1001 |
history = [{"role": m.role, "content": m.content} for m in req.history] if req.history else []
|
| 1002 |
use_claude = req.model == "claude" and bool(os.environ.get("ANTHROPIC_API_KEY"))
|
| 1003 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1004 |
intent = "general"
|
| 1005 |
natural_query = question
|
| 1006 |
database_query = question
|
| 1007 |
search_plan = None
|
| 1008 |
|
| 1009 |
try:
|
| 1010 |
-
|
| 1011 |
-
|
| 1012 |
-
|
| 1013 |
-
intent = "
|
| 1014 |
-
elif _looks_medical_search(question):
|
| 1015 |
-
intent = "search_medical"
|
| 1016 |
-
elif _looks_research_question(question):
|
| 1017 |
-
intent = "search_academic"
|
| 1018 |
-
else:
|
| 1019 |
-
classifier_prompt = f"""You are routing a question for a university library assistant.
|
| 1020 |
-
|
| 1021 |
-
Return ONLY valid JSON with one intent: library_info, search_academic, search_medical, general, or general_recent.
|
| 1022 |
-
|
| 1023 |
-
Rules:
|
| 1024 |
-
- library_info: KU Library services, librarians, borrowing, rooms, accounts, policies, databases access
|
| 1025 |
-
- search_medical: user wants medical/clinical literature, articles, books, evidence, or databases
|
| 1026 |
-
- search_academic: user wants literature, books, articles, papers, evidence, or academic search on any non-medical topic
|
| 1027 |
-
- general_recent: the answer depends on current or recent information, news, or current office-holders
|
| 1028 |
-
- general: simple general-knowledge or conversational questions not asking for academic search
|
| 1029 |
-
|
| 1030 |
-
Question: "{question}"
|
| 1031 |
-
|
| 1032 |
-
Return JSON like: {{"intent":"general_recent"}}"""
|
| 1033 |
-
if use_claude:
|
| 1034 |
-
from langchain_anthropic import ChatAnthropic
|
| 1035 |
-
clf_llm = ChatAnthropic(model="claude-haiku-4-5-20251001", temperature=0, max_tokens=120)
|
| 1036 |
-
else:
|
| 1037 |
-
clf_llm = ChatOpenAI(model="gpt-4o-mini", temperature=0, max_tokens=120)
|
| 1038 |
-
clf_resp = clf_llm.invoke(classifier_prompt)
|
| 1039 |
-
clf = _extract_json_object(clf_resp.content.strip())
|
| 1040 |
-
intent = clf.get("intent", "general")
|
| 1041 |
-
|
| 1042 |
if intent in ("search_academic", "search_medical"):
|
| 1043 |
search_plan = await _build_search_plan(question)
|
| 1044 |
natural_query = search_plan["natural"]
|
| 1045 |
database_query = search_plan["database_query"] or search_plan["corrected"]
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1046 |
except Exception:
|
| 1047 |
-
if
|
|
|
|
|
|
|
| 1048 |
intent = "search_medical" if _looks_medical_search(question) else "search_academic"
|
| 1049 |
search_plan = await _build_search_plan(question)
|
| 1050 |
natural_query = search_plan["natural"]
|
|
@@ -1114,7 +1111,10 @@ Return JSON like: {{"intent":"general_recent"}}"""
|
|
| 1114 |
context_parts.append(f"Natural query for AI tools: {natural_query}")
|
| 1115 |
context_parts.append(f"Database query for PRIMO/PubMed: {database_query}")
|
| 1116 |
|
| 1117 |
-
|
|
|
|
|
|
|
|
|
|
| 1118 |
Be concise (3-5 sentences).
|
| 1119 |
- For library_info, answer from the library context and include URLs when useful.
|
| 1120 |
- For search intents, briefly summarise the search direction and mention the top 2-3 relevant results if present.
|
|
@@ -1368,8 +1368,6 @@ function st(el,t){{document.querySelectorAll('.tab').forEach(e=>e.classList.remo
|
|
| 1368 |
|
| 1369 |
// Fetch analytics from Cloudflare D1
|
| 1370 |
fetch(W+'/analytics').then(r=>r.json()).then(d=>{{
|
| 1371 |
-
const helpful = (d.feedback_summary||[]).find(x=>x.feedback==='up')?.c || 0;
|
| 1372 |
-
const notHelpful = (d.feedback_summary||[]).find(x=>x.feedback==='down')?.c || 0;
|
| 1373 |
const el=document.getElementById('t-analytics');
|
| 1374 |
el.innerHTML=`
|
| 1375 |
<div class="grid">
|
|
@@ -1378,44 +1376,20 @@ fetch(W+'/analytics').then(r=>r.json()).then(d=>{{
|
|
| 1378 |
<div class="stat"><div class="n">${{d.week}}</div><div class="l">This Week</div></div>
|
| 1379 |
<div class="stat"><div class="n">${{(d.avg_time||0).toFixed(1)}}s</div><div class="l">Avg Time</div></div>
|
| 1380 |
<div class="stat"><div class="n">${{d.errors}}</div><div class="l">Errors</div></div>
|
| 1381 |
-
<div class="stat"><div class="n">${{helpful}}</div><div class="l">👍 Helpful</div></div>
|
| 1382 |
-
<div class="stat"><div class="n">${{notHelpful}}</div><div class="l">👎 Not Helpful</div></div>
|
| 1383 |
-
</div>
|
| 1384 |
-
<div style="display:flex;gap:8px;flex-wrap:wrap;margin-bottom:12px">
|
| 1385 |
-
<a class="btn bp" href="${{W}}/analytics/export?kind=queries" target="_blank">⬇️ Download Queries CSV</a>
|
| 1386 |
-
<a class="btn bp" href="${{W}}/analytics/export?kind=feedback" target="_blank">⬇️ Download Feedback CSV</a>
|
| 1387 |
-
<a class="btn bp" href="${{W}}/analytics/export?kind=clicks" target="_blank">⬇️ Download Clicks CSV</a>
|
| 1388 |
-
<button class="btn br" onclick="deleteOldData()">🗑️ Delete Old Data</button>
|
| 1389 |
</div>
|
| 1390 |
<div class="two">
|
| 1391 |
-
<div class="card"><h2>Tool Usage</h2><table><tr><th>Tool</th><th>Count</th></tr>${{
|
| 1392 |
-
<div class="card"><h2>Model Usage</h2><table><tr><th>Model</th><th>Count</th></tr>${{
|
| 1393 |
</div>
|
| 1394 |
<div class="two">
|
| 1395 |
<div class="card"><h2>Hourly</h2><canvas id="hc"></canvas></div>
|
| 1396 |
<div class="card"><h2>Daily (14d)</h2><canvas id="dc"></canvas></div>
|
| 1397 |
</div>
|
| 1398 |
-
<div class="
|
| 1399 |
-
|
| 1400 |
-
|
| 1401 |
-
</div>
|
| 1402 |
-
<div class="two">
|
| 1403 |
-
<div class="card"><h2>Top Clicks</h2><table><tr><th>Label</th><th>Type</th><th>Count</th></tr>${{(d.top_clicks||[]).map(c=>`<tr><td title="${{c.label||''}}">${{(c.label||'').substring(0,55)}}</td><td>${{c.target_type}}</td><td>${{c.c}}</td></tr>`).join('') || '<tr><td colspan="3">No clicks yet</td></tr>'}}</table></div>
|
| 1404 |
-
<div class="card"><h2>Popular (Top 20)</h2><table><tr><th>Question</th><th>Count</th></tr>${{(d.popular||[]).map(p=>`<tr><td>${{(p.question||'').substring(0,70)}}</td><td>${{p.c}}</td></tr>`).join('')}}</table></div>
|
| 1405 |
-
</div>`;
|
| 1406 |
-
if((d.hourly||[]).length)new Chart(document.getElementById('hc'),{{type:'bar',data:{{labels:d.hourly.map(h=>h.hour+':00'),datasets:[{{label:'Q',data:d.hourly.map(h=>h.c),backgroundColor:'#003366'}}]}},options:{{responsive:true,plugins:{{legend:{{display:false}}}}}}}});
|
| 1407 |
-
if((d.daily||[]).length)new Chart(document.getElementById('dc'),{{type:'line',data:{{labels:d.daily.map(x=>(x.day||'').slice(5)),datasets:[{{label:'Q',data:d.daily.map(x=>x.c),borderColor:'#003366',backgroundColor:'rgba(0,51,102,0.1)',fill:true,tension:.3}}]}},options:{{responsive:true,plugins:{{legend:{{display:false}}}}}}}});
|
| 1408 |
}}).catch(e=>{{document.getElementById('t-analytics').innerHTML='<div class="card" style="color:#dc2626">Failed to load analytics: '+e.message+'<br>Make sure D1 is initialized: <a href="'+W+'/analytics/init" target="_blank">Click here to init DB</a></div>';}});
|
| 1409 |
|
| 1410 |
-
async function deleteOldData(){{
|
| 1411 |
-
const days = prompt('Delete records older than how many days?', '90');
|
| 1412 |
-
if(!days) return;
|
| 1413 |
-
const r = await fetch(W+'/analytics/delete-old', {{method:'POST', headers:{{'Content-Type':'application/json'}}, body:JSON.stringify({{days}})}});
|
| 1414 |
-
const d = await r.json();
|
| 1415 |
-
alert(d.status==='ok' ? `Deleted data older than ${{d.deleted_older_than_days}} days.` : (d.error?.message || 'Delete failed'));
|
| 1416 |
-
location.reload();
|
| 1417 |
-
}}
|
| 1418 |
-
|
| 1419 |
// Fetch recent queries
|
| 1420 |
fetch(W+'/analytics/recent').then(r=>r.json()).then(d=>{{
|
| 1421 |
const el=document.getElementById('t-queries');
|
|
@@ -1615,4 +1589,4 @@ async def clear_logs():
|
|
| 1615 |
conn.execute("DELETE FROM queries")
|
| 1616 |
conn.commit()
|
| 1617 |
conn.close()
|
| 1618 |
-
return {"status": "ok", "message": "All logs cleared"}
|
|
|
|
| 61 |
RESEARCH_CUE_RE = re.compile(r"\b(find|search|look for|show me|get me|give me|locate|recommend|suggest|articles?|papers?|books?|journals?|studies|literature|research|systematic review|evidence|sources|database|databases|peer reviewed|open access)\b", re.IGNORECASE)
|
| 62 |
LIBRARY_CUE_RE = re.compile(r"\b(library|librarian|borrow|loan|renew|fine|study room|room booking|reserve a room|account|my library|interlibrary|ill|khazna|orcid|open access|apc|refworks|libkey|hours|location|contact|visitor|alumni|database access|off campus|remote access)\b", re.IGNORECASE)
|
| 63 |
MEDICAL_SEARCH_RE = re.compile(r"\b(pubmed|embase|cinahl|clinicalkey|cochrane|uptodate|medline|systematic review|clinical trial|biomedical literature|medical literature)\b", re.IGNORECASE)
|
| 64 |
+
SOCIAL_RE = re.compile(r"^(hi|hello|hey|good morning|good afternoon|good evening|how are you|thanks|thank you|ok|okay|bye|goodbye|lol|haha|hehe|you are silly|are you silly|are you dumb|stupid bot|joke|tell me a joke)\b", re.IGNORECASE)
|
| 65 |
+
PURE_GREETING_RE = re.compile(r"^(hi|hello|hey|good morning|good afternoon|good evening)[!.\s]*$", re.IGNORECASE)
|
| 66 |
+
HOURS_RE = re.compile(r"\b(library hours|hours|opening hours|closing hours|open today|closed today|open now|when .*open|when .*close|what time .*open|what time .*close|opening time|closing time|is .*library open|is .*campus library open|habshan .*open|san .*open|main campus .*open|ramadan hours|weekend hours|friday hours|saturday hours)\b", re.IGNORECASE)
|
| 67 |
+
KU_GENERAL_RE = re.compile(r"\b(khalifa university|ku)\b.*\b(admission|admissions|program|programs|degree|degrees|college|colleges|school|schools|tuition|fees|scholarship|scholarships|hostel|housing|transport|ranking|rankings|president|vice president|chancellor|application|apply|registrar|academic calendar|semester dates|campus map)\b|\b(admission|admissions|program|programs|degree|degrees|college|colleges|school|schools|tuition|fees|scholarship|scholarships|hostel|housing|transport|ranking|rankings|president|vice president|chancellor|application|apply|registrar|academic calendar|semester dates|campus map)\b.*\b(khalifa university|ku)\b", re.IGNORECASE)
|
| 68 |
+
LIBRARY_HOURS_URL = "https://library.ku.ac.ae/hours"
|
| 69 |
+
KU_MAIN_URL = "https://www.ku.ac.ae/"
|
| 70 |
+
|
| 71 |
+
GROUNDED_LIBRARY_MAP = {
|
| 72 |
+
"ill": "interlibrary loan ILL document delivery full text unavailable article not available borrow from another library",
|
| 73 |
+
"fulltext": "full text libkey nomad article access pdf unavailable interlibrary loan",
|
| 74 |
+
"primo": "PRIMO discovery catalog holdings publication finder library has this journal books articles",
|
| 75 |
+
"circulation": "borrowing circulation renew loan period due date hold request fines my library account",
|
| 76 |
+
"research_help": "Ask a Librarian research consultation reference help subject guides research skills",
|
| 77 |
+
"orcid_oa": "ORCID open access APC publishing research impact Scopus SciVal bibliometrics Nikesh Narayanan",
|
| 78 |
+
"database_access": "database access e-resources remote access off campus login vendor issue Rani Anand",
|
| 79 |
+
"medical_help": "medical librarian Jason Fetty PubMed Embase CINAHL Cochrane UpToDate systematic review",
|
| 80 |
+
"systems_help": "Walter Brian Hall systems website technology digital services library systems",
|
| 81 |
+
"acquisitions": "Alia Al-Harrasi acquisitions request title suggest a book collection development",
|
| 82 |
+
"standards": "ASTM Compass IEEE standards ASME ASCE engineering standards",
|
| 83 |
+
"theses": "ProQuest Dissertations theses Khazna institutional repository dissertations theses",
|
| 84 |
+
"metrics": "impact factor journal citation reports JCR CiteScore Scopus SciVal metrics",
|
| 85 |
+
"chemistry": "ACS SciFindern RSC Reaxys chemistry database",
|
| 86 |
+
"physics": "APS AIP IOPScience physics database",
|
| 87 |
+
"engineering": "IEEE Xplore ACM ASCE ASME Knovel engineering databases",
|
| 88 |
+
}
|
| 89 |
|
| 90 |
# ===== GLOBALS =====
|
| 91 |
vectorstore = None
|
|
|
|
| 144 |
conn.close()
|
| 145 |
|
| 146 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 147 |
# ===== ADMIN AUTH =====
|
| 148 |
# ADMIN_PASSWORD must be set as HF Space Secret — no insecure fallback
|
| 149 |
ADMIN_PASSWORD = os.environ.get("ADMIN_PASSWORD", "")
|
|
|
|
| 432 |
history_text = "\n".join(f"{'User' if m['role']=='user' else 'Assistant'}: {m['content']}" for m in history[-3:])
|
| 433 |
base_query = f"{history_text}\n{question}"
|
| 434 |
|
| 435 |
+
# ── Semantic query expansion via lightweight LLM interpreter + grounded institutional map ──
|
| 436 |
+
interp = await _interpret_semantics(question, history or [])
|
| 437 |
+
grounding_text = " ".join(GROUNDED_LIBRARY_MAP.get(k, "") for k in interp.get("grounding_keys", []))
|
| 438 |
+
canonical_text = " ".join(interp.get("canonical_terms", []))
|
| 439 |
+
expanded_query = " ".join(x for x in [base_query, canonical_text, grounding_text] if x).strip()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 440 |
|
| 441 |
# ── FAISS scored search ──
|
| 442 |
docs_with_scores = vectorstore.similarity_search_with_score(expanded_query, k=TOP_K)
|
|
|
|
| 464 |
start your answer with: "Did you mean [exact title from context]? If so, here is the information:"
|
| 465 |
then give the answer.""" if moderate_match else ""
|
| 466 |
|
| 467 |
+
prompt = f"""You are the Khalifa University Library AI Assistant in Abu Dhabi, UAE.
|
|
|
|
|
|
|
|
|
|
| 468 |
KU means Khalifa University, NOT Kuwait University.
|
| 469 |
|
| 470 |
RULES — follow exactly:
|
|
|
|
| 740 |
return False
|
| 741 |
return bool(MEDICAL_SEARCH_RE.search(q) or _looks_research_question(q))
|
| 742 |
|
| 743 |
+
|
| 744 |
+
def _looks_library_hours_question(question: str) -> bool:
|
| 745 |
+
q = question.strip().lower()
|
| 746 |
+
return bool(HOURS_RE.search(q)) and ('library' in q or 'campus' in q or 'habshan' in q or 'san' in q or 'main campus' in q or 'hours' in q or 'open' in q or 'close' in q)
|
| 747 |
+
|
| 748 |
+
def _looks_nonlibrary_ku_question(question: str) -> bool:
|
| 749 |
+
q = question.strip().lower()
|
| 750 |
+
if 'library' in q or 'librarian' in q or 'primo' in q or 'database' in q or 'book' in q or 'article' in q:
|
| 751 |
+
return False
|
| 752 |
+
return bool(KU_GENERAL_RE.search(question))
|
| 753 |
+
|
| 754 |
+
def _hours_redirect_answer() -> str:
|
| 755 |
+
return (
|
| 756 |
+
f"For the most accurate opening and closing times, please check the official Library Hours page: "
|
| 757 |
+
f"<a href=\"{LIBRARY_HOURS_URL}\" target=\"_blank\">{LIBRARY_HOURS_URL}</a>. "
|
| 758 |
+
"Hours may vary by campus and during exams, holidays, and Ramadan."
|
| 759 |
+
)
|
| 760 |
+
|
| 761 |
+
def _ku_general_redirect_answer() -> str:
|
| 762 |
+
return (
|
| 763 |
+
f"I’m <strong>LibBee</strong>, the Khalifa University <strong>Library</strong> AI Assistant, so I’m best for library resources, services, databases, research help, and staff contacts.<br><br>"
|
| 764 |
+
f"For general Khalifa University questions, please visit the main KU website: <a href=\"{KU_MAIN_URL}\" target=\"_blank\">{KU_MAIN_URL}</a>. "
|
| 765 |
+
"A broader university chatbot is available there for non-library questions."
|
| 766 |
+
)
|
| 767 |
+
|
| 768 |
@app.post("/correct")
|
| 769 |
async def correct_query(req: CorrectRequest):
|
| 770 |
"""
|
|
|
|
| 791 |
|
| 792 |
async def _answer_general(question: str, history=None) -> dict:
|
| 793 |
"""Answer general knowledge questions using live web search when available."""
|
| 794 |
+
if _looks_library_hours_question(question):
|
| 795 |
+
return {"answer": _hours_redirect_answer(), "sources": [], "model": "library-hours-redirect"}
|
| 796 |
+
if _looks_nonlibrary_ku_question(question):
|
| 797 |
+
return {"answer": _ku_general_redirect_answer(), "sources": [], "model": "ku-general-redirect"}
|
| 798 |
+
if _looks_social_or_greeting(question):
|
| 799 |
+
if PURE_GREETING_RE.match(question.strip()):
|
| 800 |
+
guided = (
|
| 801 |
+
"Hi! I’m <strong>LibBee</strong>, the Khalifa University Library AI Assistant.<br><br>"
|
| 802 |
+
"I can help you with articles and books, databases, full text, Interlibrary Loan (ILL), "
|
| 803 |
+
"library services, staff contacts, ORCID, RefWorks, and Open Access.<br><br>"
|
| 804 |
+
"Are you looking for one of these? You can also type your question directly."
|
| 805 |
+
)
|
| 806 |
+
return {"answer": guided, "sources": [], "model": "libbee-greeting"}
|
| 807 |
+
try:
|
| 808 |
+
behavior = get_behavior_instructions()
|
| 809 |
+
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0.4, max_tokens=220)
|
| 810 |
+
msgs = []
|
| 811 |
+
if behavior:
|
| 812 |
+
msgs.append({"role": "system", "content": behavior + "\nIf the user greets you, jokes, or says something mildly mocking, reply politely, warmly, and briefly. Do not be defensive. Keep it light and helpful."})
|
| 813 |
+
for m in (history or [])[-4:]:
|
| 814 |
+
if m.get("role") in ("user", "assistant"):
|
| 815 |
+
msgs.append({"role": m["role"], "content": m.get("content", "")})
|
| 816 |
+
msgs.append({"role": "user", "content": question})
|
| 817 |
+
response = llm.invoke(msgs)
|
| 818 |
+
return {"answer": response.content.strip(), "sources": [], "model": "gpt-4o-mini-social"}
|
| 819 |
+
except Exception:
|
| 820 |
+
return {"answer": "Hi! I’m <strong>LibBee</strong>, the Khalifa University Library AI Assistant. I can help with articles, books, databases, full text, ILL, library services, and staff contacts. What would you like help with?", "sources": [], "model": "fallback-social"}
|
| 821 |
try:
|
| 822 |
import openai
|
| 823 |
client = openai.OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
|
| 824 |
|
| 825 |
+
messages = []
|
|
|
|
| 826 |
if history:
|
| 827 |
for m in history[-5:]:
|
| 828 |
role = m.get("role", "user")
|
|
|
|
| 857 |
print(f"Web search failed, falling back to plain GPT: {e}")
|
| 858 |
try:
|
| 859 |
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0.3, max_tokens=500)
|
| 860 |
+
msgs = []
|
| 861 |
for m in (history or [])[-5:]:
|
| 862 |
if m.get("role") in ("user", "assistant"):
|
| 863 |
msgs.append({"role": m["role"], "content": m.get("content", "")})
|
|
|
|
| 965 |
history = [{"role": m.role, "content": m.content} for m in req.history] if req.history else []
|
| 966 |
use_claude = req.model == "claude" and bool(os.environ.get("ANTHROPIC_API_KEY"))
|
| 967 |
|
| 968 |
+
# ---- Early greeting handling ----
|
| 969 |
+
if PURE_GREETING_RE.match(question):
|
| 970 |
+
answer = (
|
| 971 |
+
"Hi! I’m <strong>LibBee</strong>, the Khalifa University Library AI Assistant.<br><br>"
|
| 972 |
+
"I can help you find articles and books, search databases, access full text, request Interlibrary Loan (ILL), "
|
| 973 |
+
"and answer questions about library services and staff.<br><br>"
|
| 974 |
+
"Are you looking for one of these? You can also type your question directly."
|
| 975 |
+
)
|
| 976 |
+
elapsed = time.time() - start
|
| 977 |
+
return {
|
| 978 |
+
"answer": answer,
|
| 979 |
+
"intent": "social_greeting",
|
| 980 |
+
"tools_used": [],
|
| 981 |
+
"search_results": [],
|
| 982 |
+
"sources": [],
|
| 983 |
+
"model_used": req.model,
|
| 984 |
+
"response_time": round(elapsed, 2),
|
| 985 |
+
"corrected_query": question,
|
| 986 |
+
"natural_query": question,
|
| 987 |
+
"database_query": question,
|
| 988 |
+
"original_question": question,
|
| 989 |
+
"is_follow_up": False,
|
| 990 |
+
"source_mode": "social",
|
| 991 |
+
}
|
| 992 |
+
|
| 993 |
+
if _looks_library_hours_question(question):
|
| 994 |
+
elapsed = time.time() - start
|
| 995 |
+
return {
|
| 996 |
+
"answer": _hours_redirect_answer(),
|
| 997 |
+
"intent": "library_info",
|
| 998 |
+
"tools_used": ["hours_redirect"],
|
| 999 |
+
"search_results": [],
|
| 1000 |
+
"sources": [{"title": "Library Hours", "source": LIBRARY_HOURS_URL}],
|
| 1001 |
+
"model_used": req.model,
|
| 1002 |
+
"response_time": round(elapsed, 2),
|
| 1003 |
+
"corrected_query": question,
|
| 1004 |
+
"natural_query": question,
|
| 1005 |
+
"database_query": question,
|
| 1006 |
+
}
|
| 1007 |
+
|
| 1008 |
+
if _looks_nonlibrary_ku_question(question):
|
| 1009 |
+
elapsed = time.time() - start
|
| 1010 |
+
return {
|
| 1011 |
+
"answer": _ku_general_redirect_answer(),
|
| 1012 |
+
"intent": "general",
|
| 1013 |
+
"tools_used": ["ku_general_redirect"],
|
| 1014 |
+
"search_results": [],
|
| 1015 |
+
"sources": [],
|
| 1016 |
+
"model_used": req.model,
|
| 1017 |
+
"response_time": round(elapsed, 2),
|
| 1018 |
+
"corrected_query": question,
|
| 1019 |
+
"natural_query": question,
|
| 1020 |
+
"database_query": question,
|
| 1021 |
+
}
|
| 1022 |
+
|
| 1023 |
intent = "general"
|
| 1024 |
natural_query = question
|
| 1025 |
database_query = question
|
| 1026 |
search_plan = None
|
| 1027 |
|
| 1028 |
try:
|
| 1029 |
+
interp = await _interpret_semantics(question, history)
|
| 1030 |
+
intent = interp.get("intent_hint", "general")
|
| 1031 |
+
if interp.get("social"):
|
| 1032 |
+
intent = "general"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1033 |
if intent in ("search_academic", "search_medical"):
|
| 1034 |
search_plan = await _build_search_plan(question)
|
| 1035 |
natural_query = search_plan["natural"]
|
| 1036 |
database_query = search_plan["database_query"] or search_plan["corrected"]
|
| 1037 |
+
elif intent == "library_info" and interp.get("grounding_keys"):
|
| 1038 |
+
# strengthen RAG using interpreted canonical library terms
|
| 1039 |
+
resolved = " ".join([question] + interp.get("canonical_terms", []) + [GROUNDED_LIBRARY_MAP.get(k, "") for k in interp.get("grounding_keys", []) if GROUNDED_LIBRARY_MAP.get(k)])
|
| 1040 |
+
question = re.sub(r"\s+", " ", resolved).strip()
|
| 1041 |
except Exception:
|
| 1042 |
+
if _looks_social_or_greeting(question):
|
| 1043 |
+
intent = "general"
|
| 1044 |
+
elif _looks_research_question(question) or _looks_medical_search(question):
|
| 1045 |
intent = "search_medical" if _looks_medical_search(question) else "search_academic"
|
| 1046 |
search_plan = await _build_search_plan(question)
|
| 1047 |
natural_query = search_plan["natural"]
|
|
|
|
| 1111 |
context_parts.append(f"Natural query for AI tools: {natural_query}")
|
| 1112 |
context_parts.append(f"Database query for PRIMO/PubMed: {database_query}")
|
| 1113 |
|
| 1114 |
+
behavior = get_behavior_instructions()
|
| 1115 |
+
synthesis_prompt = f"""{behavior}
|
| 1116 |
+
|
| 1117 |
+
You are the Khalifa University Library AI Assistant (Abu Dhabi, UAE). KU = Khalifa University.
|
| 1118 |
Be concise (3-5 sentences).
|
| 1119 |
- For library_info, answer from the library context and include URLs when useful.
|
| 1120 |
- For search intents, briefly summarise the search direction and mention the top 2-3 relevant results if present.
|
|
|
|
| 1368 |
|
| 1369 |
// Fetch analytics from Cloudflare D1
|
| 1370 |
fetch(W+'/analytics').then(r=>r.json()).then(d=>{{
|
|
|
|
|
|
|
| 1371 |
const el=document.getElementById('t-analytics');
|
| 1372 |
el.innerHTML=`
|
| 1373 |
<div class="grid">
|
|
|
|
| 1376 |
<div class="stat"><div class="n">${{d.week}}</div><div class="l">This Week</div></div>
|
| 1377 |
<div class="stat"><div class="n">${{(d.avg_time||0).toFixed(1)}}s</div><div class="l">Avg Time</div></div>
|
| 1378 |
<div class="stat"><div class="n">${{d.errors}}</div><div class="l">Errors</div></div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1379 |
</div>
|
| 1380 |
<div class="two">
|
| 1381 |
+
<div class="card"><h2>Tool Usage</h2><table><tr><th>Tool</th><th>Count</th></tr>${{d.tools.map(t=>`<tr><td>${{t.tool_used}}</td><td>${{t.c}}</td></tr>`).join('')}}</table></div>
|
| 1382 |
+
<div class="card"><h2>Model Usage</h2><table><tr><th>Model</th><th>Count</th></tr>${{d.models.map(m=>`<tr><td>${{m.model}}</td><td>${{m.c}}</td></tr>`).join('')}}</table></div>
|
| 1383 |
</div>
|
| 1384 |
<div class="two">
|
| 1385 |
<div class="card"><h2>Hourly</h2><canvas id="hc"></canvas></div>
|
| 1386 |
<div class="card"><h2>Daily (14d)</h2><canvas id="dc"></canvas></div>
|
| 1387 |
</div>
|
| 1388 |
+
<div class="card"><h2>Popular (Top 20)</h2><table><tr><th>Question</th><th>Count</th></tr>${{d.popular.map(p=>`<tr><td>${{(p.question||'').substring(0,70)}}</td><td>${{p.c}}</td></tr>`).join('')}}</table></div>`;
|
| 1389 |
+
if(d.hourly.length)new Chart(document.getElementById('hc'),{{type:'bar',data:{{labels:d.hourly.map(h=>h.hour+':00'),datasets:[{{label:'Q',data:d.hourly.map(h=>h.c),backgroundColor:'#003366'}}]}},options:{{responsive:true,plugins:{{legend:{{display:false}}}}}}}});
|
| 1390 |
+
if(d.daily.length)new Chart(document.getElementById('dc'),{{type:'line',data:{{labels:d.daily.map(x=>(x.day||'').slice(5)),datasets:[{{label:'Q',data:d.daily.map(x=>x.c),borderColor:'#003366',backgroundColor:'rgba(0,51,102,0.1)',fill:true,tension:.3}}]}},options:{{responsive:true,plugins:{{legend:{{display:false}}}}}}}});
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1391 |
}}).catch(e=>{{document.getElementById('t-analytics').innerHTML='<div class="card" style="color:#dc2626">Failed to load analytics: '+e.message+'<br>Make sure D1 is initialized: <a href="'+W+'/analytics/init" target="_blank">Click here to init DB</a></div>';}});
|
| 1392 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1393 |
// Fetch recent queries
|
| 1394 |
fetch(W+'/analytics/recent').then(r=>r.json()).then(d=>{{
|
| 1395 |
const el=document.getElementById('t-queries');
|
|
|
|
| 1589 |
conn.execute("DELETE FROM queries")
|
| 1590 |
conn.commit()
|
| 1591 |
conn.close()
|
| 1592 |
+
return {"status": "ok", "message": "All logs cleared"}
|