nikeshn commited on
Commit
61c709c
·
verified ·
1 Parent(s): bf3042b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +161 -187
app.py CHANGED
@@ -61,6 +61,31 @@ CURRENT_INFO_RE = re.compile(r"\b(current|currently|latest|today|recent|recently
61
  RESEARCH_CUE_RE = re.compile(r"\b(find|search|look for|show me|get me|give me|locate|recommend|suggest|articles?|papers?|books?|journals?|studies|literature|research|systematic review|evidence|sources|database|databases|peer reviewed|open access)\b", re.IGNORECASE)
62
  LIBRARY_CUE_RE = re.compile(r"\b(library|librarian|borrow|loan|renew|fine|study room|room booking|reserve a room|account|my library|interlibrary|ill|khazna|orcid|open access|apc|refworks|libkey|hours|location|contact|visitor|alumni|database access|off campus|remote access)\b", re.IGNORECASE)
63
  MEDICAL_SEARCH_RE = re.compile(r"\b(pubmed|embase|cinahl|clinicalkey|cochrane|uptodate|medline|systematic review|clinical trial|biomedical literature|medical literature)\b", re.IGNORECASE)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
 
65
  # ===== GLOBALS =====
66
  vectorstore = None
@@ -119,15 +144,6 @@ def set_config(key, value):
119
  conn.close()
120
 
121
 
122
- def get_behavior_instructions():
123
- base = get_config("bot_personality", "").strip()
124
- extra = get_config("custom_instructions", "").strip()
125
- if base and extra:
126
- return base + "\n\nAdditional instructions:\n" + extra
127
- return base or extra or "You are a helpful, friendly, and knowledgeable library assistant at Khalifa University, Abu Dhabi, UAE. KU = Khalifa University, NOT Kuwait University. Be concise and include relevant URLs when useful."
128
-
129
-
130
-
131
  # ===== ADMIN AUTH =====
132
  # ADMIN_PASSWORD must be set as HF Space Secret — no insecure fallback
133
  ADMIN_PASSWORD = os.environ.get("ADMIN_PASSWORD", "")
@@ -416,111 +432,11 @@ async def tool_library_info(question, history=None, model="gpt"):
416
  history_text = "\n".join(f"{'User' if m['role']=='user' else 'Assistant'}: {m['content']}" for m in history[-3:])
417
  base_query = f"{history_text}\n{question}"
418
 
419
- # ── Semantic query expansion ──
420
- # Common synonym mappings for library staff/service terms
421
- # This improves FAISS retrieval when users use informal terms
422
- SYNONYMS = {
423
- "research librarian": "Research and Access Services Librarian Nikesh Narayanan research support",
424
- "who is the librarian": "library staff contacts librarian",
425
- "subject librarian": "Research and Access Services Librarian Nikesh Narayanan",
426
- "medical librarian": "Jason Fetty Medical Librarian PubMed Embase CINAHL UpToDate",
427
- "systems librarian": "Walter Brian Hall Digital Technology Services Librarian website systems technology",
428
- "website issue": "Walter Brian Hall website systems technology",
429
- "technology issue": "Walter Brian Hall website systems technology",
430
- "acquisitions librarian": "Alia Al-Harrasi acquisitions collection development request title",
431
- "public services": "Muna Ahmad Al Blooshi public services circulation general library services",
432
- "e-resources librarian": "Rani Anand e-resources databases access problems vendor issues",
433
- "database access": "Rani Anand e-resources databases access problems",
434
- "open access": "Nikesh Narayanan open access APC scholarly communication",
435
- "orcid": "Nikesh Narayanan ORCID researcher identifier profile",
436
- "research impact": "Nikesh Narayanan research impact SciVal Scopus bibliometrics",
437
- "library director": "Dr Abdulla Al Hefeiti Assistant Provost Libraries",
438
- "ebook": "ebook central proquest download ebooks",
439
- "ebooks": "ebook central proquest download ebooks",
440
- "how to borrow": "borrowing loan period renew circulation",
441
- "borrow books": "borrowing loan period renew circulation",
442
- "renew book": "renew borrowing circulation My Library Account",
443
- "renew loan": "renew borrowing circulation My Library Account",
444
- "hold item": "request hold checked out item My Library Account",
445
- "reserve item": "request hold checked out item My Library Account",
446
- "access from home": "remote access off campus proxy no VPN",
447
- "off campus": "remote access off campus proxy no VPN",
448
- "remote access": "remote access off campus proxy no VPN",
449
- "cite": "RefWorks citation reference management bibliography",
450
- "citation": "RefWorks citation reference management bibliography",
451
- "reference manager": "RefWorks citation bibliography",
452
- "researcher id": "ORCID researcher identifier profile",
453
- "impact factor": "journal citation reports JCR Scopus CiteScore SciVal",
454
- "systematic review": "Cochrane Embase CINAHL PICO PubMed evidence based medicine",
455
- "evidence based medicine": "Cochrane PubMed Embase CINAHL UpToDate",
456
- "ai tools": "LeapSpace Scopus AI ScienceDirect AI EBSCO Research AI PRIMO AI",
457
- "inter library loan": "interlibrary loan ILL document delivery article request",
458
- "interlibrary loan": "interlibrary loan ILL document delivery article request",
459
- "ill": "interlibrary loan ILL document delivery article request",
460
- "document delivery": "interlibrary loan ILL document delivery article request",
461
- "borrow from another library": "interlibrary loan ILL document delivery",
462
- "request from another library": "interlibrary loan ILL document delivery",
463
- "article not available": "full text article not available interlibrary loan LibKey Nomad",
464
- "can't get article": "full text article not available interlibrary loan LibKey Nomad",
465
- "cannot get article": "full text article not available interlibrary loan LibKey Nomad",
466
- "don't get article": "full text article not available interlibrary loan LibKey Nomad",
467
- "full text unavailable": "full text not available interlibrary loan LibKey Nomad",
468
- "no full text": "full text not available interlibrary loan LibKey Nomad",
469
- "full text": "full text LibKey Nomad article access PDF download",
470
- "pdf": "full text LibKey Nomad article access PDF download",
471
- "get article": "full text article access LibKey Nomad interlibrary loan",
472
- "catalog": "PRIMO library catalog discovery holdings publication finder",
473
- "library catalog": "PRIMO library catalog discovery holdings publication finder",
474
- "discovery": "PRIMO library catalog discovery holdings publication finder",
475
- "holdings": "PRIMO library catalog discovery holdings publication finder",
476
- "does the library have": "PRIMO library catalog holdings publication finder",
477
- "find journal": "PRIMO publication finder journal access holdings",
478
- "publication finder": "PRIMO publication finder journal access holdings",
479
- "circulation": "borrowing renew return hold loan period My Library Account",
480
- "due date": "borrowing renew due date My Library Account",
481
- "reserve room": "study room reserve rooms booking",
482
- "book room": "study room reserve rooms booking",
483
- "study room": "study room reserve rooms booking",
484
- "ask librarian": "Ask a Librarian research help reference consultation",
485
- "research help": "Ask a Librarian research consultation reference help",
486
- "consultation": "Ask a Librarian research consultation reference help",
487
- "peer reviewed": "peer reviewed scholarly journal article database filters",
488
- "scholarly article": "peer reviewed scholarly journal article database filters",
489
- "abstract": "article abstract citation database search results",
490
- "doi": "DOI article identifier full text citation",
491
- "call number": "PRIMO call number browse book location",
492
- "course reserve": "reserve course materials reserve collection",
493
- "visitor": "visitors external visitors access library",
494
- "alumni": "alumni services library access alumni",
495
- "civil engineering": "ASCE Library Knovel ScienceDirect Scopus ASTM Compass",
496
- "mechanical engineering": "ASME Digital Library Knovel ScienceDirect ASTM Compass",
497
- "electrical engineering": "IEEE Xplore INSPEC ScienceDirect",
498
- "computer science": "ACM Digital Library IEEE Xplore arXiv ScienceDirect",
499
- "artificial intelligence": "ACM Digital Library IEEE Xplore arXiv ScienceDirect Scopus",
500
- "medicine": "PubMed Embase Cochrane UpToDate CINAHL",
501
- "nursing": "CINAHL PubMed nursing allied health",
502
- "chemistry": "ACS SciFindern RSC Journals Reaxys",
503
- "physics": "APS Journals AIP IOPScience",
504
- "business": "Business Source Complete Emerald ProQuest",
505
- "dissertations": "ProQuest Dissertations Khazna theses",
506
- "theses": "ProQuest Dissertations Khazna theses",
507
- "standards": "ASTM Compass IEEE standards ASME ASCE",
508
- }
509
- question_lower = question.lower()
510
- expanded_query = base_query
511
- matched_expansions = []
512
- for term, expansion in SYNONYMS.items():
513
- if term in question_lower:
514
- matched_expansions.append(expansion)
515
- if matched_expansions:
516
- unique_expansions = []
517
- seen_exp = set()
518
- for exp in matched_expansions:
519
- if exp not in seen_exp:
520
- seen_exp.add(exp)
521
- unique_expansions.append(exp)
522
- expanded_query = f"{base_query} {' '.join(unique_expansions[:4])}"
523
- print(f"Query expanded with {len(unique_expansions[:4])} glossary hints")
524
 
525
  # ── FAISS scored search ──
526
  docs_with_scores = vectorstore.similarity_search_with_score(expanded_query, k=TOP_K)
@@ -548,10 +464,7 @@ async def tool_library_info(question, history=None, model="gpt"):
548
  start your answer with: "Did you mean [exact title from context]? If so, here is the information:"
549
  then give the answer.""" if moderate_match else ""
550
 
551
- behavior = get_behavior_instructions()
552
- prompt = f"""{behavior}
553
-
554
- You are the Khalifa University Library AI Assistant in Abu Dhabi, UAE.
555
  KU means Khalifa University, NOT Kuwait University.
556
 
557
  RULES — follow exactly:
@@ -827,6 +740,31 @@ def _looks_medical_search(question: str) -> bool:
827
  return False
828
  return bool(MEDICAL_SEARCH_RE.search(q) or _looks_research_question(q))
829
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
830
  @app.post("/correct")
831
  async def correct_query(req: CorrectRequest):
832
  """
@@ -853,12 +791,38 @@ class GeneralRequest(BaseModel):
853
 
854
  async def _answer_general(question: str, history=None) -> dict:
855
  """Answer general knowledge questions using live web search when available."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
856
  try:
857
  import openai
858
  client = openai.OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
859
 
860
- behavior = get_behavior_instructions()
861
- messages = [{"role": "system", "content": behavior}]
862
  if history:
863
  for m in history[-5:]:
864
  role = m.get("role", "user")
@@ -893,7 +857,7 @@ async def _answer_general(question: str, history=None) -> dict:
893
  print(f"Web search failed, falling back to plain GPT: {e}")
894
  try:
895
  llm = ChatOpenAI(model="gpt-4o-mini", temperature=0.3, max_tokens=500)
896
- msgs = [{"role": "system", "content": get_behavior_instructions()}]
897
  for m in (history or [])[-5:]:
898
  if m.get("role") in ("user", "assistant"):
899
  msgs.append({"role": m["role"], "content": m.get("content", "")})
@@ -1001,50 +965,83 @@ async def agent_query(req: AgentRequest):
1001
  history = [{"role": m.role, "content": m.content} for m in req.history] if req.history else []
1002
  use_claude = req.model == "claude" and bool(os.environ.get("ANTHROPIC_API_KEY"))
1003
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1004
  intent = "general"
1005
  natural_query = question
1006
  database_query = question
1007
  search_plan = None
1008
 
1009
  try:
1010
- if _looks_library_question(question):
1011
- intent = "library_info"
1012
- elif _looks_current_question(question):
1013
- intent = "general_recent"
1014
- elif _looks_medical_search(question):
1015
- intent = "search_medical"
1016
- elif _looks_research_question(question):
1017
- intent = "search_academic"
1018
- else:
1019
- classifier_prompt = f"""You are routing a question for a university library assistant.
1020
-
1021
- Return ONLY valid JSON with one intent: library_info, search_academic, search_medical, general, or general_recent.
1022
-
1023
- Rules:
1024
- - library_info: KU Library services, librarians, borrowing, rooms, accounts, policies, databases access
1025
- - search_medical: user wants medical/clinical literature, articles, books, evidence, or databases
1026
- - search_academic: user wants literature, books, articles, papers, evidence, or academic search on any non-medical topic
1027
- - general_recent: the answer depends on current or recent information, news, or current office-holders
1028
- - general: simple general-knowledge or conversational questions not asking for academic search
1029
-
1030
- Question: "{question}"
1031
-
1032
- Return JSON like: {{"intent":"general_recent"}}"""
1033
- if use_claude:
1034
- from langchain_anthropic import ChatAnthropic
1035
- clf_llm = ChatAnthropic(model="claude-haiku-4-5-20251001", temperature=0, max_tokens=120)
1036
- else:
1037
- clf_llm = ChatOpenAI(model="gpt-4o-mini", temperature=0, max_tokens=120)
1038
- clf_resp = clf_llm.invoke(classifier_prompt)
1039
- clf = _extract_json_object(clf_resp.content.strip())
1040
- intent = clf.get("intent", "general")
1041
-
1042
  if intent in ("search_academic", "search_medical"):
1043
  search_plan = await _build_search_plan(question)
1044
  natural_query = search_plan["natural"]
1045
  database_query = search_plan["database_query"] or search_plan["corrected"]
 
 
 
 
1046
  except Exception:
1047
- if _looks_research_question(question) or _looks_medical_search(question):
 
 
1048
  intent = "search_medical" if _looks_medical_search(question) else "search_academic"
1049
  search_plan = await _build_search_plan(question)
1050
  natural_query = search_plan["natural"]
@@ -1114,7 +1111,10 @@ Return JSON like: {{"intent":"general_recent"}}"""
1114
  context_parts.append(f"Natural query for AI tools: {natural_query}")
1115
  context_parts.append(f"Database query for PRIMO/PubMed: {database_query}")
1116
 
1117
- synthesis_prompt = f"""You are the Khalifa University Library AI Assistant (Abu Dhabi, UAE). KU = Khalifa University.
 
 
 
1118
  Be concise (3-5 sentences).
1119
  - For library_info, answer from the library context and include URLs when useful.
1120
  - For search intents, briefly summarise the search direction and mention the top 2-3 relevant results if present.
@@ -1368,8 +1368,6 @@ function st(el,t){{document.querySelectorAll('.tab').forEach(e=>e.classList.remo
1368
 
1369
  // Fetch analytics from Cloudflare D1
1370
  fetch(W+'/analytics').then(r=>r.json()).then(d=>{{
1371
- const helpful = (d.feedback_summary||[]).find(x=>x.feedback==='up')?.c || 0;
1372
- const notHelpful = (d.feedback_summary||[]).find(x=>x.feedback==='down')?.c || 0;
1373
  const el=document.getElementById('t-analytics');
1374
  el.innerHTML=`
1375
  <div class="grid">
@@ -1378,44 +1376,20 @@ fetch(W+'/analytics').then(r=>r.json()).then(d=>{{
1378
  <div class="stat"><div class="n">${{d.week}}</div><div class="l">This Week</div></div>
1379
  <div class="stat"><div class="n">${{(d.avg_time||0).toFixed(1)}}s</div><div class="l">Avg Time</div></div>
1380
  <div class="stat"><div class="n">${{d.errors}}</div><div class="l">Errors</div></div>
1381
- <div class="stat"><div class="n">${{helpful}}</div><div class="l">👍 Helpful</div></div>
1382
- <div class="stat"><div class="n">${{notHelpful}}</div><div class="l">👎 Not Helpful</div></div>
1383
- </div>
1384
- <div style="display:flex;gap:8px;flex-wrap:wrap;margin-bottom:12px">
1385
- <a class="btn bp" href="${{W}}/analytics/export?kind=queries" target="_blank">⬇️ Download Queries CSV</a>
1386
- <a class="btn bp" href="${{W}}/analytics/export?kind=feedback" target="_blank">⬇️ Download Feedback CSV</a>
1387
- <a class="btn bp" href="${{W}}/analytics/export?kind=clicks" target="_blank">⬇️ Download Clicks CSV</a>
1388
- <button class="btn br" onclick="deleteOldData()">🗑️ Delete Old Data</button>
1389
  </div>
1390
  <div class="two">
1391
- <div class="card"><h2>Tool Usage</h2><table><tr><th>Tool</th><th>Count</th></tr>${{(d.tools||[]).map(t=>`<tr><td>${{t.tool_used}}</td><td>${{t.c}}</td></tr>`).join('')}}</table></div>
1392
- <div class="card"><h2>Model Usage</h2><table><tr><th>Model</th><th>Count</th></tr>${{(d.models||[]).map(m=>`<tr><td>${{m.model}}</td><td>${{m.c}}</td></tr>`).join('')}}</table></div>
1393
  </div>
1394
  <div class="two">
1395
  <div class="card"><h2>Hourly</h2><canvas id="hc"></canvas></div>
1396
  <div class="card"><h2>Daily (14d)</h2><canvas id="dc"></canvas></div>
1397
  </div>
1398
- <div class="two">
1399
- <div class="card"><h2>Recent Feedback</h2><table><tr><th>Time</th><th>Question</th><th>Feedback</th></tr>${{(d.recent_feedback||[]).slice(0,15).map(f=>`<tr><td>${{(f.timestamp||'').substring(0,19)}}</td><td title="${{f.question||''}}">${{(f.question||'').substring(0,55)}}</td><td>${{f.feedback==='up'?'👍':'👎'}}</td></tr>`).join('') || '<tr><td colspan="3">No feedback yet</td></tr>'}}</table></div>
1400
- <div class="card"><h2>Top Weak Questions</h2><table><tr><th>Question</th><th>👎</th></tr>${{(d.weak_questions||[]).map(q=>`<tr><td title="${{q.question||''}}">${{(q.question||'').substring(0,70)}}</td><td>${{q.c}}</td></tr>`).join('') || '<tr><td colspan="2">No weak questions yet</td></tr>'}}</table></div>
1401
- </div>
1402
- <div class="two">
1403
- <div class="card"><h2>Top Clicks</h2><table><tr><th>Label</th><th>Type</th><th>Count</th></tr>${{(d.top_clicks||[]).map(c=>`<tr><td title="${{c.label||''}}">${{(c.label||'').substring(0,55)}}</td><td>${{c.target_type}}</td><td>${{c.c}}</td></tr>`).join('') || '<tr><td colspan="3">No clicks yet</td></tr>'}}</table></div>
1404
- <div class="card"><h2>Popular (Top 20)</h2><table><tr><th>Question</th><th>Count</th></tr>${{(d.popular||[]).map(p=>`<tr><td>${{(p.question||'').substring(0,70)}}</td><td>${{p.c}}</td></tr>`).join('')}}</table></div>
1405
- </div>`;
1406
- if((d.hourly||[]).length)new Chart(document.getElementById('hc'),{{type:'bar',data:{{labels:d.hourly.map(h=>h.hour+':00'),datasets:[{{label:'Q',data:d.hourly.map(h=>h.c),backgroundColor:'#003366'}}]}},options:{{responsive:true,plugins:{{legend:{{display:false}}}}}}}});
1407
- if((d.daily||[]).length)new Chart(document.getElementById('dc'),{{type:'line',data:{{labels:d.daily.map(x=>(x.day||'').slice(5)),datasets:[{{label:'Q',data:d.daily.map(x=>x.c),borderColor:'#003366',backgroundColor:'rgba(0,51,102,0.1)',fill:true,tension:.3}}]}},options:{{responsive:true,plugins:{{legend:{{display:false}}}}}}}});
1408
  }}).catch(e=>{{document.getElementById('t-analytics').innerHTML='<div class="card" style="color:#dc2626">Failed to load analytics: '+e.message+'<br>Make sure D1 is initialized: <a href="'+W+'/analytics/init" target="_blank">Click here to init DB</a></div>';}});
1409
 
1410
- async function deleteOldData(){{
1411
- const days = prompt('Delete records older than how many days?', '90');
1412
- if(!days) return;
1413
- const r = await fetch(W+'/analytics/delete-old', {{method:'POST', headers:{{'Content-Type':'application/json'}}, body:JSON.stringify({{days}})}});
1414
- const d = await r.json();
1415
- alert(d.status==='ok' ? `Deleted data older than ${{d.deleted_older_than_days}} days.` : (d.error?.message || 'Delete failed'));
1416
- location.reload();
1417
- }}
1418
-
1419
  // Fetch recent queries
1420
  fetch(W+'/analytics/recent').then(r=>r.json()).then(d=>{{
1421
  const el=document.getElementById('t-queries');
@@ -1615,4 +1589,4 @@ async def clear_logs():
1615
  conn.execute("DELETE FROM queries")
1616
  conn.commit()
1617
  conn.close()
1618
- return {"status": "ok", "message": "All logs cleared"}
 
61
  RESEARCH_CUE_RE = re.compile(r"\b(find|search|look for|show me|get me|give me|locate|recommend|suggest|articles?|papers?|books?|journals?|studies|literature|research|systematic review|evidence|sources|database|databases|peer reviewed|open access)\b", re.IGNORECASE)
62
  LIBRARY_CUE_RE = re.compile(r"\b(library|librarian|borrow|loan|renew|fine|study room|room booking|reserve a room|account|my library|interlibrary|ill|khazna|orcid|open access|apc|refworks|libkey|hours|location|contact|visitor|alumni|database access|off campus|remote access)\b", re.IGNORECASE)
63
  MEDICAL_SEARCH_RE = re.compile(r"\b(pubmed|embase|cinahl|clinicalkey|cochrane|uptodate|medline|systematic review|clinical trial|biomedical literature|medical literature)\b", re.IGNORECASE)
64
+ SOCIAL_RE = re.compile(r"^(hi|hello|hey|good morning|good afternoon|good evening|how are you|thanks|thank you|ok|okay|bye|goodbye|lol|haha|hehe|you are silly|are you silly|are you dumb|stupid bot|joke|tell me a joke)\b", re.IGNORECASE)
65
+ PURE_GREETING_RE = re.compile(r"^(hi|hello|hey|good morning|good afternoon|good evening)[!.\s]*$", re.IGNORECASE)
66
+ HOURS_RE = re.compile(r"\b(library hours|hours|opening hours|closing hours|open today|closed today|open now|when .*open|when .*close|what time .*open|what time .*close|opening time|closing time|is .*library open|is .*campus library open|habshan .*open|san .*open|main campus .*open|ramadan hours|weekend hours|friday hours|saturday hours)\b", re.IGNORECASE)
67
+ KU_GENERAL_RE = re.compile(r"\b(khalifa university|ku)\b.*\b(admission|admissions|program|programs|degree|degrees|college|colleges|school|schools|tuition|fees|scholarship|scholarships|hostel|housing|transport|ranking|rankings|president|vice president|chancellor|application|apply|registrar|academic calendar|semester dates|campus map)\b|\b(admission|admissions|program|programs|degree|degrees|college|colleges|school|schools|tuition|fees|scholarship|scholarships|hostel|housing|transport|ranking|rankings|president|vice president|chancellor|application|apply|registrar|academic calendar|semester dates|campus map)\b.*\b(khalifa university|ku)\b", re.IGNORECASE)
68
+ LIBRARY_HOURS_URL = "https://library.ku.ac.ae/hours"
69
+ KU_MAIN_URL = "https://www.ku.ac.ae/"
70
+
71
+ GROUNDED_LIBRARY_MAP = {
72
+ "ill": "interlibrary loan ILL document delivery full text unavailable article not available borrow from another library",
73
+ "fulltext": "full text libkey nomad article access pdf unavailable interlibrary loan",
74
+ "primo": "PRIMO discovery catalog holdings publication finder library has this journal books articles",
75
+ "circulation": "borrowing circulation renew loan period due date hold request fines my library account",
76
+ "research_help": "Ask a Librarian research consultation reference help subject guides research skills",
77
+ "orcid_oa": "ORCID open access APC publishing research impact Scopus SciVal bibliometrics Nikesh Narayanan",
78
+ "database_access": "database access e-resources remote access off campus login vendor issue Rani Anand",
79
+ "medical_help": "medical librarian Jason Fetty PubMed Embase CINAHL Cochrane UpToDate systematic review",
80
+ "systems_help": "Walter Brian Hall systems website technology digital services library systems",
81
+ "acquisitions": "Alia Al-Harrasi acquisitions request title suggest a book collection development",
82
+ "standards": "ASTM Compass IEEE standards ASME ASCE engineering standards",
83
+ "theses": "ProQuest Dissertations theses Khazna institutional repository dissertations theses",
84
+ "metrics": "impact factor journal citation reports JCR CiteScore Scopus SciVal metrics",
85
+ "chemistry": "ACS SciFindern RSC Reaxys chemistry database",
86
+ "physics": "APS AIP IOPScience physics database",
87
+ "engineering": "IEEE Xplore ACM ASCE ASME Knovel engineering databases",
88
+ }
89
 
90
  # ===== GLOBALS =====
91
  vectorstore = None
 
144
  conn.close()
145
 
146
 
 
 
 
 
 
 
 
 
 
147
  # ===== ADMIN AUTH =====
148
  # ADMIN_PASSWORD must be set as HF Space Secret — no insecure fallback
149
  ADMIN_PASSWORD = os.environ.get("ADMIN_PASSWORD", "")
 
432
  history_text = "\n".join(f"{'User' if m['role']=='user' else 'Assistant'}: {m['content']}" for m in history[-3:])
433
  base_query = f"{history_text}\n{question}"
434
 
435
+ # ── Semantic query expansion via lightweight LLM interpreter + grounded institutional map ──
436
+ interp = await _interpret_semantics(question, history or [])
437
+ grounding_text = " ".join(GROUNDED_LIBRARY_MAP.get(k, "") for k in interp.get("grounding_keys", []))
438
+ canonical_text = " ".join(interp.get("canonical_terms", []))
439
+ expanded_query = " ".join(x for x in [base_query, canonical_text, grounding_text] if x).strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
440
 
441
  # ── FAISS scored search ──
442
  docs_with_scores = vectorstore.similarity_search_with_score(expanded_query, k=TOP_K)
 
464
  start your answer with: "Did you mean [exact title from context]? If so, here is the information:"
465
  then give the answer.""" if moderate_match else ""
466
 
467
+ prompt = f"""You are the Khalifa University Library AI Assistant in Abu Dhabi, UAE.
 
 
 
468
  KU means Khalifa University, NOT Kuwait University.
469
 
470
  RULES — follow exactly:
 
740
  return False
741
  return bool(MEDICAL_SEARCH_RE.search(q) or _looks_research_question(q))
742
 
743
+
744
+ def _looks_library_hours_question(question: str) -> bool:
745
+ q = question.strip().lower()
746
+ return bool(HOURS_RE.search(q)) and ('library' in q or 'campus' in q or 'habshan' in q or 'san' in q or 'main campus' in q or 'hours' in q or 'open' in q or 'close' in q)
747
+
748
+ def _looks_nonlibrary_ku_question(question: str) -> bool:
749
+ q = question.strip().lower()
750
+ if 'library' in q or 'librarian' in q or 'primo' in q or 'database' in q or 'book' in q or 'article' in q:
751
+ return False
752
+ return bool(KU_GENERAL_RE.search(question))
753
+
754
+ def _hours_redirect_answer() -> str:
755
+ return (
756
+ f"For the most accurate opening and closing times, please check the official Library Hours page: "
757
+ f"<a href=\"{LIBRARY_HOURS_URL}\" target=\"_blank\">{LIBRARY_HOURS_URL}</a>. "
758
+ "Hours may vary by campus and during exams, holidays, and Ramadan."
759
+ )
760
+
761
+ def _ku_general_redirect_answer() -> str:
762
+ return (
763
+ f"I’m <strong>LibBee</strong>, the Khalifa University <strong>Library</strong> AI Assistant, so I’m best for library resources, services, databases, research help, and staff contacts.<br><br>"
764
+ f"For general Khalifa University questions, please visit the main KU website: <a href=\"{KU_MAIN_URL}\" target=\"_blank\">{KU_MAIN_URL}</a>. "
765
+ "A broader university chatbot is available there for non-library questions."
766
+ )
767
+
768
  @app.post("/correct")
769
  async def correct_query(req: CorrectRequest):
770
  """
 
791
 
792
  async def _answer_general(question: str, history=None) -> dict:
793
  """Answer general knowledge questions using live web search when available."""
794
+ if _looks_library_hours_question(question):
795
+ return {"answer": _hours_redirect_answer(), "sources": [], "model": "library-hours-redirect"}
796
+ if _looks_nonlibrary_ku_question(question):
797
+ return {"answer": _ku_general_redirect_answer(), "sources": [], "model": "ku-general-redirect"}
798
+ if _looks_social_or_greeting(question):
799
+ if PURE_GREETING_RE.match(question.strip()):
800
+ guided = (
801
+ "Hi! I’m <strong>LibBee</strong>, the Khalifa University Library AI Assistant.<br><br>"
802
+ "I can help you with articles and books, databases, full text, Interlibrary Loan (ILL), "
803
+ "library services, staff contacts, ORCID, RefWorks, and Open Access.<br><br>"
804
+ "Are you looking for one of these? You can also type your question directly."
805
+ )
806
+ return {"answer": guided, "sources": [], "model": "libbee-greeting"}
807
+ try:
808
+ behavior = get_behavior_instructions()
809
+ llm = ChatOpenAI(model="gpt-4o-mini", temperature=0.4, max_tokens=220)
810
+ msgs = []
811
+ if behavior:
812
+ msgs.append({"role": "system", "content": behavior + "\nIf the user greets you, jokes, or says something mildly mocking, reply politely, warmly, and briefly. Do not be defensive. Keep it light and helpful."})
813
+ for m in (history or [])[-4:]:
814
+ if m.get("role") in ("user", "assistant"):
815
+ msgs.append({"role": m["role"], "content": m.get("content", "")})
816
+ msgs.append({"role": "user", "content": question})
817
+ response = llm.invoke(msgs)
818
+ return {"answer": response.content.strip(), "sources": [], "model": "gpt-4o-mini-social"}
819
+ except Exception:
820
+ return {"answer": "Hi! I’m <strong>LibBee</strong>, the Khalifa University Library AI Assistant. I can help with articles, books, databases, full text, ILL, library services, and staff contacts. What would you like help with?", "sources": [], "model": "fallback-social"}
821
  try:
822
  import openai
823
  client = openai.OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
824
 
825
+ messages = []
 
826
  if history:
827
  for m in history[-5:]:
828
  role = m.get("role", "user")
 
857
  print(f"Web search failed, falling back to plain GPT: {e}")
858
  try:
859
  llm = ChatOpenAI(model="gpt-4o-mini", temperature=0.3, max_tokens=500)
860
+ msgs = []
861
  for m in (history or [])[-5:]:
862
  if m.get("role") in ("user", "assistant"):
863
  msgs.append({"role": m["role"], "content": m.get("content", "")})
 
965
  history = [{"role": m.role, "content": m.content} for m in req.history] if req.history else []
966
  use_claude = req.model == "claude" and bool(os.environ.get("ANTHROPIC_API_KEY"))
967
 
968
+ # ---- Early greeting handling ----
969
+ if PURE_GREETING_RE.match(question):
970
+ answer = (
971
+ "Hi! I’m <strong>LibBee</strong>, the Khalifa University Library AI Assistant.<br><br>"
972
+ "I can help you find articles and books, search databases, access full text, request Interlibrary Loan (ILL), "
973
+ "and answer questions about library services and staff.<br><br>"
974
+ "Are you looking for one of these? You can also type your question directly."
975
+ )
976
+ elapsed = time.time() - start
977
+ return {
978
+ "answer": answer,
979
+ "intent": "social_greeting",
980
+ "tools_used": [],
981
+ "search_results": [],
982
+ "sources": [],
983
+ "model_used": req.model,
984
+ "response_time": round(elapsed, 2),
985
+ "corrected_query": question,
986
+ "natural_query": question,
987
+ "database_query": question,
988
+ "original_question": question,
989
+ "is_follow_up": False,
990
+ "source_mode": "social",
991
+ }
992
+
993
+ if _looks_library_hours_question(question):
994
+ elapsed = time.time() - start
995
+ return {
996
+ "answer": _hours_redirect_answer(),
997
+ "intent": "library_info",
998
+ "tools_used": ["hours_redirect"],
999
+ "search_results": [],
1000
+ "sources": [{"title": "Library Hours", "source": LIBRARY_HOURS_URL}],
1001
+ "model_used": req.model,
1002
+ "response_time": round(elapsed, 2),
1003
+ "corrected_query": question,
1004
+ "natural_query": question,
1005
+ "database_query": question,
1006
+ }
1007
+
1008
+ if _looks_nonlibrary_ku_question(question):
1009
+ elapsed = time.time() - start
1010
+ return {
1011
+ "answer": _ku_general_redirect_answer(),
1012
+ "intent": "general",
1013
+ "tools_used": ["ku_general_redirect"],
1014
+ "search_results": [],
1015
+ "sources": [],
1016
+ "model_used": req.model,
1017
+ "response_time": round(elapsed, 2),
1018
+ "corrected_query": question,
1019
+ "natural_query": question,
1020
+ "database_query": question,
1021
+ }
1022
+
1023
  intent = "general"
1024
  natural_query = question
1025
  database_query = question
1026
  search_plan = None
1027
 
1028
  try:
1029
+ interp = await _interpret_semantics(question, history)
1030
+ intent = interp.get("intent_hint", "general")
1031
+ if interp.get("social"):
1032
+ intent = "general"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1033
  if intent in ("search_academic", "search_medical"):
1034
  search_plan = await _build_search_plan(question)
1035
  natural_query = search_plan["natural"]
1036
  database_query = search_plan["database_query"] or search_plan["corrected"]
1037
+ elif intent == "library_info" and interp.get("grounding_keys"):
1038
+ # strengthen RAG using interpreted canonical library terms
1039
+ resolved = " ".join([question] + interp.get("canonical_terms", []) + [GROUNDED_LIBRARY_MAP.get(k, "") for k in interp.get("grounding_keys", []) if GROUNDED_LIBRARY_MAP.get(k)])
1040
+ question = re.sub(r"\s+", " ", resolved).strip()
1041
  except Exception:
1042
+ if _looks_social_or_greeting(question):
1043
+ intent = "general"
1044
+ elif _looks_research_question(question) or _looks_medical_search(question):
1045
  intent = "search_medical" if _looks_medical_search(question) else "search_academic"
1046
  search_plan = await _build_search_plan(question)
1047
  natural_query = search_plan["natural"]
 
1111
  context_parts.append(f"Natural query for AI tools: {natural_query}")
1112
  context_parts.append(f"Database query for PRIMO/PubMed: {database_query}")
1113
 
1114
+ behavior = get_behavior_instructions()
1115
+ synthesis_prompt = f"""{behavior}
1116
+
1117
+ You are the Khalifa University Library AI Assistant (Abu Dhabi, UAE). KU = Khalifa University.
1118
  Be concise (3-5 sentences).
1119
  - For library_info, answer from the library context and include URLs when useful.
1120
  - For search intents, briefly summarise the search direction and mention the top 2-3 relevant results if present.
 
1368
 
1369
  // Fetch analytics from Cloudflare D1
1370
  fetch(W+'/analytics').then(r=>r.json()).then(d=>{{
 
 
1371
  const el=document.getElementById('t-analytics');
1372
  el.innerHTML=`
1373
  <div class="grid">
 
1376
  <div class="stat"><div class="n">${{d.week}}</div><div class="l">This Week</div></div>
1377
  <div class="stat"><div class="n">${{(d.avg_time||0).toFixed(1)}}s</div><div class="l">Avg Time</div></div>
1378
  <div class="stat"><div class="n">${{d.errors}}</div><div class="l">Errors</div></div>
 
 
 
 
 
 
 
 
1379
  </div>
1380
  <div class="two">
1381
+ <div class="card"><h2>Tool Usage</h2><table><tr><th>Tool</th><th>Count</th></tr>${{d.tools.map(t=>`<tr><td>${{t.tool_used}}</td><td>${{t.c}}</td></tr>`).join('')}}</table></div>
1382
+ <div class="card"><h2>Model Usage</h2><table><tr><th>Model</th><th>Count</th></tr>${{d.models.map(m=>`<tr><td>${{m.model}}</td><td>${{m.c}}</td></tr>`).join('')}}</table></div>
1383
  </div>
1384
  <div class="two">
1385
  <div class="card"><h2>Hourly</h2><canvas id="hc"></canvas></div>
1386
  <div class="card"><h2>Daily (14d)</h2><canvas id="dc"></canvas></div>
1387
  </div>
1388
+ <div class="card"><h2>Popular (Top 20)</h2><table><tr><th>Question</th><th>Count</th></tr>${{d.popular.map(p=>`<tr><td>${{(p.question||'').substring(0,70)}}</td><td>${{p.c}}</td></tr>`).join('')}}</table></div>`;
1389
+ if(d.hourly.length)new Chart(document.getElementById('hc'),{{type:'bar',data:{{labels:d.hourly.map(h=>h.hour+':00'),datasets:[{{label:'Q',data:d.hourly.map(h=>h.c),backgroundColor:'#003366'}}]}},options:{{responsive:true,plugins:{{legend:{{display:false}}}}}}}});
1390
+ if(d.daily.length)new Chart(document.getElementById('dc'),{{type:'line',data:{{labels:d.daily.map(x=>(x.day||'').slice(5)),datasets:[{{label:'Q',data:d.daily.map(x=>x.c),borderColor:'#003366',backgroundColor:'rgba(0,51,102,0.1)',fill:true,tension:.3}}]}},options:{{responsive:true,plugins:{{legend:{{display:false}}}}}}}});
 
 
 
 
 
 
 
1391
  }}).catch(e=>{{document.getElementById('t-analytics').innerHTML='<div class="card" style="color:#dc2626">Failed to load analytics: '+e.message+'<br>Make sure D1 is initialized: <a href="'+W+'/analytics/init" target="_blank">Click here to init DB</a></div>';}});
1392
 
 
 
 
 
 
 
 
 
 
1393
  // Fetch recent queries
1394
  fetch(W+'/analytics/recent').then(r=>r.json()).then(d=>{{
1395
  const el=document.getElementById('t-queries');
 
1589
  conn.execute("DELETE FROM queries")
1590
  conn.commit()
1591
  conn.close()
1592
+ return {"status": "ok", "message": "All logs cleared"}