Spaces:

nikeshn
/

kulibrary

Running

App Files Files Community

nikeshn commited on 3 days ago

Commit

a8a6287

verified ·

1 Parent(s): f8ccb74

Update app.py

Browse files

Files changed (1) hide show

app.py +138 -14

app.py CHANGED Viewed

@@ -1838,6 +1838,15 @@ async def agent_query(req: AgentRequest):
         rag = await tool_library_info(enriched_question, history[-5:] if history else None, model=req.model)
         tools_used = ["get_library_info"]
         if rag.get("has_answer") and rag.get("answer"):
             # Good RAG answer — synthesise and return
             context_parts = [f"Library Knowledge Base:\n{rag['answer']}"]
@@ -1845,7 +1854,8 @@ async def agent_query(req: AgentRequest):
             synthesis_prompt = (
                 f"{behavior}\n\n"
                 "You are the Khalifa University Library AI Assistant (Abu Dhabi, UAE). KU = Khalifa University.\n"
-                "Answer in 2-4 sentences. Include URLs from the context when relevant.\n\n"
                 f"Context:\n{chr(10).join(context_parts)}\n\n"
                 f"Question: {question}\nAnswer:"
             )
@@ -1860,26 +1870,140 @@ async def agent_query(req: AgentRequest):
                 answer = rag["answer"]
             elapsed = time.time() - start
             return _make_agent_response(
-                answer=answer, intent="library_info", tools_used=tools_used,
                 search_results=[], sources=rag.get("sources", []),
                 model=req.model, elapsed=elapsed, question=question,
             )
         else:
-            # RAG found nothing — fall back to general LLM with library persona
-            general = await _answer_general(question, history)
-            gen_answer = general.get("answer", "").strip()
-            # Safety: if general also failed, give a redirect
-            if not gen_answer:
-                gen_answer = (
-                    "I'm not sure I have the exact answer for that. "
-                    "Please try <a href=\"https://library.ku.ac.ae/AskUs\" target=\"_blank\">Ask a Librarian</a> "
-                    "for personalised help from the KU library team."
                 )
             elapsed = time.time() - start
             return _make_agent_response(
-                answer=gen_answer, intent="library_info",
-                tools_used=tools_used + ["general_fallback"],
-                search_results=[], sources=general.get("sources", []),
                 model=req.model, elapsed=elapsed, question=question,
             )

         rag = await tool_library_info(enriched_question, history[-5:] if history else None, model=req.model)
         tools_used = ["get_library_info"]
+        # Ask a Librarian footer — appended to ALL library_info answers
+        ASK_LIB = (
+            '<br><br><span style="font-size:.82rem;color:#6b7280">'
+            '💬 Need more help? '
+            '<a href="https://library.ku.ac.ae/AskUs" target="_blank" style="color:#003366;font-weight:600">Ask a Librarian</a>'
+            ' — or browse <a href="https://library.ku.ac.ae/lib" target="_blank" style="color:#003366">library.ku.ac.ae</a>.'
+            '</span>'
+        )
         if rag.get("has_answer") and rag.get("answer"):
             # Good RAG answer — synthesise and return
             context_parts = [f"Library Knowledge Base:\n{rag['answer']}"]
             synthesis_prompt = (
                 f"{behavior}\n\n"
                 "You are the Khalifa University Library AI Assistant (Abu Dhabi, UAE). KU = Khalifa University.\n"
+                "Answer in 2-4 sentences. Include URLs from the context when relevant.\n"
+                "Answer ONLY from the provided context. Do not add information not present in the context.\n\n"
                 f"Context:\n{chr(10).join(context_parts)}\n\n"
                 f"Question: {question}\nAnswer:"
             )
                 answer = rag["answer"]
             elapsed = time.time() - start
             return _make_agent_response(
+                answer=answer + ASK_LIB, intent="library_info", tools_used=tools_used,
                 search_results=[], sources=rag.get("sources", []),
                 model=req.model, elapsed=elapsed, question=question,
             )
         else:
+            # ── RAG miss — three-tier fallback for library questions ──
+            #
+            # Tier 1: Strict KU-library-only LLM prompt (no hallucination risk —
+            #         LLM is told to say it doesn't know rather than invent).
+            # Tier 2: If tier 1 is vague/empty → web search scoped to library.ku.ac.ae
+            #         so all facts come from the real KU library website.
+            # Tier 3: Always append "Ask a Librarian" + library homepage link.
+            #
+            # General web search (_answer_general) is NOT used here — that's only
+            # for intent==general/general_recent where unrestricted search is correct.
+            ASK_LIB = (
+                '<br><br><span style="font-size:.82rem;color:#6b7280">'
+                '💬 Need more help? '
+                '<a href="https://library.ku.ac.ae/AskUs" target="_blank" style="color:#003366;font-weight:600">Ask a Librarian</a>'
+                ' — or browse <a href="https://library.ku.ac.ae/lib" target="_blank" style="color:#003366">library.ku.ac.ae</a>.'
+                '</span>'
+            )
+            # ── Tier 1: Strict KU-only LLM ──
+            ku_only_prompt = (
+                "You are LibBee, the Khalifa University Library AI Assistant (Abu Dhabi, UAE). "
+                "KU = Khalifa University.\n\n"
+                "STRICT RULES:\n"
+                "1. Answer ONLY using your knowledge of Khalifa University Library services, "
+                "databases, policies, staff, and resources.\n"
+                "2. Be concise (2-4 sentences). Include real KU library URLs when relevant "
+                "(e.g. https://library.ku.ac.ae/AskUs, https://library.ku.ac.ae/ill/, "
+                "https://library.ku.ac.ae/eresources, https://library.ku.ac.ae/oa/).\n"
+                "3. If you are not confident about a specific detail (phone number, exact policy, "
+                "specific database URL), do NOT invent it — instead say you are not certain and "
+                "direct the user to Ask a Librarian or the library website.\n"
+                "4. Never make up staff names, contact details, or database names.\n"
+                "5. Do NOT use general web knowledge or facts unrelated to KU Library.\n\n"
+                f"Question: {question}\nAnswer:"
+            )
+            tier1_answer = ""
+            try:
+                if use_claude:
+                    from langchain_anthropic import ChatAnthropic
+                    t1_llm = ChatAnthropic(model="claude-haiku-4-5-20251001", temperature=0.1, max_tokens=400)
+                else:
+                    t1_llm = ChatOpenAI(model="gpt-4o-mini", temperature=0.1, max_tokens=400)
+                t1_response = t1_llm.invoke([
+                    {"role": "system", "content": ku_only_prompt},
+                    {"role": "user",   "content": question},
+                ])
+                tier1_answer = t1_response.content.strip()
+            except Exception as t1_err:
+                print(f"[LibBee tier1 LLM error] {t1_err}")
+            # ── Tier 2: If tier 1 is vague, search library.ku.ac.ae ──
+            # Detect vague answers: short, uncertain, or explicitly admitting no knowledge
+            vague_phrases = [
+                "i'm not sure", "i am not sure", "i don't have", "i do not have",
+                "not certain", "cannot confirm", "not available", "unable to find",
+                "please contact", "you may want to", "i would recommend checking",
+                "i don't have specific", "i lack specific",
+            ]
+            is_vague = (
+                not tier1_answer
+                or len(tier1_answer.split()) < 15
+                or any(p in tier1_answer.lower().replace("\u2019", "'") for p in vague_phrases)
+            )
+            tier2_answer = ""
+            tier2_sources = []
+            if is_vague:
+                try:
+                    import openai as _openai
+                    _client = _openai.OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
+                    # Scope web search to KU library domain only
+                    scoped_query = f"site:library.ku.ac.ae {question}"
+                    _resp = _client.responses.create(
+                        model="gpt-4o-mini",
+                        tools=[{"type": "web_search_preview"}],
+                        input=[{
+                            "role": "system",
+                            "content": (
+                                "You are LibBee, the Khalifa University Library AI Assistant. "
+                                "Search the KU library website (library.ku.ac.ae) only. "
+                                "Answer in 2-4 sentences using only what you find on the KU library website. "
+                                "Always include the source URL. "
+                                "If nothing relevant is found on library.ku.ac.ae, say so briefly."
+                            )
+                        }, {"role": "user", "content": scoped_query}],
+                    )
+                    for block in _resp.output:
+                        if hasattr(block, "content"):
+                            for item in block.content:
+                                if hasattr(item, "text"):
+                                    tier2_answer += item.text
+                                if hasattr(item, "annotations"):
+                                    for ann in item.annotations:
+                                        if hasattr(ann, "url") and hasattr(ann, "title"):
+                                            if "library.ku.ac.ae" in getattr(ann, "url", ""):
+                                                tier2_sources.append({"url": ann.url, "title": ann.title})
+                    if not tier2_answer:
+                        tier2_answer = getattr(_resp, "output_text", "") or ""
+                    tier2_answer = tier2_answer.strip()
+                except Exception as t2_err:
+                    print(f"[LibBee tier2 web search error] {t2_err}")
+            # ── Combine best answer + Ask a Librarian footer ──
+            if tier2_answer and len(tier2_answer.split()) >= 10:
+                # Web search found something useful
+                final_answer = tier2_answer + ASK_LIB
+                final_sources = tier2_sources
+                final_tools = tools_used + ["ku_web_search"]
+            elif tier1_answer and len(tier1_answer.split()) >= 10:
+                # Tier 1 LLM gave a decent answer
+                final_answer = tier1_answer + ASK_LIB
+                final_sources = []
+                final_tools = tools_used + ["ku_only_llm"]
+            else:
+                # Both failed — clean redirect only
+                final_answer = (
+                    "I don't have that specific information in my knowledge base right now. "
+                    "The KU Library team will be able to help directly."
+                    + ASK_LIB
                 )
+                final_sources = []
+                final_tools = tools_used + ["redirected"]
             elapsed = time.time() - start
             return _make_agent_response(
+                answer=final_answer, intent="library_info",
+                tools_used=final_tools,
+                search_results=[], sources=final_sources,
                 model=req.model, elapsed=elapsed, question=question,
             )