Spaces:

nikeshn
/

kulibrary

Running

App Files Files Community

nikeshn commited on 13 days ago

Commit

3c9f801

verified ·

1 Parent(s): c2be7f0

Update app.py

Browse files

Files changed (1) hide show

app.py +4 -187

app.py CHANGED Viewed

@@ -1,24 +1,18 @@
 """
 Khalifa University Library AI Agent
-MCP-style tool-calling backend with RAG, PRIMO, PubMed, Google Scholar, Consensus, EBSCO EDS
 Tools:
   - search_primo: Search KU Library catalog
   - search_pubmed: Search biomedical literature
   - search_scholar: Search Google Scholar
   - search_consensus: Search Consensus (research papers)
-  - search_eds: Search EBSCO Discovery Service
   - get_library_info: RAG from KU library knowledge base
 Environment variables (HF Space Secrets):
   OPENAI_API_KEY      — required (embeddings + ChatGPT)
   ANTHROPIC_API_KEY   — optional (Claude answers)
   PRIMO_API_KEY       — required (PRIMO search)
-  EDS_USER_ID         — required (EBSCO EDS API)
-  EDS_PASSWORD        — required (EBSCO EDS API)
-  EDS_PROFILE         — optional (default: edsapi)
-  EDS_INTERFACE       — optional (default: wsapi)
-  EDS_ORG             — optional (default: ns174973)
 """
 import os
@@ -1011,182 +1005,6 @@ async def tool_search_pubmed(query, limit=5):
     except Exception as e:
         return {"error": f"PubMed: {str(e)}", "results": [], "source": "PubMed"}
-# ===== TOOL: SEARCH EDS (EBSCO Discovery Service) =====
-async def tool_search_eds(query: str, limit: int = 5) -> dict:
-    """
-    Search EBSCO EDS API.
-    Auth flow: UIDAuth → CreateSession → Search → EndSession
-    Credentials from HF Space secrets: EDS_USER_ID, EDS_PASSWORD,
-    EDS_PROFILE, EDS_INTERFACE, EDS_ORG
-    """
-    user_id   = os.environ.get("EDS_USER_ID")
-    password  = os.environ.get("EDS_PASSWORD")
-    profile   = os.environ.get("EDS_PROFILE",   "edsapi")
-    interface = os.environ.get("EDS_INTERFACE",  "wsapi")
-    org       = os.environ.get("EDS_ORG",        "ns174973")
-    if not user_id or not password:
-        return {"error": "EDS credentials not configured", "results": [], "source": "EBSCO EDS"}
-    auth_url    = "https://eds-api.ebscohost.com/authservice/rest/UIDAuth"
-    session_url = "https://eds-api.ebscohost.com/edsapi/rest/CreateSession"
-    search_url  = "https://eds-api.ebscohost.com/edsapi/rest/Search"
-    end_url     = "https://eds-api.ebscohost.com/edsapi/rest/EndSession"
-    headers_base = {"Content-Type": "application/json", "Accept": "application/json"}
-    try:
-        async with httpx.AsyncClient(timeout=20) as client:
-            # ── Step 1: Authentication token ──
-            auth_r = await client.post(auth_url, json={
-                "UserId": user_id,
-                "Password": password,
-                "InterfaceId": interface,
-            }, headers=headers_base)
-            if auth_r.status_code != 200:
-                return {"error": f"EDS auth {auth_r.status_code}: {auth_r.text[:300]}", "results": [], "source": "EBSCO EDS"}
-            auth_token = auth_r.json().get("AuthToken")
-            if not auth_token:
-                return {"error": f"EDS: no AuthToken. Response: {auth_r.text[:300]}", "results": [], "source": "EBSCO EDS"}
-            # ── Step 2: Session token ──
-            sess_headers = {**headers_base, "x-authenticationToken": auth_token}
-            sess_r = await client.post(session_url, json={
-                "Profile": profile,
-                "Guest":   "n",
-                "Org":     org,
-            }, headers=sess_headers)
-            if sess_r.status_code != 200:
-                return {"error": f"EDS session {sess_r.status_code}: {sess_r.text[:300]}", "results": [], "source": "EBSCO EDS"}
-            session_token = sess_r.json().get("SessionToken")
-            if not session_token:
-                return {"error": f"EDS: no SessionToken. Response: {sess_r.text[:300]}", "results": [], "source": "EBSCO EDS"}
-            # ── Step 3: Search ──
-            search_headers = {
-                **headers_base,
-                "x-authenticationToken": auth_token,
-                "x-sessionToken":        session_token,
-            }
-            # EDS Search uses POST with JSON body — NOT GET with query params
-            search_body = {
-                "SearchCriteria": {
-                    "Queries": [{"BooleanOperator": "AND", "Term": query}],
-                    "SearchMode": "all",
-                    "IncludeFacets": "n",
-                    "Sort": "relevance",
-                },
-                "RetrievalCriteria": {
-                    "InclSourcesTitle": "y",
-                    "ReturnedResultsField": {
-                        "MaxResultsToReturn": limit,
-                        "PageNumber": 1,
-                    },
-                    "Highlight": "n",
-                },
-                "Actions": None,
-            }
-            search_r = await client.post(search_url, json=search_body, headers=search_headers)
-            # Log non-200 for debugging
-            if search_r.status_code != 200:
-                error_body = search_r.text[:400]
-                return {"error": f"EDS search {search_r.status_code}: {error_body}", "results": [], "source": "EBSCO EDS"}
-            results = []
-            total   = 0
-            if search_r.status_code == 200:
-                data       = search_r.json()
-                search_res = data.get("SearchResult", {})
-                statistics = search_res.get("Statistics", {})
-                total      = statistics.get("TotalHits", 0)
-                records    = search_res.get("Data", {}).get("Records", []) or []
-                for rec in records[:limit]:
-                    # Title
-                    title_items = rec.get("RecordInfo", {}).get("BibRecord", {}).get("BibEntity", {}).get("Titles", [])
-                    title = next((t.get("TitleFull", "") for t in title_items if t.get("Type") == "main"), "") or \
-                            (title_items[0].get("TitleFull", "") if title_items else "Untitled")
-                    # Authors
-                    contributors = rec.get("RecordInfo", {}).get("BibRecord", {}).get("BibRelationships", {}) \
-                                      .get("HasContributorRelationships", [])
-                    authors = []
-                    for c in contributors[:3]:
-                        parts = c.get("PersonEntity", {}).get("Name", {})
-                        name  = parts.get("NameFull") or \
-                                f"{parts.get('NameLast','')}, {parts.get('NameFirst','')}".strip(", ")
-                        if name:
-                            authors.append(name)
-                    if len(contributors) > 3:
-                        authors.append("et al.")
-                    creator = "; ".join(authors) if authors else "Unknown"
-                    # Date / source / volume / issue
-                    pub_info = rec.get("RecordInfo", {}).get("BibRecord", {}).get("BibRelationships", {}) \
-                                  .get("IsPartOfRelationships", [])
-                    date = volume = issue = source_title = ""
-                    if pub_info:
-                        pub = pub_info[0]
-                        bib_entity = pub.get("BibEntity", {})
-                        dates = bib_entity.get("Dates", [])
-                        if dates:
-                            d = dates[0]
-                            date = d.get("Y", "") + ("-" + d.get("M", "") if d.get("M") else "")
-                        nums = bib_entity.get("Numbering", [])
-                        for n in nums:
-                            if n.get("Type") == "volume":  volume = n.get("Value", "")
-                            if n.get("Type") == "issue":   issue  = n.get("Value", "")
-                        titles = bib_entity.get("Titles", [])
-                        source_title = titles[0].get("TitleFull", "") if titles else ""
-                    # Abstract
-                    abstract = ""
-                    items = rec.get("Items", [])
-                    for item in items:
-                        if item.get("Name") == "Abstract":
-                            abstract = re.sub(r"<[^>]+>", "", item.get("Data", ""))[:400]
-                            break
-                    # Persistent link
-                    plink = rec.get("PLink", "")
-                    # DOI from custom links
-                    doi = None
-                    for link in rec.get("CustomLinks", []):
-                        url_val = link.get("Url", "")
-                        if "doi.org" in url_val:
-                            doi = url_val.split("doi.org/")[-1]
-                            break
-                    results.append({
-                        "title":       title,
-                        "creator":     creator,
-                        "date":        date,
-                        "source":      source_title,
-                        "volume":      volume,
-                        "issue":       issue,
-                        "description": abstract,
-                        "doi":         doi,
-                        "link":        plink,
-                        "type":        "Journal Article",
-                        "_source":     "EBSCO EDS",
-                    })
-            # ── Step 4: End session (cleanup) ──
-            try:
-                await client.get(end_url, headers=search_headers)
-            except Exception:
-                pass  # non-critical
-            return {"total": total, "results": results, "source": "EBSCO EDS"}
-    except Exception as e:
-        return {"error": f"EDS: {str(e)}", "results": [], "source": "EBSCO EDS"}
 # ===== TOOL: SEARCH CONSENSUS (via Semantic Scholar with consensus framing) =====
 async def tool_search_consensus(query, limit=5):
     """
@@ -1416,7 +1234,7 @@ class ChatMessage(BaseModel):
 class SearchRequest(BaseModel):
     query: str
-    source: str = "primo"  # primo, pubmed, scholar, consensus, eds, all
     model: str = "gpt"
     limit: int = 5
     peer_reviewed: bool = False
@@ -1441,7 +1259,7 @@ def health():
     return {
         "status": "ok",
         "vectorstore_ready": vectorstore is not None,
-        "tools": ["search_primo", "search_pubmed", "search_scholar", "search_consensus", "search_eds", "get_library_info"],
         "endpoints": ["/rag", "/search", "/agent", "/general", "/config", "/year"],
         "models": {
             "gpt": bool(os.environ.get("OPENAI_API_KEY")),
@@ -1893,8 +1711,6 @@ async def search(req: SearchRequest):
             result = await tool_search_scholar(req.query, req.limit)
         elif source == "consensus":
             result = await tool_search_consensus(req.query, req.limit)
-        elif source == "eds":
-            result = await tool_search_eds(req.query, req.limit)
         elif source == "all":
             import asyncio
             tasks = [
@@ -3047,3 +2863,4 @@ async def clear_logs():
     conn.commit()
     conn.close()
     return {"status": "ok", "message": "All logs cleared"}

 """
 Khalifa University Library AI Agent
+MCP-style tool-calling backend with RAG, PRIMO, PubMed, Google Scholar, Consensus
 Tools:
   - search_primo: Search KU Library catalog
   - search_pubmed: Search biomedical literature
   - search_scholar: Search Google Scholar
   - search_consensus: Search Consensus (research papers)
   - get_library_info: RAG from KU library knowledge base
 Environment variables (HF Space Secrets):
   OPENAI_API_KEY      — required (embeddings + ChatGPT)
   ANTHROPIC_API_KEY   — optional (Claude answers)
   PRIMO_API_KEY       — required (PRIMO search)
 """
 import os
     except Exception as e:
         return {"error": f"PubMed: {str(e)}", "results": [], "source": "PubMed"}
 # ===== TOOL: SEARCH CONSENSUS (via Semantic Scholar with consensus framing) =====
 async def tool_search_consensus(query, limit=5):
     """
 class SearchRequest(BaseModel):
     query: str
+    source: str = "primo"  # primo, pubmed, scholar, consensus, all
     model: str = "gpt"
     limit: int = 5
     peer_reviewed: bool = False
     return {
         "status": "ok",
         "vectorstore_ready": vectorstore is not None,
+        "tools": ["search_primo", "search_pubmed", "search_scholar", "search_consensus", "get_library_info"],
         "endpoints": ["/rag", "/search", "/agent", "/general", "/config", "/year"],
         "models": {
             "gpt": bool(os.environ.get("OPENAI_API_KEY")),
             result = await tool_search_scholar(req.query, req.limit)
         elif source == "consensus":
             result = await tool_search_consensus(req.query, req.limit)
         elif source == "all":
             import asyncio
             tasks = [
     conn.commit()
     conn.close()
     return {"status": "ok", "message": "All logs cleared"}