Update app.py
Browse files
app.py
CHANGED
|
@@ -1,24 +1,18 @@
|
|
| 1 |
"""
|
| 2 |
Khalifa University Library AI Agent
|
| 3 |
-
MCP-style tool-calling backend with RAG, PRIMO, PubMed, Google Scholar, Consensus
|
| 4 |
|
| 5 |
Tools:
|
| 6 |
- search_primo: Search KU Library catalog
|
| 7 |
- search_pubmed: Search biomedical literature
|
| 8 |
- search_scholar: Search Google Scholar
|
| 9 |
- search_consensus: Search Consensus (research papers)
|
| 10 |
-
- search_eds: Search EBSCO Discovery Service
|
| 11 |
- get_library_info: RAG from KU library knowledge base
|
| 12 |
|
| 13 |
Environment variables (HF Space Secrets):
|
| 14 |
OPENAI_API_KEY β required (embeddings + ChatGPT)
|
| 15 |
ANTHROPIC_API_KEY β optional (Claude answers)
|
| 16 |
PRIMO_API_KEY β required (PRIMO search)
|
| 17 |
-
EDS_USER_ID β required (EBSCO EDS API)
|
| 18 |
-
EDS_PASSWORD β required (EBSCO EDS API)
|
| 19 |
-
EDS_PROFILE β optional (default: edsapi)
|
| 20 |
-
EDS_INTERFACE β optional (default: wsapi)
|
| 21 |
-
EDS_ORG β optional (default: ns174973)
|
| 22 |
"""
|
| 23 |
|
| 24 |
import os
|
|
@@ -1011,182 +1005,6 @@ async def tool_search_pubmed(query, limit=5):
|
|
| 1011 |
except Exception as e:
|
| 1012 |
return {"error": f"PubMed: {str(e)}", "results": [], "source": "PubMed"}
|
| 1013 |
|
| 1014 |
-
|
| 1015 |
-
# ===== TOOL: SEARCH EDS (EBSCO Discovery Service) =====
|
| 1016 |
-
async def tool_search_eds(query: str, limit: int = 5) -> dict:
|
| 1017 |
-
"""
|
| 1018 |
-
Search EBSCO EDS API.
|
| 1019 |
-
Auth flow: UIDAuth β CreateSession β Search β EndSession
|
| 1020 |
-
Credentials from HF Space secrets: EDS_USER_ID, EDS_PASSWORD,
|
| 1021 |
-
EDS_PROFILE, EDS_INTERFACE, EDS_ORG
|
| 1022 |
-
"""
|
| 1023 |
-
user_id = os.environ.get("EDS_USER_ID")
|
| 1024 |
-
password = os.environ.get("EDS_PASSWORD")
|
| 1025 |
-
profile = os.environ.get("EDS_PROFILE", "edsapi")
|
| 1026 |
-
interface = os.environ.get("EDS_INTERFACE", "wsapi")
|
| 1027 |
-
org = os.environ.get("EDS_ORG", "ns174973")
|
| 1028 |
-
|
| 1029 |
-
if not user_id or not password:
|
| 1030 |
-
return {"error": "EDS credentials not configured", "results": [], "source": "EBSCO EDS"}
|
| 1031 |
-
|
| 1032 |
-
auth_url = "https://eds-api.ebscohost.com/authservice/rest/UIDAuth"
|
| 1033 |
-
session_url = "https://eds-api.ebscohost.com/edsapi/rest/CreateSession"
|
| 1034 |
-
search_url = "https://eds-api.ebscohost.com/edsapi/rest/Search"
|
| 1035 |
-
end_url = "https://eds-api.ebscohost.com/edsapi/rest/EndSession"
|
| 1036 |
-
|
| 1037 |
-
headers_base = {"Content-Type": "application/json", "Accept": "application/json"}
|
| 1038 |
-
|
| 1039 |
-
try:
|
| 1040 |
-
async with httpx.AsyncClient(timeout=20) as client:
|
| 1041 |
-
|
| 1042 |
-
# ββ Step 1: Authentication token ββ
|
| 1043 |
-
auth_r = await client.post(auth_url, json={
|
| 1044 |
-
"UserId": user_id,
|
| 1045 |
-
"Password": password,
|
| 1046 |
-
"InterfaceId": interface,
|
| 1047 |
-
}, headers=headers_base)
|
| 1048 |
-
if auth_r.status_code != 200:
|
| 1049 |
-
return {"error": f"EDS auth {auth_r.status_code}: {auth_r.text[:300]}", "results": [], "source": "EBSCO EDS"}
|
| 1050 |
-
auth_token = auth_r.json().get("AuthToken")
|
| 1051 |
-
if not auth_token:
|
| 1052 |
-
return {"error": f"EDS: no AuthToken. Response: {auth_r.text[:300]}", "results": [], "source": "EBSCO EDS"}
|
| 1053 |
-
|
| 1054 |
-
# ββ Step 2: Session token ββ
|
| 1055 |
-
sess_headers = {**headers_base, "x-authenticationToken": auth_token}
|
| 1056 |
-
sess_r = await client.post(session_url, json={
|
| 1057 |
-
"Profile": profile,
|
| 1058 |
-
"Guest": "n",
|
| 1059 |
-
"Org": org,
|
| 1060 |
-
}, headers=sess_headers)
|
| 1061 |
-
if sess_r.status_code != 200:
|
| 1062 |
-
return {"error": f"EDS session {sess_r.status_code}: {sess_r.text[:300]}", "results": [], "source": "EBSCO EDS"}
|
| 1063 |
-
session_token = sess_r.json().get("SessionToken")
|
| 1064 |
-
if not session_token:
|
| 1065 |
-
return {"error": f"EDS: no SessionToken. Response: {sess_r.text[:300]}", "results": [], "source": "EBSCO EDS"}
|
| 1066 |
-
|
| 1067 |
-
# ββ Step 3: Search ββ
|
| 1068 |
-
search_headers = {
|
| 1069 |
-
**headers_base,
|
| 1070 |
-
"x-authenticationToken": auth_token,
|
| 1071 |
-
"x-sessionToken": session_token,
|
| 1072 |
-
}
|
| 1073 |
-
# EDS Search uses POST with JSON body β NOT GET with query params
|
| 1074 |
-
search_body = {
|
| 1075 |
-
"SearchCriteria": {
|
| 1076 |
-
"Queries": [{"BooleanOperator": "AND", "Term": query}],
|
| 1077 |
-
"SearchMode": "all",
|
| 1078 |
-
"IncludeFacets": "n",
|
| 1079 |
-
"Sort": "relevance",
|
| 1080 |
-
},
|
| 1081 |
-
"RetrievalCriteria": {
|
| 1082 |
-
"InclSourcesTitle": "y",
|
| 1083 |
-
"ReturnedResultsField": {
|
| 1084 |
-
"MaxResultsToReturn": limit,
|
| 1085 |
-
"PageNumber": 1,
|
| 1086 |
-
},
|
| 1087 |
-
"Highlight": "n",
|
| 1088 |
-
},
|
| 1089 |
-
"Actions": None,
|
| 1090 |
-
}
|
| 1091 |
-
search_r = await client.post(search_url, json=search_body, headers=search_headers)
|
| 1092 |
-
|
| 1093 |
-
# Log non-200 for debugging
|
| 1094 |
-
if search_r.status_code != 200:
|
| 1095 |
-
error_body = search_r.text[:400]
|
| 1096 |
-
return {"error": f"EDS search {search_r.status_code}: {error_body}", "results": [], "source": "EBSCO EDS"}
|
| 1097 |
-
|
| 1098 |
-
results = []
|
| 1099 |
-
total = 0
|
| 1100 |
-
if search_r.status_code == 200:
|
| 1101 |
-
data = search_r.json()
|
| 1102 |
-
search_res = data.get("SearchResult", {})
|
| 1103 |
-
statistics = search_res.get("Statistics", {})
|
| 1104 |
-
total = statistics.get("TotalHits", 0)
|
| 1105 |
-
records = search_res.get("Data", {}).get("Records", []) or []
|
| 1106 |
-
|
| 1107 |
-
for rec in records[:limit]:
|
| 1108 |
-
# Title
|
| 1109 |
-
title_items = rec.get("RecordInfo", {}).get("BibRecord", {}).get("BibEntity", {}).get("Titles", [])
|
| 1110 |
-
title = next((t.get("TitleFull", "") for t in title_items if t.get("Type") == "main"), "") or \
|
| 1111 |
-
(title_items[0].get("TitleFull", "") if title_items else "Untitled")
|
| 1112 |
-
|
| 1113 |
-
# Authors
|
| 1114 |
-
contributors = rec.get("RecordInfo", {}).get("BibRecord", {}).get("BibRelationships", {}) \
|
| 1115 |
-
.get("HasContributorRelationships", [])
|
| 1116 |
-
authors = []
|
| 1117 |
-
for c in contributors[:3]:
|
| 1118 |
-
parts = c.get("PersonEntity", {}).get("Name", {})
|
| 1119 |
-
name = parts.get("NameFull") or \
|
| 1120 |
-
f"{parts.get('NameLast','')}, {parts.get('NameFirst','')}".strip(", ")
|
| 1121 |
-
if name:
|
| 1122 |
-
authors.append(name)
|
| 1123 |
-
if len(contributors) > 3:
|
| 1124 |
-
authors.append("et al.")
|
| 1125 |
-
creator = "; ".join(authors) if authors else "Unknown"
|
| 1126 |
-
|
| 1127 |
-
# Date / source / volume / issue
|
| 1128 |
-
pub_info = rec.get("RecordInfo", {}).get("BibRecord", {}).get("BibRelationships", {}) \
|
| 1129 |
-
.get("IsPartOfRelationships", [])
|
| 1130 |
-
date = volume = issue = source_title = ""
|
| 1131 |
-
if pub_info:
|
| 1132 |
-
pub = pub_info[0]
|
| 1133 |
-
bib_entity = pub.get("BibEntity", {})
|
| 1134 |
-
dates = bib_entity.get("Dates", [])
|
| 1135 |
-
if dates:
|
| 1136 |
-
d = dates[0]
|
| 1137 |
-
date = d.get("Y", "") + ("-" + d.get("M", "") if d.get("M") else "")
|
| 1138 |
-
nums = bib_entity.get("Numbering", [])
|
| 1139 |
-
for n in nums:
|
| 1140 |
-
if n.get("Type") == "volume": volume = n.get("Value", "")
|
| 1141 |
-
if n.get("Type") == "issue": issue = n.get("Value", "")
|
| 1142 |
-
titles = bib_entity.get("Titles", [])
|
| 1143 |
-
source_title = titles[0].get("TitleFull", "") if titles else ""
|
| 1144 |
-
|
| 1145 |
-
# Abstract
|
| 1146 |
-
abstract = ""
|
| 1147 |
-
items = rec.get("Items", [])
|
| 1148 |
-
for item in items:
|
| 1149 |
-
if item.get("Name") == "Abstract":
|
| 1150 |
-
abstract = re.sub(r"<[^>]+>", "", item.get("Data", ""))[:400]
|
| 1151 |
-
break
|
| 1152 |
-
|
| 1153 |
-
# Persistent link
|
| 1154 |
-
plink = rec.get("PLink", "")
|
| 1155 |
-
|
| 1156 |
-
# DOI from custom links
|
| 1157 |
-
doi = None
|
| 1158 |
-
for link in rec.get("CustomLinks", []):
|
| 1159 |
-
url_val = link.get("Url", "")
|
| 1160 |
-
if "doi.org" in url_val:
|
| 1161 |
-
doi = url_val.split("doi.org/")[-1]
|
| 1162 |
-
break
|
| 1163 |
-
|
| 1164 |
-
results.append({
|
| 1165 |
-
"title": title,
|
| 1166 |
-
"creator": creator,
|
| 1167 |
-
"date": date,
|
| 1168 |
-
"source": source_title,
|
| 1169 |
-
"volume": volume,
|
| 1170 |
-
"issue": issue,
|
| 1171 |
-
"description": abstract,
|
| 1172 |
-
"doi": doi,
|
| 1173 |
-
"link": plink,
|
| 1174 |
-
"type": "Journal Article",
|
| 1175 |
-
"_source": "EBSCO EDS",
|
| 1176 |
-
})
|
| 1177 |
-
|
| 1178 |
-
# ββ Step 4: End session (cleanup) ββ
|
| 1179 |
-
try:
|
| 1180 |
-
await client.get(end_url, headers=search_headers)
|
| 1181 |
-
except Exception:
|
| 1182 |
-
pass # non-critical
|
| 1183 |
-
|
| 1184 |
-
return {"total": total, "results": results, "source": "EBSCO EDS"}
|
| 1185 |
-
|
| 1186 |
-
except Exception as e:
|
| 1187 |
-
return {"error": f"EDS: {str(e)}", "results": [], "source": "EBSCO EDS"}
|
| 1188 |
-
|
| 1189 |
-
|
| 1190 |
# ===== TOOL: SEARCH CONSENSUS (via Semantic Scholar with consensus framing) =====
|
| 1191 |
async def tool_search_consensus(query, limit=5):
|
| 1192 |
"""
|
|
@@ -1416,7 +1234,7 @@ class ChatMessage(BaseModel):
|
|
| 1416 |
|
| 1417 |
class SearchRequest(BaseModel):
|
| 1418 |
query: str
|
| 1419 |
-
source: str = "primo" # primo, pubmed, scholar, consensus,
|
| 1420 |
model: str = "gpt"
|
| 1421 |
limit: int = 5
|
| 1422 |
peer_reviewed: bool = False
|
|
@@ -1441,7 +1259,7 @@ def health():
|
|
| 1441 |
return {
|
| 1442 |
"status": "ok",
|
| 1443 |
"vectorstore_ready": vectorstore is not None,
|
| 1444 |
-
"tools": ["search_primo", "search_pubmed", "search_scholar", "search_consensus", "
|
| 1445 |
"endpoints": ["/rag", "/search", "/agent", "/general", "/config", "/year"],
|
| 1446 |
"models": {
|
| 1447 |
"gpt": bool(os.environ.get("OPENAI_API_KEY")),
|
|
@@ -1893,8 +1711,6 @@ async def search(req: SearchRequest):
|
|
| 1893 |
result = await tool_search_scholar(req.query, req.limit)
|
| 1894 |
elif source == "consensus":
|
| 1895 |
result = await tool_search_consensus(req.query, req.limit)
|
| 1896 |
-
elif source == "eds":
|
| 1897 |
-
result = await tool_search_eds(req.query, req.limit)
|
| 1898 |
elif source == "all":
|
| 1899 |
import asyncio
|
| 1900 |
tasks = [
|
|
@@ -3047,3 +2863,4 @@ async def clear_logs():
|
|
| 3047 |
conn.commit()
|
| 3048 |
conn.close()
|
| 3049 |
return {"status": "ok", "message": "All logs cleared"}
|
|
|
|
|
|
| 1 |
"""
|
| 2 |
Khalifa University Library AI Agent
|
| 3 |
+
MCP-style tool-calling backend with RAG, PRIMO, PubMed, Google Scholar, Consensus
|
| 4 |
|
| 5 |
Tools:
|
| 6 |
- search_primo: Search KU Library catalog
|
| 7 |
- search_pubmed: Search biomedical literature
|
| 8 |
- search_scholar: Search Google Scholar
|
| 9 |
- search_consensus: Search Consensus (research papers)
|
|
|
|
| 10 |
- get_library_info: RAG from KU library knowledge base
|
| 11 |
|
| 12 |
Environment variables (HF Space Secrets):
|
| 13 |
OPENAI_API_KEY β required (embeddings + ChatGPT)
|
| 14 |
ANTHROPIC_API_KEY β optional (Claude answers)
|
| 15 |
PRIMO_API_KEY β required (PRIMO search)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
"""
|
| 17 |
|
| 18 |
import os
|
|
|
|
| 1005 |
except Exception as e:
|
| 1006 |
return {"error": f"PubMed: {str(e)}", "results": [], "source": "PubMed"}
|
| 1007 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1008 |
# ===== TOOL: SEARCH CONSENSUS (via Semantic Scholar with consensus framing) =====
|
| 1009 |
async def tool_search_consensus(query, limit=5):
|
| 1010 |
"""
|
|
|
|
| 1234 |
|
| 1235 |
class SearchRequest(BaseModel):
|
| 1236 |
query: str
|
| 1237 |
+
source: str = "primo" # primo, pubmed, scholar, consensus, all
|
| 1238 |
model: str = "gpt"
|
| 1239 |
limit: int = 5
|
| 1240 |
peer_reviewed: bool = False
|
|
|
|
| 1259 |
return {
|
| 1260 |
"status": "ok",
|
| 1261 |
"vectorstore_ready": vectorstore is not None,
|
| 1262 |
+
"tools": ["search_primo", "search_pubmed", "search_scholar", "search_consensus", "get_library_info"],
|
| 1263 |
"endpoints": ["/rag", "/search", "/agent", "/general", "/config", "/year"],
|
| 1264 |
"models": {
|
| 1265 |
"gpt": bool(os.environ.get("OPENAI_API_KEY")),
|
|
|
|
| 1711 |
result = await tool_search_scholar(req.query, req.limit)
|
| 1712 |
elif source == "consensus":
|
| 1713 |
result = await tool_search_consensus(req.query, req.limit)
|
|
|
|
|
|
|
| 1714 |
elif source == "all":
|
| 1715 |
import asyncio
|
| 1716 |
tasks = [
|
|
|
|
| 2863 |
conn.commit()
|
| 2864 |
conn.close()
|
| 2865 |
return {"status": "ok", "message": "All logs cleared"}
|
| 2866 |
+
|