nikeshn commited on
Commit
3c9f801
Β·
verified Β·
1 Parent(s): c2be7f0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -187
app.py CHANGED
@@ -1,24 +1,18 @@
1
  """
2
  Khalifa University Library AI Agent
3
- MCP-style tool-calling backend with RAG, PRIMO, PubMed, Google Scholar, Consensus, EBSCO EDS
4
 
5
  Tools:
6
  - search_primo: Search KU Library catalog
7
  - search_pubmed: Search biomedical literature
8
  - search_scholar: Search Google Scholar
9
  - search_consensus: Search Consensus (research papers)
10
- - search_eds: Search EBSCO Discovery Service
11
  - get_library_info: RAG from KU library knowledge base
12
 
13
  Environment variables (HF Space Secrets):
14
  OPENAI_API_KEY β€” required (embeddings + ChatGPT)
15
  ANTHROPIC_API_KEY β€” optional (Claude answers)
16
  PRIMO_API_KEY β€” required (PRIMO search)
17
- EDS_USER_ID β€” required (EBSCO EDS API)
18
- EDS_PASSWORD β€” required (EBSCO EDS API)
19
- EDS_PROFILE β€” optional (default: edsapi)
20
- EDS_INTERFACE β€” optional (default: wsapi)
21
- EDS_ORG β€” optional (default: ns174973)
22
  """
23
 
24
  import os
@@ -1011,182 +1005,6 @@ async def tool_search_pubmed(query, limit=5):
1011
  except Exception as e:
1012
  return {"error": f"PubMed: {str(e)}", "results": [], "source": "PubMed"}
1013
 
1014
-
1015
- # ===== TOOL: SEARCH EDS (EBSCO Discovery Service) =====
1016
- async def tool_search_eds(query: str, limit: int = 5) -> dict:
1017
- """
1018
- Search EBSCO EDS API.
1019
- Auth flow: UIDAuth β†’ CreateSession β†’ Search β†’ EndSession
1020
- Credentials from HF Space secrets: EDS_USER_ID, EDS_PASSWORD,
1021
- EDS_PROFILE, EDS_INTERFACE, EDS_ORG
1022
- """
1023
- user_id = os.environ.get("EDS_USER_ID")
1024
- password = os.environ.get("EDS_PASSWORD")
1025
- profile = os.environ.get("EDS_PROFILE", "edsapi")
1026
- interface = os.environ.get("EDS_INTERFACE", "wsapi")
1027
- org = os.environ.get("EDS_ORG", "ns174973")
1028
-
1029
- if not user_id or not password:
1030
- return {"error": "EDS credentials not configured", "results": [], "source": "EBSCO EDS"}
1031
-
1032
- auth_url = "https://eds-api.ebscohost.com/authservice/rest/UIDAuth"
1033
- session_url = "https://eds-api.ebscohost.com/edsapi/rest/CreateSession"
1034
- search_url = "https://eds-api.ebscohost.com/edsapi/rest/Search"
1035
- end_url = "https://eds-api.ebscohost.com/edsapi/rest/EndSession"
1036
-
1037
- headers_base = {"Content-Type": "application/json", "Accept": "application/json"}
1038
-
1039
- try:
1040
- async with httpx.AsyncClient(timeout=20) as client:
1041
-
1042
- # ── Step 1: Authentication token ──
1043
- auth_r = await client.post(auth_url, json={
1044
- "UserId": user_id,
1045
- "Password": password,
1046
- "InterfaceId": interface,
1047
- }, headers=headers_base)
1048
- if auth_r.status_code != 200:
1049
- return {"error": f"EDS auth {auth_r.status_code}: {auth_r.text[:300]}", "results": [], "source": "EBSCO EDS"}
1050
- auth_token = auth_r.json().get("AuthToken")
1051
- if not auth_token:
1052
- return {"error": f"EDS: no AuthToken. Response: {auth_r.text[:300]}", "results": [], "source": "EBSCO EDS"}
1053
-
1054
- # ── Step 2: Session token ──
1055
- sess_headers = {**headers_base, "x-authenticationToken": auth_token}
1056
- sess_r = await client.post(session_url, json={
1057
- "Profile": profile,
1058
- "Guest": "n",
1059
- "Org": org,
1060
- }, headers=sess_headers)
1061
- if sess_r.status_code != 200:
1062
- return {"error": f"EDS session {sess_r.status_code}: {sess_r.text[:300]}", "results": [], "source": "EBSCO EDS"}
1063
- session_token = sess_r.json().get("SessionToken")
1064
- if not session_token:
1065
- return {"error": f"EDS: no SessionToken. Response: {sess_r.text[:300]}", "results": [], "source": "EBSCO EDS"}
1066
-
1067
- # ── Step 3: Search ──
1068
- search_headers = {
1069
- **headers_base,
1070
- "x-authenticationToken": auth_token,
1071
- "x-sessionToken": session_token,
1072
- }
1073
- # EDS Search uses POST with JSON body β€” NOT GET with query params
1074
- search_body = {
1075
- "SearchCriteria": {
1076
- "Queries": [{"BooleanOperator": "AND", "Term": query}],
1077
- "SearchMode": "all",
1078
- "IncludeFacets": "n",
1079
- "Sort": "relevance",
1080
- },
1081
- "RetrievalCriteria": {
1082
- "InclSourcesTitle": "y",
1083
- "ReturnedResultsField": {
1084
- "MaxResultsToReturn": limit,
1085
- "PageNumber": 1,
1086
- },
1087
- "Highlight": "n",
1088
- },
1089
- "Actions": None,
1090
- }
1091
- search_r = await client.post(search_url, json=search_body, headers=search_headers)
1092
-
1093
- # Log non-200 for debugging
1094
- if search_r.status_code != 200:
1095
- error_body = search_r.text[:400]
1096
- return {"error": f"EDS search {search_r.status_code}: {error_body}", "results": [], "source": "EBSCO EDS"}
1097
-
1098
- results = []
1099
- total = 0
1100
- if search_r.status_code == 200:
1101
- data = search_r.json()
1102
- search_res = data.get("SearchResult", {})
1103
- statistics = search_res.get("Statistics", {})
1104
- total = statistics.get("TotalHits", 0)
1105
- records = search_res.get("Data", {}).get("Records", []) or []
1106
-
1107
- for rec in records[:limit]:
1108
- # Title
1109
- title_items = rec.get("RecordInfo", {}).get("BibRecord", {}).get("BibEntity", {}).get("Titles", [])
1110
- title = next((t.get("TitleFull", "") for t in title_items if t.get("Type") == "main"), "") or \
1111
- (title_items[0].get("TitleFull", "") if title_items else "Untitled")
1112
-
1113
- # Authors
1114
- contributors = rec.get("RecordInfo", {}).get("BibRecord", {}).get("BibRelationships", {}) \
1115
- .get("HasContributorRelationships", [])
1116
- authors = []
1117
- for c in contributors[:3]:
1118
- parts = c.get("PersonEntity", {}).get("Name", {})
1119
- name = parts.get("NameFull") or \
1120
- f"{parts.get('NameLast','')}, {parts.get('NameFirst','')}".strip(", ")
1121
- if name:
1122
- authors.append(name)
1123
- if len(contributors) > 3:
1124
- authors.append("et al.")
1125
- creator = "; ".join(authors) if authors else "Unknown"
1126
-
1127
- # Date / source / volume / issue
1128
- pub_info = rec.get("RecordInfo", {}).get("BibRecord", {}).get("BibRelationships", {}) \
1129
- .get("IsPartOfRelationships", [])
1130
- date = volume = issue = source_title = ""
1131
- if pub_info:
1132
- pub = pub_info[0]
1133
- bib_entity = pub.get("BibEntity", {})
1134
- dates = bib_entity.get("Dates", [])
1135
- if dates:
1136
- d = dates[0]
1137
- date = d.get("Y", "") + ("-" + d.get("M", "") if d.get("M") else "")
1138
- nums = bib_entity.get("Numbering", [])
1139
- for n in nums:
1140
- if n.get("Type") == "volume": volume = n.get("Value", "")
1141
- if n.get("Type") == "issue": issue = n.get("Value", "")
1142
- titles = bib_entity.get("Titles", [])
1143
- source_title = titles[0].get("TitleFull", "") if titles else ""
1144
-
1145
- # Abstract
1146
- abstract = ""
1147
- items = rec.get("Items", [])
1148
- for item in items:
1149
- if item.get("Name") == "Abstract":
1150
- abstract = re.sub(r"<[^>]+>", "", item.get("Data", ""))[:400]
1151
- break
1152
-
1153
- # Persistent link
1154
- plink = rec.get("PLink", "")
1155
-
1156
- # DOI from custom links
1157
- doi = None
1158
- for link in rec.get("CustomLinks", []):
1159
- url_val = link.get("Url", "")
1160
- if "doi.org" in url_val:
1161
- doi = url_val.split("doi.org/")[-1]
1162
- break
1163
-
1164
- results.append({
1165
- "title": title,
1166
- "creator": creator,
1167
- "date": date,
1168
- "source": source_title,
1169
- "volume": volume,
1170
- "issue": issue,
1171
- "description": abstract,
1172
- "doi": doi,
1173
- "link": plink,
1174
- "type": "Journal Article",
1175
- "_source": "EBSCO EDS",
1176
- })
1177
-
1178
- # ── Step 4: End session (cleanup) ──
1179
- try:
1180
- await client.get(end_url, headers=search_headers)
1181
- except Exception:
1182
- pass # non-critical
1183
-
1184
- return {"total": total, "results": results, "source": "EBSCO EDS"}
1185
-
1186
- except Exception as e:
1187
- return {"error": f"EDS: {str(e)}", "results": [], "source": "EBSCO EDS"}
1188
-
1189
-
1190
  # ===== TOOL: SEARCH CONSENSUS (via Semantic Scholar with consensus framing) =====
1191
  async def tool_search_consensus(query, limit=5):
1192
  """
@@ -1416,7 +1234,7 @@ class ChatMessage(BaseModel):
1416
 
1417
  class SearchRequest(BaseModel):
1418
  query: str
1419
- source: str = "primo" # primo, pubmed, scholar, consensus, eds, all
1420
  model: str = "gpt"
1421
  limit: int = 5
1422
  peer_reviewed: bool = False
@@ -1441,7 +1259,7 @@ def health():
1441
  return {
1442
  "status": "ok",
1443
  "vectorstore_ready": vectorstore is not None,
1444
- "tools": ["search_primo", "search_pubmed", "search_scholar", "search_consensus", "search_eds", "get_library_info"],
1445
  "endpoints": ["/rag", "/search", "/agent", "/general", "/config", "/year"],
1446
  "models": {
1447
  "gpt": bool(os.environ.get("OPENAI_API_KEY")),
@@ -1893,8 +1711,6 @@ async def search(req: SearchRequest):
1893
  result = await tool_search_scholar(req.query, req.limit)
1894
  elif source == "consensus":
1895
  result = await tool_search_consensus(req.query, req.limit)
1896
- elif source == "eds":
1897
- result = await tool_search_eds(req.query, req.limit)
1898
  elif source == "all":
1899
  import asyncio
1900
  tasks = [
@@ -3047,3 +2863,4 @@ async def clear_logs():
3047
  conn.commit()
3048
  conn.close()
3049
  return {"status": "ok", "message": "All logs cleared"}
 
 
1
  """
2
  Khalifa University Library AI Agent
3
+ MCP-style tool-calling backend with RAG, PRIMO, PubMed, Google Scholar, Consensus
4
 
5
  Tools:
6
  - search_primo: Search KU Library catalog
7
  - search_pubmed: Search biomedical literature
8
  - search_scholar: Search Google Scholar
9
  - search_consensus: Search Consensus (research papers)
 
10
  - get_library_info: RAG from KU library knowledge base
11
 
12
  Environment variables (HF Space Secrets):
13
  OPENAI_API_KEY β€” required (embeddings + ChatGPT)
14
  ANTHROPIC_API_KEY β€” optional (Claude answers)
15
  PRIMO_API_KEY β€” required (PRIMO search)
 
 
 
 
 
16
  """
17
 
18
  import os
 
1005
  except Exception as e:
1006
  return {"error": f"PubMed: {str(e)}", "results": [], "source": "PubMed"}
1007
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1008
  # ===== TOOL: SEARCH CONSENSUS (via Semantic Scholar with consensus framing) =====
1009
  async def tool_search_consensus(query, limit=5):
1010
  """
 
1234
 
1235
  class SearchRequest(BaseModel):
1236
  query: str
1237
+ source: str = "primo" # primo, pubmed, scholar, consensus, all
1238
  model: str = "gpt"
1239
  limit: int = 5
1240
  peer_reviewed: bool = False
 
1259
  return {
1260
  "status": "ok",
1261
  "vectorstore_ready": vectorstore is not None,
1262
+ "tools": ["search_primo", "search_pubmed", "search_scholar", "search_consensus", "get_library_info"],
1263
  "endpoints": ["/rag", "/search", "/agent", "/general", "/config", "/year"],
1264
  "models": {
1265
  "gpt": bool(os.environ.get("OPENAI_API_KEY")),
 
1711
  result = await tool_search_scholar(req.query, req.limit)
1712
  elif source == "consensus":
1713
  result = await tool_search_consensus(req.query, req.limit)
 
 
1714
  elif source == "all":
1715
  import asyncio
1716
  tasks = [
 
2863
  conn.commit()
2864
  conn.close()
2865
  return {"status": "ok", "message": "All logs cleared"}
2866
+