Codemaster67 commited on
Commit
6733661
·
verified ·
1 Parent(s): 4b3377d

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +191 -0
app.py ADDED
@@ -0,0 +1,191 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+ from fastmcp import FastMCP
4
+
5
+ # --- CONFIG ---
6
+ SERP_URL = "https://serpapi.com/search"
7
+ SEMANTIC_SCHOLAR_URL = "https://api.semanticscholar.org/graph/v1"
8
+ OPEN_ALEX_URL = "https://api.openalex.org"
9
+
10
+ # Fetch Keys from Hugging Face Secrets
11
+ SERP_API_KEY = os.getenv("SERP_API_KEY")
12
+ JINA_API_KEY = os.getenv("JINA_API_KEY")
13
+ OPEN_ALEX_API_KEY = os.getenv("OPEN_ALEX_API_KEY")
14
+
15
+ # --- HELPER ---
16
+ def reconstruct_abstract(abstract_inverted_index):
17
+ """Reconstruct abstract text from OpenAlex's inverted index format."""
18
+ if not abstract_inverted_index:
19
+ return "Abstract not available."
20
+ try:
21
+ words = {}
22
+ for word, indices in abstract_inverted_index.items():
23
+ for index in indices:
24
+ words[index] = word
25
+ return " ".join([words[i] for i in sorted(words.keys())])
26
+ except Exception:
27
+ return "Abstract reconstruction failed."
28
+
29
+ def _openalex_search(query: str, limit: int):
30
+ """Internal helper: search OpenAlex and return normalized paper list."""
31
+ oa_params = {"search": query, "per_page": limit}
32
+ headers = {"api-key": OPEN_ALEX_API_KEY} if OPEN_ALEX_API_KEY else {}
33
+ res = requests.get(f"{OPEN_ALEX_URL}/works", params=oa_params, headers=headers, timeout=10)
34
+ res.raise_for_status()
35
+ results = res.json().get("results", [])
36
+
37
+ normalized = []
38
+ for r in results:
39
+ normalized.append({
40
+ "paperId": r.get("id"),
41
+ "title": r.get("title"),
42
+ "authors": [{"name": a.get("author", {}).get("display_name")} for a in r.get("authorships", [])],
43
+ "year": r.get("publication_year"),
44
+ "citationCount": r.get("cited_by_count"),
45
+ "url": r.get("doi"),
46
+ "openAccessPdf": {"url": r.get("open_access", {}).get("oa_url")} if r.get("open_access", {}).get("oa_url") else None,
47
+ "abstract": reconstruct_abstract(r.get("abstract_inverted_index")),
48
+ "externalIds": r.get("ids", {}),
49
+ "source": "openalex",
50
+ })
51
+ return normalized
52
+
53
+ mcp = FastMCP("ResearchAgent")
54
+
55
+ # --- 1. CONSOLIDATED SEARCH (Web & YouTube) ---
56
+ @mcp.tool()
57
+ def search_web(query: str, required_links: int = 10):
58
+ """General search for websites, articles, and YouTube videos."""
59
+ required_links = min(required_links, 20)
60
+ results = []
61
+ start = 0
62
+
63
+ while len(results) < required_links:
64
+ params = {
65
+ "engine": "google",
66
+ "q": query,
67
+ "api_key": SERP_API_KEY,
68
+ "start": start,
69
+ }
70
+ try:
71
+ res = requests.get(SERP_URL, params=params)
72
+ res.raise_for_status()
73
+ data = res.json()
74
+ organic = data.get("organic_results", [])
75
+ if not organic:
76
+ break
77
+
78
+ for item in organic:
79
+ results.append({
80
+ "title": item.get("title"),
81
+ "link": item.get("link"),
82
+ "snippet": item.get("snippet"),
83
+ })
84
+ start += 10
85
+ except Exception as e:
86
+ return {"error": f"Search failed: {e}"}
87
+
88
+ return results[:required_links]
89
+
90
+ # --- 2. WEB CONTENT READER ---
91
+ @mcp.tool()
92
+ def fetch_web_content(url: str) -> str:
93
+ """Extracts Markdown text from a URL. Does NOT work for YouTube links."""
94
+ if "youtube.com" in url or "youtu.be" in url:
95
+ return "Error: This tool cannot read YouTube videos. Please use a YouTube Transcript tool or summarize based on search snippets."
96
+
97
+ reader_url = f"https://r.jina.ai/{url}"
98
+ headers = {"Authorization": f"Bearer {JINA_API_KEY}"} if JINA_API_KEY else {}
99
+
100
+ try:
101
+ response = requests.get(reader_url, headers=headers, timeout=15)
102
+ response.raise_for_status()
103
+ return response.text
104
+ except Exception as e:
105
+ return f"Error accessing page: {str(e)}"
106
+
107
+ # --- 3. ACADEMIC ENGINE ---
108
+ @mcp.tool()
109
+ def academic_research(query: str, limit: int = 5):
110
+ """Finds research papers, citation counts, and direct PDF links."""
111
+ search_url = f"{SEMANTIC_SCHOLAR_URL}/paper/search"
112
+ params = {
113
+ "query": query,
114
+ "limit": limit,
115
+ "fields": "paperId,title,authors,year,citationCount,url,openAccessPdf,abstract,externalIds",
116
+ }
117
+ try:
118
+ res = requests.get(search_url, params=params, timeout=10)
119
+ res.raise_for_status()
120
+ data = res.json().get("data", [])
121
+ if data:
122
+ return data
123
+ except Exception as e:
124
+ print(f"[academic_research] Semantic Scholar failed: {e}. Falling back to OpenAlex...")
125
+
126
+ try:
127
+ return _openalex_search(query, limit)
128
+ except Exception as e:
129
+ return f"Academic search failed (both Semantic Scholar and OpenAlex): {e}"
130
+
131
+ # --- 4. GET PAPER ID ---
132
+ @mcp.tool()
133
+ def get_paper_id(query: str):
134
+ """Search for a paper by title/keywords and return all available IDs."""
135
+ results = academic_research(query, limit=1)
136
+ if isinstance(results, list) and len(results) > 0:
137
+ paper = results[0]
138
+ ext_ids = paper.get("externalIds", {})
139
+ paper_id = paper.get("paperId", "")
140
+ return {
141
+ "title": paper.get("title"),
142
+ "paperId": paper_id,
143
+ "doi": ext_ids.get("DOI") or ext_ids.get("doi"),
144
+ "openalex": ext_ids.get("openalex") or (paper_id if "openalex.org" in str(paper_id) else None),
145
+ "arxiv": ext_ids.get("ArXiv") or ext_ids.get("arxiv"),
146
+ "source": paper.get("source", "semantic_scholar"),
147
+ }
148
+ return "No paper found or an error occurred during ID lookup."
149
+
150
+ # --- 5. FIND RELATED PAPERS ---
151
+ @mcp.tool()
152
+ def find_related_papers(paper_id: str, limit: int = 5):
153
+ """Finds similar or recommended papers based on a Paper ID."""
154
+ if "openalex.org" not in paper_id:
155
+ rec_url = f"{SEMANTIC_SCHOLAR_URL}/recommendations/papers/{paper_id}"
156
+ params = {"limit": limit, "fields": "paperId,title,authors,year,citationCount,url"}
157
+ try:
158
+ res = requests.get(rec_url, params=params, timeout=10)
159
+ res.raise_for_status()
160
+ return res.json().get("recommendedPapers", [])
161
+ except Exception as e:
162
+ print(f"[find_related_papers] Semantic Scholar failed: {e}. Falling back to OpenAlex...")
163
+
164
+ if "openalex.org" in paper_id:
165
+ oa_filter = f"related_to:{paper_id}"
166
+ elif paper_id.startswith("10.") or "doi.org" in paper_id:
167
+ doi = paper_id.replace("https://doi.org/", "").replace("http://doi.org/", "")
168
+ oa_filter = f"related_to:doi:{doi}"
169
+ else:
170
+ return "Could not find related papers: provide an OpenAlex ID or DOI for the OpenAlex fallback."
171
+
172
+ try:
173
+ oa_url = f"{OPEN_ALEX_URL}/works"
174
+ oa_params = {"filter": oa_filter, "per_page": limit}
175
+ headers = {"api-key": OPEN_ALEX_API_KEY} if OPEN_ALEX_API_KEY else {}
176
+ res = requests.get(oa_url, params=oa_params, headers=headers, timeout=10)
177
+ res.raise_for_status()
178
+ results = res.json().get("results", [])
179
+ return [{
180
+ "paperId": r.get("id"),
181
+ "title": r.get("title"),
182
+ "authors": [{"name": a.get("author", {}).get("display_name")} for a in r.get("authorships", [])],
183
+ "year": r.get("publication_year"),
184
+ "citationCount": r.get("cited_by_count"),
185
+ "url": r.get("doi"),
186
+ } for r in results]
187
+ except Exception as e:
188
+ return f"Could not find related papers: {e}"
189
+
190
+ if __name__ == "__main__":
191
+ mcp.run(transport="sse", host="0.0.0.0", port=7860)