leonsimon23 commited on
Commit
2f1d60e
·
verified ·
1 Parent(s): b76dc9d

Create Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +449 -0
Dockerfile ADDED
@@ -0,0 +1,449 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 使用官方Python基础镜像
2
+ FROM python:3.11-slim
3
+
4
+ # 安装系统依赖
5
+ ENV DEBIAN_FRONTEND=noninteractive
6
+ RUN apt-get update && \
7
+ apt-get install -y --no-install-recommends git curl sed && \
8
+ apt-get clean && \
9
+ rm -rf /var/lib/apt/lists/*
10
+
11
+ # 安装 uv
12
+ RUN curl -LsSf https://astral.sh/uv/install.sh | sh
13
+ ENV PATH="/root/.local/bin:${PATH}"
14
+
15
+ # 设置工作目录
16
+ WORKDIR /app
17
+
18
+ # 克隆项目源代码
19
+ RUN git clone https://github.com/openags/paper-search-mcp.git .
20
+
21
+ # 创建虚拟环境并安装依赖
22
+ RUN uv venv .venv
23
+ ENV PATH="/app/.venv/bin:${PATH}"
24
+
25
+ # 安装项目依赖
26
+ RUN uv pip install -e .
27
+
28
+ # 安装额外的web依赖
29
+ RUN uv pip install fastapi uvicorn python-multipart
30
+
31
+ # 创建一个优化的HTTP API包装器
32
+ COPY <<'EOF' /app/http_wrapper.py
33
+ #!/usr/bin/env python3
34
+
35
+ import asyncio
36
+ import json
37
+ import logging
38
+ import traceback
39
+ import os
40
+ from contextlib import asynccontextmanager
41
+ from typing import Any, Dict, Optional, List
42
+
43
+ import uvicorn
44
+ from fastapi import FastAPI, HTTPException
45
+ from fastapi.responses import HTMLResponse, JSONResponse
46
+ from pydantic import BaseModel
47
+
48
+ # 设置日志
49
+ logging.basicConfig(level=logging.INFO)
50
+ logger = logging.getLogger(__name__)
51
+
52
+ # 请求模型
53
+ class SearchRequest(BaseModel):
54
+ query: str
55
+ max_results: int = 10
56
+
57
+ class DownloadRequest(BaseModel):
58
+ paper_id: str
59
+
60
+ # 全局变量
61
+ available_functions = {}
62
+ project_info = {}
63
+
64
+ async def explore_project_structure():
65
+ """探索项目结构并找到可用的功能"""
66
+ global available_functions, project_info
67
+
68
+ try:
69
+ # 尝试导入主模块
70
+ import paper_search_mcp
71
+ project_info['main_module'] = True
72
+ logger.info("主模块导入成功")
73
+
74
+ # 尝试导入服务器模块
75
+ try:
76
+ import paper_search_mcp.server as server_module
77
+ project_info['server_module'] = dir(server_module)
78
+ logger.info(f"服务器模块属性: {project_info['server_module']}")
79
+
80
+ # 直接从server模块获取搜索函数
81
+ search_functions = [
82
+ 'search_arxiv', 'search_pubmed', 'search_biorxiv',
83
+ 'search_crossref', 'search_semantic', 'search_google_scholar',
84
+ 'search_iacr', 'search_medrxiv'
85
+ ]
86
+
87
+ for func_name in search_functions:
88
+ if hasattr(server_module, func_name):
89
+ available_functions[func_name] = getattr(server_module, func_name)
90
+ logger.info(f"找到搜索函数: {func_name}")
91
+
92
+ except Exception as e:
93
+ logger.error(f"服务器模块导入失败: {e}")
94
+
95
+ # 尝试导入学术平台模块
96
+ try:
97
+ import paper_search_mcp.academic_platforms as platforms
98
+ project_info['platforms_module'] = dir(platforms)
99
+ logger.info(f"学术平台模块属性: {project_info['platforms_module']}")
100
+
101
+ except Exception as e:
102
+ logger.error(f"学术平台模块导入失败: {e}")
103
+
104
+ # 尝试导入各个子模块的搜索类
105
+ submodules = ['arxiv', 'pubmed', 'biorxiv', 'crossref', 'semantic']
106
+ for submodule in submodules:
107
+ try:
108
+ module = __import__(f'paper_search_mcp.academic_platforms.{submodule}', fromlist=[submodule])
109
+ project_info[f'{submodule}_module'] = dir(module)
110
+ logger.info(f"{submodule}模块属性: {project_info[f'{submodule}_module']}")
111
+
112
+ # 查找搜索类
113
+ for attr_name in dir(module):
114
+ if attr_name.endswith('Searcher') and not attr_name.startswith('_'):
115
+ SearcherClass = getattr(module, attr_name)
116
+ if callable(SearcherClass):
117
+ try:
118
+ # 创建搜索器实例
119
+ searcher = SearcherClass()
120
+ available_functions[f'{submodule}_searcher'] = searcher
121
+ logger.info(f"创建{submodule}搜索器实例: {attr_name}")
122
+ except Exception as e:
123
+ logger.warning(f"无法创建{submodule}搜索器实例: {e}")
124
+
125
+ except Exception as e:
126
+ logger.warning(f"子模块 {submodule} 导入失败: {e}")
127
+
128
+ logger.info(f"总共找到 {len(available_functions)} 个可用函数")
129
+
130
+ except Exception as e:
131
+ logger.error(f"项目探索失败: {e}")
132
+ logger.error(traceback.format_exc())
133
+
134
+ @asynccontextmanager
135
+ async def lifespan(app: FastAPI):
136
+ """应用生命周期管理"""
137
+ logger.info("应用启动中...")
138
+ await explore_project_structure()
139
+ logger.info("应用启动完成!")
140
+ yield
141
+ logger.info("应用关闭中...")
142
+
143
+ # 创建FastAPI应用
144
+ app = FastAPI(
145
+ title="Paper Search MCP Server",
146
+ description="HTTP wrapper for Paper Search MCP Server with dynamic function discovery",
147
+ version="1.0.0",
148
+ lifespan=lifespan
149
+ )
150
+
151
+ @app.get("/", response_class=HTMLResponse)
152
+ async def root():
153
+ return f"""
154
+ <!DOCTYPE html>
155
+ <html>
156
+ <head>
157
+ <title>Paper Search MCP Server</title>
158
+ <meta charset="UTF-8">
159
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
160
+ <style>
161
+ body {{
162
+ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
163
+ margin: 0;
164
+ padding: 20px;
165
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
166
+ min-height: 100vh;
167
+ color: white;
168
+ }}
169
+ .container {{
170
+ max-width: 1200px;
171
+ margin: 0 auto;
172
+ background: rgba(255,255,255,0.1);
173
+ border-radius: 20px;
174
+ padding: 30px;
175
+ backdrop-filter: blur(10px);
176
+ box-shadow: 0 8px 32px rgba(0,0,0,0.3);
177
+ }}
178
+ .header {{
179
+ text-align: center;
180
+ margin-bottom: 30px;
181
+ }}
182
+ .header h1 {{
183
+ font-size: 2.5em;
184
+ margin-bottom: 10px;
185
+ background: linear-gradient(45deg, #fff, #f0f0f0);
186
+ -webkit-background-clip: text;
187
+ -webkit-text-fill-color: transparent;
188
+ }}
189
+ .info {{
190
+ background: rgba(255,255,255,0.2);
191
+ padding: 20px;
192
+ margin: 20px 0;
193
+ border-radius: 15px;
194
+ border: 1px solid rgba(255,255,255,0.3);
195
+ }}
196
+ .endpoint {{
197
+ background: rgba(255,255,255,0.15);
198
+ padding: 15px;
199
+ margin: 15px 0;
200
+ border-radius: 10px;
201
+ border-left: 4px solid #4CAF50;
202
+ transition: all 0.3s ease;
203
+ }}
204
+ .endpoint:hover {{
205
+ background: rgba(255,255,255,0.25);
206
+ transform: translateX(5px);
207
+ }}
208
+ .method {{
209
+ color: #4CAF50;
210
+ font-weight: bold;
211
+ padding: 4px 8px;
212
+ background: rgba(76,175,80,0.2);
213
+ border-radius: 4px;
214
+ margin-right: 10px;
215
+ }}
216
+ .method.post {{ color: #FF9800; background: rgba(255,152,0,0.2); }}
217
+ pre {{
218
+ background: rgba(0,0,0,0.3);
219
+ padding: 15px;
220
+ border-radius: 8px;
221
+ overflow-x: auto;
222
+ font-family: 'Monaco', 'Menlo', monospace;
223
+ border: 1px solid rgba(255,255,255,0.2);
224
+ }}
225
+ code {{
226
+ background: rgba(255,255,255,0.2);
227
+ padding: 2px 6px;
228
+ border-radius: 4px;
229
+ font-family: 'Monaco', 'Menlo', monospace;
230
+ }}
231
+ .status {{
232
+ display: inline-block;
233
+ padding: 5px 15px;
234
+ background: #4CAF50;
235
+ color: white;
236
+ border-radius: 20px;
237
+ font-size: 0.9em;
238
+ margin-left: 10px;
239
+ }}
240
+ </style>
241
+ </head>
242
+ <body>
243
+ <div class="container">
244
+ <div class="header">
245
+ <h1>📚 Paper Search MCP Server</h1>
246
+ <p>学术论文搜索与下载服务 <span class="status">🟢 Running</span></p>
247
+ </div>
248
+
249
+ <div class="info">
250
+ <h3>🔍 发现的搜索功能 ({len(available_functions)} 个):</h3>
251
+ <pre>{json.dumps(list(available_functions.keys()), indent=2)}</pre>
252
+ </div>
253
+
254
+ <!--
255
+ <div class="info">
256
+ <h3>📋 项目信息:</h3>
257
+ <pre>{json.dumps(project_info, indent=2, default=str)}</pre>
258
+ </div>
259
+ -->
260
+
261
+ <h2>🛠️ 可用API端点:</h2>
262
+
263
+ <div class="endpoint">
264
+ <span class="method">GET</span> <code>/health</code> - 健康检查与状态信息
265
+ </div>
266
+
267
+ <div class="endpoint">
268
+ <span class="method">GET</span> <code>/functions</code> - 列出所有发现的功能
269
+ </div>
270
+
271
+ <div class="endpoint">
272
+ <span class="method post">POST</span> <code>/search</code> - 通用搜索接口
273
+ <br><small>📝 示例: {{"platform": "arxiv", "query": "machine learning", "max_results": 10}}</small>
274
+ </div>
275
+
276
+ <div class="endpoint">
277
+ <span class="method post">POST</span> <code>/download</code> - 论文下载接口
278
+ <br><small>📝 示例: {{"platform": "arxiv", "paper_id": "2301.12345"}}</small>
279
+ </div>
280
+
281
+ <div class="endpoint">
282
+ <span class="method">GET</span> <code>/docs</code> - 📖 API文档 (Swagger UI)
283
+ </div>
284
+
285
+ <div class="endpoint">
286
+ <span class="method">GET</span> <code>/redoc</code> - 📖 API文档 (ReDoc)
287
+ </div>
288
+ </div>
289
+ </body>
290
+ </html>
291
+ """
292
+
293
+ @app.get("/health")
294
+ async def health():
295
+ return {
296
+ "status": "healthy",
297
+ "message": "Paper Search MCP Server is running",
298
+ "functions_loaded": len(available_functions),
299
+ "available_functions": list(available_functions.keys()),
300
+ "uptime": "running"
301
+ }
302
+
303
+ @app.get("/functions")
304
+ async def list_functions():
305
+ return {
306
+ "available_functions": list(available_functions.keys()),
307
+ "project_info": project_info,
308
+ "total_functions": len(available_functions)
309
+ }
310
+
311
+ class GenericSearchRequest(BaseModel):
312
+ platform: str
313
+ query: str
314
+ max_results: int = 10
315
+
316
+ @app.post("/search")
317
+ async def generic_search(request: GenericSearchRequest):
318
+ try:
319
+ # 查找合适的搜索函数
320
+ possible_function_names = [
321
+ f"search_{request.platform}",
322
+ f"{request.platform}_search",
323
+ f"{request.platform}_searcher"
324
+ ]
325
+
326
+ search_function = None
327
+ used_function_name = None
328
+
329
+ for func_name in possible_function_names:
330
+ if func_name in available_functions:
331
+ search_function = available_functions[func_name]
332
+ used_function_name = func_name
333
+ break
334
+
335
+ if not search_function:
336
+ return {
337
+ "error": f"No search function found for platform: {request.platform}",
338
+ "available_platforms": [name.replace('search_', '').replace('_searcher', '') for name in available_functions.keys()],
339
+ "searched_for": possible_function_names
340
+ }
341
+
342
+ # 尝试调用搜索函数
343
+ try:
344
+ result = None
345
+ error_msgs = []
346
+
347
+ # 如果是搜索器实例,调用search方法
348
+ if hasattr(search_function, 'search'):
349
+ try:
350
+ result = await asyncio.to_thread(search_function.search, request.query, request.max_results)
351
+ except Exception as e1:
352
+ error_msgs.append(f"搜索器实例调用失败: {str(e1)}")
353
+ else:
354
+ # 尝试直接调用函数
355
+ try:
356
+ result = await asyncio.to_thread(search_function, request.query, request.max_results)
357
+ except Exception as e2:
358
+ error_msgs.append(f"函数直接调用失败: {str(e2)}")
359
+
360
+ if result is not None:
361
+ return {
362
+ "platform": request.platform,
363
+ "function_used": used_function_name,
364
+ "query": request.query,
365
+ "results": result,
366
+ "count": len(result) if isinstance(result, (list, tuple)) else 1
367
+ }
368
+ else:
369
+ return {
370
+ "error": "搜索调用失败",
371
+ "function_used": used_function_name,
372
+ "error_messages": error_msgs
373
+ }
374
+
375
+ except Exception as e:
376
+ logger.error(f"搜索函数调用错误: {e}")
377
+ return {
378
+ "error": f"Function call failed: {str(e)}",
379
+ "function_used": used_function_name
380
+ }
381
+
382
+ except Exception as e:
383
+ logger.error(f"通用搜索错误: {e}")
384
+ raise HTTPException(status_code=500, detail=str(e))
385
+
386
+ class GenericDownloadRequest(BaseModel):
387
+ platform: str
388
+ paper_id: str
389
+
390
+ @app.post("/download")
391
+ async def generic_download(request: GenericDownloadRequest):
392
+ return {
393
+ "message": "Download functionality is not implemented yet",
394
+ "platform": request.platform,
395
+ "paper_id": request.paper_id
396
+ }
397
+
398
+ # 添加启动消息
399
+ @app.on_event("startup")
400
+ async def startup_event():
401
+ logger.info("🚀 Paper Search MCP Server started successfully!")
402
+ logger.info(f"📡 Server running on http://0.0.0.0:{os.getenv('PORT', '7860')}")
403
+
404
+ if __name__ == "__main__":
405
+ # 获取端口,优先使用环境变量
406
+ port = int(os.getenv("PORT", "7860"))
407
+ host = "0.0.0.0"
408
+
409
+ logger.info(f"🌟 Starting server on {host}:{port}")
410
+
411
+ uvicorn.run(
412
+ app,
413
+ host=host,
414
+ port=port,
415
+ log_level="info",
416
+ access_log=True
417
+ )
418
+ EOF
419
+
420
+ # ==============================================================================
421
+ # BEGIN PATCH: 修复 http_wrapper.py 中的异步调用错误
422
+ # ------------------------------------------------------------------------------
423
+ # 原因: 原始脚本错误地使用 asyncio.to_thread 来调用一个本身就是异步的函数。
424
+ # 这会导致返回一个未执行的协程对象,从而引发 FastAPI 的序列化错误。
425
+ # 解决方案: 我们使用 `sed` 命令在构建镜像时直接修改脚本,
426
+ # 将错误的 `asyncio.to_thread` 调用替换为正确的 `await` 调用。
427
+ # ==============================================================================
428
+ RUN \
429
+ sed -i 's/result = await asyncio.to_thread(search_function.search, request.query, request.max_results)/result = await search_function.search(request.query, request.max_results)/g' /app/http_wrapper.py && \
430
+ sed -i 's/result = await asyncio.to_thread(search_function, request.query, request.max_results)/result = await search_function(request.query, request.max_results)/g' /app/http_wrapper.py
431
+
432
+ # ==============================================================================
433
+ # END PATCH
434
+ # ==============================================================================
435
+
436
+ # 设置环境变量
437
+ ENV SEMANTIC_SCHOLAR_API_KEY=""
438
+ ENV PYTHONPATH=/app
439
+ ENV PORT=7860
440
+
441
+ # 声明端口 (Hugging Face Spaces 默认使用7860)
442
+ EXPOSE 7860
443
+
444
+ # 创建健康检查
445
+ HEALTHCHECK --interval=30s --timeout=10s --start-period=30s --retries=3 \
446
+ CMD curl -f http://localhost:7860/health || exit 1
447
+
448
+ # 启动HTTP包装器
449
+ CMD ["python", "/app/http_wrapper.py"]