Spaces:
Sleeping
Sleeping
| # 使用官方Python基础镜像 | |
| FROM python:3.11-slim | |
| # 安装系统依赖 | |
| ENV DEBIAN_FRONTEND=noninteractive | |
| RUN apt-get update && \ | |
| apt-get install -y --no-install-recommends git curl sed && \ | |
| apt-get clean && \ | |
| rm -rf /var/lib/apt/lists/* | |
| # 安装 uv | |
| RUN curl -LsSf https://astral.sh/uv/install.sh | sh | |
| ENV PATH="/root/.local/bin:${PATH}" | |
| # 设置工作目录 | |
| WORKDIR /app | |
| # 克隆项目源代码 | |
| RUN git clone https://github.com/openags/paper-search-mcp.git . | |
| # 创建虚拟环境并安装依赖 | |
| RUN uv venv .venv | |
| ENV PATH="/app/.venv/bin:${PATH}" | |
| # 安装项目依赖 | |
| RUN uv pip install -e . | |
| # 安装额外的web依赖 | |
| RUN uv pip install fastapi uvicorn python-multipart | |
| # 创建一个优化的HTTP API包装器 | |
| COPY <<'EOF' /app/http_wrapper.py | |
| #!/usr/bin/env python3 | |
| import asyncio | |
| import json | |
| import logging | |
| import traceback | |
| import os | |
| from contextlib import asynccontextmanager | |
| from typing import Any, Dict, Optional, List | |
| import uvicorn | |
| from fastapi import FastAPI, HTTPException | |
| from fastapi.responses import HTMLResponse, JSONResponse | |
| from pydantic import BaseModel | |
| # 设置日志 | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger(__name__) | |
| # 请求模型 | |
| class SearchRequest(BaseModel): | |
| query: str | |
| max_results: int = 10 | |
| class DownloadRequest(BaseModel): | |
| paper_id: str | |
| # 全局变量 | |
| available_functions = {} | |
| project_info = {} | |
| async def explore_project_structure(): | |
| """探索项目结构并找到可用的功能""" | |
| global available_functions, project_info | |
| try: | |
| # 尝试导入主模块 | |
| import paper_search_mcp | |
| project_info['main_module'] = True | |
| logger.info("主模块导入成功") | |
| # 尝试导入服务器模块 | |
| try: | |
| import paper_search_mcp.server as server_module | |
| project_info['server_module'] = dir(server_module) | |
| logger.info(f"服务器模块属性: {project_info['server_module']}") | |
| # 直接从server模块获取搜索函数 | |
| search_functions = [ | |
| 'search_arxiv', 'search_pubmed', 'search_biorxiv', | |
| 'search_crossref', 'search_semantic', 'search_google_scholar', | |
| 'search_iacr', 'search_medrxiv' | |
| ] | |
| for func_name in search_functions: | |
| if hasattr(server_module, func_name): | |
| available_functions[func_name] = getattr(server_module, func_name) | |
| logger.info(f"找到搜索函数: {func_name}") | |
| except Exception as e: | |
| logger.error(f"服务器模块导入失败: {e}") | |
| # 尝试导入学术平台模块 | |
| try: | |
| import paper_search_mcp.academic_platforms as platforms | |
| project_info['platforms_module'] = dir(platforms) | |
| logger.info(f"学术平台模块属性: {project_info['platforms_module']}") | |
| except Exception as e: | |
| logger.error(f"学术平台模块导入失败: {e}") | |
| # 尝试导入各个子模块的搜索类 | |
| submodules = ['arxiv', 'pubmed', 'biorxiv', 'crossref', 'semantic'] | |
| for submodule in submodules: | |
| try: | |
| module = __import__(f'paper_search_mcp.academic_platforms.{submodule}', fromlist=[submodule]) | |
| project_info[f'{submodule}_module'] = dir(module) | |
| logger.info(f"{submodule}模块属性: {project_info[f'{submodule}_module']}") | |
| # 查找搜索类 | |
| for attr_name in dir(module): | |
| if attr_name.endswith('Searcher') and not attr_name.startswith('_'): | |
| SearcherClass = getattr(module, attr_name) | |
| if callable(SearcherClass): | |
| try: | |
| # 创建搜索器实例 | |
| searcher = SearcherClass() | |
| available_functions[f'{submodule}_searcher'] = searcher | |
| logger.info(f"创建{submodule}搜索器实例: {attr_name}") | |
| except Exception as e: | |
| logger.warning(f"无法创建{submodule}搜索器实例: {e}") | |
| except Exception as e: | |
| logger.warning(f"子模块 {submodule} 导入失败: {e}") | |
| logger.info(f"总共找到 {len(available_functions)} 个可用函数") | |
| except Exception as e: | |
| logger.error(f"项目探索失败: {e}") | |
| logger.error(traceback.format_exc()) | |
| async def lifespan(app: FastAPI): | |
| """应用生命周期管理""" | |
| logger.info("应用启动中...") | |
| await explore_project_structure() | |
| logger.info("应用启动完成!") | |
| yield | |
| logger.info("应用关闭中...") | |
| # 创建FastAPI应用 | |
| app = FastAPI( | |
| title="Paper Search MCP Server", | |
| description="HTTP wrapper for Paper Search MCP Server with dynamic function discovery", | |
| version="1.0.0", | |
| lifespan=lifespan | |
| ) | |
| async def root(): | |
| return f""" | |
| <!DOCTYPE html> | |
| <html> | |
| <head> | |
| <title>Paper Search MCP Server</title> | |
| <meta charset="UTF-8"> | |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"> | |
| <style> | |
| body {{ | |
| font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; | |
| margin: 0; | |
| padding: 20px; | |
| background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); | |
| min-height: 100vh; | |
| color: white; | |
| }} | |
| .container {{ | |
| max-width: 1200px; | |
| margin: 0 auto; | |
| background: rgba(255,255,255,0.1); | |
| border-radius: 20px; | |
| padding: 30px; | |
| backdrop-filter: blur(10px); | |
| box-shadow: 0 8px 32px rgba(0,0,0,0.3); | |
| }} | |
| .header {{ | |
| text-align: center; | |
| margin-bottom: 30px; | |
| }} | |
| .header h1 {{ | |
| font-size: 2.5em; | |
| margin-bottom: 10px; | |
| background: linear-gradient(45deg, #fff, #f0f0f0); | |
| -webkit-background-clip: text; | |
| -webkit-text-fill-color: transparent; | |
| }} | |
| .info {{ | |
| background: rgba(255,255,255,0.2); | |
| padding: 20px; | |
| margin: 20px 0; | |
| border-radius: 15px; | |
| border: 1px solid rgba(255,255,255,0.3); | |
| }} | |
| .endpoint {{ | |
| background: rgba(255,255,255,0.15); | |
| padding: 15px; | |
| margin: 15px 0; | |
| border-radius: 10px; | |
| border-left: 4px solid #4CAF50; | |
| transition: all 0.3s ease; | |
| }} | |
| .endpoint:hover {{ | |
| background: rgba(255,255,255,0.25); | |
| transform: translateX(5px); | |
| }} | |
| .method {{ | |
| color: #4CAF50; | |
| font-weight: bold; | |
| padding: 4px 8px; | |
| background: rgba(76,175,80,0.2); | |
| border-radius: 4px; | |
| margin-right: 10px; | |
| }} | |
| .method.post {{ color: #FF9800; background: rgba(255,152,0,0.2); }} | |
| pre {{ | |
| background: rgba(0,0,0,0.3); | |
| padding: 15px; | |
| border-radius: 8px; | |
| overflow-x: auto; | |
| font-family: 'Monaco', 'Menlo', monospace; | |
| border: 1px solid rgba(255,255,255,0.2); | |
| }} | |
| code {{ | |
| background: rgba(255,255,255,0.2); | |
| padding: 2px 6px; | |
| border-radius: 4px; | |
| font-family: 'Monaco', 'Menlo', monospace; | |
| }} | |
| .status {{ | |
| display: inline-block; | |
| padding: 5px 15px; | |
| background: #4CAF50; | |
| color: white; | |
| border-radius: 20px; | |
| font-size: 0.9em; | |
| margin-left: 10px; | |
| }} | |
| </style> | |
| </head> | |
| <body> | |
| <div class="container"> | |
| <div class="header"> | |
| <h1>📚 Paper Search MCP Server</h1> | |
| <p>学术论文搜索与下载服务 <span class="status">🟢 Running</span></p> | |
| </div> | |
| <div class="info"> | |
| <h3>🔍 发现的搜索功能 ({len(available_functions)} 个):</h3> | |
| <pre>{json.dumps(list(available_functions.keys()), indent=2)}</pre> | |
| </div> | |
| <!-- | |
| <div class="info"> | |
| <h3>📋 项目信息:</h3> | |
| <pre>{json.dumps(project_info, indent=2, default=str)}</pre> | |
| </div> | |
| --> | |
| <h2>🛠️ 可用API端点:</h2> | |
| <div class="endpoint"> | |
| <span class="method">GET</span> <code>/health</code> - 健康检查与状态信息 | |
| </div> | |
| <div class="endpoint"> | |
| <span class="method">GET</span> <code>/functions</code> - 列出所有发现的功能 | |
| </div> | |
| <div class="endpoint"> | |
| <span class="method post">POST</span> <code>/search</code> - 通用搜索接口 | |
| <br><small>📝 示例: {{"platform": "arxiv", "query": "machine learning", "max_results": 10}}</small> | |
| </div> | |
| <div class="endpoint"> | |
| <span class="method post">POST</span> <code>/download</code> - 论文下载接口 | |
| <br><small>📝 示例: {{"platform": "arxiv", "paper_id": "2301.12345"}}</small> | |
| </div> | |
| <div class="endpoint"> | |
| <span class="method">GET</span> <code>/docs</code> - 📖 API文档 (Swagger UI) | |
| </div> | |
| <div class="endpoint"> | |
| <span class="method">GET</span> <code>/redoc</code> - 📖 API文档 (ReDoc) | |
| </div> | |
| </div> | |
| </body> | |
| </html> | |
| """ | |
| async def health(): | |
| return { | |
| "status": "healthy", | |
| "message": "Paper Search MCP Server is running", | |
| "functions_loaded": len(available_functions), | |
| "available_functions": list(available_functions.keys()), | |
| "uptime": "running" | |
| } | |
| async def list_functions(): | |
| return { | |
| "available_functions": list(available_functions.keys()), | |
| "project_info": project_info, | |
| "total_functions": len(available_functions) | |
| } | |
| class GenericSearchRequest(BaseModel): | |
| platform: str | |
| query: str | |
| max_results: int = 10 | |
| async def generic_search(request: GenericSearchRequest): | |
| try: | |
| # 查找合适的搜索函数 | |
| possible_function_names = [ | |
| f"search_{request.platform}", | |
| f"{request.platform}_search", | |
| f"{request.platform}_searcher" | |
| ] | |
| search_function = None | |
| used_function_name = None | |
| for func_name in possible_function_names: | |
| if func_name in available_functions: | |
| search_function = available_functions[func_name] | |
| used_function_name = func_name | |
| break | |
| if not search_function: | |
| return { | |
| "error": f"No search function found for platform: {request.platform}", | |
| "available_platforms": [name.replace('search_', '').replace('_searcher', '') for name in available_functions.keys()], | |
| "searched_for": possible_function_names | |
| } | |
| # 尝试调用搜索函数 | |
| try: | |
| result = None | |
| error_msgs = [] | |
| # 如果是搜索器实例,调用search方法 | |
| if hasattr(search_function, 'search'): | |
| try: | |
| result = await asyncio.to_thread(search_function.search, request.query, request.max_results) | |
| except Exception as e1: | |
| error_msgs.append(f"搜索器实例调用失败: {str(e1)}") | |
| else: | |
| # 尝试直接调用函数 | |
| try: | |
| result = await asyncio.to_thread(search_function, request.query, request.max_results) | |
| except Exception as e2: | |
| error_msgs.append(f"函数直接调用失败: {str(e2)}") | |
| if result is not None: | |
| return { | |
| "platform": request.platform, | |
| "function_used": used_function_name, | |
| "query": request.query, | |
| "results": result, | |
| "count": len(result) if isinstance(result, (list, tuple)) else 1 | |
| } | |
| else: | |
| return { | |
| "error": "搜索调用失败", | |
| "function_used": used_function_name, | |
| "error_messages": error_msgs | |
| } | |
| except Exception as e: | |
| logger.error(f"搜索函数调用错误: {e}") | |
| return { | |
| "error": f"Function call failed: {str(e)}", | |
| "function_used": used_function_name | |
| } | |
| except Exception as e: | |
| logger.error(f"通用搜索错误: {e}") | |
| raise HTTPException(status_code=500, detail=str(e)) | |
| class GenericDownloadRequest(BaseModel): | |
| platform: str | |
| paper_id: str | |
| async def generic_download(request: GenericDownloadRequest): | |
| return { | |
| "message": "Download functionality is not implemented yet", | |
| "platform": request.platform, | |
| "paper_id": request.paper_id | |
| } | |
| # 添加启动消息 | |
| async def startup_event(): | |
| logger.info("🚀 Paper Search MCP Server started successfully!") | |
| logger.info(f"📡 Server running on http://0.0.0.0:{os.getenv('PORT', '7860')}") | |
| if __name__ == "__main__": | |
| # 获取端口,优先使用环境变量 | |
| port = int(os.getenv("PORT", "7860")) | |
| host = "0.0.0.0" | |
| logger.info(f"🌟 Starting server on {host}:{port}") | |
| uvicorn.run( | |
| app, | |
| host=host, | |
| port=port, | |
| log_level="info", | |
| access_log=True | |
| ) | |
| EOF | |
| # ============================================================================== | |
| # BEGIN PATCH: 修复 http_wrapper.py 中的异步调用错误 | |
| # ------------------------------------------------------------------------------ | |
| # 原因: 原始脚本错误地使用 asyncio.to_thread 来调用一个本身就是异步的函数。 | |
| # 这会导致返回一个未执行的协程对象,从而引发 FastAPI 的序列化错误。 | |
| # 解决方案: 我们使用 `sed` 命令在构建镜像时直接修改脚本, | |
| # 将错误的 `asyncio.to_thread` 调用替换为正确的 `await` 调用。 | |
| # ============================================================================== | |
| RUN \ | |
| sed -i 's/result = await asyncio.to_thread(search_function.search, request.query, request.max_results)/result = await search_function.search(request.query, request.max_results)/g' /app/http_wrapper.py && \ | |
| sed -i 's/result = await asyncio.to_thread(search_function, request.query, request.max_results)/result = await search_function(request.query, request.max_results)/g' /app/http_wrapper.py | |
| # ============================================================================== | |
| # END PATCH | |
| # ============================================================================== | |
| # 设置环境变量 | |
| ENV SEMANTIC_SCHOLAR_API_KEY="" | |
| ENV PYTHONPATH=/app | |
| ENV PORT=7860 | |
| # 声明端口 (Hugging Face Spaces 默认使用7860) | |
| EXPOSE 7860 | |
| # 创建健康检查 | |
| HEALTHCHECK --interval=30s --timeout=10s --start-period=30s --retries=3 \ | |
| CMD curl -f http://localhost:7860/health || exit 1 | |
| # 启动HTTP包装器 | |
| CMD ["python", "/app/http_wrapper.py"] |