| # ============================================================================ | |
| # Async OpenAI Compatible Model Example | |
| # ============================================================================ | |
| # This script demonstrates how to use the async_openai_compatible model with: | |
| # 1. Basic video/image evaluation (without MCP tools) | |
| # 2. Tool-enabled evaluation (with MCP client) | |
| # | |
| # The AsyncOpenAIChat class supports asynchronous processing of requests | |
| # using OpenAI-compatible API servers (e.g., vLLM, local LLMs with OpenAI wrapper) | |
| # ============================================================================ | |
| export OPENAI_API_BASE="http://localhost:8000/v1" | |
| export OPENAI_API_KEY='EMPTY' | |
| MODEL_VERSION="Qwen/Qwen3-VL-4B-Instruct" | |
| # ============================================================================ | |
| # EXAMPLE 1: Basic Usage (Without MCP Tools) | |
| # ============================================================================ | |
| # This is the simplest usage pattern without tool calling. | |
| # The model will process video/image queries and return responses. | |
| # | |
| # Key Parameters: | |
| # - model_version: The model name (used in API calls) | |
| # - max_pixels: Maximum pixels for image resolution (default: 151200) | |
| # - base_url: OpenAI API base URL | |
| # - api_key: API key (use 'EMPTY' for local servers) | |
| # - num_cpus: Number of concurrent workers (controls parallelism) | |
| # - timeout: Request timeout in seconds | |
| # - is_qwen3_vl: Set to True for Qwen3-VL specific formatting, set to False for other models | |
| accelerate launch --num_processes=1 --main_process_port 12345 -m lmms_eval \ | |
| --model async_openai \ | |
| --model_args model_version=$MODEL_VERSION,max_pixels=151200,base_url=$OPENAI_API_BASE,api_key=$OPENAI_API_KEY,num_cpus=8,timeout=6000,is_qwen3_vl=True \ | |
| --tasks videomme \ | |
| --batch_size 1 \ | |
| --output_path ./logs/ \ | |
| --log_samples --verbosity DEBUG | |
| # ============================================================================ | |
| # EXAMPLE 2: With MCP Client Tools (Tool-Enabled Evaluation) | |
| # ============================================================================ | |
| # This example demonstrates how to enable MCP (Model Context Protocol) client | |
| # for tool calling. The model can now use external tools during inference. | |
| # | |
| # IMPORTANT: Before running this, you need to: | |
| # 1. Create an MCP server that exposes tools (e.g., image processing, web search) | |
| # 2. The MCP server should be a Python script that implements tool definitions | |
| # 3. Pass the path to the MCP server script via mcp_server_path parameter | |
| # | |
| # How MCP Tool Calling Works: | |
| # βββββββββββββββββββββββββββββ | |
| # 1. User sends a request with a question | |
| # 2. The model receives the message and processes it | |
| # 3. The OpenAI API may decide to call a tool (finish_reason == "tool_calls") | |
| # 4. The MCPClient retrieves tool definitions from the MCP server | |
| # 5. The model calls the tool via MCPClient.run_tool() | |
| # 6. The tool result is converted to OpenAI format | |
| # 7. The result is sent back to the model for continuation | |
| # 8. Steps 3-7 repeat in a loop until the model produces final text output | |
| # | |
| # Tool Calling Loop in Code (from async_openai.py): | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # while response.choices[0].finish_reason == "tool_calls": | |
| # for tool_call in response.choices[0].message.tool_calls: | |
| # result = await self.mcp_client.run_tool(call.function.name, args) | |
| # # Convert result to OpenAI format | |
| # tool_messages.append({"role": "tool", "content": result}) | |
| # # Send tool results back to model for next iteration | |
| # response = await self.client.chat.completions.create( | |
| # model=model_version, | |
| # messages=messages + tool_messages, | |
| # tools=tool_definitions, | |
| # tool_choice="auto" | |
| # ) | |
| # Example with MCP tools enabled: | |
| # (Uncomment the following lines to use) | |
| # | |
| # accelerate launch --num_processes=1 --main_process_port 12345 -m lmms_eval \ | |
| # --model async_openai \ | |
| # --model_args model_version=$MODEL_VERSION,max_pixels=151200,base_url=$OPENAI_API_BASE,api_key=$OPENAI_API_KEY,num_cpus=8,timeout=6000,mcp_server_path=/path/to/mcp_server.py,work_dir=/tmp/mcp_work \ | |
| # --tasks videomme \ | |
| # --batch_size 1 \ | |
| # --output_path ./logs/ \ | |
| # --log_samples --verbosity DEBUG | |
| # ============================================================================ | |
| # Parameter Reference | |
| # ============================================================================ | |
| # model_version : Model name for API calls (required) | |
| # base_url : OpenAI API endpoint (required) | |
| # api_key : API key (required, use 'EMPTY' for local servers) | |
| # num_cpus : Number of concurrent async workers (default: cpu_count//2) | |
| # timeout : Request timeout in seconds (default: 600) | |
| # max_retries : Number of retries on failure (default: 5) | |
| # max_pixels : Max image resolution (default: 151200) | |
| # min_pixels : Min image resolution (default: 28*28) | |
| # max_frames : Max frames for videos (default: 768) | |
| # fps : Frames per second for video sampling (optional) | |
| # nframes : Fixed number of frames for video (default: 64) | |
| # is_qwen3_vl : Enable Qwen3-VL specific formatting (default: False) | |
| # mcp_server_path : Path to MCP server script for tool calling (optional) | |
| # work_dir : Working directory for MCP tools (default: /tmp/...) |