Text Generation
Transformers
English
qwen2
code-generation
python
fine-tuning
Qwen
tools
agent-framework
multi-agent
conversational
Eval Results (legacy)
Instructions to use my-ai-stack/Stack-2-9-finetuned with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use my-ai-stack/Stack-2-9-finetuned with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="my-ai-stack/Stack-2-9-finetuned") messages = [ {"role": "user", "content": "Who are you?"}, ] pipe(messages)# Load model directly from transformers import AutoTokenizer, AutoModelForCausalLM tokenizer = AutoTokenizer.from_pretrained("my-ai-stack/Stack-2-9-finetuned") model = AutoModelForCausalLM.from_pretrained("my-ai-stack/Stack-2-9-finetuned") messages = [ {"role": "user", "content": "Who are you?"}, ] inputs = tokenizer.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use my-ai-stack/Stack-2-9-finetuned with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "my-ai-stack/Stack-2-9-finetuned" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "my-ai-stack/Stack-2-9-finetuned", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker
docker model run hf.co/my-ai-stack/Stack-2-9-finetuned
- SGLang
How to use my-ai-stack/Stack-2-9-finetuned with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "my-ai-stack/Stack-2-9-finetuned" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "my-ai-stack/Stack-2-9-finetuned", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "my-ai-stack/Stack-2-9-finetuned" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "my-ai-stack/Stack-2-9-finetuned", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }' - Docker Model Runner
How to use my-ai-stack/Stack-2-9-finetuned with Docker Model Runner:
docker model run hf.co/my-ai-stack/Stack-2-9-finetuned
| """MCP Server for Stack 2.9 - Exposes Stack tools via Model Context Protocol""" | |
| import asyncio | |
| import os | |
| import sys | |
| from typing import Any | |
| # Ensure project root is on the path so 'from src.tools import' works | |
| _project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) | |
| if _project_root not in sys.path: | |
| sys.path.insert(0, _project_root) | |
| from mcp.server.fastmcp import FastMCP | |
| # Import all Stack 2.9 tools (triggers auto-registration) | |
| from src.tools import ( | |
| BaseTool, | |
| ToolResult, | |
| get_registry, | |
| file_read, | |
| grep_tool, | |
| task_management, | |
| team_tool, | |
| agent_tool, | |
| ) | |
| def _tool_to_mcp(tool: BaseTool) -> dict[str, Any]: | |
| """Convert a Stack 2.9 tool to MCP tool schema.""" | |
| schema = tool.input_schema | |
| if callable(schema): | |
| schema = schema() | |
| return { | |
| "name": tool.name, | |
| "description": tool.description, | |
| "inputSchema": schema, | |
| } | |
| def _call_tool_sync(tool: BaseTool, arguments: dict[str, Any]) -> Any: | |
| """Call a tool and extract result data, handling sync/async execute.""" | |
| import inspect | |
| execute = tool.execute | |
| # Determine if execute is async or sync | |
| if inspect.iscoroutinefunction(execute): | |
| # Run in event loop | |
| loop = asyncio.get_event_loop() | |
| if inspect.iscoroutinefunction(execute): | |
| result = loop.run_until_complete(execute(**arguments)) | |
| else: | |
| result = execute(**arguments) | |
| else: | |
| # Sync execute (uses input_data dict style) | |
| if hasattr(tool, 'input_schema') and not callable(tool.input_schema): | |
| result = execute(arguments) | |
| else: | |
| result = execute(**arguments) | |
| if isinstance(result, ToolResult): | |
| if result.success: | |
| return {"success": True, "data": result.data} | |
| else: | |
| return {"success": False, "error": result.error} | |
| return result | |
| def _register_tool(mcp: FastMCP, tool: BaseTool) -> None: | |
| """Register a single Stack tool as an MCP tool.""" | |
| tool_name = tool.name | |
| schema = tool.input_schema | |
| if callable(schema): | |
| schema = schema() | |
| async def handler(arguments: dict[str, Any]) -> dict[str, Any]: | |
| return _call_tool_sync(tool, arguments) | |
| mcp.add_tool(handler, name=tool_name, description=tool.description) | |
| def _register_all_tools(mcp: FastMCP) -> int: | |
| """Register all tools from the Stack 2.9 registry.""" | |
| registry = get_registry() | |
| count = 0 | |
| for tool in registry._tools.values(): | |
| try: | |
| _register_tool(mcp, tool) | |
| count += 1 | |
| except Exception as e: | |
| print(f"Failed to register tool {getattr(tool, 'name', 'unknown')}: {e}") | |
| return count | |
| # Create the FastMCP server | |
| mcp = FastMCP("Stack2.9") | |
| def main(): | |
| """Main entry point - register tools and run the server.""" | |
| # Import all tools to ensure registration | |
| from src.tools import ( | |
| agent_tool, | |
| ask_question, | |
| brief_tool, | |
| config_tool, | |
| file_edit, | |
| file_read, | |
| file_write, | |
| glob_tool, | |
| grep_tool, | |
| messaging, | |
| plan_mode, | |
| remote_trigger, | |
| scheduling, | |
| skill_tool, | |
| sleep_tool, | |
| synthetic_output, | |
| task_get, | |
| task_management, | |
| team_delete, | |
| team_tool, | |
| todo_tool, | |
| tool_discovery, | |
| web_fetch, | |
| web_search, | |
| worktree_tool, | |
| ) | |
| # Register all tools from registry | |
| count = _register_all_tools(mcp) | |
| print(f"Registered {count} Stack 2.9 tools as MCP tools") | |
| # Run the MCP server | |
| mcp.run() | |
| if __name__ == "__main__": | |
| main() |