Text Generation
Transformers
English
qwen2
code-generation
python
fine-tuning
Qwen
tools
agent-framework
multi-agent
conversational
Eval Results (legacy)
Instructions to use my-ai-stack/Stack-2-9-finetuned with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use my-ai-stack/Stack-2-9-finetuned with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="my-ai-stack/Stack-2-9-finetuned") messages = [ {"role": "user", "content": "Who are you?"}, ] pipe(messages)# Load model directly from transformers import AutoTokenizer, AutoModelForCausalLM tokenizer = AutoTokenizer.from_pretrained("my-ai-stack/Stack-2-9-finetuned") model = AutoModelForCausalLM.from_pretrained("my-ai-stack/Stack-2-9-finetuned") messages = [ {"role": "user", "content": "Who are you?"}, ] inputs = tokenizer.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use my-ai-stack/Stack-2-9-finetuned with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "my-ai-stack/Stack-2-9-finetuned" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "my-ai-stack/Stack-2-9-finetuned", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker
docker model run hf.co/my-ai-stack/Stack-2-9-finetuned
- SGLang
How to use my-ai-stack/Stack-2-9-finetuned with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "my-ai-stack/Stack-2-9-finetuned" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "my-ai-stack/Stack-2-9-finetuned", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "my-ai-stack/Stack-2-9-finetuned" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "my-ai-stack/Stack-2-9-finetuned", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }' - Docker Model Runner
How to use my-ai-stack/Stack-2-9-finetuned with Docker Model Runner:
docker model run hf.co/my-ai-stack/Stack-2-9-finetuned
walidsobhie-code
feat: Add remaining RTMP tools (FileRead, FileWrite, Sleep, AskQuestion, Brief, TaskGet, TeamDelete, MCPTool, Worktree, SyntheticOutput)
5dc5419 | """BriefTool - Generate briefings for Stack 2.9""" | |
| import json | |
| from datetime import datetime | |
| from pathlib import Path | |
| from typing import Any, Dict, List, Optional | |
| from .base import BaseTool, ToolResult | |
| from .registry import tool_registry | |
| class BriefTool(BaseTool): | |
| """Generate a briefing for a task.""" | |
| name = "brief" | |
| description = "Generate a structured briefing for a task" | |
| input_schema = { | |
| "type": "object", | |
| "properties": { | |
| "task": {"type": "string", "description": "Main task or goal"}, | |
| "context": {"type": "string", "description": "Additional context"}, | |
| "constraints": {"type": "array", "items": {"type": "string"}, "description": "Constraints or requirements"}, | |
| "hints": {"type": "array", "items": {"type": "string"}, "description": " Helpful hints"}, | |
| "format": {"type": "string", "enum": ["concise", "detailed"], "default": "concise"} | |
| }, | |
| "required": ["task"] | |
| } | |
| async def execute(self, task: str, context: Optional[str] = None, constraints: Optional[List[str]] = None, hints: Optional[List[str]] = None, format: str = "concise") -> ToolResult: | |
| """Generate brief.""" | |
| brief_id = f"brief_{datetime.now().strftime('%Y%m%d_%H%M%S')}" | |
| sections = { | |
| "id": brief_id, | |
| "task": task, | |
| "created_at": datetime.now().isoformat() | |
| } | |
| if context: | |
| sections["context"] = context | |
| if constraints: | |
| sections["constraints"] = constraints | |
| if hints: | |
| sections["hints"] = hints | |
| if format == "detailed": | |
| sections["format_version"] = "detailed" | |
| sections["priority"] = "medium" | |
| sections["estimated_complexity"] = "unknown" | |
| else: | |
| sections["format_version"] = "concise" | |
| return ToolResult(success=True, data=sections) | |
| class BriefSummaryTool(BaseTool): | |
| """Summarize a previous brief or conversation.""" | |
| name = "brief_summary" | |
| description = "Generate a summary briefing" | |
| input_schema = { | |
| "type": "object", | |
| "properties": { | |
| "content": {"type": "string", "description": "Content to summarize"}, | |
| "max_points": {"type": "number", "default": 5, "description": "Maximum key points"} | |
| }, | |
| "required": ["content"] | |
| } | |
| async def execute(self, content: str, max_points: int = 5) -> ToolResult: | |
| """Generate summary.""" | |
| # Simple extractive summarization | |
| lines = [l.strip() for l in content.split('\n') if l.strip()] | |
| points = lines[:max_points] | |
| return ToolResult(success=True, data={ | |
| "summary": points, | |
| "total_lines": len(lines), | |
| "points_extracted": len(points) | |
| }) | |
| # Register tools | |
| tool_registry.register(BriefTool()) | |
| tool_registry.register(BriefSummaryTool()) | |