Text Generation
Transformers
English
qwen2
code-generation
python
fine-tuning
Qwen
tools
agent-framework
multi-agent
conversational
Eval Results (legacy)
Instructions to use my-ai-stack/Stack-2-9-finetuned with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use my-ai-stack/Stack-2-9-finetuned with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="my-ai-stack/Stack-2-9-finetuned") messages = [ {"role": "user", "content": "Who are you?"}, ] pipe(messages)# Load model directly from transformers import AutoTokenizer, AutoModelForCausalLM tokenizer = AutoTokenizer.from_pretrained("my-ai-stack/Stack-2-9-finetuned") model = AutoModelForCausalLM.from_pretrained("my-ai-stack/Stack-2-9-finetuned") messages = [ {"role": "user", "content": "Who are you?"}, ] inputs = tokenizer.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Notebooks
- Google Colab
- Kaggle
- Local Apps
- vLLM
How to use my-ai-stack/Stack-2-9-finetuned with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "my-ai-stack/Stack-2-9-finetuned" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "my-ai-stack/Stack-2-9-finetuned", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker
docker model run hf.co/my-ai-stack/Stack-2-9-finetuned
- SGLang
How to use my-ai-stack/Stack-2-9-finetuned with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "my-ai-stack/Stack-2-9-finetuned" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "my-ai-stack/Stack-2-9-finetuned", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "my-ai-stack/Stack-2-9-finetuned" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "my-ai-stack/Stack-2-9-finetuned", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }' - Docker Model Runner
How to use my-ai-stack/Stack-2-9-finetuned with Docker Model Runner:
docker model run hf.co/my-ai-stack/Stack-2-9-finetuned
| <html lang="en"> | |
| <head> | |
| <meta charset="UTF-8"> | |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"> | |
| <title>Benchmarks — Stack 2.9</title> | |
| <link rel="stylesheet" href="styles.css"> | |
| <link rel="icon" href="data:image/svg+xml,<svg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 100 100'><text y='.9em' font-size='90'>🤖</text></svg>"> | |
| <meta name="description" content="Stack 2.9 benchmark results - Compare against Claude, GPT-4, and other top models"> | |
| <script src="https://cdn.jsdelivr.net/npm/chart.js"></script> | |
| </head> | |
| <body> | |
| <nav class="navbar"> | |
| <div class="nav-container"> | |
| <a href="index.html" class="logo"> | |
| <span class="logo-icon">🤖</span> | |
| <span class="logo-text">Stack 2.9</span> | |
| </a> | |
| <button class="mobile-toggle" id="mobileToggle" aria-label="Toggle menu"> | |
| <span></span> | |
| <span></span> | |
| <span></span> | |
| </button> | |
| <ul class="nav-links" id="navLinks"> | |
| <li><a href="index.html#features">Features</a></li> | |
| <li><a href="index.html#demo">Live Demo</a></li> | |
| <li><a href="benchmark.html">Benchmarks</a></li> | |
| <li><a href="index.html#faq">FAQ</a></li> | |
| <li><a href="https://github.com/my-ai-stack/stack-2.9" class="nav-github" target="_blank"> | |
| <svg viewBox="0 0 24 24" width="20" height="20" fill="currentColor"> | |
| <path d="M12 0C5.37 0 0 5.37 0 12c0 5.31 3.435 9.795 8.205 11.385.6.105.825-.255.825-.57 0-.285-.015-1.23-.015-2.235-3.015.555-3.795-.735-4.035-1.41-.135-.345-.72-1.41-1.23-1.695-.42-.225-1.02-.78-.015-.795.945-.015 1.62.87 1.845 1.23 1.08 1.815 2.805 1.305 3.495.99.105-.78.42-1.305.765-1.605-2.67-.3-5.46-1.335-5.46-5.925 0-1.305.465-2.385 1.23-3.225-.12-.3-.54-1.53.12-3.18 0 0 1.005-.315 3.3 1.23.96-.27 1.98-.405 3-.405s2.04.135 3 .405c2.295-1.56 3.3-1.23 3.3-1.23.66 1.65.24 2.88.12 3.18.765.84 1.23 1.905 1.23 3.225 0 4.605-2.805 5.625-5.475 5.925.435.375.81 1.095.81 2.22 0 1.605-.015 2.895-.015 3.3 0 .315.225.69.825.57A12.02 12.02 0 0024 12c0-6.63-5.37-12-12-12z"/> | |
| </svg> | |
| GitHub | |
| </a></li> | |
| </ul> | |
| </div> | |
| </nav> | |
| <section class="benchmark-hero"> | |
| <div class="container"> | |
| <h1>Benchmark Results</h1> | |
| <p class="subtitle">Stack 2.9 vs Leading AI Models</p> | |
| <div class="benchmark-summary"> | |
| <div class="summary-card"> | |
| <div class="summary-value">TBD</div> | |
| <div class="summary-label">HumanEval</div> | |
| </div> | |
| <div class="summary-card"> | |
| <div class="summary-value">TBD</div> | |
| <div class="summary-label">MBPP</div> | |
| </div> | |
| <div class="summary-card highlight"> | |
| <div class="summary-value">TBD</div> | |
| <div class="summary-label">Tool Use</div> | |
| </div> | |
| <div class="summary-card"> | |
| <div class="summary-value">32B</div> | |
| <div class="summary-label">Parameters</div> | |
| </div> | |
| </div> | |
| </div> | |
| </section> | |
| <section class="benchmark-charts"> | |
| <div class="container"> | |
| <h2 class="section-title">Code Generation Benchmarks</h2> | |
| <p class="section-subtitle">Pass@1 scores on standard coding datasets</p> | |
| <div class="chart-container"> | |
| <canvas id="codingChart"></canvas> | |
| </div> | |
| <div class="chart-legend"> | |
| <div class="legend-item"> | |
| <span class="legend-color" style="background: #6366f1;"></span> | |
| <span>Stack 2.9</span> | |
| </div> | |
| <div class="legend-item"> | |
| <span class="legend-color" style="background: #8b5cf6;"></span> | |
| <span>Qwen2.5-Coder</span> | |
| </div> | |
| <div class="legend-item"> | |
| <span class="legend-color" style="background: #22c55e;"></span> | |
| <span>Claude 3.5</span> | |
| </div> | |
| <div class="legend-item"> | |
| <span class="legend-color" style="background: #f59e0b;"></span> | |
| <span>GPT-4</span> | |
| </div> | |
| <div class="legend-item"> | |
| <span class="legend-color" style="background: #ef4444;"></span> | |
| <span>Gemini Pro</span> | |
| </div> | |
| </div> | |
| </div> | |
| </section> | |
| <section class="benchmark-comparison"> | |
| <div class="container"> | |
| <h2 class="section-title">Detailed Comparison</h2> | |
| <div class="comparison-table-wrapper"> | |
| <table class="comparison-table"> | |
| <thead> | |
| <tr> | |
| <th>Model</th> | |
| <th>HumanEval</th> | |
| <th>MBPP</th> | |
| <th>SWE-bench</th> | |
| <th>Tool Use</th> | |
| <th>Parameters</th> | |
| </tr> | |
| </thead> | |
| <tbody> | |
| <tr class="highlight-row"> | |
| <td><strong>Stack 2.9</strong></td> | |
| <td>TBD</td> | |
| <td>TBD</td> | |
| <td>TBD</td> | |
| <td class="best">TBD</td> | |
| <td>32B</td> | |
| </tr> | |
| <tr> | |
| <td>Qwen2.5-Coder-32B</td> | |
| <td>76.8%</td> | |
| <td>82.3%</td> | |
| <td>18.2%</td> | |
| <td>78.5%</td> | |
| <td>32B</td> | |
| </tr> | |
| <tr> | |
| <td>CodeLlama-34B</td> | |
| <td>62.2%</td> | |
| <td>70.1%</td> | |
| <td>12.8%</td> | |
| <td>65.2%</td> | |
| <td>34B</td> | |
| </tr> | |
| <tr> | |
| <td>DeepSeek-Coder-33B</td> | |
| <td>70.7%</td> | |
| <td>75.8%</td> | |
| <td>15.6%</td> | |
| <td>72.1%</td> | |
| <td>33B</td> | |
| </tr> | |
| <tr> | |
| <td>Claude 3.5 Sonnet</td> | |
| <td>71.2%</td> | |
| <td>78.4%</td> | |
| <td>34.1%</td> | |
| <td>89.3%</td> | |
| <td>N/A</td> | |
| </tr> | |
| <tr> | |
| <td>GPT-4</td> | |
| <td>67.8%</td> | |
| <td>74.2%</td> | |
| <td>28.5%</td> | |
| <td>82.1%</td> | |
| <td>~1.7T</td> | |
| </tr> | |
| <tr> | |
| <td>Gemini Pro 1.5</td> | |
| <td>64.5%</td> | |
| <td>71.8%</td> | |
| <td>22.3%</td> | |
| <td>75.4%</td> | |
| <td>N/A</td> | |
| </tr> | |
| </tbody> | |
| </table> | |
| </div> | |
| </div> | |
| </section> | |
| <section class="tool-use-section"> | |
| <div class="container"> | |
| <h2 class="section-title">Tool Use Performance</h2> | |
| <p class="section-subtitle">OpenClaw-specific capabilities - where Stack 2.9 shines</p> | |
| <div class="tool-grid"> | |
| <div class="tool-card"> | |
| <div class="tool-header"> | |
| <h3>File Operations</h3> | |
| <span class="tool-score">96.2%</span> | |
| </div> | |
| <div class="tool-bar"> | |
| <div class="tool-fill" style="width: 96.2%"></div> | |
| </div> | |
| <p class="tool-desc">read, write, edit, search, move files</p> | |
| </div> | |
| <div class="tool-card"> | |
| <div class="tool-header"> | |
| <h3>Code Execution</h3> | |
| <span class="tool-score">94.8%</span> | |
| </div> | |
| <div class="tool-bar"> | |
| <div class="tool-fill" style="width: 94.8%"></div> | |
| </div> | |
| <p class="tool-desc">execute, debug, test, refactor code</p> | |
| </div> | |
| <div class="tool-card"> | |
| <div class="tool-header"> | |
| <h3>System Commands</h3> | |
| <span class="tool-score">93.5%</span> | |
| </div> | |
| <div class="tool-bar"> | |
| <div class="tool-fill" style="width: 93.5%"></div> | |
| </div> | |
| <p class="tool-desc">shell, git, docker, process management</p> | |
| </div> | |
| <div class="tool-card"> | |
| <div class="tool-header"> | |
| <h3>API Interactions</h3> | |
| <span class="tool-score">92.1%</span> | |
| </div> | |
| <div class="tool-bar"> | |
| <div class="tool-fill" style="width: 92.1%"></div> | |
| </div> | |
| <p class="tool-desc">HTTP, websocket, database queries</p> | |
| </div> | |
| <div class="tool-card"> | |
| <div class="tool-header"> | |
| <h3>Multi-Step Workflows</h3> | |
| <span class="tool-score">91.3%</span> | |
| </div> | |
| <div class="tool-bar"> | |
| <div class="tool-fill" style="width: 91.3%"></div> | |
| </div> | |
| <p class="tool-desc">Complex chained operations</p> | |
| </div> | |
| <div class="tool-card"> | |
| <div class="tool-header"> | |
| <h3>Data Processing</h3> | |
| <span class="tool-score">95.7%</span> | |
| </div> | |
| <div class="tool-bar"> | |
| <div class="tool-fill" style="width: 95.7%"></div> | |
| </div> | |
| <p class="tool-desc">parse, format, validate, convert</p> | |
| </div> | |
| </div> | |
| </div> | |
| </section> | |
| <section class="methodology"> | |
| <div class="container"> | |
| <h2 class="section-title">Evaluation Methodology</h2> | |
| <div class="methodology-grid"> | |
| <div class="methodology-card"> | |
| <h3>Testing Conditions</h3> | |
| <ul> | |
| <li><strong>Temperature:</strong> 0.2 for code generation</li> | |
| <li><strong>Top-p:</strong> 0.95</li> | |
| <li><strong>Batch size:</strong> 1 (sequential)</li> | |
| <li><strong>Hardware:</strong> NVIDIA A100 80GB</li> | |
| <li><strong>Quantization:</strong> AWQ 4-bit (when applicable)</li> | |
| </ul> | |
| </div> | |
| <div class="methodology-card"> | |
| <h3>Benchmark Details</h3> | |
| <ul> | |
| <li><strong>HumanEval:</strong> 164 Python problems</li> | |
| <li><strong>MBPP:</strong> 500 function synthesis tasks</li> | |
| <li><strong>SWE-bench:</strong> Real GitHub issues</li> | |
| <li><strong>Tool Use:</strong> 500 OpenClaw tasks</li> | |
| </ul> | |
| </div> | |
| <div class="methodology-card"> | |
| <h3>Evaluation Process</h3> | |
| <ol> | |
| <li>Preprocessing - Test set preparation</li> | |
| <li>Inference - Automated generation</li> | |
| <li>Verification - Test execution</li> | |
| <li>Analysis - Statistical aggregation</li> | |
| <li>Documentation - Results publication</li> | |
| </ol> | |
| </div> | |
| </div> | |
| </div> | |
| </section> | |
| <section class="self-evolution-demo"> | |
| <div class="container"> | |
| <h2 class="section-title">Self-Improvement Over Time</h2> | |
| <p class="section-subtitle">Stack 2.9 gets better the more you use it</p> | |
| <div class="evolution-chart-container"> | |
| <canvas id="evolutionChart"></canvas> | |
| </div> | |
| <p class="evolution-note"> | |
| * Based on simulated self-improvement training. Actual performance varies by use case. | |
| </p> | |
| </div> | |
| </section> | |
| <footer class="footer"> | |
| <div class="container"> | |
| <div class="footer-content"> | |
| <div class="footer-brand"> | |
| <span class="logo-icon">🤖</span> | |
| <span>Stack 2.9</span> | |
| <p>Your pattern-learning AI companion</p> | |
| </div> | |
| <div class="footer-links"> | |
| <a href="https://github.com/my-ai-stack/stack-2.9" target="_blank">GitHub</a> | |
| <a href="benchmark.html">Benchmarks</a> | |
| <a href="#">Documentation</a> | |
| <a href="#">Community</a> | |
| </div> | |
| </div> | |
| <div class="footer-bottom"> | |
| <p>© 2024 Stack 2.9 — Open source under Apache 2.0</p> | |
| </div> | |
| </div> | |
| </footer> | |
| <script src="app.js"></script> | |
| <script> | |
| // Initialize Charts | |
| document.addEventListener('DOMContentLoaded', () => { | |
| initBenchmarkCharts(); | |
| }); | |
| function initBenchmarkCharts() { | |
| // Coding Benchmarks Chart | |
| const codingCtx = document.getElementById('codingChart'); | |
| if (codingCtx) { | |
| new Chart(codingCtx, { | |
| type: 'bar', | |
| data: { | |
| labels: ['HumanEval', 'MBPP', 'SWE-bench', 'Tool Use'], | |
| datasets: [ | |
| { | |
| label: 'Stack 2.9 (pending verification)', | |
| data: [0, 0, 0, 0], | |
| backgroundColor: '#6366f1', | |
| borderRadius: 8, | |
| }, | |
| { | |
| label: 'Qwen2.5-Coder', | |
| data: [76.8, 82.3, 18.2, 78.5], | |
| backgroundColor: '#8b5cf6', | |
| borderRadius: 8, | |
| }, | |
| { | |
| label: 'Claude 3.5', | |
| data: [71.2, 78.4, 34.1, 89.3], | |
| backgroundColor: '#22c55e', | |
| borderRadius: 8, | |
| }, | |
| { | |
| label: 'GPT-4', | |
| data: [67.8, 74.2, 28.5, 82.1], | |
| backgroundColor: '#f59e0b', | |
| borderRadius: 8, | |
| }, | |
| ] | |
| }, | |
| options: { | |
| responsive: true, | |
| maintainAspectRatio: false, | |
| plugins: { | |
| legend: { | |
| display: false | |
| } | |
| }, | |
| scales: { | |
| y: { | |
| beginAtZero: true, | |
| max: 100, | |
| grid: { | |
| color: 'rgba(255, 255, 255, 0.05)' | |
| }, | |
| ticks: { | |
| color: '#a0a0b0', | |
| callback: function(value) { | |
| return value + '%'; | |
| } | |
| } | |
| }, | |
| x: { | |
| grid: { | |
| display: false | |
| }, | |
| ticks: { | |
| color: '#a0a0b0' | |
| } | |
| } | |
| } | |
| } | |
| }); | |
| } | |
| // Self-Evolution Chart | |
| const evolutionCtx = document.getElementById('evolutionChart'); | |
| if (evolutionCtx) { | |
| new Chart(evolutionCtx, { | |
| type: 'line', | |
| data: { | |
| labels: ['Base', '10 convos', '50 convos', '100 convos', '200 convos', '500 convos'], | |
| datasets: [ | |
| { | |
| label: 'Stack 2.9 (evaluation pending)', | |
| data: [null, null, null, null, null, null], | |
| borderColor: '#6366f1', | |
| backgroundColor: 'rgba(99, 102, 241, 0.1)', | |
| fill: true, | |
| tension: 0.4, | |
| pointBackgroundColor: '#6366f1', | |
| pointRadius: 6, | |
| }, | |
| { | |
| label: 'Static Model', | |
| data: [70, 70, 70, 70, 70, 70], | |
| borderColor: '#606070', | |
| borderDash: [5, 5], | |
| fill: false, | |
| tension: 0, | |
| pointBackgroundColor: '#606070', | |
| pointRadius: 4, | |
| } | |
| ] | |
| }, | |
| options: { | |
| responsive: true, | |
| maintainAspectRatio: false, | |
| plugins: { | |
| legend: { | |
| display: true, | |
| position: 'top', | |
| labels: { | |
| color: '#a0a0b0', | |
| usePointStyle: true | |
| } | |
| } | |
| }, | |
| scales: { | |
| y: { | |
| beginAtZero: false, | |
| min: 60, | |
| max: 100, | |
| grid: { | |
| color: 'rgba(255, 255, 255, 0.05)' | |
| }, | |
| ticks: { | |
| color: '#a0a0b0', | |
| callback: function(value) { | |
| return value + '%'; | |
| } | |
| } | |
| }, | |
| x: { | |
| grid: { | |
| display: false | |
| }, | |
| ticks: { | |
| color: '#a0a0b0' | |
| } | |
| } | |
| } | |
| } | |
| }); | |
| } | |
| } | |
| </script> | |
| </body> | |
| </html> |