Text Generation
Transformers
English
qwen2
code-generation
python
fine-tuning
Qwen
tools
agent-framework
multi-agent
conversational
Eval Results (legacy)
Instructions to use my-ai-stack/Stack-2-9-finetuned with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use my-ai-stack/Stack-2-9-finetuned with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="my-ai-stack/Stack-2-9-finetuned") messages = [ {"role": "user", "content": "Who are you?"}, ] pipe(messages)# Load model directly from transformers import AutoTokenizer, AutoModelForCausalLM tokenizer = AutoTokenizer.from_pretrained("my-ai-stack/Stack-2-9-finetuned") model = AutoModelForCausalLM.from_pretrained("my-ai-stack/Stack-2-9-finetuned") messages = [ {"role": "user", "content": "Who are you?"}, ] inputs = tokenizer.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Notebooks
- Google Colab
- Kaggle
- Local Apps
- vLLM
How to use my-ai-stack/Stack-2-9-finetuned with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "my-ai-stack/Stack-2-9-finetuned" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "my-ai-stack/Stack-2-9-finetuned", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker
docker model run hf.co/my-ai-stack/Stack-2-9-finetuned
- SGLang
How to use my-ai-stack/Stack-2-9-finetuned with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "my-ai-stack/Stack-2-9-finetuned" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "my-ai-stack/Stack-2-9-finetuned", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "my-ai-stack/Stack-2-9-finetuned" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "my-ai-stack/Stack-2-9-finetuned", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }' - Docker Model Runner
How to use my-ai-stack/Stack-2-9-finetuned with Docker Model Runner:
docker model run hf.co/my-ai-stack/Stack-2-9-finetuned
| #!/usr/bin/env python3 | |
| """ | |
| model_info.py — Extract and report Stack 2.9 model metadata. | |
| Reads from models/registry.json and optionally from a model checkpoint | |
| directory to extract/verify metadata. | |
| Usage: | |
| python scripts/model_info.py # Show all models | |
| python scripts/model_info.py --model stack-2.9-1.5B | |
| python scripts/model_info.py --model stack-2.9-7B-QLoRA --verbose | |
| python scripts/model_info.py --export-json /path/to/output.json | |
| """ | |
| import argparse | |
| import json | |
| import os | |
| import sys | |
| from pathlib import Path | |
| from typing import Optional | |
| REGISTRY_PATH = Path(__file__).parent.parent / "models" / "registry.json" | |
| def load_registry(registry_path: Path = REGISTRY_PATH) -> dict: | |
| """Load the model registry JSON.""" | |
| if not registry_path.exists(): | |
| print(f"ERROR: Registry not found at {registry_path}", file=sys.stderr) | |
| sys.exit(1) | |
| with open(registry_path) as f: | |
| return json.load(f) | |
| def format_params(n: int) -> str: | |
| """Format parameter count as human-readable string.""" | |
| if n >= 1_000_000_000: | |
| return f"{n / 1_000_000_000:.1f}B" | |
| elif n >= 1_000_000: | |
| return f"{n / 1_000_000:.0f}M" | |
| return str(n) | |
| def format_lora(config: Optional[dict]) -> str: | |
| """Format LoRA config as readable string.""" | |
| if not config: | |
| return "N/A (full model)" | |
| lines = [ | |
| f" Rank (r): {config.get('rank', 'N/A')}", | |
| f" Alpha: {config.get('alpha', 'N/A')}", | |
| f" Dropout: {config.get('dropout', 'N/A')}", | |
| f" Target Modules: {', '.join(config.get('target_modules', []))}", | |
| ] | |
| if config.get("modules_to_save"): | |
| lines.append(f" Modules to Save: {', '.join(config['modules_to_save'])}") | |
| return "\n".join(lines) | |
| def format_performance(metrics: dict) -> str: | |
| """Format performance metrics.""" | |
| benchmarks = { | |
| "HellaSwag": metrics.get("hellaswag"), | |
| "ARC-Challenge": metrics.get("arc_challenge"), | |
| "MMLU": metrics.get("mmlu"), | |
| "HumanEval": metrics.get("humaneval"), | |
| "Training Loss": metrics.get("loss"), | |
| } | |
| lines = [] | |
| for name, value in benchmarks.items(): | |
| if value is not None: | |
| lines.append(f" {name:20s} {value}") | |
| else: | |
| lines.append(f" {name:20s} N/A") | |
| return "\n".join(lines) if lines else " No benchmarks yet" | |
| def status_emoji(status: str) -> str: | |
| """Return emoji for model status.""" | |
| return { | |
| "in_training": "🟡 IN TRAINING", | |
| "planned": "🔴 PLANNED", | |
| "released": "🟢 RELEASED", | |
| "deprecated": "⚠️ DEPRECATED", | |
| }.get(status, f"({status})") | |
| def print_model(model: dict, verbose: bool = False): | |
| """Print a single model's info.""" | |
| print(f"\n{'='*60}") | |
| print(f" {model['version']} [{status_emoji(model['status'])}]") | |
| print(f"{'='*60}") | |
| print(f"\n Base Model: {model['base_model']}") | |
| print(f" Parameters: {format_params(model['parameters'])} ({model['parameters']:,})") | |
| print(f" Quantization: {model.get('quantization') or 'None (full precision)'}") | |
| print(f" Precision: {model.get('precision', 'N/A')}") | |
| print(f" Context Length: {model.get('context_length', 'N/A'):,} tokens") | |
| print(f" Vocab Size: {model.get('vocabulary_size', 'N/A'):,}") | |
| print(f" Dataset: {model['dataset']}") | |
| print(f" Created: {model.get('created_at') or 'TBD'}") | |
| print(f"\n LoRA Config:") | |
| print(f" {format_lora(model.get('lora'))}") | |
| print(f"\n Performance Metrics:") | |
| print(f" {format_performance(model.get('performance', {}))}") | |
| print(f"\n Use Case: {model['use_case']}") | |
| if model.get("notes"): | |
| print(f" Notes: {model['notes']}") | |
| def main(): | |
| parser = argparse.ArgumentParser( | |
| description="Extract and report Stack 2.9 model metadata." | |
| ) | |
| parser.add_argument( | |
| "--model", "-m", | |
| help="Specific model version to show (e.g., stack-2.9-1.5B). " | |
| "If omitted, shows all models." | |
| ) | |
| parser.add_argument( | |
| "--verbose", "-v", | |
| action="store_true", | |
| help="Show verbose output (same as default)." | |
| ) | |
| parser.add_argument( | |
| "--export-json", "-o", | |
| metavar="PATH", | |
| help="Export selected model(s) as JSON to a file." | |
| ) | |
| parser.add_argument( | |
| "--registry", | |
| default=REGISTRY_PATH, | |
| metavar="PATH", | |
| help=f"Path to registry.json (default: {REGISTRY_PATH})." | |
| ) | |
| args = parser.parse_args() | |
| registry_path = Path(args.registry) | |
| registry = load_registry(registry_path) | |
| models = registry.get("models", []) | |
| if args.model: | |
| selected = [m for m in models if m["version"] == args.model] | |
| if not selected: | |
| print(f"ERROR: Model '{args.model}' not found in registry.", file=sys.stderr) | |
| print("Available models:", ", ".join(m["version"] for m in models)) | |
| sys.exit(1) | |
| else: | |
| selected = models | |
| for model in selected: | |
| print_model(model, verbose=args.verbose) | |
| # Export to JSON if requested | |
| if args.export_json: | |
| output = {"registry_version": registry.get("registry_version"), "models": selected} | |
| with open(args.export_json, "w") as f: | |
| json.dump(output, f, indent=2) | |
| print(f"\n✓ Exported to {args.export_json}") | |
| print() | |
| if __name__ == "__main__": | |
| main() | |