Text Generation
Transformers
English
qwen2
code-generation
python
fine-tuning
Qwen
tools
agent-framework
multi-agent
conversational
Eval Results (legacy)
Instructions to use my-ai-stack/Stack-2-9-finetuned with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use my-ai-stack/Stack-2-9-finetuned with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="my-ai-stack/Stack-2-9-finetuned") messages = [ {"role": "user", "content": "Who are you?"}, ] pipe(messages)# Load model directly from transformers import AutoTokenizer, AutoModelForCausalLM tokenizer = AutoTokenizer.from_pretrained("my-ai-stack/Stack-2-9-finetuned") model = AutoModelForCausalLM.from_pretrained("my-ai-stack/Stack-2-9-finetuned") messages = [ {"role": "user", "content": "Who are you?"}, ] inputs = tokenizer.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Notebooks
- Google Colab
- Kaggle
- Local Apps
- vLLM
How to use my-ai-stack/Stack-2-9-finetuned with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "my-ai-stack/Stack-2-9-finetuned" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "my-ai-stack/Stack-2-9-finetuned", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker
docker model run hf.co/my-ai-stack/Stack-2-9-finetuned
- SGLang
How to use my-ai-stack/Stack-2-9-finetuned with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "my-ai-stack/Stack-2-9-finetuned" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "my-ai-stack/Stack-2-9-finetuned", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "my-ai-stack/Stack-2-9-finetuned" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "my-ai-stack/Stack-2-9-finetuned", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }' - Docker Model Runner
How to use my-ai-stack/Stack-2-9-finetuned with Docker Model Runner:
docker model run hf.co/my-ai-stack/Stack-2-9-finetuned
| #!/usr/bin/env python3 | |
| """ | |
| Merge Multiple LoRA Adapters | |
| Combines multiple LoRA adapters using weighted averaging based on success rates. | |
| The merged adapter can be used to combine patterns learned by different users | |
| or from different sources. | |
| Usage: | |
| python merge_lora_adapters.py \ | |
| --adapters adapter1.safetensors adapter2.safetensors \ | |
| --weights 0.6 0.4 \ | |
| --output merged.safetensors | |
| # Or with success rates (auto-computes weights proportional to success) | |
| python merge_lora_adapters.py \ | |
| --adapters adapter1.safetensors adapter2.safetensors \ | |
| --success-rates 0.85 0.65 \ | |
| --output merged.safetensors | |
| """ | |
| import argparse | |
| import json | |
| import os | |
| import sys | |
| from pathlib import Path | |
| from typing import Optional | |
| # Try to import required libraries | |
| try: | |
| import torch | |
| import torch.nn as nn | |
| from safetensors.torch import load_file, save_file | |
| HAS_LIBS = True | |
| except ImportError: | |
| HAS_LIBS = False | |
| def load_adapter(path: str) -> dict: | |
| """Load a LoRA adapter from a safetensors file.""" | |
| if not os.path.exists(path): | |
| raise FileNotFoundError(f"Adapter not found: {path}") | |
| return load_file(path) | |
| def compute_weights_from_success_rates(success_rates: list[float]) -> list[float]: | |
| """Compute normalized weights proportional to success rates.""" | |
| total = sum(success_rates) | |
| if total == 0: | |
| # Equal weights if all success rates are 0 | |
| return [1.0 / len(success_rates)] * len(success_rates) | |
| return [rate / total for rate in success_rates] | |
| def merge_adapters_weighted( | |
| adapters: list[dict], | |
| weights: list[float], | |
| output_path: str | |
| ) -> dict: | |
| """ | |
| Merge multiple LoRA adapters using weighted averaging. | |
| Algorithm: merged_weight = Σ(adapter_i.weight * adapter_i.success_rate) / Σ(success_rate) | |
| For simplicity, we use the provided weights directly. | |
| """ | |
| if len(adapters) != len(weights): | |
| raise ValueError("Number of adapters must match number of weights") | |
| # Normalize weights | |
| total_weight = sum(weights) | |
| if total_weight == 0: | |
| raise ValueError("Sum of weights cannot be zero") | |
| normalized_weights = [w / total_weight for w in weights] | |
| print(f"Merging {len(adapters)} adapters with weights: {normalized_weights}") | |
| # Get all keys from the first adapter | |
| sample_adapter = adapters[0] | |
| all_keys = set(sample_adapter.keys()) | |
| # Verify all adapters have the same keys | |
| for i, adapter in enumerate(adapters[1:], 1): | |
| adapter_keys = set(adapter.keys()) | |
| if adapter_keys != all_keys: | |
| print(f"Warning: Adapter {i} has different keys. Taking union.", file=sys.stderr) | |
| all_keys = all_keys.union(adapter_keys) | |
| # Merge each tensor | |
| merged = {} | |
| for key in all_keys: | |
| # Collect tensors from all adapters | |
| tensors = [] | |
| valid_weights = [] | |
| for i, (adapter, weight) in enumerate(zip(adapters, normalized_weights)): | |
| if key in adapter: | |
| tensors.append(adapter[key]) | |
| valid_weights.append(weight) | |
| if not tensors: | |
| continue | |
| # Normalize weights for available tensors | |
| total_valid = sum(valid_weights) | |
| if total_valid == 0: | |
| continue | |
| norm_weights = [w / total_valid for w in valid_weights] | |
| # Weighted average | |
| merged[key] = sum(t * w for t, w in zip(tensors, norm_weights)) | |
| # Save merged adapter | |
| save_file(merged, output_path) | |
| print(f"Merged adapter saved to: {output_path}") | |
| return merged | |
| def compute_adapter_stats(adapter: dict) -> dict: | |
| """Compute statistics about an adapter for debugging.""" | |
| stats = { | |
| "num_tensors": len(adapter), | |
| "total_params": 0, | |
| "dtype_counts": {}, | |
| "shape_counts": {} | |
| } | |
| for key, tensor in adapter.items(): | |
| num_params = tensor.numel() | |
| stats["total_params"] += num_params | |
| dtype = str(tensor.dtype) | |
| stats["dtype_counts"][dtype] = stats["dtype_counts"].get(dtype, 0) + 1 | |
| shape = tuple(tensor.shape) | |
| shape_key = str(shape) | |
| stats["shape_counts"][shape_key] = stats["shape_counts"].get(shape_key, 0) + 1 | |
| return stats | |
| def main(): | |
| parser = argparse.ArgumentParser( | |
| description="Merge multiple LoRA adapters using weighted averaging" | |
| ) | |
| parser.add_argument( | |
| "--adapters", | |
| type=str, | |
| nargs="+", | |
| required=True, | |
| help="Paths to LoRA adapter safetensors files" | |
| ) | |
| parser.add_argument( | |
| "--weights", | |
| type=float, | |
| nargs="+", | |
| default=None, | |
| help="Manual weights for each adapter (must sum to 1 or will be normalized)" | |
| ) | |
| parser.add_argument( | |
| "--success-rates", | |
| type=float, | |
| nargs="+", | |
| default=None, | |
| help="Success rates for each adapter (weights computed proportionally)" | |
| ) | |
| parser.add_argument( | |
| "--output", | |
| type=str, | |
| required=True, | |
| help="Output path for merged adapter" | |
| ) | |
| parser.add_argument( | |
| "--stats", | |
| action="store_true", | |
| help="Print adapter statistics" | |
| ) | |
| args = parser.parse_args() | |
| if not HAS_LIBS: | |
| print("Error: Required libraries not found.", file=sys.stderr) | |
| print("Install with: pip install torch safetensors", file=sys.stderr) | |
| sys.exit(1) | |
| # Validate inputs | |
| if args.weights and args.success_rates: | |
| print("Error: Cannot specify both --weights and --success-rates", file=sys.stderr) | |
| sys.exit(1) | |
| if args.weights: | |
| if len(args.adapters) != len(args.weights): | |
| print("Error: Number of --adapters must match number of --weights", file=sys.stderr) | |
| sys.exit(1) | |
| weights = args.weights | |
| elif args.success_rates: | |
| if len(args.adapters) != len(args.success_rates): | |
| print("Error: Number of --adapters must match number of --success-rates", file=sys.stderr) | |
| sys.exit(1) | |
| weights = compute_weights_from_success_rates(args.success_rates) | |
| print(f"Computed weights from success rates: {weights}") | |
| else: | |
| # Equal weights | |
| weights = [1.0 / len(args.adapters)] * len(args.adapters) | |
| # Load adapters | |
| print(f"Loading {len(args.adapters)} adapters...") | |
| adapters = [] | |
| for i, path in enumerate(args.adapters): | |
| print(f" Loading {i+1}: {path}") | |
| adapter = load_adapter(path) | |
| adapters.append(adapter) | |
| if args.stats: | |
| stats = compute_adapter_stats(adapter) | |
| print(f" Stats: {stats['num_tensors']} tensors, {stats['total_params']:,} params") | |
| # Merge | |
| merge_adapters_weighted(adapters, weights, args.output) | |
| # Print merge info | |
| print(f"\nMerge complete!") | |
| print(f" Output: {args.output}") | |
| print(f" Adapters merged: {len(args.adapters)}") | |
| # Save merge metadata | |
| metadata_path = args.output + ".meta.json" | |
| metadata = { | |
| "adapters": args.adapters, | |
| "weights": weights, | |
| "num_adapters": len(args.adapters) | |
| } | |
| with open(metadata_path, 'w') as f: | |
| json.dump(metadata, f, indent=2) | |
| print(f" Metadata: {metadata_path}") | |
| if __name__ == "__main__": | |
| main() | |