Instructions to use my-ai-stack/Stack-2-9-finetuned with libraries, inference providers, notebooks, and local apps. Follow these links to get started.

Libraries

How to use my-ai-stack/Stack-2-9-finetuned with Transformers:

# Use a pipeline as a high-level helper
from transformers import pipeline

pipe = pipeline("text-generation", model="my-ai-stack/Stack-2-9-finetuned")
messages = [
    {"role": "user", "content": "Who are you?"},
]
pipe(messages)

# Load model directly
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("my-ai-stack/Stack-2-9-finetuned")
model = AutoModelForCausalLM.from_pretrained("my-ai-stack/Stack-2-9-finetuned")
messages = [
    {"role": "user", "content": "Who are you?"},
]
inputs = tokenizer.apply_chat_template(
	messages,
	add_generation_prompt=True,
	tokenize=True,
	return_dict=True,
	return_tensors="pt",
).to(model.device)

outputs = model.generate(**inputs, max_new_tokens=40)
print(tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:]))

Notebooks
Google Colab
Kaggle
Local Apps

vLLM

How to use my-ai-stack/Stack-2-9-finetuned with vLLM:

Install from pip and serve model

# Install vLLM from pip:
pip install vllm
# Start the vLLM server:
vllm serve "my-ai-stack/Stack-2-9-finetuned"
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:8000/v1/chat/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "my-ai-stack/Stack-2-9-finetuned",
		"messages": [
			{
				"role": "user",
				"content": "What is the capital of France?"
			}
		]
	}'

Use Docker

docker model run hf.co/my-ai-stack/Stack-2-9-finetuned

SGLang

How to use my-ai-stack/Stack-2-9-finetuned with SGLang:

Install from pip and serve model

# Install SGLang from pip:
pip install sglang
# Start the SGLang server:
python3 -m sglang.launch_server \
    --model-path "my-ai-stack/Stack-2-9-finetuned" \
    --host 0.0.0.0 \
    --port 30000
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:30000/v1/chat/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "my-ai-stack/Stack-2-9-finetuned",
		"messages": [
			{
				"role": "user",
				"content": "What is the capital of France?"
			}
		]
	}'

Use Docker images

docker run --gpus all \
    --shm-size 32g \
    -p 30000:30000 \
    -v ~/.cache/huggingface:/root/.cache/huggingface \
    --env "HF_TOKEN=<secret>" \
    --ipc=host \
    lmsysorg/sglang:latest \
    python3 -m sglang.launch_server \
        --model-path "my-ai-stack/Stack-2-9-finetuned" \
        --host 0.0.0.0 \
        --port 30000
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:30000/v1/chat/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "my-ai-stack/Stack-2-9-finetuned",
		"messages": [
			{
				"role": "user",
				"content": "What is the capital of France?"
			}
		]
	}'

Docker Model Runner
How to use my-ai-stack/Stack-2-9-finetuned with Docker Model Runner:
```
docker model run hf.co/my-ai-stack/Stack-2-9-finetuned
```

Stack-2-9-finetuned

File size: 11,452 Bytes

{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# \ud83d\ude80 Stack 2.9 - Kaggle Training\n",
    "\n",
    "Free GPU training on Kaggle using Qwen2.5-Coder-7B.\n",
    "\n",
    "\u23f1\ufe0f **Runtime:** 2-4 hours  |  \ud83d\udcbe **VRAM:** ~14GB (bfloat16, no bitsandbytes)\n",
    "\n",
    "**Setup:**\n",
    "1. Settings \u2192 Accelerator \u2192 GPU **T4**\n",
    "2. Run all cells in order\n",
    "3. Download merged model from Output tab when done"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Check GPU\n",
    "!nvidia-smi"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Clone repository\n",
    "import os, shutil, subprocess\n",
    "\n",
    "os.chdir('/kaggle/working')\n",
    "REPO_DIR = '/kaggle/working/stack-2.9'\n",
    "OUTPUT_DIR = os.path.join(REPO_DIR, 'training_output')\n",
    "\n",
    "if os.path.exists(REPO_DIR):\n",
    "    shutil.rmtree(REPO_DIR)\n",
    "subprocess.run(['git', 'clone', 'https://github.com/my-ai-stack/stack-2.9.git', REPO_DIR], check=True)\n",
    "os.chdir(REPO_DIR)\n",
    "print('\u2705 Repo ready:', REPO_DIR)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Save to Kaggle output (download before session ends!)\n",
    "# Kaggle sessions expire after 9 hours - download outputs immediately!\n",
    "\n",
    "# Create a symbolic link to make paths easier\n",
    "OUTPUT_DIR = os.path.join(REPO_DIR, 'training_output')\n",
    "os.makedirs(OUTPUT_DIR, exist_ok=True)\n",
    "\n",
    "print(f\"\u2705 Output directory: {OUTPUT_DIR}\")\n",
    "print(\"\u26a0\ufe0f IMPORTANT: Download outputs from 'Output' tab before session expires!\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Install PyTorch (force CUDA 11.8 build for sm_60 Pascal GPU compatibility)\n",
    "# Kaggle sometimes assigns P100 (sm_60) which requires CUDA 11.x builds of PyTorch\n",
    "!pip uninstall -y torch torchvision torchaudio\n",
    "!pip install torch==2.2.0+cu118 torchvision==0.17.0+cu118 torchaudio==2.2.0+cu118 --index-url https://download.pytorch.org/whl/cu118\n",
    "print('\u2705 PyTorch ready')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Install other dependencies (NO bitsandbytes \u2014 bfloat16 only)\n!pip install -q transformers==4.40.0 peft==0.10.0 accelerate==0.34.0 datasets==3.0.0 pyyaml tqdm scipy numpy\nprint('\u2705 Dependencies ready')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Fix NumPy 2.0 compatibility (downgrade to <2.0)\n",
    "!pip install -q \"numpy<2\" --force-reinstall\n",
    "print('\u2705 NumPy downgraded to <2.0')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Prepare training data (auto-detect or synthetic fallback)\n",
    "import os, json\n",
    "\n",
    "REPO_TRAIN_DATA = os.path.join(REPO_DIR, 'training-data/final/train.jsonl')\n",
    "MINI_DATA_DIR = os.path.join(REPO_DIR, 'data_mini')\n",
    "MINI_DATA_FILE = os.path.join(MINI_DATA_DIR, 'train_mini.jsonl')\n",
    "SYNTHETIC_FILE = os.path.join(REPO_DIR, 'data/synthetic.jsonl')\n",
    "\n",
    "print('\ud83d\udd0d Data check')\n",
    "\n",
    "if os.path.exists(REPO_TRAIN_DATA):\n",
    "    os.makedirs(MINI_DATA_DIR, exist_ok=True)\n",
    "    if not os.path.exists(MINI_DATA_FILE):\n",
    "        print('   Building mini dataset (1K samples) from full data...')\n",
    "        !python scripts/create_mini_dataset.py --size 1000 --output {MINI_DATA_FILE} --source {REPO_TRAIN_DATA}\n",
    "    DATA_FILE = MINI_DATA_FILE\n",
    "    print('   Using mini dataset')\n",
    "elif os.path.exists(MINI_DATA_FILE):\n",
    "    DATA_FILE = MINI_DATA_FILE\n",
    "    print('   Using existing mini dataset')\n",
    "else:\n",
    "    print('   Creating synthetic data (last resort)')\n",
    "    examples = [\n",
    "        {'instruction': 'Write a Python function to reverse a string', 'output': 'def reverse_string(s):\\n    return s[::-1]'},\n",
    "        {'instruction': 'Write a function to check if a number is prime', 'output': 'def is_prime(n):\\n    if n <= 1:\\n        return False\\n    for i in range(2, int(n**0.5) + 1):\\n        if n % i == 0:\\n            return False\\n        return True'},\n",
    "        {'instruction': 'Write a binary search function', 'output': 'def binary_search(arr, target):\\n    left, right = 0, len(arr) - 1\\n    while left <= right:\\n        mid = (left + right) // 2\\n        if arr[mid] == target:\\n            return mid\\n        elif arr[mid] < target:\\n            left = mid + 1\\n        else:\\n            right = mid - 1\\n        return -1'},\n",
    "    ]\n",
    "    samples = examples * 333\n",
    "    os.makedirs(os.path.dirname(SYNTHETIC_FILE), exist_ok=True)\n",
    "    with open(SYNTHETIC_FILE, 'w') as f:\n",
    "        for s in samples:\n",
    "            f.write(json.dumps(s) + '\\n')\n",
    "    DATA_FILE = SYNTHETIC_FILE\n",
    "    print(f'   Synthetic dataset: {len(samples)} examples')\n",
    "\n",
    "print(f'\\n\u2705 Data: {DATA_FILE}')\n",
    "!ls -lh {DATA_FILE}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Generate training configuration\n",
    "# Uses bfloat16 only (NO bitsandbytes \u2014 avoids CUDA 13 dependency issues)\n",
    "import yaml\n",
    "\n",
    "os.makedirs(OUTPUT_DIR, exist_ok=True)\n",
    "\n",
    "config = {\n",
    "    'model': {'name': 'Qwen/Qwen2.5-Coder-1.5B', 'trust_remote_code': True},\n",
    "    'data': {'input_path': DATA_FILE, 'max_length': 2048, 'train_split': 0.999},\n",
    "    'lora': {'r': 8, 'lora_alpha': 16, 'dropout': 0.05, 'target_modules': ['q_proj', 'k_proj', 'v_proj', 'o_proj', 'gate_proj', 'up_proj', 'down_proj'], 'bias': 'none', 'task_type': 'CAUSAL_LM'},\n",
    "    'training': {'num_epochs': 1, 'batch_size': 1, 'gradient_accumulation': 4, 'learning_rate': 2e-4, 'warmup_steps': 50, 'weight_decay': 0.01, 'max_grad_norm': 1.0, 'logging_steps': 10, 'save_steps': 100, 'save_total_limit': 2, 'fp16': True, 'bf16': False, 'gradient_checkpointing': True},\n",
    "    'output': {'lora_dir': os.path.join(OUTPUT_DIR, 'lora'), 'logging_dir': os.path.join(OUTPUT_DIR, 'logs')},\n",
    "    'quantization': {'enabled': False},\n",
    "    'hardware': {'device': 'cuda', 'num_gpus': 1, 'use_4bit': False, 'use_8bit': False}\n",
    "}\n",
    "\n",
    "config_path = os.path.join(OUTPUT_DIR, 'train_config.yaml')\n",
    "with open(config_path, 'w') as f:\n",
    "    yaml.dump(config, f, default_flow_style=False)\n",
    "\n",
    "print(f'\u2705 Config: {config_path}')\n",
    "print(f\"   Model: {config['model']['name']}\")\n",
    "print(f\"   Data: {config['data']['input_path']}\")\n",
    "print(f\"   bf16={config['training']['bf16']}, fp16={config['training']['fp16']}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Train (using standalone train_simple_nobnb.py - bfloat16, no quantization)\n",
    "print('='*60)\n",
    "print('STARTING TRAINING (bfloat16, no quantization)')\n",
    "print('='*60)\n",
    "\n",
    "!cd {REPO_DIR} && python train_simple_nobnb.py --config {config_path}\n",
    "\n",
    "print('\\n\u2705 Training step finished')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Merge LoRA adapter into final model\n",
    "lora_dir = os.path.join(OUTPUT_DIR, 'lora')\n",
    "merged_dir = os.path.join(OUTPUT_DIR, 'merged')\n",
    "\n",
    "print('='*60)\n",
    "print('MERGING LORA ADAPTER')\n",
    "print('='*60)\n",
    "\n",
    "!cd {REPO_DIR} && python merge_simple.py \\\n",
    "    --base-model {config['model']['name']} \\\n",
    "    --adapter-path {lora_dir} \\\n",
    "    --output-path {merged_dir} \\\n",
    "    --use-safetensors\n",
    "\n",
    "print('\\n\u2705 Merge complete!')\n",
    "print(f'Merged model: {merged_dir}')\n",
    "!ls -lh {merged_dir}\n",
    "\n",
    "print(\"\\n\u26a0\ufe0f DOWNLOAD THE MODEL NOW: Go to Output tab and download 'merged' folder!\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Push merged model to GitHub LFS (optional - for permanent storage)\n",
    "# This saves the model to your GitHub repo so you can download anytime\n",
    "\n",
    "# Configure Git LFS\n",
    "!git lfs install 2>/dev/null || echo 'Git LFS already installed'\n",
    "\n",
    "# Clone the repo if not already there\n",
    "import subprocess\n",
    "repo_url = 'https://github.com/my-ai-stack/stack-2.9.git'\n",
    "local_repo = '/kaggle/working/stack-2.9-repo'\n",
    "\n",
    "if not os.path.exists(local_repo):\n",
    "    subprocess.run(['git', 'clone', repo_url, local_repo], check=True)\n",
    "\n",
    "# Copy merged model to repo\n",
    "import shutil\n",
    "target_dir = os.path.join(local_repo, 'models/stack-2.9-finetuned')\n",
    "os.makedirs(target_dir, exist_ok=True)\n",
    "\n",
    "if os.path.exists(merged_dir):\n",
    "    # Copy files\n",
    "    for f in os.listdir(merged_dir):\n",
    "        src = os.path.join(merged_dir, f)\n",
    "        dst = os.path.join(target_dir, f)\n",
    "        if os.path.isdir(src):\n",
    "            shutil.copytree(src, dst, dirs_exist_ok=True)\n",
    "        else:\n",
    "            shutil.copy2(src, dst)\n",
    "    \n",
    "    print(f'\u2705 Copied model to {target_dir}')\n",
    "    \n",
    "    # Push to GitHub\n",
    "    os.chdir(local_repo)\n",
    "    subprocess.run(['git', 'add', 'models/stack-2.9-finetuned/'], check=True)\n",
    "    subprocess.run(['git', 'config', 'user.email', 'kaggle@kaggle.com'], check=True)\n",
    "    subprocess.run(['git', 'config', 'user.name', 'Kaggle Auto-Push'], check=True)\n",
    "    subprocess.run(['git', 'commit', '-m', 'feat: add fine-tuned model from Kaggle'], check=True)\n",
    "    \n",
    "    # Push (you may need a GitHub token for private repos)\n",
    "    result = subprocess.run(['git', 'push', 'origin', 'main'], capture_output=True, text=True)\n",
    "    if result.returncode == 0:\n",
    "        print('\u2705 Model pushed to GitHub!')\n",
    "    else:\n",
    "        print(f'\u26a0\ufe0f Push failed: {result.stderr}')\n",
    "        print('   You can still download from Kaggle Output tab.')\n",
    "else:\n",
    "    print('\u26a0\ufe0f Merged model not found. Train first!')\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## \ud83d\udce5 Download Model\n",
    "\n",
    "1. Open **Output** tab on the right\n",
    "2. Find `training_output/merged/`\n",
    "3. Select all files and **Download**\n",
    "\n",
    "\u26a0\ufe0f **Do this before Kaggle session ends!**"
   ]
  }
 ],
 "metadata": {
  "kaggle": {
   "accelerator": "gpu"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}