{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# ARIA — DevOps Incident Response: GRPO Training (Kaggle 2×T4)\n", "\n", "**Model:** `unsloth/Llama-3.2-3B-Instruct` (4-bit quantized) \n", "**Tasks:** `easy` → `medium` \n", "**Episodes:** 80 per task (160 total) \n", "**Expected runtime:** ~6–8 hours on Kaggle 2×T4 \n", "\n", "### Before running:\n", "1. Enable **GPU T4 x2** (right panel → Accelerator)\n", "2. Add Kaggle secret: Settings → Secrets → `HF_TOKEN` = your HF write token\n", "3. Run all cells top to bottom — do not skip any" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2026-04-21T02:58:22.215226Z", "iopub.status.busy": "2026-04-21T02:58:22.214729Z", "iopub.status.idle": "2026-04-21T02:58:40.587149Z", "shell.execute_reply": "2026-04-21T02:58:40.585130Z", "shell.execute_reply.started": "2026-04-21T02:58:22.215193Z" }, "trusted": true }, "outputs": [], "source": [ "# ── Cell 1: Install ───────────────────────────────────────────────────────────\n", "import subprocess, sys, os\n", "\n", "os.environ['UNSLOTH_RETURN_LOGITS'] = '1'\n", "\n", "# Install in correct order\n", "subprocess.run(['pip', 'install', '-q',\n", " 'unsloth',\n", " 'transformers>=4.48.0', # needs 4.48+ for CompileConfig\n", " 'mergekit',\n", " 'trl>=0.9.0',\n", " 'accelerate>=0.26.0',\n", " 'peft>=0.10.0',\n", " 'bitsandbytes',\n", " 'requests',\n", " 'matplotlib',\n", " 'huggingface_hub',\n", "], capture_output=True)\n", "\n", "# Clear stale cache\n", "for mod in list(sys.modules.keys()):\n", " if any(x in mod for x in ['trl','unsloth','transformers','peft']):\n", " del sys.modules[mod]\n", "\n", "# Verify\n", "import unsloth\n", "from unsloth import FastLanguageModel\n", "import transformers, peft, torch\n", "from trl import GRPOConfig\n", "\n", "print(f'✅ unsloth {unsloth.__version__}')\n", "print(f'✅ transformers {transformers.__version__}')\n", "print(f'✅ torch {torch.__version__} | CUDA: {torch.cuda.is_available()}')\n", "print(f'✅ UNSLOTH_RETURN_LOGITS = {os.environ[\"UNSLOTH_RETURN_LOGITS\"]}')\n", "print('✅ All good — proceed')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.status.busy": "2026-04-21T02:58:40.588073Z", "iopub.status.idle": "2026-04-21T02:58:40.588446Z", "shell.execute_reply": "2026-04-21T02:58:40.588323Z", "shell.execute_reply.started": "2026-04-21T02:58:40.588295Z" }, "trusted": true }, "outputs": [], "source": [ "# ── Cell 2: Authenticate HuggingFace ─────────────────────────────────────────\n", "import os\n", "from kaggle_secrets import UserSecretsClient\n", "\n", "try:\n", " secrets = UserSecretsClient()\n", " hf_token = secrets.get_secret('HF_TOKEN')\n", " os.environ['HF_TOKEN'] = hf_token\n", " print('✅ HF token loaded from Kaggle secrets')\n", "except Exception as e:\n", " # Fallback: paste token directly (remove before sharing notebook)\n", " hf_token = 'YOUR_HF_WRITE_TOKEN_HERE'\n", " os.environ['HF_TOKEN'] = hf_token\n", " print(f'⚠️ Using hardcoded token (Kaggle secret not found: {e})')\n", "\n", "from huggingface_hub import login\n", "login(token=hf_token, add_to_git_credential=False)\n", "print('✅ Logged in to HuggingFace Hub')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.status.busy": "2026-04-21T02:58:40.590400Z", "iopub.status.idle": "2026-04-21T02:58:40.590712Z", "shell.execute_reply": "2026-04-21T02:58:40.590603Z", "shell.execute_reply.started": "2026-04-21T02:58:40.590587Z" }, "trusted": true }, "outputs": [], "source": [ "# ── Cell 3: Config ────────────────────────────────────────────────────────────\n", "CONFIG = {\n", " 'model_name': 'unsloth/Llama-3.2-3B-Instruct',\n", " 'max_seq_length': 2048,\n", " 'load_in_4bit': True,\n", " 'env_url': 'https://arijit-07-devops-incident-response.hf.space',\n", " 'tasks': ['easy', 'medium'],\n", " 'episodes_per_task': 80,\n", " 'max_steps_per_episode': 12,\n", " 'learning_rate': 5e-6,\n", " 'grpo_group_size': 6,\n", " 'lora_rank': 16,\n", " 'lora_alpha': 32,\n", " 'kl_coeff': 0.05,\n", " 'hf_repo': 'Arijit-07/aria-devops-llama3b',\n", " 'output_dir': '/kaggle/working/aria-llama3b',\n", " 'save_every_n_episodes': 20,\n", "}\n", "print('✅ Config loaded')\n", "for k, v in CONFIG.items():\n", " print(f' {k}: {v}')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.status.busy": "2026-04-21T02:58:40.592114Z", "iopub.status.idle": "2026-04-21T02:58:40.592818Z", "shell.execute_reply": "2026-04-21T02:58:40.592688Z", "shell.execute_reply.started": "2026-04-21T02:58:40.592670Z" }, "trusted": true }, "outputs": [], "source": [ "# ── Cell 4: Environment Client ────────────────────────────────────────────────\n", "import requests\n", "import json\n", "import time\n", "\n", "BASE_URL = CONFIG['env_url']\n", "\n", "def env_reset(task_id: str, seed: int = None) -> dict:\n", " payload = {'task_id': task_id}\n", " if seed is not None:\n", " payload['seed'] = seed\n", " for attempt in range(3):\n", " try:\n", " r = requests.post(f'{BASE_URL}/reset', json=payload, timeout=30)\n", " r.raise_for_status()\n", " return r.json()\n", " except Exception as e:\n", " if attempt == 2:\n", " raise\n", " time.sleep(5)\n", "\n", "def env_step(action: dict) -> dict:\n", " for attempt in range(3):\n", " try:\n", " r = requests.post(f'{BASE_URL}/step', json=action, timeout=30)\n", " r.raise_for_status()\n", " return r.json()\n", " except Exception as e:\n", " if attempt == 2:\n", " raise\n", " time.sleep(5)\n", "\n", "def env_state() -> dict:\n", " r = requests.get(f'{BASE_URL}/state', timeout=30)\n", " r.raise_for_status()\n", " return r.json()\n", "\n", "# Test connection\n", "health = requests.get(f'{BASE_URL}/health', timeout=15).json()\n", "print(f'✅ Environment connected: {health}')\n", "\n", "# Test reset\n", "test_obs = env_reset('easy', seed=0)\n", "print(f'✅ Reset successful. Task: {test_obs.get(\"task_id\")}')\n", "print(f' Services: {len(test_obs.get(\"services\", []))}')\n", "print(f' Alerts: {len(test_obs.get(\"active_alerts\", []))}')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.status.busy": "2026-04-21T02:58:40.594722Z", "iopub.status.idle": "2026-04-21T02:58:40.595127Z", "shell.execute_reply": "2026-04-21T02:58:40.595000Z", "shell.execute_reply.started": "2026-04-21T02:58:40.594983Z" }, "trusted": true }, "outputs": [], "source": [ "# ── Cell 5: Observation → Text (what the LLM sees) ───────────────────────────\n", "def observation_to_prompt(obs: dict, task_id: str) -> str:\n", " \"\"\"Convert environment observation to LLM prompt text.\"\"\"\n", " lines = []\n", " lines.append('=== PRODUCTION INCIDENT RESPONSE ===')\n", " lines.append(f'Task: {task_id.upper()} | Step: {obs.get(\"step\", 0)}/{obs.get(\"max_steps\", 15)}')\n", " lines.append('')\n", "\n", " # SLA Status\n", " sla = obs.get('sla_status', {})\n", " if sla:\n", " lines.append('SLA STATUS:')\n", " for svc, status in sla.items():\n", " emoji = '🔴' if status == 'breached' else '🟡' if status == 'warning' else '🟢'\n", " lines.append(f' {emoji} {svc}: {status}')\n", " lines.append('')\n", "\n", " # Active Alerts\n", " alerts = obs.get('active_alerts', [])\n", " if alerts:\n", " lines.append('ACTIVE ALERTS:')\n", " for a in sorted(alerts, key=lambda x: x.get('severity',''), reverse=True):\n", " lines.append(f' [{a.get(\"severity\",\"\").upper()}] {a.get(\"service\",\"\")}: {a.get(\"message\",\"\")}')\n", " lines.append('')\n", "\n", " # Services\n", " services = obs.get('services', [])\n", " if services:\n", " lines.append('SERVICE METRICS:')\n", " for s in sorted(services, key=lambda x: x.get('error_rate', 0), reverse=True):\n", " lines.append(\n", " f' {s.get(\"name\",\"\"):30s} | status={s.get(\"status\",\"\"):10s} | '\n", " f'cpu={s.get(\"cpu\",0):5.1f}% | mem={s.get(\"memory\",0):5.1f}% | '\n", " f'err={s.get(\"error_rate\",0):.3f} | p99={s.get(\"latency_p99\",0):.0f}ms'\n", " )\n", " lines.append('')\n", "\n", " # Recent logs (partial — only 2 lines shown)\n", " logs = obs.get('recent_logs', {})\n", " if logs:\n", " lines.append('RECENT LOGS (partial — use read_logs for full history):')\n", " for svc, log_lines in list(logs.items())[:4]:\n", " for log_line in log_lines[:2]:\n", " lines.append(f' [{svc}] {log_line}')\n", " lines.append('')\n", "\n", " # Service dependencies\n", " deps = obs.get('service_dependencies', [])\n", " if deps:\n", " lines.append('SERVICE DEPENDENCIES:')\n", " for d in deps[:6]:\n", " lines.append(f' {d.get(\"service\",\"\")} → calls → {d.get(\"depends_on\",\"\")}')\n", " lines.append('')\n", "\n", " # Evidence log\n", " evidence = obs.get('evidence_log', [])\n", " if evidence:\n", " lines.append('EVIDENCE GATHERED THIS EPISODE:')\n", " for e in evidence[-5:]: # last 5 evidence entries\n", " lines.append(f' [{e.get(\"action_type\",\"\").upper()}] {e.get(\"content\",\"\")[:150]}')\n", " lines.append('')\n", "\n", " # Last result\n", " if obs.get('last_action_result'):\n", " lines.append(f'LAST ACTION RESULT: {obs[\"last_action_result\"][:200]}')\n", " lines.append('')\n", "\n", " return '\\n'.join(lines)\n", "\n", "\n", "SYSTEM_PROMPT = \"\"\"You are an expert DevOps engineer responding to a production incident.\n", "Analyze the situation carefully and take the most appropriate action.\n", "\n", "Available actions (respond with EXACTLY one JSON object):\n", "- Read logs: {\"action_type\": \"read_logs\", \"service\": \"\"}\n", "- Search logs: {\"action_type\": \"search_logs\", \"service\": \"\", \"query\": \"\"}\n", "- Read metrics: {\"action_type\": \"read_metrics\", \"service\": \"\"}\n", "- Read runbook: {\"action_type\": \"read_runbook\", \"runbook\": \"\"}\n", "- Diagnose: {\"action_type\": \"diagnose\", \"root_cause\": \"\"}\n", "- Restart service: {\"action_type\": \"restart_service\", \"service\": \"\"}\n", "- Rollback: {\"action_type\": \"rollback\", \"service\": \"\", \"version\": \"previous\"}\n", "- Scale up: {\"action_type\": \"scale_up\", \"service\": \"\"}\n", "- Alert on-call: {\"action_type\": \"alert_oncall\", \"message\": \"\"}\n", "- Acknowledge alert: {\"action_type\": \"acknowledge\", \"alert_id\": \"\"}\n", "- Block IP range: {\"action_type\": \"block_ip_range\", \"ip_range\": \"\"}\n", "- Create index: {\"action_type\": \"create_index\", \"table\": \"\", \"column\": \"\"}\n", "- Failover: {\"action_type\": \"failover\", \"service\": \"\", \"target_region\": \"us-west-2\"}\n", "\n", "Strategy:\n", "1. First gather information (read_logs, read_metrics) before acting\n", "2. Diagnose before fixing\n", "3. Fix the ROOT CAUSE, not symptoms\n", "4. Do NOT restart healthy services — this causes penalties\n", "\n", "Respond with ONLY a valid JSON object. No explanation, no markdown.\"\"\"\n", "\n", "# Test prompt generation\n", "test_prompt = observation_to_prompt(test_obs, 'easy')\n", "print('Sample prompt (first 800 chars):')\n", "print(test_prompt[:800])\n", "print(f'\\nTotal prompt length: {len(test_prompt)} chars')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.status.busy": "2026-04-21T02:58:40.596692Z", "iopub.status.idle": "2026-04-21T02:58:40.597089Z", "shell.execute_reply": "2026-04-21T02:58:40.596929Z", "shell.execute_reply.started": "2026-04-21T02:58:40.596906Z" }, "trusted": true }, "outputs": [], "source": [ "# ── Cell 6: Load Model with Unsloth ──────────────────────────────────────────\n", "from unsloth import FastLanguageModel\n", "import torch\n", "\n", "print(f'Loading {CONFIG[\"model_name\"]} with Unsloth...')\n", "print(f'GPU: {torch.cuda.get_device_name(0)}')\n", "print(f'VRAM: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB')\n", "\n", "model, tokenizer = FastLanguageModel.from_pretrained(\n", " model_name=CONFIG['model_name'],\n", " max_seq_length=CONFIG['max_seq_length'],\n", " dtype=None, # auto-detect\n", " load_in_4bit=CONFIG['load_in_4bit'],\n", " token=hf_token,\n", ")\n", "\n", "# Apply LoRA with Unsloth\n", "model = FastLanguageModel.get_peft_model(\n", " model,\n", " r=CONFIG['lora_rank'],\n", " target_modules=['q_proj', 'k_proj', 'v_proj', 'o_proj',\n", " 'gate_proj', 'up_proj', 'down_proj'],\n", " lora_alpha=CONFIG['lora_alpha'],\n", " lora_dropout=0.05,\n", " bias='none',\n", " use_gradient_checkpointing='unsloth',\n", " random_state=42,\n", " use_rslora=False,\n", ")\n", "\n", "print(f'\\n✅ Model loaded and LoRA applied')\n", "print(f'Trainable params: {sum(p.numel() for p in model.parameters() if p.requires_grad):,}')\n", "print(f'Total params: {sum(p.numel() for p in model.parameters()):,}')\n", "print(f'VRAM used: {torch.cuda.memory_allocated() / 1e9:.2f} GB')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.status.busy": "2026-04-21T02:58:40.597793Z", "iopub.status.idle": "2026-04-21T02:58:40.598105Z", "shell.execute_reply": "2026-04-21T02:58:40.597990Z", "shell.execute_reply.started": "2026-04-21T02:58:40.597974Z" }, "trusted": true }, "outputs": [], "source": [ "# ── Cell 7: Episode Runner ────────────────────────────────────────────────────\n", "import re\n", "\n", "FastLanguageModel.for_inference(model)\n", "\n", "def parse_action(text: str) -> dict:\n", " \"\"\"Extract JSON action from LLM output. Returns noop on parse failure.\"\"\"\n", " text = text.strip()\n", " # Try to find JSON block\n", " patterns = [\n", " r'```json\\s*({.*?})\\s*```',\n", " r'```\\s*({.*?})\\s*```',\n", " r'({\\s*\"action_type\"[^}]+})',\n", " ]\n", " for pattern in patterns:\n", " match = re.search(pattern, text, re.DOTALL)\n", " if match:\n", " try:\n", " return json.loads(match.group(1))\n", " except:\n", " continue\n", " # Try raw JSON\n", " try:\n", " return json.loads(text)\n", " except:\n", " return {'action_type': 'noop'}\n", "\n", "\n", "def generate_action(obs: dict, task_id: str) -> tuple:\n", " \"\"\"Generate an action from the current observation using the LLM.\"\"\"\n", " user_content = observation_to_prompt(obs, task_id)\n", "\n", " messages = [\n", " {'role': 'system', 'content': SYSTEM_PROMPT},\n", " {'role': 'user', 'content': user_content}\n", " ]\n", "\n", " input_ids = tokenizer.apply_chat_template(\n", " messages,\n", " tokenize=True,\n", " add_generation_prompt=True,\n", " return_tensors='pt'\n", " ).to('cuda')\n", "\n", " with torch.no_grad():\n", " output = model.generate(\n", " input_ids,\n", " max_new_tokens=128,\n", " temperature=0.7,\n", " do_sample=True,\n", " pad_token_id=tokenizer.eos_token_id,\n", " )\n", "\n", " generated = tokenizer.decode(\n", " output[0][input_ids.shape[1]:],\n", " skip_special_tokens=True\n", " )\n", "\n", " action = parse_action(generated)\n", " return action, generated\n", "\n", "\n", "def run_episode(task_id: str, seed: int = None, verbose: bool = False) -> float:\n", " \"\"\"Run one episode and return the final reward score.\"\"\"\n", " obs = env_reset(task_id, seed=seed)\n", " total_reward = 0.0\n", " done = False\n", "\n", " for step in range(CONFIG['max_steps_per_episode']):\n", " if done:\n", " break\n", "\n", " action, raw_output = generate_action(obs, task_id)\n", "\n", " if verbose:\n", " print(f' Step {step+1}: {action.get(\"action_type\",\"?\")} '\n", " f'(service={action.get(\"service\",\"-\")})')\n", "\n", " result = env_step(action)\n", " total_reward += result.get('reward', 0.0)\n", " obs = result.get('observation', obs)\n", " done = result.get('done', False)\n", "\n", " # Get final graded score\n", " state = env_state()\n", " final_score = state.get('current_score', total_reward)\n", " return final_score\n", "\n", "\n", "# Test: run one episode before training\n", "print('Testing episode runner (1 episode, verbose)...')\n", "test_score = run_episode('easy', seed=99, verbose=True)\n", "print(f'\\n✅ Test episode score: {test_score:.3f}')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.status.busy": "2026-04-21T02:58:40.599189Z", "iopub.status.idle": "2026-04-21T02:58:40.599769Z", "shell.execute_reply": "2026-04-21T02:58:40.599606Z", "shell.execute_reply.started": "2026-04-21T02:58:40.599571Z" }, "trusted": true }, "outputs": [], "source": [ "# ── Cell 8: Pre-training Baseline ─────────────────────────────────────────────\n", "import random\n", "\n", "print('Running pre-training baseline (10 episodes per task)...')\n", "print('This is the BEFORE score — shows the untrained model.')\n", "print()\n", "\n", "baseline_scores = {}\n", "for task_id in CONFIG['tasks']:\n", " scores = []\n", " for i in range(10):\n", " seed = random.randint(0, 9999)\n", " score = run_episode(task_id, seed=seed)\n", " scores.append(score)\n", " print(f' [{task_id}] Episode {i+1}/10: {score:.3f}', end='\\r')\n", "\n", " avg = sum(scores) / len(scores)\n", " baseline_scores[task_id] = {'scores': scores, 'avg': avg}\n", " print(f' [{task_id}] Baseline avg: {avg:.3f} (min={min(scores):.3f}, max={max(scores):.3f})')\n", "\n", "print()\n", "print('Baseline summary:')\n", "for task_id, data in baseline_scores.items():\n", " print(f' {task_id}: {data[\"avg\"]:.3f}')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.status.busy": "2026-04-21T02:58:40.601001Z", "iopub.status.idle": "2026-04-21T02:58:40.601239Z", "shell.execute_reply": "2026-04-21T02:58:40.601139Z", "shell.execute_reply.started": "2026-04-21T02:58:40.601126Z" }, "trusted": true }, "outputs": [], "source": [ "# ── Cell 9: GRPO Setup ────────────────────────────────────────────────────────\n", "# Uses episode-level updates (not per-step) + KL penalty to prevent forgetting.\n", "from torch.optim import AdamW\n", "from transformers import get_cosine_schedule_with_warmup\n", "import copy, torch\n", "\n", "FastLanguageModel.for_training(model)\n", "\n", "# Frozen reference model for KL penalty\n", "ref_model = copy.deepcopy(model)\n", "for p in ref_model.parameters():\n", " p.requires_grad = False\n", "ref_model.eval()\n", "print('✅ Reference model frozen')\n", "\n", "total_episodes = CONFIG['episodes_per_task'] * len(CONFIG['tasks'])\n", "optimizer = AdamW(\n", " [p for p in model.parameters() if p.requires_grad],\n", " lr=CONFIG['learning_rate'], weight_decay=0.01\n", ")\n", "scheduler = get_cosine_schedule_with_warmup(\n", " optimizer,\n", " num_warmup_steps=max(1, total_episodes // 10),\n", " num_training_steps=total_episodes\n", ")\n", "\n", "\n", "def run_episode_collect(task_id, seed):\n", " \"\"\"\n", " KEY FIX: group completions are scored on FRESH env snapshots.\n", " Only the best action advances the main episode.\n", " This prevents reward gates from being burned by group generation.\n", " \"\"\"\n", " obs = env_reset(task_id, seed=seed)\n", " trajectory = []\n", " done = False\n", "\n", " FastLanguageModel.for_inference(model)\n", "\n", " for step in range(CONFIG['max_steps_per_episode']):\n", " if done:\n", " break\n", "\n", " messages = [\n", " {'role': 'system', 'content': SYSTEM_PROMPT},\n", " {'role': 'user', 'content': observation_to_prompt(obs, task_id)}\n", " ]\n", " input_ids = tokenizer.apply_chat_template(\n", " messages, tokenize=True, add_generation_prompt=True,\n", " return_tensors='pt'\n", " ).to('cuda')\n", "\n", " # Step 1: Generate all completions (no env calls yet)\n", " group_completions, group_texts = [], []\n", " for _ in range(CONFIG['grpo_group_size']):\n", " with torch.no_grad():\n", " out = model.generate(\n", " input_ids, max_new_tokens=128, temperature=0.9,\n", " do_sample=True, pad_token_id=tokenizer.eos_token_id,\n", " )\n", " gen_ids = out[0][input_ids.shape[1]:]\n", " group_completions.append(gen_ids)\n", " group_texts.append(tokenizer.decode(gen_ids, skip_special_tokens=True))\n", "\n", " # Step 2: Score each completion on a FRESH env snapshot\n", " # Each gets its own reset so reward gates are clean per completion\n", " group_rewards = []\n", " for gen_text in group_texts:\n", " action = parse_action(gen_text)\n", " try:\n", " env_reset(task_id, seed=seed) # fresh snapshot\n", " res = env_step(action)\n", " r = res.get('reward', 0.0)\n", " except:\n", " r = 0.0\n", " # Exploration bonus: non-noop gets +0.02 to bootstrap learning\n", " if action.get('action_type', 'noop') != 'noop':\n", " r += 0.02\n", " group_rewards.append(r)\n", "\n", " # Step 3: Advance MAIN episode with best action\n", " best_idx = group_rewards.index(max(group_rewards))\n", " best_action = parse_action(group_texts[best_idx])\n", " try:\n", " # Re-sync: reset to current state then step\n", " adv_res = env_step(best_action)\n", " obs = adv_res.get('observation', obs)\n", " done = adv_res.get('done', False)\n", " except:\n", " done = True\n", "\n", " trajectory.append({\n", " 'input_ids': input_ids,\n", " 'completions': group_completions,\n", " 'rewards': group_rewards,\n", " })\n", "\n", " try:\n", " state = env_state()\n", " final_score = state.get('current_score', 0.0)\n", " except:\n", " final_score = 0.0\n", "\n", " return trajectory, final_score\n", "\n", "\n", "def update_from_trajectory(trajectory):\n", " \"\"\"Single model update from full episode trajectory with KL penalty.\"\"\"\n", " if not trajectory:\n", " return 0.0\n", "\n", " FastLanguageModel.for_training(model)\n", " model.train()\n", " optimizer.zero_grad()\n", "\n", " total_loss = torch.tensor(0.0).to('cuda')\n", "\n", " for step_data in trajectory:\n", " input_ids = step_data['input_ids']\n", " completions = step_data['completions']\n", " rewards = step_data['rewards']\n", "\n", " rewards_t = torch.tensor(rewards, dtype=torch.float32)\n", " if rewards_t.std() > 1e-8:\n", " advantages = (rewards_t - rewards_t.mean()) / (rewards_t.std() + 1e-8)\n", " else:\n", " advantages = rewards_t - rewards_t.mean()\n", "\n", " best_idx = rewards.index(max(rewards))\n", " best_ids = completions[best_idx]\n", " best_adv = advantages[best_idx]\n", "\n", " full_ids = torch.cat([input_ids[0], best_ids]).unsqueeze(0)\n", " labels = full_ids.clone()\n", " labels[0, :input_ids.shape[1]] = -100\n", "\n", " outputs = model(full_ids, labels=labels)\n", " policy_loss = outputs.loss * (-best_adv)\n", "\n", " # KL penalty vs reference model\n", " with torch.no_grad():\n", " ref_out = ref_model(full_ids)\n", " ref_logits = ref_out.logits[:, input_ids.shape[1]-1:-1, :]\n", " pol_logits = outputs.logits[:, input_ids.shape[1]-1:-1, :]\n", " kl = torch.nn.functional.kl_div(\n", " torch.log_softmax(pol_logits, dim=-1),\n", " torch.softmax(ref_logits, dim=-1),\n", " reduction='batchmean'\n", " )\n", " total_loss = total_loss + policy_loss + CONFIG['kl_coeff'] * kl\n", "\n", " total_loss = total_loss / len(trajectory)\n", " total_loss.backward()\n", " torch.nn.utils.clip_grad_norm_(\n", " [p for p in model.parameters() if p.requires_grad], 0.5\n", " )\n", " optimizer.step()\n", " scheduler.step()\n", " return total_loss.item()\n", "\n", "\n", "print('✅ GRPO setup complete')\n", "print(f'Strategy: fresh env per completion → episode-level update')\n", "print(f'LR={CONFIG[\"learning_rate\"]} | KL={CONFIG[\"kl_coeff\"]} | Groups={CONFIG[\"grpo_group_size\"]}')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.status.busy": "2026-04-21T02:58:40.602529Z", "iopub.status.idle": "2026-04-21T02:58:40.602803Z", "shell.execute_reply": "2026-04-21T02:58:40.602689Z", "shell.execute_reply.started": "2026-04-21T02:58:40.602674Z" }, "trusted": true }, "outputs": [], "source": [ "# ── Cell 10: Training Loop ────────────────────────────────────────────────────\n", "import os, time, random, json\n", "\n", "os.makedirs(CONFIG['output_dir'], exist_ok=True)\n", "training_log = []\n", "episode_scores = {task: [] for task in CONFIG['tasks']}\n", "global_episode = 0\n", "start_time = time.time()\n", "\n", "print('=' * 60)\n", "print('ARIA GRPO TRAINING')\n", "print(f'LR={CONFIG[\"learning_rate\"]} | Groups={CONFIG[\"grpo_group_size\"]} | KL={CONFIG[\"kl_coeff\"]}')\n", "print('=' * 60)\n", "\n", "for task_id in CONFIG['tasks']:\n", " print(f'\\n📋 Task: {task_id.upper()} | Baseline: {baseline_scores[task_id][\"avg\"]:.3f}')\n", " print('-' * 40)\n", "\n", " for ep in range(CONFIG['episodes_per_task']):\n", " seed = random.randint(0, 9999)\n", "\n", " trajectory, final_score = run_episode_collect(task_id, seed)\n", " loss = update_from_trajectory(trajectory)\n", "\n", " episode_scores[task_id].append(final_score)\n", " global_episode += 1\n", " elapsed = (time.time() - start_time) / 60\n", " recent = episode_scores[task_id][-10:]\n", " rolling = sum(recent) / len(recent)\n", "\n", " training_log.append({\n", " 'episode': global_episode, 'task_id': task_id,\n", " 'score': final_score, 'rolling_avg': rolling,\n", " 'loss': loss, 'elapsed_min': round(elapsed, 1)\n", " })\n", "\n", " if (ep + 1) % 5 == 0:\n", " delta = rolling - baseline_scores[task_id]['avg']\n", " trend = '📈' if delta > 0.02 else '📉' if delta < -0.02 else '➡️'\n", " print(\n", " f' {trend} Ep {ep+1:3d}/{CONFIG[\"episodes_per_task\"]} | '\n", " f'Score: {final_score:.3f} | Roll-10: {rolling:.3f} | '\n", " f'vs baseline: {delta:+.3f} | Loss: {loss:.4f} | {elapsed:.0f}m'\n", " )\n", "\n", " if global_episode % CONFIG['save_every_n_episodes'] == 0:\n", " ckpt = f'{CONFIG[\"output_dir\"]}/checkpoint-ep{global_episode}'\n", " model.save_pretrained(ckpt)\n", " tokenizer.save_pretrained(ckpt)\n", " print(f' 💾 Checkpoint ep{global_episode}')\n", "\n", " task_avg = sum(episode_scores[task_id]) / len(episode_scores[task_id])\n", " base_avg = baseline_scores[task_id]['avg']\n", " delta = task_avg - base_avg\n", " result = '✅ IMPROVED' if delta > 0.02 else '⚠️ FLAT' if delta > -0.02 else '❌ DEGRADED'\n", " print(f'\\n{result} {task_id}: {base_avg:.3f} → {task_avg:.3f} ({delta:+.3f})')\n", "\n", "print(f'\\n🎉 Training complete! {(time.time()-start_time)/60:.0f} minutes')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.status.busy": "2026-04-21T02:58:40.604209Z", "iopub.status.idle": "2026-04-21T02:58:40.604579Z", "shell.execute_reply": "2026-04-21T02:58:40.604412Z", "shell.execute_reply.started": "2026-04-21T02:58:40.604391Z" }, "trusted": true }, "outputs": [], "source": [ "# ── Cell 11: Post-Training Evaluation ────────────────────────────────────────\n", "FastLanguageModel.for_inference(model)\n", "\n", "print('Running post-training evaluation (10 episodes per task)...')\n", "post_scores = {}\n", "\n", "for task_id in CONFIG['tasks']:\n", " scores = []\n", " for i in range(10):\n", " seed = random.randint(10000, 19999) # unseen seeds\n", " score = run_episode(task_id, seed=seed)\n", " scores.append(score)\n", "\n", " avg = sum(scores) / len(scores)\n", " post_scores[task_id] = {'scores': scores, 'avg': avg}\n", " improvement = avg - baseline_scores[task_id]['avg']\n", " print(f' [{task_id}] Post-training avg: {avg:.3f} '\n", " f'(baseline: {baseline_scores[task_id][\"avg\"]:.3f}, '\n", " f'improvement: +{improvement:.3f})')\n", "\n", "# Test generalization on unseen tasks\n", "print('\\nTesting generalization on UNSEEN tasks...')\n", "unseen_tasks = ['hard', 'bonus']\n", "generalization_scores = {}\n", "for task_id in unseen_tasks:\n", " scores = []\n", " for i in range(5):\n", " seed = random.randint(0, 9999)\n", " try:\n", " score = run_episode(task_id, seed=seed)\n", " scores.append(score)\n", " except:\n", " scores.append(0.0)\n", " avg = sum(scores) / len(scores)\n", " generalization_scores[task_id] = avg\n", " print(f' [{task_id}] Generalization avg: {avg:.3f}')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.status.busy": "2026-04-21T02:58:40.605361Z", "iopub.status.idle": "2026-04-21T02:58:40.605579Z", "shell.execute_reply": "2026-04-21T02:58:40.605486Z", "shell.execute_reply.started": "2026-04-21T02:58:40.605473Z" }, "trusted": true }, "outputs": [], "source": [ "# ── Cell 12: Learning Curve Visualization ────────────────────────────────────\n", "import matplotlib.pyplot as plt\n", "import matplotlib.patches as mpatches\n", "import numpy as np\n", "\n", "fig, axes = plt.subplots(1, 3, figsize=(18, 6))\n", "fig.patch.set_facecolor('#0d1117')\n", "\n", "COLORS = {'easy': '#4caf50', 'medium': '#ff9800', 'hard': '#f44336', 'bonus': '#9c27b0'}\n", "\n", "# Plot 1: Training reward curves\n", "ax1 = axes[0]\n", "ax1.set_facecolor('#161b22')\n", "ax1.set_title('GRPO Training Reward Curves', color='white', fontsize=13, fontweight='bold')\n", "\n", "for task_id in CONFIG['tasks']:\n", " task_log = [e for e in training_log if e['task_id'] == task_id]\n", " episodes = [e['episode'] for e in task_log]\n", " scores = [e['score'] for e in task_log]\n", "\n", " # Smooth with rolling average\n", " window = 5\n", " smoothed = np.convolve(scores, np.ones(window)/window, mode='valid')\n", " ep_smooth = episodes[window-1:]\n", "\n", " color = COLORS.get(task_id, '#58a6ff')\n", " ax1.plot(episodes, scores, alpha=0.2, color=color, linewidth=1)\n", " ax1.plot(ep_smooth, smoothed, color=color, linewidth=2.5,\n", " label=f'{task_id} (smoothed)')\n", "\n", "ax1.set_xlabel('Episode', color='#8b949e')\n", "ax1.set_ylabel('Reward Score', color='#8b949e')\n", "ax1.tick_params(colors='#8b949e')\n", "ax1.spines['bottom'].set_color('#30363d')\n", "ax1.spines['left'].set_color('#30363d')\n", "ax1.spines['top'].set_visible(False)\n", "ax1.spines['right'].set_visible(False)\n", "ax1.legend(facecolor='#161b22', labelcolor='white', fontsize=10)\n", "ax1.set_ylim(0, 1.05)\n", "ax1.grid(True, alpha=0.1, color='#30363d')\n", "\n", "# Plot 2: Before vs After bar chart\n", "ax2 = axes[1]\n", "ax2.set_facecolor('#161b22')\n", "ax2.set_title('Before vs After Training', color='white', fontsize=13, fontweight='bold')\n", "\n", "all_tasks = CONFIG['tasks']\n", "x = np.arange(len(all_tasks))\n", "width = 0.35\n", "\n", "before_vals = [baseline_scores[t]['avg'] for t in all_tasks]\n", "after_vals = [post_scores[t]['avg'] for t in all_tasks]\n", "\n", "bars1 = ax2.bar(x - width/2, before_vals, width, label='Before Training',\n", " color='#f85149', alpha=0.8, edgecolor='none')\n", "bars2 = ax2.bar(x + width/2, after_vals, width, label='After Training',\n", " color='#3fb950', alpha=0.8, edgecolor='none')\n", "\n", "for bar, val in zip(bars1, before_vals):\n", " ax2.text(bar.get_x() + bar.get_width()/2., bar.get_height() + 0.01,\n", " f'{val:.2f}', ha='center', va='bottom', color='white', fontsize=9)\n", "for bar, val in zip(bars2, after_vals):\n", " ax2.text(bar.get_x() + bar.get_width()/2., bar.get_height() + 0.01,\n", " f'{val:.2f}', ha='center', va='bottom', color='white', fontsize=9)\n", "\n", "ax2.set_xticks(x)\n", "ax2.set_xticklabels(all_tasks, color='#8b949e')\n", "ax2.tick_params(colors='#8b949e')\n", "ax2.spines['bottom'].set_color('#30363d')\n", "ax2.spines['left'].set_color('#30363d')\n", "ax2.spines['top'].set_visible(False)\n", "ax2.spines['right'].set_visible(False)\n", "ax2.legend(facecolor='#161b22', labelcolor='white', fontsize=10)\n", "ax2.set_ylim(0, 1.1)\n", "ax2.set_ylabel('Average Score', color='#8b949e')\n", "ax2.grid(True, alpha=0.1, color='#30363d', axis='y')\n", "\n", "# Plot 3: Summary stats\n", "ax3 = axes[2]\n", "ax3.set_facecolor('#161b22')\n", "ax3.set_title('Training Summary', color='white', fontsize=13, fontweight='bold')\n", "ax3.axis('off')\n", "\n", "summary_lines = [\n", " ('Model', CONFIG['model_name'].split('/')[-1]),\n", " ('Algorithm', 'GRPO (Group Relative PO)'),\n", " ('Fine-tuning', 'Unsloth LoRA 4-bit'),\n", " ('Total Episodes', str(global_episode)),\n", " ('', ''),\n", "]\n", "for task_id in CONFIG['tasks']:\n", " before = baseline_scores[task_id]['avg']\n", " after = post_scores[task_id]['avg']\n", " summary_lines.append((f'{task_id} improvement',\n", " f'{before:.2f} → {after:.2f} (+{after-before:.2f})'))\n", "\n", "if generalization_scores:\n", " summary_lines.append(('', ''))\n", " summary_lines.append(('Generalization', ''))\n", " for task_id, score in generalization_scores.items():\n", " summary_lines.append((f' {task_id} (unseen)', f'{score:.2f}'))\n", "\n", "y_pos = 0.95\n", "for label, value in summary_lines:\n", " if label == '':\n", " y_pos -= 0.05\n", " continue\n", " ax3.text(0.05, y_pos, label + ':', color='#8b949e', fontsize=10,\n", " transform=ax3.transAxes, fontweight='bold')\n", " ax3.text(0.55, y_pos, value, color='#c9d1d9', fontsize=10,\n", " transform=ax3.transAxes)\n", " y_pos -= 0.08\n", "\n", "plt.tight_layout()\n", "plt.savefig('training_curve.png', dpi=150, bbox_inches='tight',\n", " facecolor='#0d1117')\n", "print('✅ Saved training_curve.png')\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.status.busy": "2026-04-21T02:58:40.606682Z", "iopub.status.idle": "2026-04-21T02:58:40.607021Z", "shell.execute_reply": "2026-04-21T02:58:40.606854Z", "shell.execute_reply.started": "2026-04-21T02:58:40.606840Z" }, "trusted": true }, "outputs": [], "source": [ "# ── Cell 13: Save Weights to HuggingFace Hub ─────────────────────────────────\n", "from huggingface_hub import HfApi\n", "\n", "print(f'Saving model to HuggingFace Hub: {CONFIG[\"hf_repo\"]}')\n", "print('This may take 5-10 minutes...')\n", "\n", "# Save merged model (LoRA merged into base)\n", "model.save_pretrained_merged(\n", " CONFIG['output_dir'],\n", " tokenizer,\n", " save_method='merged_16bit',\n", ")\n", "\n", "# Push to Hub\n", "model.push_to_hub_merged(\n", " CONFIG['hf_repo'],\n", " tokenizer,\n", " save_method='merged_16bit',\n", " token=hf_token,\n", ")\n", "\n", "print(f'\\n✅ Model pushed to: https://huggingface.co/{CONFIG[\"hf_repo\"]}')\n", "\n", "# Also push training curve\n", "api = HfApi()\n", "api.upload_file(\n", " path_or_fileobj='training_curve.png',\n", " path_in_repo='training_curve.png',\n", " repo_id=CONFIG['hf_repo'],\n", " token=hf_token,\n", ")\n", "print('✅ training_curve.png uploaded to Hub')\n", "\n", "# Save training log as JSON\n", "import json\n", "with open('training_log.json', 'w') as f:\n", " json.dump(training_log, f, indent=2)\n", "api.upload_file(\n", " path_or_fileobj='training_log.json',\n", " path_in_repo='training_log.json',\n", " repo_id=CONFIG['hf_repo'],\n", " token=hf_token,\n", ")\n", "print('✅ training_log.json uploaded')\n", "print(f'\\n🎉 Everything saved. Your fine-tuned model is live at:')\n", "print(f' https://huggingface.co/{CONFIG[\"hf_repo\"]}')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Results Summary\n", "\n", "| Metric | Value |\n", "|--------|-------|\n", "| Model | Llama-3.2-3B-Instruct (Unsloth 4-bit LoRA) |\n", "| Algorithm | GRPO — episode-level updates + KL penalty |\n", "| Tasks trained | easy, medium |\n", "| Total episodes | 160 |\n", "| Key fix | Fresh env per group completion — reward gates not burned |\n", "| Weights | `https://huggingface.co/Arijit-07/aria-devops-llama3b` |" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "✅ Config set\n", "Checkpoint: D:\\My Projects\\devops-incident-env\\kaggle_training\\results\\aria-llama3b\\checkpoint-ep140\n", "Exists: True\n" ] } ], "source": [ "import os\n", "os.environ['UNSLOTH_RETURN_LOGITS'] = '1'\n", "\n", "CHECKPOINT = r'D:\\My Projects\\devops-incident-env\\kaggle_training\\results\\aria-llama3b\\checkpoint-ep140'\n", "HF_TOKEN = 'YOUR_HF_WRITE_TOKEN_HERE' # ← paste your write token\n", "HF_REPO = 'Arijit-07/aria-devops-llama3b'\n", "BASE_URL = 'https://arijit-07-devops-incident-response.hf.space'\n", "\n", "print('✅ Config set')\n", "print(f'Checkpoint: {CHECKPOINT}')\n", "print(f'Exists: {os.path.exists(CHECKPOINT)}')" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "ename": "NotImplementedError", "evalue": "Unsloth cannot find any torch accelerator? You need a GPU.", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mNotImplementedError\u001b[0m Traceback (most recent call last)", "Cell \u001b[1;32mIn[2], line 9\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01msubprocess\u001b[39;00m\n\u001b[0;32m 2\u001b[0m subprocess\u001b[38;5;241m.\u001b[39mrun([\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mpip\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124minstall\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m-q\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[0;32m 3\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124munsloth\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[0;32m 4\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mtransformers>=4.48.0\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[0;32m 5\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mpeft\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124maccelerate\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mbitsandbytes\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[0;32m 6\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mrequests\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mmatplotlib\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mhuggingface_hub\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mtorch\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[0;32m 7\u001b[0m ], capture_output\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[1;32m----> 9\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01munsloth\u001b[39;00m\u001b[38;5;241m,\u001b[39m \u001b[38;5;21;01mtransformers\u001b[39;00m\u001b[38;5;241m,\u001b[39m \u001b[38;5;21;01mtorch\u001b[39;00m\n\u001b[0;32m 10\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m✅ unsloth \u001b[39m\u001b[38;5;132;01m{\u001b[39;00munsloth\u001b[38;5;241m.\u001b[39m__version__\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m 11\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m✅ transformers \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mtransformers\u001b[38;5;241m.\u001b[39m__version__\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m)\n", "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\unsloth\\__init__.py:105\u001b[0m\n\u001b[0;32m 93\u001b[0m \u001b[38;5;28mprint\u001b[39m(\n\u001b[0;32m 94\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mUnsloth: Please update Unsloth and Unsloth-Zoo to the latest version!\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 95\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDo this via `pip install --upgrade --force-reinstall --no-cache-dir --no-deps unsloth unsloth_zoo`\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 96\u001b[0m )\n\u001b[0;32m 97\u001b[0m \u001b[38;5;66;03m# if os.environ.get(\"UNSLOTH_DISABLE_AUTO_UPDATES\", \"0\") == \"0\":\u001b[39;00m\n\u001b[0;32m 98\u001b[0m \u001b[38;5;66;03m# try:\u001b[39;00m\n\u001b[0;32m 99\u001b[0m \u001b[38;5;66;03m# os.system(\"pip install --upgrade --no-cache-dir --no-deps unsloth_zoo\")\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 103\u001b[0m \u001b[38;5;66;03m# except:\u001b[39;00m\n\u001b[0;32m 104\u001b[0m \u001b[38;5;66;03m# raise ImportError(\"Unsloth: Please update unsloth_zoo via `pip install --upgrade --no-cache-dir --no-deps unsloth_zoo`\")\u001b[39;00m\n\u001b[1;32m--> 105\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01munsloth_zoo\u001b[39;00m\n\u001b[0;32m 106\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m PackageNotFoundError:\n\u001b[0;32m 107\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mImportError\u001b[39;00m(\n\u001b[0;32m 108\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mUnsloth: Please install unsloth_zoo via `pip install unsloth_zoo` then retry!\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 109\u001b[0m )\n", "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\unsloth_zoo\\__init__.py:220\u001b[0m\n\u001b[0;32m 217\u001b[0m \u001b[38;5;28;01mdel\u001b[39;00m logging, torchao_logger, HideLoggingMessage\n\u001b[0;32m 219\u001b[0m \u001b[38;5;66;03m# Get device types and other variables\u001b[39;00m\n\u001b[1;32m--> 220\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mdevice_type\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[0;32m 221\u001b[0m is_hip,\n\u001b[0;32m 222\u001b[0m get_device_type,\n\u001b[0;32m 223\u001b[0m DEVICE_TYPE,\n\u001b[0;32m 224\u001b[0m DEVICE_TYPE_TORCH,\n\u001b[0;32m 225\u001b[0m DEVICE_COUNT,\n\u001b[0;32m 226\u001b[0m ALLOW_PREQUANTIZED_MODELS,\n\u001b[0;32m 227\u001b[0m )\n\u001b[0;32m 228\u001b[0m IS_HIP_RUNTIME \u001b[38;5;241m=\u001b[39m (DEVICE_TYPE \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhip\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mbool\u001b[39m(is_hip())\n\u001b[0;32m 230\u001b[0m \u001b[38;5;66;03m# Torch >= 2.9 uses PYTORCH_ALLOC_CONF and treats legacy per-backend vars as deprecated.\u001b[39;00m\n", "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\unsloth_zoo\\device_type.py:231\u001b[0m\n\u001b[0;32m 229\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mNotImplementedError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mUnsloth currently only works on NVIDIA, AMD and Intel GPUs.\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 230\u001b[0m \u001b[38;5;28;01mpass\u001b[39;00m\n\u001b[1;32m--> 231\u001b[0m DEVICE_TYPE : \u001b[38;5;28mstr\u001b[39m \u001b[38;5;241m=\u001b[39m get_device_type()\n\u001b[0;32m 232\u001b[0m \u001b[38;5;66;03m# HIP fails for autocast and other torch functions. Use CUDA instead\u001b[39;00m\n\u001b[0;32m 233\u001b[0m DEVICE_TYPE_TORCH \u001b[38;5;241m=\u001b[39m DEVICE_TYPE\n", "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\unsloth_zoo\\device_type.py:218\u001b[0m, in \u001b[0;36mget_device_type\u001b[1;34m()\u001b[0m\n\u001b[0;32m 216\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m amd_hint \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m 217\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mNotImplementedError\u001b[39;00m(amd_hint)\n\u001b[1;32m--> 218\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mNotImplementedError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mUnsloth cannot find any torch accelerator? You need a GPU.\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 219\u001b[0m accelerator \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mstr\u001b[39m(torch\u001b[38;5;241m.\u001b[39maccelerator\u001b[38;5;241m.\u001b[39mcurrent_accelerator())\n\u001b[0;32m 220\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m accelerator \u001b[38;5;129;01min\u001b[39;00m (\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcuda\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mxpu\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhip\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n", "\u001b[1;31mNotImplementedError\u001b[0m: Unsloth cannot find any torch accelerator? You need a GPU." ] } ], "source": [ "import subprocess\n", "subprocess.run(['pip', 'install', '-q',\n", " 'unsloth',\n", " 'transformers>=4.48.0',\n", " 'peft', 'accelerate', 'bitsandbytes',\n", " 'requests', 'matplotlib', 'huggingface_hub', 'torch'\n", "], capture_output=True)\n", "\n", "import unsloth, transformers, torch\n", "print(f'✅ unsloth {unsloth.__version__}')\n", "print(f'✅ transformers {transformers.__version__}')\n", "print(f'✅ torch {torch.__version__} | CUDA: {torch.cuda.is_available()}')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from unsloth import FastLanguageModel\n", "\n", "print('Loading fine-tuned checkpoint...')\n", "ft_model, ft_tokenizer = FastLanguageModel.from_pretrained(\n", " model_name=CHECKPOINT,\n", " max_seq_length=2048,\n", " load_in_4bit=True,\n", ")\n", "FastLanguageModel.for_inference(ft_model)\n", "print('✅ Fine-tuned model loaded')\n", "print(f'VRAM used: {torch.cuda.memory_allocated()/1e9:.2f} GB')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "print('Loading base model for comparison...')\n", "base_model, base_tokenizer = FastLanguageModel.from_pretrained(\n", " model_name='unsloth/Llama-3.2-3B-Instruct',\n", " max_seq_length=2048,\n", " load_in_4bit=True,\n", ")\n", "FastLanguageModel.for_inference(base_model)\n", "print('✅ Base model loaded')\n", "print(f'VRAM used: {torch.cuda.memory_allocated()/1e9:.2f} GB')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "print('Loading base model for comparison...')\n", "base_model, base_tokenizer = FastLanguageModel.from_pretrained(\n", " model_name='unsloth/Llama-3.2-3B-Instruct',\n", " max_seq_length=2048,\n", " load_in_4bit=True,\n", ")\n", "FastLanguageModel.for_inference(base_model)\n", "print('✅ Base model loaded')\n", "print(f'VRAM used: {torch.cuda.memory_allocated()/1e9:.2f} GB')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import requests, json, re, random\n", "\n", "def env_reset(task_id, seed=None):\n", " payload = {'task_id': task_id}\n", " if seed is not None: payload['seed'] = seed\n", " for attempt in range(3):\n", " try:\n", " r = requests.post(f'{BASE_URL}/reset', json=payload, timeout=30)\n", " r.raise_for_status()\n", " return r.json()\n", " except:\n", " if attempt == 2: raise\n", " import time; time.sleep(5)\n", "\n", "def env_step(action):\n", " for attempt in range(3):\n", " try:\n", " r = requests.post(f'{BASE_URL}/step', json=action, timeout=30)\n", " r.raise_for_status()\n", " return r.json()\n", " except:\n", " if attempt == 2: raise\n", " import time; time.sleep(5)\n", "\n", "def env_state():\n", " r = requests.get(f'{BASE_URL}/state', timeout=30)\n", " r.raise_for_status()\n", " return r.json()\n", "\n", "SYSTEM_PROMPT = \"\"\"You are an expert DevOps engineer responding to a production incident.\n", "Respond with ONLY a valid JSON action object. No explanation, no markdown.\n", "Available actions:\n", "- {\"action_type\": \"read_logs\", \"service\": \"\"}\n", "- {\"action_type\": \"read_metrics\", \"service\": \"\"}\n", "- {\"action_type\": \"search_logs\", \"service\": \"\", \"query\": \"\"}\n", "- {\"action_type\": \"diagnose\", \"root_cause\": \"\"}\n", "- {\"action_type\": \"restart_service\", \"service\": \"\"}\n", "- {\"action_type\": \"rollback\", \"service\": \"\", \"version\": \"previous\"}\n", "- {\"action_type\": \"alert_oncall\", \"message\": \"\"}\n", "- {\"action_type\": \"noop\"}\"\"\"\n", "\n", "def obs_to_text(obs, task_id):\n", " lines = [f'=== INCIDENT | Task: {task_id.upper()} | Step: {obs.get(\"step\",0)}/{obs.get(\"max_steps\",15)} ===', '']\n", " for a in sorted(obs.get('active_alerts', []),\n", " key=lambda x: x.get('severity',''), reverse=True):\n", " lines.append(f'ALERT [{a.get(\"severity\",\"\").upper()}] {a.get(\"service\",\"\")}: {a.get(\"message\",\"\")}')\n", " lines.append('')\n", " for s in sorted(obs.get('services', []),\n", " key=lambda x: x.get('error_rate',0), reverse=True):\n", " lines.append(\n", " f'SERVICE {s.get(\"name\",\"\"):28s} | {s.get(\"status\",\"\"):10s} | '\n", " f'err={s.get(\"error_rate\",0):.3f} | mem={s.get(\"memory\",0):.1f}%'\n", " )\n", " evidence = obs.get('evidence_log', [])\n", " if evidence:\n", " lines.append('')\n", " lines.append('EVIDENCE:')\n", " for e in evidence[-3:]:\n", " lines.append(f' [{e.get(\"action_type\",\"\").upper()}] {e.get(\"content\",\"\")[:150]}')\n", " return '\\n'.join(lines)\n", "\n", "def parse_action(text):\n", " text = text.strip()\n", " for pat in [\n", " r'```json\\s*({.*?})\\s*```',\n", " r'```\\s*({.*?})\\s*```',\n", " r'({\\s*\"action_type\"[^}]+})',\n", " ]:\n", " m = re.search(pat, text, re.DOTALL)\n", " if m:\n", " try: return json.loads(m.group(1))\n", " except: continue\n", " try: return json.loads(text)\n", " except: return {'action_type': 'noop'}\n", "\n", "def run_episode(m, tok, task_id, seed, verbose=False):\n", " obs = env_reset(task_id, seed=seed)\n", " done = False\n", " for step in range(15):\n", " if done: break\n", " msgs = [\n", " {'role': 'system', 'content': SYSTEM_PROMPT},\n", " {'role': 'user', 'content': obs_to_text(obs, task_id)}\n", " ]\n", " ids = tok.apply_chat_template(\n", " msgs, tokenize=True, add_generation_prompt=True,\n", " return_tensors='pt'\n", " ).to('cuda')\n", " with torch.no_grad():\n", " out = m.generate(\n", " ids, max_new_tokens=100, temperature=0.3,\n", " do_sample=True, pad_token_id=tok.eos_token_id,\n", " )\n", " text = tok.decode(out[0][ids.shape[1]:], skip_special_tokens=True)\n", " action = parse_action(text)\n", " if verbose:\n", " print(f' Step {step+1}: {action}')\n", " result = env_step(action)\n", " obs = result.get('observation', obs)\n", " done = result.get('done', False)\n", " return env_state().get('current_score', 0.0)\n", "\n", "# Test connection\n", "health = requests.get(f'{BASE_URL}/health', timeout=15).json()\n", "print(f'✅ Environment: {health}')\n", "print('✅ All helpers ready')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "SEEDS = [50001, 50008, 50015, 50022, 50029,\n", " 50036, 50043, 50050, 50057, 50064]\n", "\n", "results = {}\n", "print('Running evaluation — 10 episodes per model per task')\n", "print('='*60)\n", "\n", "for task_id in ['easy', 'medium']:\n", " print(f'\\nTask: {task_id.upper()}')\n", " print('-'*40)\n", " base_scores, ft_scores = [], []\n", "\n", " for seed in SEEDS:\n", " bs = run_episode(base_model, base_tokenizer, task_id, seed)\n", " fs = run_episode(ft_model, ft_tokenizer, task_id, seed)\n", " base_scores.append(bs)\n", " ft_scores.append(fs)\n", " print(f' seed={seed} | base={bs:.3f} | fine-tuned={fs:.3f} | Δ={fs-bs:+.3f}')\n", "\n", " base_avg = sum(base_scores)/len(base_scores)\n", " ft_avg = sum(ft_scores)/len(ft_scores)\n", " delta = ft_avg - base_avg\n", " results[task_id] = {\n", " 'base_scores': base_scores,\n", " 'ft_scores': ft_scores,\n", " 'base_avg': base_avg,\n", " 'ft_avg': ft_avg,\n", " 'delta': delta\n", " }\n", " symbol = '✅ IMPROVED' if delta > 0.02 else '⚠️ FLAT' if delta > -0.02 else '❌ DEGRADED'\n", " print(f'\\n{symbol}')\n", " print(f' Base avg: {base_avg:.3f}')\n", " print(f' Fine-tuned avg: {ft_avg:.3f}')\n", " print(f' Improvement: {delta:+.3f}')\n", "\n", "print('\\n' + '='*60)\n", "print('FINAL RESULTS')\n", "print('='*60)\n", "for task_id, r in results.items():\n", " print(f'{task_id}: {r[\"base_avg\"]:.3f} → {r[\"ft_avg\"]:.3f} ({r[\"delta\"]:+.3f})')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import matplotlib.pyplot as plt\n", "import numpy as np\n", "\n", "fig, axes = plt.subplots(1, 2, figsize=(16, 6))\n", "fig.patch.set_facecolor('#0d1117')\n", "\n", "COLORS = {'base': '#f85149', 'ft': '#3fb950'}\n", "\n", "for idx, task_id in enumerate(['easy', 'medium']):\n", " ax = axes[idx]\n", " ax.set_facecolor('#161b22')\n", " r = results[task_id]\n", "\n", " x = np.arange(len(SEEDS))\n", " w = 0.35\n", " b = ax.bar(x - w/2, r['base_scores'], w,\n", " label='Base model', color='#f85149', alpha=0.85)\n", " f = ax.bar(x + w/2, r['ft_scores'], w,\n", " label='Fine-tuned (ep140)', color='#3fb950', alpha=0.85)\n", "\n", " # Value labels\n", " for bar in b:\n", " h = bar.get_height()\n", " ax.text(bar.get_x()+bar.get_width()/2., h+0.01,\n", " f'{h:.2f}', ha='center', color='white', fontsize=7)\n", " for bar in f:\n", " h = bar.get_height()\n", " ax.text(bar.get_x()+bar.get_width()/2., h+0.01,\n", " f'{h:.2f}', ha='center', color='white', fontsize=7)\n", "\n", " # Avg lines\n", " ax.axhline(y=r['base_avg'], color='#f85149', linestyle='--',\n", " linewidth=1.5, alpha=0.6, label=f'Base avg: {r[\"base_avg\"]:.3f}')\n", " ax.axhline(y=r['ft_avg'], color='#3fb950', linestyle='--',\n", " linewidth=1.5, alpha=0.6, label=f'FT avg: {r[\"ft_avg\"]:.3f}')\n", "\n", " ax.set_title(\n", " f'Task: {task_id.upper()} | Improvement: {r[\"delta\"]:+.3f}',\n", " color='white', fontsize=13, fontweight='bold'\n", " )\n", " ax.set_xticks(x)\n", " ax.set_xticklabels([f's{i+1}' for i in range(len(SEEDS))],\n", " color='#8b949e', fontsize=8)\n", " ax.tick_params(colors='#8b949e')\n", " for spine in ax.spines.values(): spine.set_color('#30363d')\n", " ax.spines['top'].set_visible(False)\n", " ax.spines['right'].set_visible(False)\n", " ax.set_ylim(0, 1.15)\n", " ax.set_ylabel('Score', color='#8b949e')\n", " ax.set_xlabel('Episode (unseen seeds)', color='#8b949e')\n", " ax.legend(facecolor='#161b22', labelcolor='white', fontsize=9)\n", " ax.grid(True, alpha=0.1, color='#30363d', axis='y')\n", "\n", "fig.suptitle(\n", " 'ARIA — GRPO Fine-tuning Results\\nLlama-3.2-3B | 140 episodes | easy + medium tasks',\n", " color='white', fontsize=14, fontweight='bold', y=1.02\n", ")\n", "plt.tight_layout()\n", "plt.savefig('training_curve.png', dpi=150, bbox_inches='tight',\n", " facecolor='#0d1117')\n", "print('✅ Saved training_curve.png')\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from huggingface_hub import HfApi, login\n", "import os\n", "\n", "login(token=HF_TOKEN)\n", "api = HfApi()\n", "\n", "# Create repo\n", "api.create_repo(\n", " repo_id=HF_REPO,\n", " repo_type='model',\n", " exist_ok=True,\n", " token=HF_TOKEN,\n", ")\n", "print(f'✅ Repo ready: {HF_REPO}')\n", "\n", "# Upload checkpoint files\n", "print('Uploading adapter weights...')\n", "for filename in os.listdir(CHECKPOINT):\n", " filepath = os.path.join(CHECKPOINT, filename)\n", " if os.path.isfile(filepath):\n", " api.upload_file(\n", " path_or_fileobj=filepath,\n", " path_in_repo=filename,\n", " repo_id=HF_REPO,\n", " token=HF_TOKEN,\n", " )\n", " print(f' ✅ {filename}')\n", "\n", "# Upload training curve\n", "api.upload_file(\n", " path_or_fileobj='training_curve.png',\n", " path_in_repo='training_curve.png',\n", " repo_id=HF_REPO,\n", " token=HF_TOKEN,\n", ")\n", "print(' ✅ training_curve.png')\n", "\n", "print(f'\\n🎉 Everything live at: https://huggingface.co/{HF_REPO}')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "easy_r = results['easy']\n", "medium_r = results['medium']\n", "\n", "model_card = f\"\"\"---\n", "base_model: unsloth/Llama-3.2-3B-Instruct\n", "library_name: peft\n", "pipeline_tag: text-generation\n", "tags:\n", "- lora\n", "- unsloth\n", "- grpo\n", "- reinforcement-learning\n", "- devops\n", "- incident-response\n", "---\n", "\n", "# ARIA — DevOps Incident Response Agent\n", "## Llama-3.2-3B fine-tuned with GRPO\n", "\n", "Fine-tuned on the [ARIA DevOps Incident Response](https://huggingface.co/spaces/Arijit-07/devops-incident-response) \n", "RL environment using Group Relative Policy Optimization (GRPO).\n", "\n", "## Training\n", "\n", "- **Algorithm:** GRPO (Group Relative Policy Optimization)\n", "- **Base model:** Llama-3.2-3B-Instruct\n", "- **Fine-tuning:** Unsloth LoRA (rank=16, alpha=32, 4-bit quantized)\n", "- **Episodes:** 140 (easy + medium tasks)\n", "- **Environment:** Live DevOps incident response simulation\n", "\n", "## Results (10 unseen episodes per task)\n", "\n", "| Task | Base Model | Fine-tuned | Improvement |\n", "|------|-----------|------------|-------------|\n", "| easy | {easy_r['base_avg']:.3f} | {easy_r['ft_avg']:.3f} | {easy_r['delta']:+.3f} |\n", "| medium | {medium_r['base_avg']:.3f} | {medium_r['ft_avg']:.3f} | {medium_r['delta']:+.3f} |\n", "\n", "## Environment\n", "\n", "The agent learns to diagnose and fix production incidents:\n", "- 7 task types: OOM crashes, cascading failures, silent corruption, DDoS, DB degradation, multi-region failover\n", "- 14 action types including read_logs, diagnose, restart_service, rollback\n", "- Dense reward shaping with collateral damage penalties\n", "- Partial log observability — agents must learn to query\n", "\n", "## Usage\n", "\n", "```python\n", "from peft import PeftModel\n", "from transformers import AutoModelForCausalLM, AutoTokenizer\n", "\n", "base = AutoModelForCausalLM.from_pretrained(\"unsloth/Llama-3.2-3B-Instruct\")\n", "model = PeftModel.from_pretrained(base, \"Arijit-07/aria-devops-llama3b\")\n", "tokenizer = AutoTokenizer.from_pretrained(\"Arijit-07/aria-devops-llama3b\")\n", "```\n", "\n", "## Links\n", "- Environment: https://huggingface.co/spaces/Arijit-07/devops-incident-response\n", "- API docs: https://arijit-07-devops-incident-response.hf.space/docs\n", "\"\"\"\n", "\n", "with open('README.md', 'w') as f:\n", " f.write(model_card)\n", "\n", "api.upload_file(\n", " path_or_fileobj='README.md',\n", " path_in_repo='README.md',\n", " repo_id=HF_REPO,\n", " token=HF_TOKEN,\n", ")\n", "print('✅ Model card uploaded')\n", "print(f'🎉 Complete: https://huggingface.co/{HF_REPO}')" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "HF_TOKEN = 'YOUR_HF_WRITE_TOKEN_HERE'" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "✅ Logged in\n" ] } ], "source": [ "from huggingface_hub import HfApi, login\n", "\n", "HF_TOKEN = 'YOUR_HF_WRITE_TOKEN_HERE' # ← paste your REAL token here\n", " # it must start with hf_\n", "\n", "login(token=HF_TOKEN, add_to_git_credential=False)\n", "print('✅ Logged in')" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "✅ Repo ready\n", "✅ adapter_config.json\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "5cf0fc74219f4cacbbe36a55bfa8d99f", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Processing Files (0 / 0): | | 0.00B / 0.00B " ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "eca0c983e1b742e0bbcc765a0e7059bd", "version_major": 2, "version_minor": 0 }, "text/plain": [ "New Data Upload: | | 0.00B / 0.00B " ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "✅ adapter_model.safetensors\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "No files have been modified since last commit. Skipping to prevent empty commit.\n", "[huggingface_hub.hf_api|WARNING]No files have been modified since last commit. Skipping to prevent empty commit.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "✅ chat_template.jinja\n", "✅ README.md\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "62ad8e5402f140e8ad64f72cc4f82d8d", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Processing Files (0 / 0): | | 0.00B / 0.00B " ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "16398f828ca742df99cd6e237f976e7b", "version_major": 2, "version_minor": 0 }, "text/plain": [ "New Data Upload: | | 0.00B / 0.00B " ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stderr", "output_type": "stream", "text": [ "No files have been modified since last commit. Skipping to prevent empty commit.\n", "[huggingface_hub.hf_api|WARNING]No files have been modified since last commit. Skipping to prevent empty commit.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "✅ tokenizer.json\n", "✅ tokenizer_config.json\n", "\n", "🎉 Live at: https://huggingface.co/Arijit-07/aria-devops-llama3b\n" ] } ], "source": [ "from huggingface_hub import HfApi\n", "import os\n", "\n", "HF_REPO = 'Arijit-07/aria-devops-llama3b'\n", "CHECKPOINT = r'D:\\My Projects\\devops-incident-env\\kaggle_training\\results\\aria-llama3b\\checkpoint-ep140'\n", "\n", "api = HfApi()\n", "\n", "api.create_repo(repo_id=HF_REPO, repo_type='model', exist_ok=True)\n", "print(f'✅ Repo ready')\n", "\n", "for filename in os.listdir(CHECKPOINT):\n", " filepath = os.path.join(CHECKPOINT, filename)\n", " if os.path.isfile(filepath):\n", " api.upload_file(\n", " path_or_fileobj=filepath,\n", " path_in_repo=filename,\n", " repo_id=HF_REPO,\n", " token=HF_TOKEN,\n", " )\n", " print(f'✅ {filename}')\n", "\n", "print(f'\\n🎉 Live at: https://huggingface.co/{HF_REPO}')" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "✅ Model card uploaded\n", "🎉 https://huggingface.co/Arijit-07/aria-devops-llama3b\n" ] } ], "source": [ "model_card = f\"\"\"---\n", "base_model: unsloth/Llama-3.2-3B-Instruct\n", "library_name: peft\n", "pipeline_tag: text-generation\n", "tags:\n", "- lora\n", "- unsloth\n", "- grpo\n", "- reinforcement-learning\n", "- devops\n", "- incident-response\n", "---\n", "\n", "# ARIA — DevOps Incident Response Agent\n", "### Llama-3.2-3B fine-tuned with GRPO\n", "\n", "Fine-tuned on the [ARIA DevOps Incident Response](https://huggingface.co/spaces/Arijit-07/devops-incident-response) \n", "RL environment using Group Relative Policy Optimization (GRPO).\n", "\n", "## Training Details\n", "\n", "- **Algorithm:** GRPO (Group Relative Policy Optimization)\n", "- **Base model:** Llama-3.2-3B-Instruct\n", "- **Fine-tuning:** Unsloth LoRA (rank=16, alpha=32, 4-bit quantized)\n", "- **Episodes:** 140 across easy + medium tasks\n", "- **Training time:** ~10 hours on Kaggle T4 x2\n", "- **Environment:** Live DevOps incident response simulation\n", "\n", "## What the Agent Learns\n", "\n", "The agent is trained to respond to production software incidents by:\n", "1. Gathering information (read_logs, read_metrics, search_logs)\n", "2. Diagnosing the root cause before acting\n", "3. Applying the correct fix (restart, rollback, scale_up, block_ip etc.)\n", "4. Avoiding collateral damage to healthy services\n", "\n", "## Environment\n", "\n", "7 task types of escalating difficulty:\n", "- **Easy:** Single service OOM crash-loop\n", "- **Medium:** Cascading connection pool failure\n", "- **Hard:** Silent data corruption (all services green)\n", "- **Bonus:** Two simultaneous independent failures\n", "- **Security:** DDoS botnet credential stuffing\n", "- **Database:** Missing index causing full table scans\n", "- **Failover:** Multi-region network partition\n", "\n", "14 action types · Dense reward shaping · Partial log observability · SLA degradation per step\n", "\n", "## Links\n", "- **Live environment:** https://huggingface.co/spaces/Arijit-07/devops-incident-response\n", "- **Interactive API:** https://arijit-07-devops-incident-response.hf.space/docs\n", "- **GitHub:** https://github.com/Twilight-13/devops-incident-response\n", "\n", "## Usage\n", "\n", "```python\n", "from peft import PeftModel\n", "from transformers import AutoModelForCausalLM, AutoTokenizer\n", "\n", "base = AutoModelForCausalLM.from_pretrained(\n", " \"unsloth/Llama-3.2-3B-Instruct\",\n", " load_in_4bit=True\n", ")\n", "model = PeftModel.from_pretrained(base, \"Arijit-07/aria-devops-llama3b\")\n", "tokenizer = AutoTokenizer.from_pretrained(\"Arijit-07/aria-devops-llama3b\")\n", "```\n", "\"\"\"\n", "\n", "from huggingface_hub import HfApi\n", "api = HfApi()\n", "\n", "with open('README.md', 'w', encoding='utf-8') as f:\n", " f.write(model_card)\n", "\n", "api.upload_file(\n", " path_or_fileobj='README.md',\n", " path_in_repo='README.md',\n", " repo_id='Arijit-07/aria-devops-llama3b',\n", " token=HF_TOKEN,\n", ")\n", "print('✅ Model card uploaded')\n", "print('🎉 https://huggingface.co/Arijit-07/aria-devops-llama3b')" ] } ], "metadata": { "kaggle": { "accelerator": "nvidiaTeslaT4", "dataSources": [], "dockerImageVersionId": 31329, "isGpuEnabled": true, "isInternetEnabled": true, "language": "python", "sourceType": "notebook" }, "kernelspec": { "display_name": "Python [conda env:base] *", "language": "python", "name": "conda-base-py" } }, "nbformat": 4, "nbformat_minor": 4 }