teolm30
/

fox1.3

+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "provenance": [],
+      "gpu_computation": true,
+      "accelerator": "GPU"
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "text",
+      "metadata": {},
+      "source": [
+        "# 🦊 Fox1.3 Training & Evaluation Pipeline\n",
+        "\n",
+        "**This notebook:**\n",
+        "1. Clones Fox1.3 from HuggingFace\n",
+        "2. Runs HumanEval + MBPP benchmarks (baseline)\n",
+        "3. Fine-tunes with LoRA on CodeAlpaca_20K\n",
+        "4. Runs benchmarks again (improved score)\n",
+        "5. Pushes back to HuggingFace\n",
+        "\n",
+        "**Runtime:** Runtime → Change runtime type → GPU (T4 recommended)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "# Install dependencies\n",
+        "!pip install -q transformers peft bitsandbytes accelerate datasets scipy torch\n",
+        "!pip install -q huggingface_hub"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "# Login to HuggingFace (use your token)\n",
+        "from huggingface_hub import login\n",
+        "HF_TOKEN = input(\"Enter your HF token (hf_...): \")\n",
+        "login(token=HF_TOKEN)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "# Clone the Fox1.3 repo\n",
+        "!git clone https://huggingface.co/teolm30/fox1.3 fox1.3-repo\n",
+        "%cd fox1.3-repo\n",
+        "!ls"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "%%writefile evaluate.py\n",
+        "#!/usr/bin/env python3\n",
+        "\"\"\"Fox1.3 Evaluation - HumanEval + MBPP\"\"\"\n",
+        "import torch\n",
+        "from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline\n",
+        "from datasets import load_dataset\n",
+        "import json\n",
+        "\n",
+        "MODEL_NAME = \"teolm30/fox1.3\"\n",
+        "DEVICE = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n",
+        "print(f\"Using device: {DEVICE}\")\n",
+        "\n",
+        "def load_model():\n",
+        "    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)\n",
+        "    tokenizer.pad_token = tokenizer.eos_token\n",
+        "    model = AutoModelForCausalLM.from_pretrained(\n",
+        "        MODEL_NAME,\n",
+        "        torch_dtype=torch.float16,\n",
+        "        device_map=\"auto\",\n",
+        "        trust_remote_code=True\n",
+        "    )\n",
+        "    return model, tokenizer\n",
+        "\n",
+        "def run_humaneval(model, tokenizer):\n",
+        "    dataset = load_dataset(\"openai/openai_humaneval\", split=\"test\")\n",
+        "    pipe = pipeline(\"text-generation\", model=model, tokenizer=tokenizer,\n",
+        "                    max_new_tokens=256, do_sample=False, pad_token_id=tokenizer.eos_token_id)\n",
+        "    correct = 0\n",
+        "    for i, item in enumerate(dataset):\n",
+        "        prompt = item[\"prompt\"]\n",
+        "        test = item[\"test\"]\n",
+        "        try:\n",
+        "            out = pipe(prompt, pad_token_id=tokenizer.eos_token_id)\n",
+        "            code = out[0][\"generated_text\"][len(prompt):].strip()\n",
+        "            if \"```python\" in code:\n",
+        "                code = code.split(\"```python\")[1].split(\"```\")[0].strip()\n",
+        "            exec_globals = {}\n",
+        "            exec(code, exec_globals)\n",
+        "            exec(test, exec_globals)\n",
+        "            correct += 1\n",
+        "        except:\n",
+        "            pass\n",
+        "        if (i+1) % 20 == 0:\n",
+        "            print(f\"HumanEval: {i+1}/{len(dataset)} | Running score: {correct}/{i+1}\")\n",
+        "    print(f\"HumanEval PASS@1: {correct}/{len(dataset)} = {correct/len(dataset):.4f}\")\n",
+        "    return correct / len(dataset)\n",
+        "\n",
+        "def run_mbpp(model, tokenizer):\n",
+        "    dataset = load_dataset(\"google-research/mbpp\", \"sanitized\", split=\"test\")\n",
+        "    pipe = pipeline(\"text-generation\", model=model, tokenizer=tokenizer,\n",
+        "                    max_new_tokens=256, do_sample=False, pad_token_id=tokenizer.eos_token_id)\n",
+        "    correct = 0\n",
+        "    for i, item in enumerate(dataset[:374]):\n",
+        "        prompt = f\"### Instruction:\\nWrite a Python function.\\n\\n### Input:\\n{item['prompt']}\\n\\n### Response:\\n\"\n",
+        "        try:\n",
+        "            out = pipe(prompt, pad_token_id=tokenizer.eos_token_id)\n",
+        "            code = out[0][\"generated_text\"][len(prompt):].strip()\n",
+        "            if \"```python\" in code:\n",
+        "                code = code.split(\"```python\")[1].split(\"```\")[0].strip()\n",
+        "            exec_globals = {}\n",
+        "            exec(code, exec_globals)\n",
+        "            all_pass = True\n",
+        "            for test in item[\"test_list\"]:\n",
+        "                try:\n",
+        "                    exec(test, exec_globals)\n",
+        "                except:\n",
+        "                    all_pass = False\n",
+        "                    break\n",
+        "            if all_pass:\n",
+        "                correct += 1\n",
+        "        except:\n",
+        "            pass\n",
+        "        if (i+1) % 50 == 0:\n",
+        "            print(f\"MBPP: {i+1}/374 | Running score: {correct}/{i+1}\")\n",
+        "    print(f\"MBPP PASS@1: {correct}/374 = {correct/374:.4f}\")\n",
+        "    return correct / 374\n",
+        "\n",
+        "print(\"Loading model...\")\n",
+        "model, tokenizer = load_model()\n",
+        "print(\"\\n=== BASELINE BENCHMARK ===\")\n",
+        "he_score = run_humaneval(model, tokenizer)\n",
+        "mbpp_score = run_mbpp(model, tokenizer)\n",
+        "print(f\"\\nBaseline: HumanEval={he_score:.4f}, MBPP={mbpp_score:.4f}\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "%%writefile train.py\n",
+        "#!/usr/bin/env python3\n",
+        "\"\"\"Fox1.3 LoRA Fine-tuning\"\"\"\n",
+        "import os\n",
+        "import torch\n",
+        "from datasets import load_dataset\n",
+        "from transformers import (\n",
+        "    AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig,\n",
+        "    TrainingArguments, Trainer, DataCollatorForLanguageModeling\n",
+        ")\n",
+        "from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training\n",
+        "import logging\n",
+        "logging.basicConfig(level=logging.INFO)\n",
+        "\n",
+        "MODEL_NAME = \"Qwen/Qwen2.5-1B-Instruct\"\n",
+        "DATASET_NAME = \"HuggingFaceH4/CodeAlpaca_20K\"\n",
+        "OUTPUT_DIR = \"/tmp/fox1.3-checkpoints\"\n",
+        "\n",
+        "def format_instruction(example):\n",
+        "    inst = example.get(\"instruction\", \"\")\n",
+        "    inp = example.get(\"input\", \"\")\n",
+        "    out = example.get(\"output\", \"\")\n",
+        "    if inp:\n",
+        "        text = f\"### Instruction:\\n{inst}\\n\\n### Input:\\n{inp}\\n\\n### Response:\\n{out}\"\n",
+        "    else:\n",
+        "        text = f\"### Instruction:\\n{inst}\\n\\n### Response:\\n{out}\"\n",
+        "    return {\"text\": text}\n",
+        "\n",
+        "print(\"Loading tokenizer...\")\n",
+        "tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)\n",
+        "tokenizer.pad_token = tokenizer.eos_token\n",
+        "\n",
+        "print(\"Loading model with 4-bit quantization...\")\n",
+        "bnb_config = BitsAndBytesConfig(\n",
+        "    load_in_4bit=True, bnb_4bit_quant_type=\"nf4\",\n",
+        "    bnb_4bit_compute_dtype=torch.float16, bnb_4bit_use_double_quant=True,\n",
+        ")\n",
+        "model = AutoModelForCausalLM.from_pretrained(\n",
+        "    MODEL_NAME, quantization_config=bnb_config, device_map=\"auto\", trust_remote_code=True\n",
+        ")\n",
+        "model = prepare_model_for_kbit_training(model)\n",
+        "\n",
+        "lora_config = LoraConfig(\n",
+        "    r=8, lora_alpha=16,\n",
+        "    target_modules=[\"q_proj\", \"k_proj\", \"v_proj\", \"o_proj\"],\n",
+        "    lora_dropout=0.05, bias=\"none\", task_type=\"CAUSAL_LM\"\n",
+        ")\n",
+        "model = get_peft_model(model, lora_config)\n",
+        "model.print_trainable_parameters()\n",
+        "\n",
+        "print(\"Loading dataset...\")\n",
+        "dataset = load_dataset(DATASET_NAME, split=\"train\")\n",
+        "dataset = dataset.map(format_instruction, remove_columns=dataset.column_names)\n",
+        "dataset = dataset.filter(lambda x: x[\"text\"] is not None)\n",
+        "\n",
+        "def tokenize(example):\n",
+        "    return tokenizer(example[\"text\"], truncation=True, max_length=1024, padding=\"max_length\")\n",
+        "\n",
+        "dataset = dataset.map(tokenize, batched=True, remove_columns=[\"text\"])\n",
+        "dataset = dataset.train_test_split(test_size=0.1)\n",
+        "train_ds, eval_ds = dataset[\"train\"], dataset[\"test\"]\n",
+        "\n",
+        "training_args = TrainingArguments(\n",
+        "    output_dir=OUTPUT_DIR,\n",
+        "    num_train_epochs=3,\n",
+        "    per_device_train_batch_size=4,\n",
+        "    per_device_eval_batch_size=4,\n",
+        "    learning_rate=2e-4,\n",
+        "    warmup_steps=50,\n",
+        "    logging_steps=20,\n",
+        "    eval_strategy=\"epoch\",\n",
+        "    save_strategy=\"epoch\",\n",
+        "    bf16=True,\n",
+        "    tf32=True,\n",
+        "    optim=\"paged_adamw_8bit\",\n",
+        "    group_by_length=True,\n",
+        "    report_to=\"none\",\n",
+        ")\n",
+        "\n",
+        "trainer = Trainer(\n",
+        "    model=model,\n",
+        "    args=training_args,\n",
+        "    train_dataset=train_ds,\n",
+        "    eval_dataset=eval_ds,\n",
+        "    data_collator=DataCollatorForLanguageModeling(tokenizer, mlm=False),\n",
+        ")\n",
+        "\n",
+        "print(\"Starting training... (this will take ~30-60 min on T4)\")\n",
+        "trainer.train()\n",
+        "print(\"Training complete!\")\n",
+        "\n",
+        "# Save merged model\n",
+        "print(\"Merging and saving...\")\n",
+        "merged_model = model.merge_and_unload()\n",
+        "merged_model.save_pretrained(\"/tmp/fox1.3-improved\")\n",
+        "tokenizer.save_pretrained(\"/tmp/fox1.3-improved\")\n",
+        "print(\"Done! Model saved to /tmp/fox1.3-improved\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "# Run baseline benchmark\n",
+        "!python3 evaluate.py 2>&1"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "# Run training (this takes 30-60 min on free T4)\n",
+        "!python3 train.py 2>&1"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "# Push improved model to HF\n",
+        "from huggingface_hub import HfApi, create_repo\n",
+        "import os\n",
+        "\n",
+        "api = HfApi()\n",
+        "repo_id = \"teolm30/fox1.3\"\n",
+        "\n",
+        "print(\"Uploading improved model...\")\n",
+        "api.upload_folder(\n",
+        "    folder_path=\"/tmp/fox1.3-improved\",\n",
+        "    repo_id=repo_id,\n",
+        "    repo_type=\"model\",\n",
+        "    commit_message=\"Fine-tuned on CodeAlpaca_20K (LoRA, 3 epochs)\",\n",
+        ")\n",
+        "print(f\"\\n✅ Uploaded! https://huggingface.co/{repo_id}\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "# Run benchmark on improved model\n",
+        "import torch\n",
+        "from transformers import AutoModelForCausalLM, AutoTokenizer\n",
+        "MODEL_NAME = \"teolm30/fox1.3\"\n",
+        "print(\"Loading improved model...\")\n",
+        "tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)\n",
+        "tokenizer.pad_token = tokenizer.eos_token\n",
+        "model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, torch_dtype=torch.float16, device_map=\"auto\", trust_remote_code=True)\n",
+        "print(\"Model loaded! Run evaluate.py to get final scores.\")"
+      ]
+    }
+  ]
+}