Spaces:
Running
Running
| """Generate a self-contained Colab notebook that QLoRA-trains an adapter | |
| on the user's selected document chunks. | |
| The user downloads the .ipynb from the EvoLLM UI, opens it in Colab | |
| (free T4 is sufficient for small corpora), clicks 'Run all', and | |
| downloads two files at the end: the LoRA adapter as GGUF and a | |
| manifest.json. They re-upload both into EvoLLM via the 'Import trained | |
| adapter' button, and the new adapter joins the pool. | |
| """ | |
| from __future__ import annotations | |
| import json | |
| import uuid | |
| from datetime import datetime | |
| from pathlib import Path | |
| def _cell(cell_type: str, source: str) -> dict: | |
| return { | |
| "cell_type": cell_type, | |
| "metadata": {}, | |
| "source": source.splitlines(keepends=True), | |
| **({"execution_count": None, "outputs": []} if cell_type == "code" else {}), | |
| } | |
| def generate_training_notebook( | |
| adapter_name: str, | |
| chunks: list[str], | |
| source_doc_names: list[str], | |
| base_model: str = "HuggingFaceTB/SmolLM2-1.7B-Instruct", | |
| lora_rank: int = 16, | |
| lora_alpha: int | None = None, | |
| learning_rate: float = 2e-4, | |
| num_epochs: int = 3, | |
| batch_size: int = 2, | |
| grad_accum: int = 4, | |
| output_path: str | Path = "evollm_training_notebook.ipynb", | |
| description: str = "", | |
| ) -> Path: | |
| """Produce a configured .ipynb the user can run on Colab.""" | |
| if lora_alpha is None: | |
| lora_alpha = lora_rank * 2 | |
| adapter_id = f"user_{uuid.uuid4().hex[:8]}" | |
| safe_adapter_name = adapter_name.strip() or adapter_id | |
| created_at = datetime.utcnow().isoformat() | |
| dataset_rows = [{"text": c} for c in chunks if c and c.strip()] | |
| manifest = { | |
| "adapter_id": adapter_id, | |
| "name": safe_adapter_name, | |
| "description": description or f"User-trained adapter on {len(source_doc_names)} document(s)", | |
| "base_model": base_model, | |
| "source_documents": source_doc_names, | |
| "lora_rank": lora_rank, | |
| "lora_alpha": lora_alpha, | |
| "learning_rate": learning_rate, | |
| "num_epochs": num_epochs, | |
| "training_examples": len(dataset_rows), | |
| "trained_at": created_at, | |
| "trained_from_knowledge": True, | |
| } | |
| intro_md = f"""# EvoLLM — Train your own adapter | |
| This notebook produces a **{safe_adapter_name}** LoRA adapter from your | |
| selected documents. | |
| **Source documents**: {", ".join(source_doc_names) or "(none)"} | |
| **Base model**: `{base_model}` | |
| **LoRA rank**: {lora_rank} (alpha = {lora_alpha}) | |
| **Epochs**: {num_epochs} · **LR**: {learning_rate} · **Examples**: {len(dataset_rows)} | |
| ## How to run | |
| 1. **Runtime → Change runtime type → T4 GPU** (or A100 if you have Colab Pro). | |
| 2. Click **Runtime → Run all**. | |
| 3. When training finishes, you'll get two download links: | |
| - `{adapter_id}.gguf` — the LoRA adapter in llama.cpp format | |
| - `{adapter_id}.json` — the manifest | |
| 4. Back in EvoLLM, go to the **🧬 Adapter Pool** tab → **📥 Import trained adapter** → drop both files. | |
| Approximate runtime on free T4: ~20–60 minutes for {len(dataset_rows)} examples. | |
| """ | |
| setup_code = """!nvidia-smi | |
| !pip install -q -U \\ | |
| "transformers>=4.46" "peft>=0.13" "trl>=0.12" \\ | |
| "datasets>=3.1" "accelerate>=1.1" "bitsandbytes>=0.44" "sentencepiece" | |
| """ | |
| config_code = f"""import json, gc, torch | |
| from pathlib import Path | |
| ADAPTER_ID = "{adapter_id}" | |
| ADAPTER_NAME = {json.dumps(safe_adapter_name)} | |
| BASE_MODEL = {json.dumps(base_model)} | |
| LORA_RANK = {lora_rank} | |
| LORA_ALPHA = {lora_alpha} | |
| LEARNING_RATE = {learning_rate} | |
| NUM_EPOCHS = {num_epochs} | |
| BATCH_SIZE = {batch_size} | |
| GRAD_ACCUM = {grad_accum} | |
| OUT_DIR = Path(f"/content/{{ADAPTER_ID}}") | |
| OUT_DIR.mkdir(parents=True, exist_ok=True) | |
| """ | |
| # Inline the dataset as a JSON list. For small corpora this is fine; | |
| # very large corpora should switch to a side file, but that's edge case. | |
| dataset_code = "DATASET_ROWS = " + json.dumps(dataset_rows, ensure_ascii=False, indent=2) | |
| manifest_code = ( | |
| "MANIFEST = " + json.dumps(manifest, ensure_ascii=False, indent=2) | |
| ) | |
| train_code = """from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig | |
| from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training | |
| from trl import SFTTrainer, SFTConfig | |
| from datasets import Dataset | |
| bnb = BitsAndBytesConfig( | |
| load_in_4bit=True, | |
| bnb_4bit_quant_type="nf4", | |
| bnb_4bit_compute_dtype=torch.bfloat16, | |
| bnb_4bit_use_double_quant=True, | |
| ) | |
| print(f"Loading base: {BASE_MODEL}") | |
| tok = AutoTokenizer.from_pretrained(BASE_MODEL) | |
| if tok.pad_token is None: | |
| tok.pad_token = tok.eos_token | |
| model = AutoModelForCausalLM.from_pretrained( | |
| BASE_MODEL, quantization_config=bnb, device_map="auto", torch_dtype=torch.bfloat16, | |
| ) | |
| model = prepare_model_for_kbit_training(model) | |
| peft_cfg = LoraConfig( | |
| r=LORA_RANK, lora_alpha=LORA_ALPHA, lora_dropout=0.05, | |
| bias="none", task_type="CAUSAL_LM", | |
| target_modules=["q_proj", "k_proj", "v_proj", "o_proj"], | |
| ) | |
| model = get_peft_model(model, peft_cfg) | |
| model.print_trainable_parameters() | |
| ds = Dataset.from_list(DATASET_ROWS) | |
| cfg = SFTConfig( | |
| output_dir=str(OUT_DIR), | |
| num_train_epochs=NUM_EPOCHS, | |
| per_device_train_batch_size=BATCH_SIZE, | |
| gradient_accumulation_steps=GRAD_ACCUM, | |
| learning_rate=LEARNING_RATE, | |
| bf16=True, | |
| logging_steps=10, | |
| save_strategy="epoch", | |
| save_total_limit=1, | |
| report_to="none", | |
| max_seq_length=1024, | |
| warmup_ratio=0.03, | |
| dataset_text_field="text", | |
| ) | |
| trainer = SFTTrainer(model=model, tokenizer=tok, train_dataset=ds, args=cfg) | |
| trainer.train() | |
| trainer.save_model(str(OUT_DIR)) | |
| print(f"\\nAdapter saved to {OUT_DIR}") | |
| del model, trainer; gc.collect(); torch.cuda.empty_cache() | |
| """ | |
| convert_code = """# Convert the LoRA adapter to GGUF for llama.cpp | |
| !apt-get install -y -qq cmake build-essential | |
| !git clone --depth 1 https://github.com/ggerganov/llama.cpp /content/llama.cpp 2>/dev/null || echo 'already cloned' | |
| !pip install -q -r /content/llama.cpp/requirements/requirements-convert_lora_to_gguf.txt | |
| GGUF_PATH = OUT_DIR / f"{ADAPTER_ID}.gguf" | |
| !python /content/llama.cpp/convert_lora_to_gguf.py {OUT_DIR} --base {BASE_MODEL} --outfile {GGUF_PATH} | |
| print(f"\\nGGUF adapter at: {GGUF_PATH}") | |
| print(f"Size: {GGUF_PATH.stat().st_size / 1024 / 1024:.1f} MB") | |
| """ | |
| package_code = """# Save manifest and prepare downloads | |
| import shutil | |
| manifest_path = OUT_DIR / f"{ADAPTER_ID}.json" | |
| manifest_path.write_text(json.dumps(MANIFEST, ensure_ascii=False, indent=2)) | |
| # Stage the two files at /content for easy download | |
| shutil.copy(GGUF_PATH, f"/content/{ADAPTER_ID}.gguf") | |
| shutil.copy(manifest_path, f"/content/{ADAPTER_ID}.json") | |
| print("\\n" + "=" * 60) | |
| print("READY TO DOWNLOAD") | |
| print("=" * 60) | |
| print(f" /content/{ADAPTER_ID}.gguf") | |
| print(f" /content/{ADAPTER_ID}.json") | |
| print() | |
| print("In the Colab Files panel (left side), right-click each file → Download.") | |
| print("Then in EvoLLM: 🧬 Adapter Pool tab → 📥 Import trained adapter → drop both files.") | |
| """ | |
| notebook = { | |
| "cells": [ | |
| _cell("markdown", intro_md), | |
| _cell("markdown", "## 0. Setup"), | |
| _cell("code", setup_code), | |
| _cell("markdown", "## 1. Configuration & dataset"), | |
| _cell("code", config_code), | |
| _cell("code", dataset_code), | |
| _cell("code", manifest_code), | |
| _cell("markdown", "## 2. Train"), | |
| _cell("code", train_code), | |
| _cell("markdown", "## 3. Convert to GGUF"), | |
| _cell("code", convert_code), | |
| _cell("markdown", "## 4. Package for EvoLLM"), | |
| _cell("code", package_code), | |
| ], | |
| "metadata": { | |
| "kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, | |
| "language_info": {"name": "python", "version": "3.10"}, | |
| "accelerator": "GPU", | |
| "colab": {"provenance": [], "gpuType": "T4"}, | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 4, | |
| } | |
| output_path = Path(output_path) | |
| output_path.parent.mkdir(parents=True, exist_ok=True) | |
| output_path.write_text(json.dumps(notebook, ensure_ascii=False, indent=1)) | |
| return output_path | |