krystv
/

ArtFlow

Model card Files Files and versions

xet

Community

krystv commited on Apr 28

Commit

3ab3cbd

verified ·

1 Parent(s): 4c58a98

v2: Updated notebook with real dataset training

Browse files

Files changed (1) hide show

ArtFlow_Training.ipynb +47 -73

ArtFlow_Training.ipynb CHANGED Viewed

@@ -4,10 +4,10 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# 🎨 ArtFlow Complete Training Notebook\n",
-    "**Reasoning-Native Artistic Image Generation for Mobile Devices**\n",
     "\n",
-    "Downloads model + training scripts from the HF repo, then trains all 5 stages."
    ]
   },
   {
@@ -18,7 +18,7 @@
    "source": [
     "# ===== 0. Setup =====\n",
     "!pip install -q torch torchvision torchaudio\n",
-    "!pip install -q huggingface_hub matplotlib numpy tqdm\n",
     "\n",
     "from huggingface_hub import hf_hub_download\n",
     "import shutil\n",
@@ -41,7 +41,7 @@
     "# ===== 1. Create Model =====\n",
     "from artflow_model import ArtFlow, ArtFlowConfig\n",
     "from artflow_train import (\n",
-    "    TrainConfig, SyntheticDataset, freeze_for_stage, train\n",
     ")\n",
     "\n",
     "DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n",
@@ -61,7 +61,7 @@
     "\n",
     "model = ArtFlow(config).to(DEVICE)\n",
     "p = sum(x.numel() for x in model.parameters())\n",
-    "print(f'Model: {p:,} params ({p/1e6:.1f}M)')"
    ]
   },
   {
@@ -70,9 +70,22 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# ===== 2. Dataset =====\n",
-    "# Synthetic for smoke-testing. Replace with real latents for real training.\n",
-    "dataset = SyntheticDataset(n=10000, config=config)"
    ]
   },
   {
@@ -82,7 +95,7 @@
     "---\n",
     "## Stage 1: Base Generation\n",
     "**Frozen:** style, mood, concept  \n",
-    "**Trains:** WaveMamba backbone + cross-attention  \n",
     "**Goal:** Learn denoising dynamics"
    ]
   },
@@ -114,7 +127,7 @@
     "    print(f'Loss: {np.mean(L[:10]):.4f} → {np.mean(L[-10:]):.4f}')\n",
     "    sm = np.convolve(L, np.ones(min(20, len(L)//4))/min(20, len(L)//4), 'valid')\n",
     "    plt.figure(figsize=(10,3))\n",
-    "    plt.plot(sm); plt.title('Stage 1 Loss'); plt.xlabel('Step'); plt.show()"
    ]
   },
   {
@@ -122,10 +135,8 @@
    "metadata": {},
    "source": [
     "---\n",
-    "## Stage 2: Style Matrix Training\n",
-    "**Frozen:** mood, concept  \n",
-    "**Trains:** style matrix + backbone (joint fine-tune)  \n",
-    "**Goal:** Disentangled art style vectors"
    ]
   },
   {
@@ -134,24 +145,12 @@
    "metadata": {},
    "outputs": [],
    "source": [
     "model = freeze_for_stage(model, 2)\n",
-    "tcfg2 = TrainConfig(\n",
-    "    lr=5e-5, batch_size=2, grad_accum=32,\n",
-    "    num_steps=25000, warmup_steps=500,\n",
-    "    log_every=100, save_every=5000, stage=2,\n",
-    ")\n",
-    "engine2 = train(model, config, tcfg2, dataset, DEVICE)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "---\n",
-    "## Stage 3: Resolution Scaling + Reasoning\n",
-    "**Frozen:** mood, concept  \n",
-    "**Trains:** backbone + style + reasoning  \n",
-    "**Goal:** Higher res, enable recursive latent reasoning"
    ]
   },
   {
@@ -160,24 +159,12 @@
    "metadata": {},
    "outputs": [],
    "source": [
     "model = freeze_for_stage(model, 3)\n",
-    "tcfg3 = TrainConfig(\n",
-    "    lr=3e-5, batch_size=2, grad_accum=32,\n",
-    "    num_steps=25000, warmup_steps=500,\n",
-    "    log_every=100, save_every=5000, stage=3,\n",
-    ")\n",
-    "engine3 = train(model, config, tcfg3, dataset, DEVICE)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "---\n",
-    "## Stage 4: Concept & Mood Training\n",
-    "**Frozen:** backbone + style  \n",
-    "**Trains:** concept engine + mood controller only  \n",
-    "**Goal:** Scene understanding, emotional atmosphere"
    ]
   },
   {
@@ -186,24 +173,12 @@
    "metadata": {},
    "outputs": [],
    "source": [
     "model = freeze_for_stage(model, 4)\n",
-    "tcfg4 = TrainConfig(\n",
-    "    lr=2e-5, batch_size=2, grad_accum=32,\n",
-    "    num_steps=15000, warmup_steps=300,\n",
-    "    log_every=100, save_every=5000, stage=4,\n",
-    ")\n",
-    "engine4 = train(model, config, tcfg4, dataset, DEVICE)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "---\n",
-    "## Stage 5: Quality Post-Training\n",
-    "**Frozen:** nothing (all trainable)  \n",
-    "**Trains:** everything at low LR  \n",
-    "**Goal:** Final quality alignment"
    ]
   },
   {
@@ -212,13 +187,12 @@
    "metadata": {},
    "outputs": [],
    "source": [
     "model = freeze_for_stage(model, 5)\n",
-    "tcfg5 = TrainConfig(\n",
-    "    lr=1e-5, batch_size=2, grad_accum=32,\n",
-    "    num_steps=5000, warmup_steps=200,\n",
-    "    log_every=50, save_every=2500, stage=5,\n",
-    ")\n",
-    "engine5 = train(model, config, tcfg5, dataset, DEVICE)"
    ]
   },
   {
@@ -231,7 +205,7 @@
     "# from huggingface_hub import HfApi\n",
     "# engine5.save('./artflow_final.pt')\n",
     "# HfApi().upload_file('./artflow_final.pt', 'artflow_final.pt', 'krystv/ArtFlow')\n",
-    "print('🎉 All 5 stages complete!')"
    ]
   }
  ],

    "cell_type": "markdown",
    "metadata": {},
    "source": [
+    "# 🎨 ArtFlow v2 Complete Training Notebook\n",
+    "**Real Mamba SSM backbone — No CUDA extensions needed!**\n",
     "\n",
+    "Downloads model + training scripts from HF repo, then trains with real art datasets."
    ]
   },
   {
    "source": [
     "# ===== 0. Setup =====\n",
     "!pip install -q torch torchvision torchaudio\n",
+    "!pip install -q huggingface_hub matplotlib numpy tqdm datasets\n",
     "\n",
     "from huggingface_hub import hf_hub_download\n",
     "import shutil\n",
     "# ===== 1. Create Model =====\n",
     "from artflow_model import ArtFlow, ArtFlowConfig\n",
     "from artflow_train import (\n",
+    "    TrainConfig, SyntheticDataset, RealArtDataset, freeze_for_stage, train\n",
     ")\n",
     "\n",
     "DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n",
     "\n",
     "model = ArtFlow(config).to(DEVICE)\n",
     "p = sum(x.numel() for x in model.parameters())\n",
+    "print(f'Model: {p:,} params ({p/1e6:.1f}M) — Real Mamba SSM!')"
    ]
   },
   {
    "metadata": {},
    "outputs": [],
    "source": [
+    "# ===== 2. Dataset (REAL — not synthetic!) =====\n",
+    "# Choose one:\n",
+    "#   - 'huggan/wikiart'                    (80K art paintings, 27 styles)\n",
+    "#   - 'Fazzie/Teyvat'                     (anime illustrations)\n",
+    "#   - 'diffusers/pokemon-gpt4-captions'   (800 pokemon, good captions)\n",
+    "#   - 'lambdalabs/naruto-blip-captions'   (anime faces)\n",
+    "\n",
+    "dataset = RealArtDataset(\n",
+    "    'diffusers/pokemon-gpt4-captions',  # Small but high quality\n",
+    "    config=config,\n",
+    "    max_samples=None,  # Use all samples\n",
+    ")\n",
+    "print(f'Dataset: {len(dataset)} samples')\n",
+    "\n",
+    "# For smoke test, use synthetic:\n",
+    "# dataset = SyntheticDataset(n=10000, config=config)"
    ]
   },
   {
     "---\n",
     "## Stage 1: Base Generation\n",
     "**Frozen:** style, mood, concept  \n",
+    "**Trains:** WaveMamba backbone (Real Mamba SSM!) + cross-attention  \n",
     "**Goal:** Learn denoising dynamics"
    ]
   },
     "    print(f'Loss: {np.mean(L[:10]):.4f} → {np.mean(L[-10:]):.4f}')\n",
     "    sm = np.convolve(L, np.ones(min(20, len(L)//4))/min(20, len(L)//4), 'valid')\n",
     "    plt.figure(figsize=(10,3))\n",
+    "    plt.plot(sm); plt.title('Stage 1 Loss (Real Mamba SSM)'); plt.xlabel('Step'); plt.show()"
    ]
   },
   {
    "metadata": {},
    "source": [
     "---\n",
+    "## Stage 2-5: Style, Resolution, Concept, Quality\n",
+    "Same as before but now with Real Mamba SSM backbone."
    ]
   },
   {
    "metadata": {},
    "outputs": [],
    "source": [
+    "# Stage 2: Style Matrix\n",
     "model = freeze_for_stage(model, 2)\n",
+    "engine2 = train(model, config, TrainConfig(\n",
+    "    lr=5e-5, batch_size=2, grad_accum=32, num_steps=25000,\n",
+    "    warmup_steps=500, log_every=100, save_every=5000, stage=2,\n",
+    "), dataset, DEVICE)"
    ]
   },
   {
    "metadata": {},
    "outputs": [],
    "source": [
+    "# Stage 3: Resolution + Reasoning\n",
     "model = freeze_for_stage(model, 3)\n",
+    "engine3 = train(model, config, TrainConfig(\n",
+    "    lr=3e-5, batch_size=2, grad_accum=32, num_steps=25000,\n",
+    "    warmup_steps=500, log_every=100, save_every=5000, stage=3,\n",
+    "), dataset, DEVICE)"
    ]
   },
   {
    "metadata": {},
    "outputs": [],
    "source": [
+    "# Stage 4: Concept & Mood\n",
     "model = freeze_for_stage(model, 4)\n",
+    "engine4 = train(model, config, TrainConfig(\n",
+    "    lr=2e-5, batch_size=2, grad_accum=32, num_steps=15000,\n",
+    "    warmup_steps=300, log_every=100, save_every=5000, stage=4,\n",
+    "), dataset, DEVICE)"
    ]
   },
   {
    "metadata": {},
    "outputs": [],
    "source": [
+    "# Stage 5: Quality Alignment\n",
     "model = freeze_for_stage(model, 5)\n",
+    "engine5 = train(model, config, TrainConfig(\n",
+    "    lr=1e-5, batch_size=2, grad_accum=32, num_steps=5000,\n",
+    "    warmup_steps=200, log_every=50, save_every=2500, stage=5,\n",
+    "), dataset, DEVICE)"
    ]
   },
   {
     "# from huggingface_hub import HfApi\n",
     "# engine5.save('./artflow_final.pt')\n",
     "# HfApi().upload_file('./artflow_final.pt', 'artflow_final.pt', 'krystv/ArtFlow')\n",
+    "print('🎉 All 5 stages complete — Real Mamba SSM, real datasets!')"
    ]
   }
  ],