fix: pin transformers stack and force slow tokenizer by default to avoid fast-tokenizer errors

Browse files

Files changed (1) hide show

CELESTIAL_Training_Notebook.ipynb +416 -388

CELESTIAL_Training_Notebook.ipynb CHANGED Viewed

@@ -1,390 +1,418 @@
 {
-    "cells": [
-        {
-            "cell_type": "markdown",
-            "metadata": {},
-            "source": [
-                "# 🌟 CELESTIAL MISTRAL 7B TRAINING\n",
-                "## Train Your Own Mistral 7B Model for CELESTIAL AI\n",
-                "\n",
-                "This notebook properly trains Mistral 7B v0.3 with:\n",
-                "- 150 production-quality conversations\n",
-                "- LoRA fine-tuning for efficiency\n",
-                "- Proper chat formatting for Mistral\n",
-                "- No logging issues"
-            ]
-        },
-        {
-            "cell_type": "code",
-            "execution_count": null,
-            "metadata": {},
-            "outputs": [],
-            "source": [
-                "# 📦 INSTALL REQUIRED PACKAGES FOR MISTRAL 7B\n",
-                "!pip install -q transformers==4.36.0 datasets accelerate peft bitsandbytes huggingface_hub trl\n",
-                "\n",
-                "# Disable all logging to prevent issues\n",
-                "import os\n",
-                "import warnings\n",
-                "os.environ[\"WANDB_DISABLED\"] = \"true\"\n",
-                "os.environ[\"WANDB_MODE\"] = \"disabled\"\n",
-                "os.environ[\"TOKENIZERS_PARALLELISM\"] = \"false\"\n",
-                "warnings.filterwarnings('ignore')\n",
-                "\n",
-                "print('✅ Packages installed for Mistral 7B training!')\n",
-                "print('🚫 All logging disabled to prevent errors')"
-            ]
-        },
-        {
-            "cell_type": "code",
-            "execution_count": null,
-            "metadata": {},
-            "outputs": [],
-            "source": [
-                "# 🔑 HUGGINGFACE AUTHENTICATION\n",
-                "from huggingface_hub import notebook_login\n",
-                "\n",
-                "print('🔐 Authenticating with HuggingFace for Mistral access...')\n",
-                "try:\n",
-                "    notebook_login()\n",
-                "    print('✅ Authentication successful!')\n",
-                "except Exception as e:\n",
-                "    print(f'⚠️ Authentication failed: {e}')\n",
-                "    print('Please set your HF token manually if needed')"
-            ]
-        },
-        {
-            "cell_type": "code",
-            "execution_count": null,
-            "metadata": {},
-            "outputs": [],
-            "source": [
-                "# 📊 LOAD CELESTIAL DATASET\n",
-                "from datasets import load_dataset\n",
-                "\n",
-                "DATASET_REPO = 'dp1812/celestial-comprehensive-spiritual-ai'\n",
-                "\n",
-                "print('📊 Loading CELESTIAL dataset for Mistral training...')\n",
-                "try:\n",
-                "    dataset = load_dataset(DATASET_REPO, data_files='celestial_complete_production_dataset.jsonl', split='train')\n",
-                "    print(f'✅ Dataset loaded: {len(dataset)} conversations')\n",
-                "    print('🎯 100 numerology + 50 Krishna divine guidance')\n",
-                "except Exception as e:\n",
-                "    print(f'❌ Dataset loading failed: {e}')\n",
-                "    # Fallback\n",
-                "    try:\n",
-                "        dataset = load_dataset(DATASET_REPO, split='train')\n",
-                "        print(f'✅ Fallback dataset loaded: {len(dataset)} conversations')\n",
-                "    except Exception as e2:\n",
-                "        print(f'❌ All dataset loading failed: {e2}')\n",
-                "        raise\n",
-                "\n",
-                "# Show sample\n",
-                "print('\\n📝 Sample conversation:')\n",
-                "sample = dataset[0]\n",
-                "print(f\"User: {sample['messages'][1]['content'][:60]}...\")\n",
-                "print(f\"Assistant: {sample['messages'][2]['content'][:60]}...\")"
-            ]
-        },
-        {
-            "cell_type": "code",
-            "execution_count": null,
-            "metadata": {},
-            "outputs": [],
-            "source": [
-                "# 🤖 LOAD MISTRAL 7B MODEL AND TOKENIZER\n",
-                "from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig\n",
-                "import torch\n",
-                "\n",
-                "MODEL_NAME = 'mistralai/Mistral-7B-v0.3'\n",
-                "\n",
-                "print('🤖 Loading Mistral 7B v0.3 model and tokenizer...')\n",
-                "\n",
-                "# Load tokenizer with proper settings\n",
-                "tokenizer = AutoTokenizer.from_pretrained(\n",
-                "    MODEL_NAME,\n",
-                "    trust_remote_code=True,\n",
-                "    padding_side='right'\n",
-                ")\n",
-                "\n",
-                "# Add pad token if missing\n",
-                "if tokenizer.pad_token is None:\n",
-                "    tokenizer.pad_token = tokenizer.eos_token\n",
-                "    tokenizer.pad_token_id = tokenizer.eos_token_id\n",
-                "\n",
-                "# Quantization config for efficient training\n",
-                "bnb_config = BitsAndBytesConfig(\n",
-                "    load_in_4bit=True,\n",
-                "    bnb_4bit_quant_type=\"nf4\",\n",
-                "    bnb_4bit_compute_dtype=torch.float16,\n",
-                "    bnb_4bit_use_double_quant=True\n",
-                ")\n",
-                "\n",
-                "# Load Mistral 7B model\n",
-                "model = AutoModelForCausalLM.from_pretrained(\n",
-                "    MODEL_NAME,\n",
-                "    quantization_config=bnb_config,\n",
-                "    device_map=\"auto\",\n",
-                "    trust_remote_code=True,\n",
-                "    torch_dtype=torch.float16\n",
-                ")\n",
-                "\n",
-                "print('✅ Mistral 7B model and tokenizer loaded successfully!')\n",
-                "print(f'🔍 Model: {MODEL_NAME}')\n",
-                "print(f'🔍 Tokenizer vocab size: {len(tokenizer)}')\n",
-                "print(f'🔍 Model device: {model.device}')"
-            ]
-        },
-        {
-            "cell_type": "code",
-            "execution_count": null,
-            "metadata": {},
-            "outputs": [],
-            "source": [
-                "# 🔧 SETUP LORA FOR MISTRAL 7B\n",
-                "from peft import LoraConfig, get_peft_model, TaskType, prepare_model_for_kbit_training\n",
-                "\n",
-                "print('🔧 Setting up LoRA for Mistral 7B training...')\n",
-                "\n",
-                "# Prepare model for k-bit training\n",
-                "model = prepare_model_for_kbit_training(model)\n",
-                "\n",
-                "# Mistral 7B specific target modules\n",
-                "target_modules = [\n",
-                "    \"q_proj\",\n",
-                "    \"k_proj\", \n",
-                "    \"v_proj\",\n",
-                "    \"o_proj\",\n",
-                "    \"gate_proj\",\n",
-                "    \"up_proj\",\n",
-                "    \"down_proj\",\n",
-                "    \"lm_head\"\n",
-                "]\n",
-                "\n",
-                "print(f'🎯 Target modules for Mistral: {target_modules}')\n",
-                "\n",
-                "# Create LoRA config optimized for Mistral\n",
-                "lora_config = LoraConfig(\n",
-                "    r=64,  # Higher rank for better performance\n",
-                "    lora_alpha=16,\n",
-                "    target_modules=target_modules,\n",
-                "    lora_dropout=0.1,\n",
-                "    bias=\"none\",\n",
-                "    task_type=TaskType.CAUSAL_LM,\n",
-                ")\n",
-                "\n",
-                "# Apply LoRA to Mistral\n",
-                "try:\n",
-                "    model = get_peft_model(model, lora_config)\n",
-                "    model.print_trainable_parameters()\n",
-                "    print('✅ LoRA adapters attached to Mistral 7B!')\n",
-                "except Exception as e:\n",
-                "    print(f'❌ LoRA setup failed: {e}')\n",
-                "    raise\n",
-                "\n",
-                "print('🎯 Mistral 7B ready for CELESTIAL training!')"
-            ]
-        },
-        {
-            "cell_type": "code",
-            "execution_count": null,
-            "metadata": {},
-            "outputs": [],
-            "source": [
-                "# 📝 FORMAT DATA FOR MISTRAL CHAT TRAINING\n",
-                "def format_for_mistral_chat(example):\n",
-                "    \"\"\"Format conversation for Mistral chat training\"\"\"\n",
-                "    messages = example['messages']\n",
-                "    \n",
-                "    # Extract messages\n",
-                "    system_msg = messages[0]['content']\n",
-                "    user_msg = messages[1]['content']\n",
-                "    assistant_msg = messages[2]['content']\n",
-                "    \n",
-                "    # Mistral chat format\n",
-                "    formatted = f\"<s>[INST] {system_msg}\\n\\nUser: {user_msg} [/INST] {assistant_msg}</s>\"\n",
-                "    \n",
-                "    # Tokenize\n",
-                "    tokens = tokenizer(\n",
-                "        formatted,\n",
-                "        truncation=True,\n",
-                "        padding=False,\n",
-                "        max_length=2048,  # Mistral context length\n",
-                "        return_tensors=None\n",
-                "    )\n",
-                "    \n",
-                "    # Set labels (same as input_ids for causal LM)\n",
-                "    tokens['labels'] = tokens['input_ids'].copy()\n",
-                "    \n",
-                "    return tokens\n",
-                "\n",
-                "print('📝 Formatting data for Mistral chat training...')\n",
-                "formatted_dataset = dataset.map(\n",
-                "    format_for_mistral_chat,\n",
-                "    remove_columns=dataset.column_names,\n",
-                "    desc=\"Formatting for Mistral\"\n",
-                ")\n",
-                "\n",
-                "print(f'✅ Formatted {len(formatted_dataset)} conversations for Mistral')\n",
-                "print('🎯 Using proper Mistral chat format with [INST] tags')"
-            ]
-        },
-        {
-            "cell_type": "code",
-            "execution_count": null,
-            "metadata": {},
-            "outputs": [],
-            "source": [
-                "# 🚀 MISTRAL TRAINING CONFIGURATION\n",
-                "from transformers import TrainingArguments, Trainer, DataCollatorForLanguageModeling\n",
-                "\n",
-                "print('🚀 Setting up Mistral 7B training configuration...')\n",
-                "\n",
-                "# Training arguments optimized for Mistral 7B\n",
-                "training_args = TrainingArguments(\n",
-                "    output_dir='./celestial-mistral-7b-results',\n",
-                "    num_train_epochs=3,\n",
-                "    per_device_train_batch_size=1,\n",
-                "    gradient_accumulation_steps=16,  # Effective batch size of 16\n",
-                "    warmup_steps=50,\n",
-                "    learning_rate=2e-4,  # Higher LR for LoRA\n",
-                "    fp16=True,\n",
-                "    logging_steps=10,\n",
-                "    save_steps=100,\n",
-                "    eval_strategy='no',\n",
-                "    save_strategy='steps',\n",
-                "    load_best_model_at_end=False,\n",
-                "    report_to=[],  # No external logging\n",
-                "    remove_unused_columns=False,\n",
-                "    dataloader_drop_last=True,\n",
-                "    group_by_length=True,  # Efficient batching\n",
-                "    ddp_find_unused_parameters=False\n",
-                ")\n",
-                "\n",
-                "# Data collator for Mistral\n",
-                "data_collator = DataCollatorForLanguageModeling(\n",
-                "    tokenizer=tokenizer,\n",
-                "    mlm=False,\n",
-                "    pad_to_multiple_of=8\n",
-                ")\n",
-                "\n",
-                "# Create Mistral trainer\n",
-                "trainer = Trainer(\n",
-                "    model=model,\n",
-                "    args=training_args,\n",
-                "    train_dataset=formatted_dataset,\n",
-                "    tokenizer=tokenizer,\n",
-                "    data_collator=data_collator\n",
-                ")\n",
-                "\n",
-                "print('✅ Mistral 7B training configuration ready!')\n",
-                "print('🎯 Optimized for CELESTIAL AI with LoRA fine-tuning')\n",
-                "print('⏱️ Expected training time: 30-45 minutes')"
-            ]
-        },
-        {
-            "cell_type": "code",
-            "execution_count": null,
-            "metadata": {},
-            "outputs": [],
-            "source": [
-                "# 🏃‍♂️ START MISTRAL 7B TRAINING\n",
-                "print('🏃‍♂️ Starting CELESTIAL Mistral 7B training...')\n",
-                "print('⏱️ Expected time: 30-45 minutes')\n",
-                "print('🎯 Training Mistral 7B v0.3 on CELESTIAL conversations')\n",
-                "print('💎 150 production-quality conversations')\n",
-                "print('\\n🚀 Mistral training begins now...')\n",
-                "\n",
-                "try:\n",
-                "    # Start Mistral training\n",
-                "    trainer.train()\n",
-                "    \n",
-                "    print('\\n🎉 MISTRAL 7B TRAINING COMPLETED SUCCESSFULLY!')\n",
-                "    print('✅ CELESTIAL Mistral 7B is now trained!')\n",
-                "    print('🌟 Ready for testing and deployment!')\n",
-                "    \n",
-                "except Exception as e:\n",
-                "    print(f'❌ Mistral training failed: {e}')\n",
-                "    print('🔧 Please check the error and try again')\n",
-                "    raise"
-            ]
-        },
-        {
-            "cell_type": "code",
-            "execution_count": null,
-            "metadata": {},
-            "outputs": [],
-            "source": [
-                "# 🧪 TEST TRAINED MISTRAL 7B\n",
-                "print('🧪 Testing the trained CELESTIAL Mistral 7B...')\n",
-                "\n",
-                "model.eval()\n",
-                "\n",
-                "test_prompts = [\n",
-                "    \"<s>[INST] You are CELESTIAL AI, an expert numerologist. Provide detailed analysis.\\n\\nUser: Tell me about number 7 in Chaldean numerology. [/INST]\",\n",
-                "    \"<s>[INST] You are Shree Krishna providing divine guidance.\\n\\nUser: Krishna, I need guidance about my career path. [/INST]\",\n",
-                "    \"<s>[INST] You are CELESTIAL AI providing numerology analysis.\\n\\nUser: Calculate my numerology for name 'John Smith' born 15/08/1990. [/INST]\"\n",
-                "]\n",
-                "\n",
-                "for i, prompt in enumerate(test_prompts, 1):\n",
-                "    print(f'\\n🔍 Test {i}: Mistral 7B Response')\n",
-                "    \n",
-                "    try:\n",
-                "        inputs = tokenizer(prompt, return_tensors=\"pt\").to(model.device)\n",
-                "        \n",
-                "        with torch.no_grad():\n",
-                "            outputs = model.generate(\n",
-                "                **inputs,\n",
-                "                max_new_tokens=300,\n",
-                "                temperature=0.7,\n",
-                "                do_sample=True,\n",
-                "                pad_token_id=tokenizer.pad_token_id,\n",
-                "                eos_token_id=tokenizer.eos_token_id\n",
-                "            )\n",
-                "        \n",
-                "        response = tokenizer.decode(outputs[0], skip_special_tokens=True)\n",
-                "        generated = response[len(prompt):].strip()\n",
-                "        \n",
-                "        print(f'🤖 Mistral Response: {generated[:250]}...')\n",
-                "        \n",
-                "        # Quality check\n",
-                "        if len(generated) > 50 and 'number' in generated.lower() or 'krishna' in generated.lower():\n",
-                "            print('✅ Response quality: EXCELLENT')\n",
-                "        else:\n",
-                "            print('⚠️ Response quality: NEEDS IMPROVEMENT')\n",
-                "        \n",
-                "    except Exception as e:\n",
-                "        print(f'❌ Test {i} failed: {e}')\n",
-                "\n",
-                "print('\\n🎉 CELESTIAL MISTRAL 7B TRAINING COMPLETE!')\n",
-                "print('✅ Your own trained Mistral 7B model is ready!')\n",
-                "print('🌟 No external API dependencies - fully yours!')\n",
-                "print('\\n🚀 Next Steps:')\n",
-                "print('   • Save the trained model to HuggingFace')\n",
-                "print('   • Integrate with CELESTIAL platform')\n",
-                "print('   • Expand training data for more features')\n",
-                "print('   • Deploy to production environment')"
-            ]
-        }
-    ],
-    "metadata": {
-        "kernelspec": {
-            "display_name": "Python 3",
-            "language": "python",
-            "name": "python3"
-        },
-        "language_info": {
-            "codemirror_mode": {
-                "name": "ipython",
-                "version": 3
-            },
-            "file_extension": ".py",
-            "name": "python",
-            "nbconvert_exporter": "python",
-            "pygments_lexer": "ipython3",
-            "version": "3.8.5"
-        }
     },
-    "nbformat": 4,
-    "nbformat_minor": 4
-}

 {
+  "cells": [
+    {
+      "cell_type": "code",
+      "metadata": {},
+      "execution_count": null,
+      "outputs": [],
+      "source": [
+        "# 🔧 Install pinned versions for stable training\n",
+        "!pip install -q transformers==4.46.2 tokenizers==0.20.1\n",
+        "!pip install -q peft==0.14.0 datasets==2.20.0 bitsandbytes==0.43.3 accelerate==0.34.2 huggingface_hub==0.24.6 trl==0.11.4\n",
+        "import os; os.environ['TOKENIZERS_PARALLELISM'] = 'false'\n"
+      ]
     },
+    {
+      "cell_type": "code",
+      "metadata": {},
+      "execution_count": null,
+      "outputs": [],
+      "source": [
+        "# 🩹 Force slow tokenizer by default to avoid PyPreTokenizerTypeWrapper errors\n",
+        "from transformers import AutoTokenizer as _AutoTokenizer\n",
+        "_orig_from_pretrained = _AutoTokenizer.from_pretrained\n",
+        "def _patched_from_pretrained(*args, **kwargs):\n",
+        "    kwargs.setdefault('use_fast', False)\n",
+        "    return _orig_from_pretrained(*args, **kwargs)\n",
+        "_AutoTokenizer.from_pretrained = staticmethod(_patched_from_pretrained)\n",
+        "print('✅ Patched AutoTokenizer.from_pretrained to default use_fast=False')\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "# 🌟 CELESTIAL MISTRAL 7B TRAINING\n",
+        "## Train Your Own Mistral 7B Model for CELESTIAL AI\n",
+        "\n",
+        "This notebook properly trains Mistral 7B v0.3 with:\n",
+        "- 150 production-quality conversations\n",
+        "- LoRA fine-tuning for efficiency\n",
+        "- Proper chat formatting for Mistral\n",
+        "- No logging issues"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "# 📦 INSTALL REQUIRED PACKAGES FOR MISTRAL 7B\n",
+        "!pip install -q transformers==4.36.0 datasets accelerate peft bitsandbytes huggingface_hub trl\n",
+        "\n",
+        "# Disable all logging to prevent issues\n",
+        "import os\n",
+        "import warnings\n",
+        "os.environ[\"WANDB_DISABLED\"] = \"true\"\n",
+        "os.environ[\"WANDB_MODE\"] = \"disabled\"\n",
+        "os.environ[\"TOKENIZERS_PARALLELISM\"] = \"false\"\n",
+        "warnings.filterwarnings('ignore')\n",
+        "\n",
+        "print('✅ Packages installed for Mistral 7B training!')\n",
+        "print('🚫 All logging disabled to prevent errors')"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "# 🔑 HUGGINGFACE AUTHENTICATION\n",
+        "from huggingface_hub import notebook_login\n",
+        "\n",
+        "print('🔐 Authenticating with HuggingFace for Mistral access...')\n",
+        "try:\n",
+        "    notebook_login()\n",
+        "    print('✅ Authentication successful!')\n",
+        "except Exception as e:\n",
+        "    print(f'⚠️ Authentication failed: {e}')\n",
+        "    print('Please set your HF token manually if needed')"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "# 📊 LOAD CELESTIAL DATASET\n",
+        "from datasets import load_dataset\n",
+        "\n",
+        "DATASET_REPO = 'dp1812/celestial-comprehensive-spiritual-ai'\n",
+        "\n",
+        "print('📊 Loading CELESTIAL dataset for Mistral training...')\n",
+        "try:\n",
+        "    dataset = load_dataset(DATASET_REPO, data_files='celestial_complete_production_dataset.jsonl', split='train')\n",
+        "    print(f'✅ Dataset loaded: {len(dataset)} conversations')\n",
+        "    print('🎯 100 numerology + 50 Krishna divine guidance')\n",
+        "except Exception as e:\n",
+        "    print(f'❌ Dataset loading failed: {e}')\n",
+        "    # Fallback\n",
+        "    try:\n",
+        "        dataset = load_dataset(DATASET_REPO, split='train')\n",
+        "        print(f'✅ Fallback dataset loaded: {len(dataset)} conversations')\n",
+        "    except Exception as e2:\n",
+        "        print(f'❌ All dataset loading failed: {e2}')\n",
+        "        raise\n",
+        "\n",
+        "# Show sample\n",
+        "print('\\n📝 Sample conversation:')\n",
+        "sample = dataset[0]\n",
+        "print(f\"User: {sample['messages'][1]['content'][:60]}...\")\n",
+        "print(f\"Assistant: {sample['messages'][2]['content'][:60]}...\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "# 🤖 LOAD MISTRAL 7B MODEL AND TOKENIZER\n",
+        "from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig\n",
+        "import torch\n",
+        "\n",
+        "MODEL_NAME = 'mistralai/Mistral-7B-v0.3'\n",
+        "\n",
+        "print('🤖 Loading Mistral 7B v0.3 model and tokenizer...')\n",
+        "\n",
+        "# Load tokenizer with proper settings\n",
+        "tokenizer = AutoTokenizer.from_pretrained(\n",
+        "    MODEL_NAME,\n",
+        "    trust_remote_code=True,\n",
+        "    padding_side='right'\n",
+        ")\n",
+        "\n",
+        "# Add pad token if missing\n",
+        "if tokenizer.pad_token is None:\n",
+        "    tokenizer.pad_token = tokenizer.eos_token\n",
+        "    tokenizer.pad_token_id = tokenizer.eos_token_id\n",
+        "\n",
+        "# Quantization config for efficient training\n",
+        "bnb_config = BitsAndBytesConfig(\n",
+        "    load_in_4bit=True,\n",
+        "    bnb_4bit_quant_type=\"nf4\",\n",
+        "    bnb_4bit_compute_dtype=torch.float16,\n",
+        "    bnb_4bit_use_double_quant=True\n",
+        ")\n",
+        "\n",
+        "# Load Mistral 7B model\n",
+        "model = AutoModelForCausalLM.from_pretrained(\n",
+        "    MODEL_NAME,\n",
+        "    quantization_config=bnb_config,\n",
+        "    device_map=\"auto\",\n",
+        "    trust_remote_code=True,\n",
+        "    torch_dtype=torch.float16\n",
+        ")\n",
+        "\n",
+        "print('✅ Mistral 7B model and tokenizer loaded successfully!')\n",
+        "print(f'🔍 Model: {MODEL_NAME}')\n",
+        "print(f'🔍 Tokenizer vocab size: {len(tokenizer)}')\n",
+        "print(f'🔍 Model device: {model.device}')"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "# 🔧 SETUP LORA FOR MISTRAL 7B\n",
+        "from peft import LoraConfig, get_peft_model, TaskType, prepare_model_for_kbit_training\n",
+        "\n",
+        "print('🔧 Setting up LoRA for Mistral 7B training...')\n",
+        "\n",
+        "# Prepare model for k-bit training\n",
+        "model = prepare_model_for_kbit_training(model)\n",
+        "\n",
+        "# Mistral 7B specific target modules\n",
+        "target_modules = [\n",
+        "    \"q_proj\",\n",
+        "    \"k_proj\", \n",
+        "    \"v_proj\",\n",
+        "    \"o_proj\",\n",
+        "    \"gate_proj\",\n",
+        "    \"up_proj\",\n",
+        "    \"down_proj\",\n",
+        "    \"lm_head\"\n",
+        "]\n",
+        "\n",
+        "print(f'🎯 Target modules for Mistral: {target_modules}')\n",
+        "\n",
+        "# Create LoRA config optimized for Mistral\n",
+        "lora_config = LoraConfig(\n",
+        "    r=64,  # Higher rank for better performance\n",
+        "    lora_alpha=16,\n",
+        "    target_modules=target_modules,\n",
+        "    lora_dropout=0.1,\n",
+        "    bias=\"none\",\n",
+        "    task_type=TaskType.CAUSAL_LM,\n",
+        ")\n",
+        "\n",
+        "# Apply LoRA to Mistral\n",
+        "try:\n",
+        "    model = get_peft_model(model, lora_config)\n",
+        "    model.print_trainable_parameters()\n",
+        "    print('✅ LoRA adapters attached to Mistral 7B!')\n",
+        "except Exception as e:\n",
+        "    print(f'❌ LoRA setup failed: {e}')\n",
+        "    raise\n",
+        "\n",
+        "print('🎯 Mistral 7B ready for CELESTIAL training!')"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "# 📝 FORMAT DATA FOR MISTRAL CHAT TRAINING\n",
+        "def format_for_mistral_chat(example):\n",
+        "    \"\"\"Format conversation for Mistral chat training\"\"\"\n",
+        "    messages = example['messages']\n",
+        "    \n",
+        "    # Extract messages\n",
+        "    system_msg = messages[0]['content']\n",
+        "    user_msg = messages[1]['content']\n",
+        "    assistant_msg = messages[2]['content']\n",
+        "    \n",
+        "    # Mistral chat format\n",
+        "    formatted = f\"<s>[INST] {system_msg}\\n\\nUser: {user_msg} [/INST] {assistant_msg}</s>\"\n",
+        "    \n",
+        "    # Tokenize\n",
+        "    tokens = tokenizer(\n",
+        "        formatted,\n",
+        "        truncation=True,\n",
+        "        padding=False,\n",
+        "        max_length=2048,  # Mistral context length\n",
+        "        return_tensors=None\n",
+        "    )\n",
+        "    \n",
+        "    # Set labels (same as input_ids for causal LM)\n",
+        "    tokens['labels'] = tokens['input_ids'].copy()\n",
+        "    \n",
+        "    return tokens\n",
+        "\n",
+        "print('📝 Formatting data for Mistral chat training...')\n",
+        "formatted_dataset = dataset.map(\n",
+        "    format_for_mistral_chat,\n",
+        "    remove_columns=dataset.column_names,\n",
+        "    desc=\"Formatting for Mistral\"\n",
+        ")\n",
+        "\n",
+        "print(f'✅ Formatted {len(formatted_dataset)} conversations for Mistral')\n",
+        "print('🎯 Using proper Mistral chat format with [INST] tags')"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "# 🚀 MISTRAL TRAINING CONFIGURATION\n",
+        "from transformers import TrainingArguments, Trainer, DataCollatorForLanguageModeling\n",
+        "\n",
+        "print('🚀 Setting up Mistral 7B training configuration...')\n",
+        "\n",
+        "# Training arguments optimized for Mistral 7B\n",
+        "training_args = TrainingArguments(\n",
+        "    output_dir='./celestial-mistral-7b-results',\n",
+        "    num_train_epochs=3,\n",
+        "    per_device_train_batch_size=1,\n",
+        "    gradient_accumulation_steps=16,  # Effective batch size of 16\n",
+        "    warmup_steps=50,\n",
+        "    learning_rate=2e-4,  # Higher LR for LoRA\n",
+        "    fp16=True,\n",
+        "    logging_steps=10,\n",
+        "    save_steps=100,\n",
+        "    eval_strategy='no',\n",
+        "    save_strategy='steps',\n",
+        "    load_best_model_at_end=False,\n",
+        "    report_to=[],  # No external logging\n",
+        "    remove_unused_columns=False,\n",
+        "    dataloader_drop_last=True,\n",
+        "    group_by_length=True,  # Efficient batching\n",
+        "    ddp_find_unused_parameters=False\n",
+        ")\n",
+        "\n",
+        "# Data collator for Mistral\n",
+        "data_collator = DataCollatorForLanguageModeling(\n",
+        "    tokenizer=tokenizer,\n",
+        "    mlm=False,\n",
+        "    pad_to_multiple_of=8\n",
+        ")\n",
+        "\n",
+        "# Create Mistral trainer\n",
+        "trainer = Trainer(\n",
+        "    model=model,\n",
+        "    args=training_args,\n",
+        "    train_dataset=formatted_dataset,\n",
+        "    tokenizer=tokenizer,\n",
+        "    data_collator=data_collator\n",
+        ")\n",
+        "\n",
+        "print('✅ Mistral 7B training configuration ready!')\n",
+        "print('🎯 Optimized for CELESTIAL AI with LoRA fine-tuning')\n",
+        "print('⏱️ Expected training time: 30-45 minutes')"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "# 🏃‍♂️ START MISTRAL 7B TRAINING\n",
+        "print('🏃‍♂️ Starting CELESTIAL Mistral 7B training...')\n",
+        "print('⏱️ Expected time: 30-45 minutes')\n",
+        "print('🎯 Training Mistral 7B v0.3 on CELESTIAL conversations')\n",
+        "print('💎 150 production-quality conversations')\n",
+        "print('\\n🚀 Mistral training begins now...')\n",
+        "\n",
+        "try:\n",
+        "    # Start Mistral training\n",
+        "    trainer.train()\n",
+        "    \n",
+        "    print('\\n🎉 MISTRAL 7B TRAINING COMPLETED SUCCESSFULLY!')\n",
+        "    print('✅ CELESTIAL Mistral 7B is now trained!')\n",
+        "    print('🌟 Ready for testing and deployment!')\n",
+        "    \n",
+        "except Exception as e:\n",
+        "    print(f'❌ Mistral training failed: {e}')\n",
+        "    print('🔧 Please check the error and try again')\n",
+        "    raise"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "# 🧪 TEST TRAINED MISTRAL 7B\n",
+        "print('🧪 Testing the trained CELESTIAL Mistral 7B...')\n",
+        "\n",
+        "model.eval()\n",
+        "\n",
+        "test_prompts = [\n",
+        "    \"<s>[INST] You are CELESTIAL AI, an expert numerologist. Provide detailed analysis.\\n\\nUser: Tell me about number 7 in Chaldean numerology. [/INST]\",\n",
+        "    \"<s>[INST] You are Shree Krishna providing divine guidance.\\n\\nUser: Krishna, I need guidance about my career path. [/INST]\",\n",
+        "    \"<s>[INST] You are CELESTIAL AI providing numerology analysis.\\n\\nUser: Calculate my numerology for name 'John Smith' born 15/08/1990. [/INST]\"\n",
+        "]\n",
+        "\n",
+        "for i, prompt in enumerate(test_prompts, 1):\n",
+        "    print(f'\\n🔍 Test {i}: Mistral 7B Response')\n",
+        "    \n",
+        "    try:\n",
+        "        inputs = tokenizer(prompt, return_tensors=\"pt\").to(model.device)\n",
+        "        \n",
+        "        with torch.no_grad():\n",
+        "            outputs = model.generate(\n",
+        "                **inputs,\n",
+        "                max_new_tokens=300,\n",
+        "                temperature=0.7,\n",
+        "                do_sample=True,\n",
+        "                pad_token_id=tokenizer.pad_token_id,\n",
+        "                eos_token_id=tokenizer.eos_token_id\n",
+        "            )\n",
+        "        \n",
+        "        response = tokenizer.decode(outputs[0], skip_special_tokens=True)\n",
+        "        generated = response[len(prompt):].strip()\n",
+        "        \n",
+        "        print(f'🤖 Mistral Response: {generated[:250]}...')\n",
+        "        \n",
+        "        # Quality check\n",
+        "        if len(generated) > 50 and 'number' in generated.lower() or 'krishna' in generated.lower():\n",
+        "            print('✅ Response quality: EXCELLENT')\n",
+        "        else:\n",
+        "            print('⚠️ Response quality: NEEDS IMPROVEMENT')\n",
+        "        \n",
+        "    except Exception as e:\n",
+        "        print(f'❌ Test {i} failed: {e}')\n",
+        "\n",
+        "print('\\n🎉 CELESTIAL MISTRAL 7B TRAINING COMPLETE!')\n",
+        "print('✅ Your own trained Mistral 7B model is ready!')\n",
+        "print('🌟 No external API dependencies - fully yours!')\n",
+        "print('\\n🚀 Next Steps:')\n",
+        "print('   • Save the trained model to HuggingFace')\n",
+        "print('   • Integrate with CELESTIAL platform')\n",
+        "print('   • Expand training data for more features')\n",
+        "print('   • Deploy to production environment')"
+      ]
+    }
+  ],
+  "metadata": {
+    "kernelspec": {
+      "display_name": "Python 3",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.8.5"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 4
+}