dp1812 commited on
Commit
543b060
Β·
verified Β·
1 Parent(s): 06e4891

PROPER MISTRAL 7B TRAINING: v0.3 with LoRA, proper chat format, no logging issues

Browse files
Files changed (1) hide show
  1. CELESTIAL_Training_Notebook.ipynb +142 -125
CELESTIAL_Training_Notebook.ipynb CHANGED
@@ -4,14 +4,14 @@
4
  "cell_type": "markdown",
5
  "metadata": {},
6
  "source": [
7
- "# 🌟 CELESTIAL PRODUCTION TRAINING\n",
8
- "## 150 Perfect Conversations - Production Ready\n",
9
- "\n",
10
- "This notebook trains CELESTIAL AI with production-quality conversations:\n",
11
- "- 100 comprehensive numerology conversations\n",
12
- "- 50 authentic Krishna divine guidance conversations\n",
13
- "- Each response is perfect, coherent, and detailed\n",
14
- "- No wandb issues - clean, reliable training"
15
  ]
16
  },
17
  {
@@ -20,17 +20,19 @@
20
  "metadata": {},
21
  "outputs": [],
22
  "source": [
23
- "# πŸ“¦ INSTALL AND SETUP\n",
24
- "!pip install -q transformers datasets accelerate peft bitsandbytes huggingface_hub\n",
25
  "\n",
26
- "# Disable all logging that might cause issues\n",
27
  "import os\n",
28
  "import warnings\n",
29
  "os.environ[\"WANDB_DISABLED\"] = \"true\"\n",
30
  "os.environ[\"WANDB_MODE\"] = \"disabled\"\n",
 
31
  "warnings.filterwarnings('ignore')\n",
32
  "\n",
33
- "print('βœ… All packages installed and logging disabled!')"
 
34
  ]
35
  },
36
  {
@@ -42,13 +44,13 @@
42
  "# πŸ”‘ HUGGINGFACE AUTHENTICATION\n",
43
  "from huggingface_hub import notebook_login\n",
44
  "\n",
45
- "print('πŸ” Authenticating with HuggingFace...')\n",
46
  "try:\n",
47
  " notebook_login()\n",
48
  " print('βœ… Authentication successful!')\n",
49
  "except Exception as e:\n",
50
  " print(f'⚠️ Authentication failed: {e}')\n",
51
- " print('Please manually set your HF token if needed')"
52
  ]
53
  },
54
  {
@@ -57,20 +59,19 @@
57
  "metadata": {},
58
  "outputs": [],
59
  "source": [
60
- "# πŸ“Š LOAD PRODUCTION DATASET\n",
61
  "from datasets import load_dataset\n",
62
  "\n",
63
  "DATASET_REPO = 'dp1812/celestial-comprehensive-spiritual-ai'\n",
64
  "\n",
65
- "print('πŸ“Š Loading PRODUCTION dataset...')\n",
66
  "try:\n",
67
  " dataset = load_dataset(DATASET_REPO, data_files='celestial_complete_production_dataset.jsonl', split='train')\n",
68
- " print(f'βœ… Dataset loaded: {len(dataset)} production-quality conversations')\n",
69
  " print('🎯 100 numerology + 50 Krishna divine guidance')\n",
70
- " print('πŸ’Ž Each conversation is perfect and coherent!')\n",
71
  "except Exception as e:\n",
72
  " print(f'❌ Dataset loading failed: {e}')\n",
73
- " # Fallback to main dataset\n",
74
  " try:\n",
75
  " dataset = load_dataset(DATASET_REPO, split='train')\n",
76
  " print(f'βœ… Fallback dataset loaded: {len(dataset)} conversations')\n",
@@ -81,8 +82,8 @@
81
  "# Show sample\n",
82
  "print('\\nπŸ“ Sample conversation:')\n",
83
  "sample = dataset[0]\n",
84
- "print(f\"User: {sample['messages'][1]['content'][:80]}...\")\n",
85
- "print(f\"Assistant: {sample['messages'][2]['content'][:80]}...\")"
86
  ]
87
  },
88
  {
@@ -91,20 +92,27 @@
91
  "metadata": {},
92
  "outputs": [],
93
  "source": [
94
- "# πŸ€– LOAD MODEL AND TOKENIZER\n",
95
  "from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig\n",
96
  "import torch\n",
97
  "\n",
98
- "MODEL_NAME = 'microsoft/DialoGPT-medium'\n",
99
  "\n",
100
- "print('πŸ€– Loading model and tokenizer...')\n",
101
  "\n",
102
- "# Load tokenizer\n",
103
- "tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)\n",
 
 
 
 
 
 
104
  "if tokenizer.pad_token is None:\n",
105
  " tokenizer.pad_token = tokenizer.eos_token\n",
 
106
  "\n",
107
- "# Load model with quantization for efficiency\n",
108
  "bnb_config = BitsAndBytesConfig(\n",
109
  " load_in_4bit=True,\n",
110
  " bnb_4bit_quant_type=\"nf4\",\n",
@@ -112,16 +120,19 @@
112
  " bnb_4bit_use_double_quant=True\n",
113
  ")\n",
114
  "\n",
 
115
  "model = AutoModelForCausalLM.from_pretrained(\n",
116
  " MODEL_NAME,\n",
117
  " quantization_config=bnb_config,\n",
118
  " device_map=\"auto\",\n",
119
- " trust_remote_code=True\n",
 
120
  ")\n",
121
  "\n",
122
- "print('βœ… Model and tokenizer loaded successfully!')\n",
123
- "print(f'πŸ” Model type: {type(model)}')\n",
124
- "print(f'πŸ” Tokenizer vocab size: {len(tokenizer)}')"
 
125
  ]
126
  },
127
  {
@@ -130,44 +141,48 @@
130
  "metadata": {},
131
  "outputs": [],
132
  "source": [
133
- "# πŸ”§ SETUP LORA FOR EFFICIENT TRAINING\n",
134
- "from peft import LoraConfig, get_peft_model, TaskType\n",
135
  "\n",
136
- "print('πŸ”§ Setting up LoRA for efficient training...')\n",
137
  "\n",
138
- "# Auto-detect target modules\n",
139
- "def find_target_modules(model):\n",
140
- " target_modules = set()\n",
141
- " for name, module in model.named_modules():\n",
142
- " if isinstance(module, torch.nn.Linear):\n",
143
- " module_name = name.split('.')[-1]\n",
144
- " if any(pattern in module_name for pattern in ['attn', 'proj', 'fc', 'dense']):\n",
145
- " target_modules.add(module_name)\n",
146
- " return list(target_modules) if target_modules else ['c_attn', 'c_proj']\n",
147
  "\n",
148
- "target_modules = find_target_modules(model)\n",
149
- "print(f'🎯 Target modules detected: {target_modules}')\n",
 
 
 
 
 
 
 
 
 
150
  "\n",
151
- "# Create LoRA config\n",
 
 
152
  "lora_config = LoraConfig(\n",
153
- " r=16,\n",
154
- " lora_alpha=32,\n",
155
  " target_modules=target_modules,\n",
156
  " lora_dropout=0.1,\n",
157
  " bias=\"none\",\n",
158
  " task_type=TaskType.CAUSAL_LM,\n",
159
  ")\n",
160
  "\n",
161
- "# Apply LoRA with error handling\n",
162
  "try:\n",
163
  " model = get_peft_model(model, lora_config)\n",
164
  " model.print_trainable_parameters()\n",
165
- " print('βœ… LoRA adapters attached successfully!')\n",
166
  "except Exception as e:\n",
167
- " print(f'⚠️ LoRA failed: {e}')\n",
168
- " print('πŸ”§ Continuing with full fine-tuning')\n",
169
  "\n",
170
- "print('🎯 Model ready for production training!')"
171
  ]
172
  },
173
  {
@@ -176,40 +191,42 @@
176
  "metadata": {},
177
  "outputs": [],
178
  "source": [
179
- "# πŸ“ PREPARE TRAINING DATA\n",
180
- "def format_conversation(example):\n",
181
- " \"\"\"Format conversation for training\"\"\"\n",
182
  " messages = example['messages']\n",
183
  " \n",
184
- " # Extract user and assistant messages\n",
 
185
  " user_msg = messages[1]['content']\n",
186
  " assistant_msg = messages[2]['content']\n",
187
  " \n",
188
- " # Create training format\n",
189
- " formatted = f\"User: {user_msg}\\nCELESTIAL AI: {assistant_msg}<|endoftext|>\"\n",
190
  " \n",
191
- " # Tokenize with proper settings\n",
192
  " tokens = tokenizer(\n",
193
  " formatted,\n",
194
  " truncation=True,\n",
195
- " padding='max_length',\n",
196
- " max_length=1024, # Longer for detailed responses\n",
197
- " return_tensors='pt'\n",
198
  " )\n",
199
  " \n",
200
- " # Set labels for training\n",
201
- " tokens['labels'] = tokens['input_ids'].clone()\n",
202
  " \n",
203
- " return {\n",
204
- " 'input_ids': tokens['input_ids'].squeeze(),\n",
205
- " 'attention_mask': tokens['attention_mask'].squeeze(),\n",
206
- " 'labels': tokens['labels'].squeeze()\n",
207
- " }\n",
208
- "\n",
209
- "print('πŸ“ Formatting production training data...')\n",
210
- "formatted_dataset = dataset.map(format_conversation, remove_columns=dataset.column_names)\n",
211
- "print(f'βœ… Formatted {len(formatted_dataset)} conversations for training')\n",
212
- "print('🎯 Each conversation is optimized for CELESTIAL AI responses')"
 
213
  ]
214
  },
215
  {
@@ -218,38 +235,40 @@
218
  "metadata": {},
219
  "outputs": [],
220
  "source": [
221
- "# πŸš€ PRODUCTION TRAINING CONFIGURATION\n",
222
  "from transformers import TrainingArguments, Trainer, DataCollatorForLanguageModeling\n",
223
  "\n",
224
- "print('πŸš€ Setting up production training configuration...')\n",
225
  "\n",
226
- "# Training arguments optimized for production\n",
227
  "training_args = TrainingArguments(\n",
228
- " output_dir='./celestial-production-results',\n",
229
- " num_train_epochs=5, # More epochs for better learning\n",
230
- " per_device_train_batch_size=1, # Conservative for stability\n",
231
- " gradient_accumulation_steps=8, # Effective batch size of 8\n",
232
- " warmup_steps=20,\n",
233
- " learning_rate=3e-5, # Conservative learning rate\n",
234
  " fp16=True,\n",
235
  " logging_steps=10,\n",
236
- " save_steps=50,\n",
237
- " evaluation_strategy='no',\n",
238
  " save_strategy='steps',\n",
239
  " load_best_model_at_end=False,\n",
240
  " report_to=[], # No external logging\n",
241
  " remove_unused_columns=False,\n",
242
  " dataloader_drop_last=True,\n",
243
- " disable_tqdm=False\n",
 
244
  ")\n",
245
  "\n",
246
- "# Data collator for language modeling\n",
247
  "data_collator = DataCollatorForLanguageModeling(\n",
248
  " tokenizer=tokenizer,\n",
249
- " mlm=False # Causal LM, not masked LM\n",
 
250
  ")\n",
251
  "\n",
252
- "# Create trainer\n",
253
  "trainer = Trainer(\n",
254
  " model=model,\n",
255
  " args=training_args,\n",
@@ -258,9 +277,9 @@
258
  " data_collator=data_collator\n",
259
  ")\n",
260
  "\n",
261
- "print('βœ… Production training configuration ready!')\n",
262
- "print('🎯 Optimized for high-quality CELESTIAL AI training')\n",
263
- "print('⏱️ Expected training time: 20-30 minutes')"
264
  ]
265
  },
266
  {
@@ -269,23 +288,23 @@
269
  "metadata": {},
270
  "outputs": [],
271
  "source": [
272
- "# πŸƒβ€β™‚οΈ START PRODUCTION TRAINING\n",
273
- "print('πŸƒβ€β™‚οΈ Starting CELESTIAL AI PRODUCTION training...')\n",
274
- "print('⏱️ Expected time: 20-30 minutes')\n",
275
- "print('🎯 Training on 150 production-quality conversations')\n",
276
- "print('πŸ’Ž 100 numerology + 50 Krishna divine guidance')\n",
277
- "print('\\nπŸš€ Training begins now...')\n",
278
  "\n",
279
  "try:\n",
280
- " # Start training\n",
281
  " trainer.train()\n",
282
  " \n",
283
- " print('\\nπŸŽ‰ PRODUCTION TRAINING COMPLETED SUCCESSFULLY!')\n",
284
- " print('βœ… CELESTIAL AI is now trained with production-quality data!')\n",
285
- " print('🌟 Ready for comprehensive testing and deployment!')\n",
286
  " \n",
287
  "except Exception as e:\n",
288
- " print(f'❌ Training failed: {e}')\n",
289
  " print('πŸ”§ Please check the error and try again')\n",
290
  " raise"
291
  ]
@@ -296,21 +315,19 @@
296
  "metadata": {},
297
  "outputs": [],
298
  "source": [
299
- "# πŸ§ͺ COMPREHENSIVE TESTING\n",
300
- "print('πŸ§ͺ Testing the trained CELESTIAL AI...')\n",
301
  "\n",
302
  "model.eval()\n",
303
  "\n",
304
  "test_prompts = [\n",
305
- " \"User: Tell me about number 7 in Chaldean numerology.\\nCELESTIAL AI:\",\n",
306
- " \"User: Calculate my numerology for name 'John Smith' born 15/08/1990.\\nCELESTIAL AI:\",\n",
307
- " \"User: Krishna, I need guidance about my career path.\\nCELESTIAL AI:\",\n",
308
- " \"User: What does master number 11 mean?\\nCELESTIAL AI:\",\n",
309
- " \"User: Krishna, I'm dealing with relationship problems.\\nCELESTIAL AI:\"\n",
310
  "]\n",
311
  "\n",
312
  "for i, prompt in enumerate(test_prompts, 1):\n",
313
- " print(f'\\nπŸ” Test {i}: {prompt.split(\"CELESTIAL AI:\")[0].replace(\"User: \", \"\")}...')\n",
314
  " \n",
315
  " try:\n",
316
  " inputs = tokenizer(prompt, return_tensors=\"pt\").to(model.device)\n",
@@ -318,35 +335,35 @@
318
  " with torch.no_grad():\n",
319
  " outputs = model.generate(\n",
320
  " **inputs,\n",
321
- " max_new_tokens=200,\n",
322
  " temperature=0.7,\n",
323
  " do_sample=True,\n",
324
- " pad_token_id=tokenizer.eos_token_id,\n",
325
  " eos_token_id=tokenizer.eos_token_id\n",
326
  " )\n",
327
  " \n",
328
  " response = tokenizer.decode(outputs[0], skip_special_tokens=True)\n",
329
  " generated = response[len(prompt):].strip()\n",
330
  " \n",
331
- " print(f'πŸ€– Response: {generated[:200]}...')\n",
332
  " \n",
333
- " # Check response quality\n",
334
- " if len(generated) > 50 and not any(issue in generated.lower() for issue in ['error', 'sorry', 'cannot']):\n",
335
- " print('βœ… Response quality: GOOD')\n",
336
  " else:\n",
337
  " print('⚠️ Response quality: NEEDS IMPROVEMENT')\n",
338
  " \n",
339
  " except Exception as e:\n",
340
  " print(f'❌ Test {i} failed: {e}')\n",
341
  "\n",
342
- "print('\\nπŸŽ‰ CELESTIAL AI PRODUCTION TRAINING COMPLETE!')\n",
343
- "print('βœ… Model is generating coherent, detailed responses!')\n",
344
- "print('🌟 Ready for deployment and expansion!')\n",
345
  "print('\\nπŸš€ Next Steps:')\n",
346
- "print(' β€’ Test with more complex queries')\n",
347
- "print(' β€’ Expand dataset with more features')\n",
348
- "print(' β€’ Deploy to production environment')\n",
349
- "print(' β€’ Integrate with CELESTIAL platform')"
350
  ]
351
  }
352
  ],
 
4
  "cell_type": "markdown",
5
  "metadata": {},
6
  "source": [
7
+ "# 🌟 CELESTIAL MISTRAL 7B TRAINING\n",
8
+ "## Train Your Own Mistral 7B Model for CELESTIAL AI\n",
9
+ "\n",
10
+ "This notebook properly trains Mistral 7B v0.3 with:\n",
11
+ "- 150 production-quality conversations\n",
12
+ "- LoRA fine-tuning for efficiency\n",
13
+ "- Proper chat formatting for Mistral\n",
14
+ "- No logging issues"
15
  ]
16
  },
17
  {
 
20
  "metadata": {},
21
  "outputs": [],
22
  "source": [
23
+ "# πŸ“¦ INSTALL REQUIRED PACKAGES FOR MISTRAL 7B\n",
24
+ "!pip install -q transformers==4.36.0 datasets accelerate peft bitsandbytes huggingface_hub trl\n",
25
  "\n",
26
+ "# Disable all logging to prevent issues\n",
27
  "import os\n",
28
  "import warnings\n",
29
  "os.environ[\"WANDB_DISABLED\"] = \"true\"\n",
30
  "os.environ[\"WANDB_MODE\"] = \"disabled\"\n",
31
+ "os.environ[\"TOKENIZERS_PARALLELISM\"] = \"false\"\n",
32
  "warnings.filterwarnings('ignore')\n",
33
  "\n",
34
+ "print('βœ… Packages installed for Mistral 7B training!')\n",
35
+ "print('🚫 All logging disabled to prevent errors')"
36
  ]
37
  },
38
  {
 
44
  "# πŸ”‘ HUGGINGFACE AUTHENTICATION\n",
45
  "from huggingface_hub import notebook_login\n",
46
  "\n",
47
+ "print('πŸ” Authenticating with HuggingFace for Mistral access...')\n",
48
  "try:\n",
49
  " notebook_login()\n",
50
  " print('βœ… Authentication successful!')\n",
51
  "except Exception as e:\n",
52
  " print(f'⚠️ Authentication failed: {e}')\n",
53
+ " print('Please set your HF token manually if needed')"
54
  ]
55
  },
56
  {
 
59
  "metadata": {},
60
  "outputs": [],
61
  "source": [
62
+ "# πŸ“Š LOAD CELESTIAL DATASET\n",
63
  "from datasets import load_dataset\n",
64
  "\n",
65
  "DATASET_REPO = 'dp1812/celestial-comprehensive-spiritual-ai'\n",
66
  "\n",
67
+ "print('πŸ“Š Loading CELESTIAL dataset for Mistral training...')\n",
68
  "try:\n",
69
  " dataset = load_dataset(DATASET_REPO, data_files='celestial_complete_production_dataset.jsonl', split='train')\n",
70
+ " print(f'βœ… Dataset loaded: {len(dataset)} conversations')\n",
71
  " print('🎯 100 numerology + 50 Krishna divine guidance')\n",
 
72
  "except Exception as e:\n",
73
  " print(f'❌ Dataset loading failed: {e}')\n",
74
+ " # Fallback\n",
75
  " try:\n",
76
  " dataset = load_dataset(DATASET_REPO, split='train')\n",
77
  " print(f'βœ… Fallback dataset loaded: {len(dataset)} conversations')\n",
 
82
  "# Show sample\n",
83
  "print('\\nπŸ“ Sample conversation:')\n",
84
  "sample = dataset[0]\n",
85
+ "print(f\"User: {sample['messages'][1]['content'][:60]}...\")\n",
86
+ "print(f\"Assistant: {sample['messages'][2]['content'][:60]}...\")"
87
  ]
88
  },
89
  {
 
92
  "metadata": {},
93
  "outputs": [],
94
  "source": [
95
+ "# πŸ€– LOAD MISTRAL 7B MODEL AND TOKENIZER\n",
96
  "from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig\n",
97
  "import torch\n",
98
  "\n",
99
+ "MODEL_NAME = 'mistralai/Mistral-7B-v0.3'\n",
100
  "\n",
101
+ "print('πŸ€– Loading Mistral 7B v0.3 model and tokenizer...')\n",
102
  "\n",
103
+ "# Load tokenizer with proper settings\n",
104
+ "tokenizer = AutoTokenizer.from_pretrained(\n",
105
+ " MODEL_NAME,\n",
106
+ " trust_remote_code=True,\n",
107
+ " padding_side='right'\n",
108
+ ")\n",
109
+ "\n",
110
+ "# Add pad token if missing\n",
111
  "if tokenizer.pad_token is None:\n",
112
  " tokenizer.pad_token = tokenizer.eos_token\n",
113
+ " tokenizer.pad_token_id = tokenizer.eos_token_id\n",
114
  "\n",
115
+ "# Quantization config for efficient training\n",
116
  "bnb_config = BitsAndBytesConfig(\n",
117
  " load_in_4bit=True,\n",
118
  " bnb_4bit_quant_type=\"nf4\",\n",
 
120
  " bnb_4bit_use_double_quant=True\n",
121
  ")\n",
122
  "\n",
123
+ "# Load Mistral 7B model\n",
124
  "model = AutoModelForCausalLM.from_pretrained(\n",
125
  " MODEL_NAME,\n",
126
  " quantization_config=bnb_config,\n",
127
  " device_map=\"auto\",\n",
128
+ " trust_remote_code=True,\n",
129
+ " torch_dtype=torch.float16\n",
130
  ")\n",
131
  "\n",
132
+ "print('βœ… Mistral 7B model and tokenizer loaded successfully!')\n",
133
+ "print(f'πŸ” Model: {MODEL_NAME}')\n",
134
+ "print(f'πŸ” Tokenizer vocab size: {len(tokenizer)}')\n",
135
+ "print(f'πŸ” Model device: {model.device}')"
136
  ]
137
  },
138
  {
 
141
  "metadata": {},
142
  "outputs": [],
143
  "source": [
144
+ "# πŸ”§ SETUP LORA FOR MISTRAL 7B\n",
145
+ "from peft import LoraConfig, get_peft_model, TaskType, prepare_model_for_kbit_training\n",
146
  "\n",
147
+ "print('πŸ”§ Setting up LoRA for Mistral 7B training...')\n",
148
  "\n",
149
+ "# Prepare model for k-bit training\n",
150
+ "model = prepare_model_for_kbit_training(model)\n",
 
 
 
 
 
 
 
151
  "\n",
152
+ "# Mistral 7B specific target modules\n",
153
+ "target_modules = [\n",
154
+ " \"q_proj\",\n",
155
+ " \"k_proj\", \n",
156
+ " \"v_proj\",\n",
157
+ " \"o_proj\",\n",
158
+ " \"gate_proj\",\n",
159
+ " \"up_proj\",\n",
160
+ " \"down_proj\",\n",
161
+ " \"lm_head\"\n",
162
+ "]\n",
163
  "\n",
164
+ "print(f'🎯 Target modules for Mistral: {target_modules}')\n",
165
+ "\n",
166
+ "# Create LoRA config optimized for Mistral\n",
167
  "lora_config = LoraConfig(\n",
168
+ " r=64, # Higher rank for better performance\n",
169
+ " lora_alpha=16,\n",
170
  " target_modules=target_modules,\n",
171
  " lora_dropout=0.1,\n",
172
  " bias=\"none\",\n",
173
  " task_type=TaskType.CAUSAL_LM,\n",
174
  ")\n",
175
  "\n",
176
+ "# Apply LoRA to Mistral\n",
177
  "try:\n",
178
  " model = get_peft_model(model, lora_config)\n",
179
  " model.print_trainable_parameters()\n",
180
+ " print('βœ… LoRA adapters attached to Mistral 7B!')\n",
181
  "except Exception as e:\n",
182
+ " print(f'❌ LoRA setup failed: {e}')\n",
183
+ " raise\n",
184
  "\n",
185
+ "print('🎯 Mistral 7B ready for CELESTIAL training!')"
186
  ]
187
  },
188
  {
 
191
  "metadata": {},
192
  "outputs": [],
193
  "source": [
194
+ "# πŸ“ FORMAT DATA FOR MISTRAL CHAT TRAINING\n",
195
+ "def format_for_mistral_chat(example):\n",
196
+ " \"\"\"Format conversation for Mistral chat training\"\"\"\n",
197
  " messages = example['messages']\n",
198
  " \n",
199
+ " # Extract messages\n",
200
+ " system_msg = messages[0]['content']\n",
201
  " user_msg = messages[1]['content']\n",
202
  " assistant_msg = messages[2]['content']\n",
203
  " \n",
204
+ " # Mistral chat format\n",
205
+ " formatted = f\"<s>[INST] {system_msg}\\n\\nUser: {user_msg} [/INST] {assistant_msg}</s>\"\n",
206
  " \n",
207
+ " # Tokenize\n",
208
  " tokens = tokenizer(\n",
209
  " formatted,\n",
210
  " truncation=True,\n",
211
+ " padding=False,\n",
212
+ " max_length=2048, # Mistral context length\n",
213
+ " return_tensors=None\n",
214
  " )\n",
215
  " \n",
216
+ " # Set labels (same as input_ids for causal LM)\n",
217
+ " tokens['labels'] = tokens['input_ids'].copy()\n",
218
  " \n",
219
+ " return tokens\n",
220
+ "\n",
221
+ "print('πŸ“ Formatting data for Mistral chat training...')\n",
222
+ "formatted_dataset = dataset.map(\n",
223
+ " format_for_mistral_chat,\n",
224
+ " remove_columns=dataset.column_names,\n",
225
+ " desc=\"Formatting for Mistral\"\n",
226
+ ")\n",
227
+ "\n",
228
+ "print(f'βœ… Formatted {len(formatted_dataset)} conversations for Mistral')\n",
229
+ "print('🎯 Using proper Mistral chat format with [INST] tags')"
230
  ]
231
  },
232
  {
 
235
  "metadata": {},
236
  "outputs": [],
237
  "source": [
238
+ "# πŸš€ MISTRAL TRAINING CONFIGURATION\n",
239
  "from transformers import TrainingArguments, Trainer, DataCollatorForLanguageModeling\n",
240
  "\n",
241
+ "print('πŸš€ Setting up Mistral 7B training configuration...')\n",
242
  "\n",
243
+ "# Training arguments optimized for Mistral 7B\n",
244
  "training_args = TrainingArguments(\n",
245
+ " output_dir='./celestial-mistral-7b-results',\n",
246
+ " num_train_epochs=3,\n",
247
+ " per_device_train_batch_size=1,\n",
248
+ " gradient_accumulation_steps=16, # Effective batch size of 16\n",
249
+ " warmup_steps=50,\n",
250
+ " learning_rate=2e-4, # Higher LR for LoRA\n",
251
  " fp16=True,\n",
252
  " logging_steps=10,\n",
253
+ " save_steps=100,\n",
254
+ " eval_strategy='no',\n",
255
  " save_strategy='steps',\n",
256
  " load_best_model_at_end=False,\n",
257
  " report_to=[], # No external logging\n",
258
  " remove_unused_columns=False,\n",
259
  " dataloader_drop_last=True,\n",
260
+ " group_by_length=True, # Efficient batching\n",
261
+ " ddp_find_unused_parameters=False\n",
262
  ")\n",
263
  "\n",
264
+ "# Data collator for Mistral\n",
265
  "data_collator = DataCollatorForLanguageModeling(\n",
266
  " tokenizer=tokenizer,\n",
267
+ " mlm=False,\n",
268
+ " pad_to_multiple_of=8\n",
269
  ")\n",
270
  "\n",
271
+ "# Create Mistral trainer\n",
272
  "trainer = Trainer(\n",
273
  " model=model,\n",
274
  " args=training_args,\n",
 
277
  " data_collator=data_collator\n",
278
  ")\n",
279
  "\n",
280
+ "print('βœ… Mistral 7B training configuration ready!')\n",
281
+ "print('🎯 Optimized for CELESTIAL AI with LoRA fine-tuning')\n",
282
+ "print('⏱️ Expected training time: 30-45 minutes')"
283
  ]
284
  },
285
  {
 
288
  "metadata": {},
289
  "outputs": [],
290
  "source": [
291
+ "# πŸƒβ€β™‚οΈ START MISTRAL 7B TRAINING\n",
292
+ "print('πŸƒβ€β™‚οΈ Starting CELESTIAL Mistral 7B training...')\n",
293
+ "print('⏱️ Expected time: 30-45 minutes')\n",
294
+ "print('🎯 Training Mistral 7B v0.3 on CELESTIAL conversations')\n",
295
+ "print('πŸ’Ž 150 production-quality conversations')\n",
296
+ "print('\\nπŸš€ Mistral training begins now...')\n",
297
  "\n",
298
  "try:\n",
299
+ " # Start Mistral training\n",
300
  " trainer.train()\n",
301
  " \n",
302
+ " print('\\nπŸŽ‰ MISTRAL 7B TRAINING COMPLETED SUCCESSFULLY!')\n",
303
+ " print('βœ… CELESTIAL Mistral 7B is now trained!')\n",
304
+ " print('🌟 Ready for testing and deployment!')\n",
305
  " \n",
306
  "except Exception as e:\n",
307
+ " print(f'❌ Mistral training failed: {e}')\n",
308
  " print('πŸ”§ Please check the error and try again')\n",
309
  " raise"
310
  ]
 
315
  "metadata": {},
316
  "outputs": [],
317
  "source": [
318
+ "# πŸ§ͺ TEST TRAINED MISTRAL 7B\n",
319
+ "print('πŸ§ͺ Testing the trained CELESTIAL Mistral 7B...')\n",
320
  "\n",
321
  "model.eval()\n",
322
  "\n",
323
  "test_prompts = [\n",
324
+ " \"<s>[INST] You are CELESTIAL AI, an expert numerologist. Provide detailed analysis.\\n\\nUser: Tell me about number 7 in Chaldean numerology. [/INST]\",\n",
325
+ " \"<s>[INST] You are Shree Krishna providing divine guidance.\\n\\nUser: Krishna, I need guidance about my career path. [/INST]\",\n",
326
+ " \"<s>[INST] You are CELESTIAL AI providing numerology analysis.\\n\\nUser: Calculate my numerology for name 'John Smith' born 15/08/1990. [/INST]\"\n",
 
 
327
  "]\n",
328
  "\n",
329
  "for i, prompt in enumerate(test_prompts, 1):\n",
330
+ " print(f'\\nπŸ” Test {i}: Mistral 7B Response')\n",
331
  " \n",
332
  " try:\n",
333
  " inputs = tokenizer(prompt, return_tensors=\"pt\").to(model.device)\n",
 
335
  " with torch.no_grad():\n",
336
  " outputs = model.generate(\n",
337
  " **inputs,\n",
338
+ " max_new_tokens=300,\n",
339
  " temperature=0.7,\n",
340
  " do_sample=True,\n",
341
+ " pad_token_id=tokenizer.pad_token_id,\n",
342
  " eos_token_id=tokenizer.eos_token_id\n",
343
  " )\n",
344
  " \n",
345
  " response = tokenizer.decode(outputs[0], skip_special_tokens=True)\n",
346
  " generated = response[len(prompt):].strip()\n",
347
  " \n",
348
+ " print(f'πŸ€– Mistral Response: {generated[:250]}...')\n",
349
  " \n",
350
+ " # Quality check\n",
351
+ " if len(generated) > 50 and 'number' in generated.lower() or 'krishna' in generated.lower():\n",
352
+ " print('βœ… Response quality: EXCELLENT')\n",
353
  " else:\n",
354
  " print('⚠️ Response quality: NEEDS IMPROVEMENT')\n",
355
  " \n",
356
  " except Exception as e:\n",
357
  " print(f'❌ Test {i} failed: {e}')\n",
358
  "\n",
359
+ "print('\\nπŸŽ‰ CELESTIAL MISTRAL 7B TRAINING COMPLETE!')\n",
360
+ "print('βœ… Your own trained Mistral 7B model is ready!')\n",
361
+ "print('🌟 No external API dependencies - fully yours!')\n",
362
  "print('\\nπŸš€ Next Steps:')\n",
363
+ "print(' β€’ Save the trained model to HuggingFace')\n",
364
+ "print(' β€’ Integrate with CELESTIAL platform')\n",
365
+ "print(' β€’ Expand training data for more features')\n",
366
+ "print(' β€’ Deploy to production environment')"
367
  ]
368
  }
369
  ],