ibrahim256 commited on Aug 28, 2025

Commit

4810a4e

verified ·

1 Parent(s): 9d7012e

Upload folder using huggingface_hub

Browse files

Files changed (39) hide show

.gitattributes +3 -0
README.md +43 -0
added_tokens.json +9 -0
checkpoint-1000/added_tokens.json +9 -0
checkpoint-1000/config.json +54 -0
checkpoint-1000/generation_config.json +7 -0
checkpoint-1000/model.safetensors +3 -0
checkpoint-1000/optimizer.pt +3 -0
checkpoint-1000/rng_state.pth +3 -0
checkpoint-1000/scheduler.pt +3 -0
checkpoint-1000/special_tokens_map.json +77 -0
checkpoint-1000/tokenizer.json +3 -0
checkpoint-1000/tokenizer.model +3 -0
checkpoint-1000/tokenizer_config.json +0 -0
checkpoint-1000/trainer_state.json +215 -0
checkpoint-1000/training_args.bin +3 -0
checkpoint-500/added_tokens.json +9 -0
checkpoint-500/config.json +54 -0
checkpoint-500/generation_config.json +7 -0
checkpoint-500/model.safetensors +3 -0
checkpoint-500/optimizer.pt +3 -0
checkpoint-500/rng_state.pth +3 -0
checkpoint-500/scheduler.pt +3 -0
checkpoint-500/special_tokens_map.json +77 -0
checkpoint-500/tokenizer.json +3 -0
checkpoint-500/tokenizer.model +3 -0
checkpoint-500/tokenizer_config.json +0 -0
checkpoint-500/trainer_state.json +129 -0
checkpoint-500/training_args.bin +3 -0
config.json +54 -0
generation_config.json +7 -0
logs/events.out.tfevents.1756308542.STUDIO-02.local.50997.0 +3 -0
model.safetensors +3 -0
special_tokens_map.json +77 -0
tokenizer.json +3 -0
tokenizer.model +3 -0
tokenizer_config.json +0 -0
training_args.bin +3 -0
training_info.json +18 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+checkpoint-1000/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+checkpoint-500/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,43 @@

+# AuraMind Auramind 270
+A warm and caring AI companion model fine-tuned for emotional support and personal connection.
+## Model Details
+- **Base Model**: Gemma 3 270M (Released August 2025)
+- **Parameters**: 268,102,656
+- **Quantization**: INT4
+- **Context Length**: 2,048 tokens
+- **Vocabulary**: 256k tokens (large vocabulary for rare tokens)
+## Architecture Details
+- **Total Parameters**: 270M (170M embedding + 100M transformer blocks)
+- **Training Data**: 6 trillion tokens
+- **Knowledge Cutoff**: August 2024
+## Training Details
+- **Dataset**: AuraMind Companion Dataset
+- **Training Steps**: 1,000
+- **Final Loss**: 126.1678
+- **Device**: MPS
+## Usage
+```python
+from transformers import AutoTokenizer, AutoModelForCausalLM
+tokenizer = AutoTokenizer.from_pretrained("models/auramind_270")
+model = AutoModelForCausalLM.from_pretrained("models/auramind_270")
+# Generate companion response
+prompt = "<|system|>\nYou are AuraMind, a warm and caring AI companion. You form genuine emotional bonds with users through empathy, understanding, and personal connection. You provide intimate emotional support while maintaining healthy boundaries. Focus on creating deep, meaningful relationships through authentic conversation.\n\n<|user|>\nI'm feeling lonely today.\n\n<|assistant|>\n"
+inputs = tokenizer(prompt, return_tensors="pt")
+outputs = model.generate(**inputs, max_length=200, temperature=0.7, do_sample=True)
+response = tokenizer.decode(outputs[0], skip_special_tokens=False)
+```
+## Capabilities
+- Emotional support and companionship
+- Personal conversation and bonding
+- Empathetic responses
+- Intimate but appropriate interactions
+**Note**: This is a companion AI, not a replacement for professional therapy.

added_tokens.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+  "<image_soft_token>": 262144,
+  "<|assistant|>": 262147,
+  "<|companion|>": 262148,
+  "<|emotion|>": 262149,
+  "<|support|>": 262150,
+  "<|system|>": 262145,
+  "<|user|>": 262146
+}

checkpoint-1000/added_tokens.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+  "<image_soft_token>": 262144,
+  "<|assistant|>": 262147,
+  "<|companion|>": 262148,
+  "<|emotion|>": 262149,
+  "<|support|>": 262150,
+  "<|system|>": 262145,
+  "<|user|>": 262146
+}

checkpoint-1000/config.json ADDED Viewed

	@@ -0,0 +1,54 @@

+{
+  "_sliding_window_pattern": 6,
+  "architectures": [
+    "Gemma3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "attn_logit_softcapping": null,
+  "bos_token_id": 2,
+  "eos_token_id": 1,
+  "final_logit_softcapping": null,
+  "head_dim": 256,
+  "hidden_activation": "gelu_pytorch_tanh",
+  "hidden_size": 640,
+  "initializer_range": 0.02,
+  "intermediate_size": 2048,
+  "layer_types": [
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "full_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "full_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 32768,
+  "model_type": "gemma3_text",
+  "num_attention_heads": 4,
+  "num_hidden_layers": 18,
+  "num_key_value_heads": 1,
+  "pad_token_id": 0,
+  "query_pre_attn_scalar": 256,
+  "rms_norm_eps": 1e-06,
+  "rope_local_base_freq": 10000.0,
+  "rope_scaling": null,
+  "rope_theta": 1000000.0,
+  "sliding_window": 512,
+  "torch_dtype": "float16",
+  "transformers_version": "4.55.3",
+  "use_bidirectional_attention": false,
+  "use_cache": true,
+  "vocab_size": 262151
+}

checkpoint-1000/generation_config.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cache_implementation": "hybrid",
+  "do_sample": true,
+  "top_k": 64,
+  "top_p": 0.95,
+  "transformers_version": "4.55.3"
+}

checkpoint-1000/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3d3340b057dfd1aeaeedf96b53e8f7cdd887c0b37e12c351f4afd6a5baafdb70
+size 536231776

checkpoint-1000/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9eb3c19b54415883fb318d378dfcfb6f0e25e574e205c5df15e6d0a9bd475343
+size 1072602443

checkpoint-1000/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5ca8ceba7f21f2d826c74925228bb7e31b692e58c2a0108653abaa8d547a15c7
+size 14391

checkpoint-1000/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ebcbfd37c5f4556961ec3f170f39cb08422150b96b0d9fff1ed7b77a9bb8eecb
+size 1465

checkpoint-1000/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,77 @@

+{
+  "additional_special_tokens": [
+    {
+      "content": "<|system|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "<|user|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "<|assistant|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "<|companion|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "<|emotion|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "<|support|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    }
+  ],
+  "boi_token": "<start_of_image>",
+  "bos_token": {
+    "content": "<bos>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eoi_token": "<end_of_image>",
+  "eos_token": {
+    "content": "<eos>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "image_token": "<image_soft_token>",
+  "pad_token": {
+    "content": "<pad>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

checkpoint-1000/tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d1e381d8ad2489a1da8ba628f2e7de10678e8b847f45e0ec06a75208a154eed1
+size 33385795

checkpoint-1000/tokenizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1299c11d7cf632ef3b4e11937501358ada021bbdf7c47638d13c0ee982f2e79c
+size 4689074

checkpoint-1000/tokenizer_config.json ADDED Viewed

The diff for this file is too large to render. See raw diff

checkpoint-1000/trainer_state.json ADDED Viewed

	@@ -0,0 +1,215 @@

+{
+  "best_global_step": null,
+  "best_metric": Infinity,
+  "best_model_checkpoint": null,
+  "epoch": 29.42,
+  "eval_steps": 250,
+  "global_step": 1000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.48,
+      "grad_norm": NaN,
+      "learning_rate": 4.9e-05,
+      "loss": 2523.3566,
+      "step": 50
+    },
+    {
+      "epoch": 2.96,
+      "grad_norm": NaN,
+      "learning_rate": 9.900000000000001e-05,
+      "loss": 0.0,
+      "step": 100
+    },
+    {
+      "epoch": 4.42,
+      "grad_norm": NaN,
+      "learning_rate": 9.831034482758622e-05,
+      "loss": 0.0,
+      "step": 150
+    },
+    {
+      "epoch": 5.9,
+      "grad_norm": NaN,
+      "learning_rate": 9.658620689655173e-05,
+      "loss": 0.0,
+      "step": 200
+    },
+    {
+      "epoch": 7.36,
+      "grad_norm": NaN,
+      "learning_rate": 9.486206896551724e-05,
+      "loss": 0.0,
+      "step": 250
+    },
+    {
+      "epoch": 7.36,
+      "eval_loss": NaN,
+      "eval_runtime": 0.8249,
+      "eval_samples_per_second": 127.294,
+      "eval_steps_per_second": 10.911,
+      "step": 250
+    },
+    {
+      "epoch": 8.84,
+      "grad_norm": NaN,
+      "learning_rate": 9.313793103448276e-05,
+      "loss": 0.0,
+      "step": 300
+    },
+    {
+      "epoch": 10.3,
+      "grad_norm": NaN,
+      "learning_rate": 9.141379310344827e-05,
+      "loss": 0.0,
+      "step": 350
+    },
+    {
+      "epoch": 11.78,
+      "grad_norm": NaN,
+      "learning_rate": 8.96896551724138e-05,
+      "loss": 0.0,
+      "step": 400
+    },
+    {
+      "epoch": 13.24,
+      "grad_norm": NaN,
+      "learning_rate": 8.796551724137931e-05,
+      "loss": 0.0,
+      "step": 450
+    },
+    {
+      "epoch": 14.72,
+      "grad_norm": NaN,
+      "learning_rate": 8.624137931034483e-05,
+      "loss": 0.0,
+      "step": 500
+    },
+    {
+      "epoch": 14.72,
+      "eval_loss": NaN,
+      "eval_runtime": 0.4313,
+      "eval_samples_per_second": 243.441,
+      "eval_steps_per_second": 20.866,
+      "step": 500
+    },
+    {
+      "epoch": 16.18,
+      "grad_norm": NaN,
+      "learning_rate": 8.451724137931036e-05,
+      "loss": 0.0,
+      "step": 550
+    },
+    {
+      "epoch": 17.66,
+      "grad_norm": NaN,
+      "learning_rate": 8.279310344827587e-05,
+      "loss": 0.0,
+      "step": 600
+    },
+    {
+      "epoch": 19.12,
+      "grad_norm": NaN,
+      "learning_rate": 8.106896551724138e-05,
+      "loss": 0.0,
+      "step": 650
+    },
+    {
+      "epoch": 20.6,
+      "grad_norm": NaN,
+      "learning_rate": 7.93448275862069e-05,
+      "loss": 0.0,
+      "step": 700
+    },
+    {
+      "epoch": 22.06,
+      "grad_norm": NaN,
+      "learning_rate": 7.762068965517241e-05,
+      "loss": 0.0,
+      "step": 750
+    },
+    {
+      "epoch": 22.06,
+      "eval_loss": NaN,
+      "eval_runtime": 0.4262,
+      "eval_samples_per_second": 246.338,
+      "eval_steps_per_second": 21.115,
+      "step": 750
+    },
+    {
+      "epoch": 23.54,
+      "grad_norm": NaN,
+      "learning_rate": 7.589655172413793e-05,
+      "loss": 0.0,
+      "step": 800
+    },
+    {
+      "epoch": 25.0,
+      "grad_norm": NaN,
+      "learning_rate": 7.417241379310345e-05,
+      "loss": 0.0,
+      "step": 850
+    },
+    {
+      "epoch": 26.48,
+      "grad_norm": NaN,
+      "learning_rate": 7.244827586206897e-05,
+      "loss": 0.0,
+      "step": 900
+    },
+    {
+      "epoch": 27.96,
+      "grad_norm": NaN,
+      "learning_rate": 7.072413793103448e-05,
+      "loss": 0.0,
+      "step": 950
+    },
+    {
+      "epoch": 29.42,
+      "grad_norm": NaN,
+      "learning_rate": 6.9e-05,
+      "loss": 0.0,
+      "step": 1000
+    },
+    {
+      "epoch": 29.42,
+      "eval_loss": NaN,
+      "eval_runtime": 0.4242,
+      "eval_samples_per_second": 247.509,
+      "eval_steps_per_second": 21.215,
+      "step": 1000
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 3000,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 89,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "EarlyStoppingCallback": {
+      "args": {
+        "early_stopping_patience": 3,
+        "early_stopping_threshold": 0.0
+      },
+      "attributes": {
+        "early_stopping_patience_counter": 3
+      }
+    },
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1204367872764672.0,
+  "train_batch_size": 12,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-1000/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3bfa38633b612b2effa711b007d658cdf00c08e0f47bdca6d839ac78c67891e4
+size 5713

checkpoint-500/added_tokens.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+  "<image_soft_token>": 262144,
+  "<|assistant|>": 262147,
+  "<|companion|>": 262148,
+  "<|emotion|>": 262149,
+  "<|support|>": 262150,
+  "<|system|>": 262145,
+  "<|user|>": 262146
+}

checkpoint-500/config.json ADDED Viewed

	@@ -0,0 +1,54 @@

+{
+  "_sliding_window_pattern": 6,
+  "architectures": [
+    "Gemma3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "attn_logit_softcapping": null,
+  "bos_token_id": 2,
+  "eos_token_id": 1,
+  "final_logit_softcapping": null,
+  "head_dim": 256,
+  "hidden_activation": "gelu_pytorch_tanh",
+  "hidden_size": 640,
+  "initializer_range": 0.02,
+  "intermediate_size": 2048,
+  "layer_types": [
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "full_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "full_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 32768,
+  "model_type": "gemma3_text",
+  "num_attention_heads": 4,
+  "num_hidden_layers": 18,
+  "num_key_value_heads": 1,
+  "pad_token_id": 0,
+  "query_pre_attn_scalar": 256,
+  "rms_norm_eps": 1e-06,
+  "rope_local_base_freq": 10000.0,
+  "rope_scaling": null,
+  "rope_theta": 1000000.0,
+  "sliding_window": 512,
+  "torch_dtype": "float16",
+  "transformers_version": "4.55.3",
+  "use_bidirectional_attention": false,
+  "use_cache": true,
+  "vocab_size": 262151
+}

checkpoint-500/generation_config.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cache_implementation": "hybrid",
+  "do_sample": true,
+  "top_k": 64,
+  "top_p": 0.95,
+  "transformers_version": "4.55.3"
+}

checkpoint-500/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3d3340b057dfd1aeaeedf96b53e8f7cdd887c0b37e12c351f4afd6a5baafdb70
+size 536231776

checkpoint-500/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2294fc6a49669b0150080790bf8f1174b536a062fe5a5e6a63754094d6e356c1
+size 1072602443

checkpoint-500/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:51df1d95d8ccbb90e460324f1b442600bbbce2875f5c7ef87dea4ee7fa279f7b
+size 14391

checkpoint-500/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7671eab3e5bcbb36d4ff91ece936d046f377cd3effe834c4cc694c495d6c32da
+size 1465

checkpoint-500/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,77 @@

+{
+  "additional_special_tokens": [
+    {
+      "content": "<|system|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "<|user|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "<|assistant|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "<|companion|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "<|emotion|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "<|support|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    }
+  ],
+  "boi_token": "<start_of_image>",
+  "bos_token": {
+    "content": "<bos>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eoi_token": "<end_of_image>",
+  "eos_token": {
+    "content": "<eos>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "image_token": "<image_soft_token>",
+  "pad_token": {
+    "content": "<pad>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

checkpoint-500/tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d1e381d8ad2489a1da8ba628f2e7de10678e8b847f45e0ec06a75208a154eed1
+size 33385795

checkpoint-500/tokenizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1299c11d7cf632ef3b4e11937501358ada021bbdf7c47638d13c0ee982f2e79c
+size 4689074

checkpoint-500/tokenizer_config.json ADDED Viewed

The diff for this file is too large to render. See raw diff

checkpoint-500/trainer_state.json ADDED Viewed

	@@ -0,0 +1,129 @@

+{
+  "best_global_step": null,
+  "best_metric": Infinity,
+  "best_model_checkpoint": null,
+  "epoch": 14.72,
+  "eval_steps": 250,
+  "global_step": 500,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.48,
+      "grad_norm": NaN,
+      "learning_rate": 4.9e-05,
+      "loss": 2523.3566,
+      "step": 50
+    },
+    {
+      "epoch": 2.96,
+      "grad_norm": NaN,
+      "learning_rate": 9.900000000000001e-05,
+      "loss": 0.0,
+      "step": 100
+    },
+    {
+      "epoch": 4.42,
+      "grad_norm": NaN,
+      "learning_rate": 9.831034482758622e-05,
+      "loss": 0.0,
+      "step": 150
+    },
+    {
+      "epoch": 5.9,
+      "grad_norm": NaN,
+      "learning_rate": 9.658620689655173e-05,
+      "loss": 0.0,
+      "step": 200
+    },
+    {
+      "epoch": 7.36,
+      "grad_norm": NaN,
+      "learning_rate": 9.486206896551724e-05,
+      "loss": 0.0,
+      "step": 250
+    },
+    {
+      "epoch": 7.36,
+      "eval_loss": NaN,
+      "eval_runtime": 0.8249,
+      "eval_samples_per_second": 127.294,
+      "eval_steps_per_second": 10.911,
+      "step": 250
+    },
+    {
+      "epoch": 8.84,
+      "grad_norm": NaN,
+      "learning_rate": 9.313793103448276e-05,
+      "loss": 0.0,
+      "step": 300
+    },
+    {
+      "epoch": 10.3,
+      "grad_norm": NaN,
+      "learning_rate": 9.141379310344827e-05,
+      "loss": 0.0,
+      "step": 350
+    },
+    {
+      "epoch": 11.78,
+      "grad_norm": NaN,
+      "learning_rate": 8.96896551724138e-05,
+      "loss": 0.0,
+      "step": 400
+    },
+    {
+      "epoch": 13.24,
+      "grad_norm": NaN,
+      "learning_rate": 8.796551724137931e-05,
+      "loss": 0.0,
+      "step": 450
+    },
+    {
+      "epoch": 14.72,
+      "grad_norm": NaN,
+      "learning_rate": 8.624137931034483e-05,
+      "loss": 0.0,
+      "step": 500
+    },
+    {
+      "epoch": 14.72,
+      "eval_loss": NaN,
+      "eval_runtime": 0.4313,
+      "eval_samples_per_second": 243.441,
+      "eval_steps_per_second": 20.866,
+      "step": 500
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 3000,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 89,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "EarlyStoppingCallback": {
+      "args": {
+        "early_stopping_patience": 3,
+        "early_stopping_threshold": 0.0
+      },
+      "attributes": {
+        "early_stopping_patience_counter": 1
+      }
+    },
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 602715764593152.0,
+  "train_batch_size": 12,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-500/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3bfa38633b612b2effa711b007d658cdf00c08e0f47bdca6d839ac78c67891e4
+size 5713

config.json ADDED Viewed

	@@ -0,0 +1,54 @@

+{
+  "_sliding_window_pattern": 6,
+  "architectures": [
+    "Gemma3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "attn_logit_softcapping": null,
+  "bos_token_id": 2,
+  "eos_token_id": 1,
+  "final_logit_softcapping": null,
+  "head_dim": 256,
+  "hidden_activation": "gelu_pytorch_tanh",
+  "hidden_size": 640,
+  "initializer_range": 0.02,
+  "intermediate_size": 2048,
+  "layer_types": [
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "full_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "full_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 32768,
+  "model_type": "gemma3_text",
+  "num_attention_heads": 4,
+  "num_hidden_layers": 18,
+  "num_key_value_heads": 1,
+  "pad_token_id": 0,
+  "query_pre_attn_scalar": 256,
+  "rms_norm_eps": 1e-06,
+  "rope_local_base_freq": 10000.0,
+  "rope_scaling": null,
+  "rope_theta": 1000000.0,
+  "sliding_window": 512,
+  "torch_dtype": "float16",
+  "transformers_version": "4.55.3",
+  "use_bidirectional_attention": false,
+  "use_cache": true,
+  "vocab_size": 262151
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cache_implementation": "hybrid",
+  "do_sample": true,
+  "top_k": 64,
+  "top_p": 0.95,
+  "transformers_version": "4.55.3"
+}

logs/events.out.tfevents.1756308542.STUDIO-02.local.50997.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:871073231bdfb5d264f99577cb7154fc6882d4d2feb79e858a15419561bb4917
+size 11313

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3d3340b057dfd1aeaeedf96b53e8f7cdd887c0b37e12c351f4afd6a5baafdb70
+size 536231776

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,77 @@

+{
+  "additional_special_tokens": [
+    {
+      "content": "<|system|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "<|user|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "<|assistant|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "<|companion|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "<|emotion|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "<|support|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    }
+  ],
+  "boi_token": "<start_of_image>",
+  "bos_token": {
+    "content": "<bos>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eoi_token": "<end_of_image>",
+  "eos_token": {
+    "content": "<eos>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "image_token": "<image_soft_token>",
+  "pad_token": {
+    "content": "<pad>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d1e381d8ad2489a1da8ba628f2e7de10678e8b847f45e0ec06a75208a154eed1
+size 33385795

tokenizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1299c11d7cf632ef3b4e11937501358ada021bbdf7c47638d13c0ee982f2e79c
+size 4689074

tokenizer_config.json ADDED Viewed

The diff for this file is too large to render. See raw diff

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3bfa38633b612b2effa711b007d658cdf00c08e0f47bdca6d839ac78c67891e4
+size 5713

training_info.json ADDED Viewed

	@@ -0,0 +1,18 @@

+{
+  "model_variant": "auramind_270",
+  "training_loss": 126.167828125,
+  "training_steps": 1000,
+  "model_parameters": 268102656,
+  "config": {
+    "base_model": "google/gemma-3-270m",
+    "target_params": 270000000,
+    "quantization": "int4",
+    "max_length": 2048,
+    "batch_size": 12,
+    "gradient_accumulation": 3,
+    "learning_rate": 0.0001,
+    "warmup_steps": 100,
+    "max_steps": 3000
+  },
+  "training_time": "2025-08-27 18:37:14.899613"
+}