Upload 8 files

Browse files

Files changed (8) hide show

config.json +40 -0
echo_llm.bin +3 -0
echo_llm.safetensors +3 -0
model_config.json +38 -0
project_config.json +8 -0
special_tokens_map.json +9 -0
tokenizer.json +0 -0
tokenizer_config.json +74 -0

config.json ADDED Viewed

	@@ -0,0 +1,40 @@

+{
+  "activation": "gelu",
+  "architecture": "EchoLLM",
+  "batch_size": 8,
+  "checkpoint_frequency": 1,
+  "curriculum_learning": true,
+  "d_model": 768,
+  "dim_feedforward": 3072,
+  "dropout": 0.1,
+  "eval_metric": "perplexity",
+  "export_formats": [
+    "bin",
+    "safetensors"
+  ],
+  "gradient_accumulation_steps": 4,
+  "learning_rate": 5e-05,
+  "load_from_checkpoint": null,
+  "loss_function": "CrossEntropyLoss",
+  "max_position_embeddings": 8192,
+  "max_token_length": 2048,
+  "memory_size": 2048,
+  "model_name": "Echo",
+  "model_type": "echollm",
+  "num_epochs": 10,
+  "num_experts": 4,
+  "num_heads": 12,
+  "num_layers": 12,
+  "optimizer": "Adafactor",
+  "output_dir": "EchoLLM_Model\\model",
+  "precision": "float32",
+  "save_best_model": true,
+  "scheduler": "cosine",
+  "transformers_version": "4.49.0",
+  "use_gradient_checkpointing": true,
+  "use_memory": true,
+  "use_mixed_precision": true,
+  "vocab_size": 32000,
+  "warmup_steps": 1000,
+  "weight_decay": 0.01
+}

echo_llm.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a03d1b56ae381b1f20d31575812081aabd13cbbeed420274fe73ee026185e880
+size 1234593397

echo_llm.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:393c80ae4793655b24367d115250508d4accaba05559b63bb07aed4e464006c7
+size 1234493648

model_config.json ADDED Viewed

	@@ -0,0 +1,38 @@

+{
+    "model_name": "Echo",
+    "architecture": "EchoLLM",
+    "vocab_size": 32000,
+    "max_position_embeddings": 8192,
+    "d_model": 768,
+    "num_layers": 12,
+    "num_heads": 12,
+    "dim_feedforward": 3072,
+    "dropout": 0.1,
+    "activation": "gelu",
+    "num_experts": 4,
+    "use_memory": true,
+    "memory_size": 2048,
+    "batch_size": 8,
+    "learning_rate": 5e-05,
+    "num_epochs": 10,
+    "optimizer": "Adafactor",
+    "scheduler": "cosine",
+    "warmup_steps": 1000,
+    "weight_decay": 0.01,
+    "curriculum_learning": true,
+    "loss_function": "CrossEntropyLoss",
+    "eval_metric": "perplexity",
+    "output_dir": "EchoLLM_Model\\model",
+    "checkpoint_frequency": 1,
+    "save_best_model": true,
+    "load_from_checkpoint": null,
+    "use_gradient_checkpointing": true,
+    "use_mixed_precision": true,
+    "gradient_accumulation_steps": 4,
+    "export_formats": [
+        "bin",
+        "safetensors"
+    ],
+    "precision": "float32",
+    "max_token_length": 2048
+}

project_config.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "project_name": "Echo",
+    "architecture": "EchoLLM",
+    "version": "1.0.0",
+    "seed": 42,
+    "logging_level": "INFO",
+    "base_output_dir": "EchoLLM_Model"
+}

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+  "bos_token": "<SOS>",
+  "cls_token": "<CLS>",
+  "eos_token": "<EOS>",
+  "mask_token": "<MASK>",
+  "pad_token": "<PAD>",
+  "sep_token": "<SEP>",
+  "unk_token": "<UNK>"
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,74 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<PAD>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<UNK>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "<SOS>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "<EOS>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "4": {
+      "content": "<CLS>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "5": {
+      "content": "<SEP>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "6": {
+      "content": "<MASK>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<SOS>",
+  "clean_up_tokenization_spaces": false,
+  "cls_token": "<CLS>",
+  "do_lower_case": false,
+  "eos_token": "<EOS>",
+  "extra_special_tokens": {},
+  "mask_token": "<MASK>",
+  "model_max_length": 16384,
+  "pad_token": "<PAD>",
+  "padding_side": "right",
+  "sep_token": "<SEP>",
+  "tokenizer_class": "PreTrainedTokenizer",
+  "truncation_side": "right",
+  "unk_token": "<UNK>"
+}