pgryko commited on Jul 5, 2025

Commit

5774ee2

verified ·

1 Parent(s): 4debf53

Upload Llama-10M-1M model

Browse files

Files changed (16) hide show

README.md +40 -1
checkpoint-220/model.safetensors +1 -1
checkpoint-220/optimizer.pt +1 -1
checkpoint-220/trainer_state.json +31 -31
checkpoint-220/training_args.bin +1 -1
dataset_info.json +19 -0
evaluation_plots.png +2 -2
evaluation_results.json +23 -23
generation_examples.json +80 -0
metrics_summary.json +16 -0
model.safetensors +1 -1
model_card_metadata.yaml +22 -0
model_info.json +31 -0
performance_benchmarks.json +24 -0
training_args.bin +1 -1
training_metrics.json +4 -4

README.md CHANGED Viewed

@@ -21,6 +21,12 @@ model_index:
     - type: perplexity
       value: N/A
       name: Perplexity
 ---
 # Llama-10M-1M
@@ -40,12 +46,25 @@ A 10M parameter LLaMA model trained on 1M synthetic tokens using the BabyLlama f
 ## Training Details
-- **Training Loss**: 2.5500883795998313
 - **Evaluation Loss**: N/A
 - **Perplexity**: N/A
 - **Learning Rate**: 3e-4
 - **Batch Size**: 32
 - **Epochs**: 2
 ## Usage
@@ -88,3 +107,23 @@ If you use this model in your research, please cite:
 ## License
 This model is released under the MIT License.

     - type: perplexity
       value: N/A
       name: Perplexity
+    - type: loss
+      value: 2.499714469909668
+      name: Training Loss
+    - type: loss
+      value: N/A
+      name: Evaluation Loss
 ---
 # Llama-10M-1M
 ## Training Details
+- **Training Loss**: 2.499714469909668
 - **Evaluation Loss**: N/A
 - **Perplexity**: N/A
 - **Learning Rate**: 3e-4
 - **Batch Size**: 32
 - **Epochs**: 2
+- **Training Time**: 29.3597 seconds
+- **Training Samples**: 3,519
+## Evaluation Metrics
+| Metric | Value |
+|--------|-------|
+| Perplexity | N/A |
+| Training Loss | 2.499714469909668 |
+| Evaluation Loss | N/A |
+| Training Time | 29.3597s |
+| Parameters | 3,652,032 |
+| Training Samples | 3,519 |
 ## Usage
 ## License
 This model is released under the MIT License.
+## Detailed Evaluation Results
+### Generation Quality Metrics
+- **Diversity Score**: 0.932
+- **Repetition Score**: 0.528 (lower is better)
+- **Average Top Token Probability**: 0.356
+- **Average Entropy**: 2.015
+- **Low Confidence Ratio**: 0.791
+### Sample Generations
+1. "A child teaches slowly at the office, therefore the teacher writes happily. The bird reads thoughtfully in the garden. An artist writes carefully outside, afterwards the engineer explores eagerly. A child walks quickly in the park, meanwhile a writer creates sadly. A student"
+2. "The cat designs carefully at the library. A child jumps eagerly in the school, furthermore an artist learns thoughtfully. The engineer explores carefully in the school. The cat discovers eagerly on the street, and the scientist teaches quickly. The bird explores slowly in the"
+3. "The scientist teaches quickly in the park, however the engineer imagines creatively. A child thinks sadly in the lab, however a writer walks carefully. A dog writes sadly at the office. A dog explores patiently in the classroom. The engineer creates sadly in the"
+4. "A writer thinks sadly at the library. A writer reads carefully on the street, but the cat builds quickly. A student jumps patiently in the school. A student runs happily in the school, moreover a writer reads quickly. The cat creates brilliantly in the"
+5. "The engineer learns creatively at the office, afterwards a student runs quickly. The teacher thinks creatively in the school, and the scientist creates patiently. The scientist writes brilliantly in the lab, therefore the scientist designs brilliantly. A writer imagines creatively in the school."
+### Evaluation Plots
+![Evaluation Plots](evaluation_plots.png)

checkpoint-220/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e25793471728cb6b312aba089827e86543414bf3d7255639385ac7e6ad26f313
 size 14614216

 version https://git-lfs.github.com/spec/v1
+oid sha256:44c4152579055e34dc5ec45941f07908ba6328eb43a9851f23d71b427b97b242
 size 14614216

checkpoint-220/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:af90b61af45e8618189307f9f9e4e51f51f782d87d3ce371290a30e98f642236
 size 29264715

 version https://git-lfs.github.com/spec/v1
+oid sha256:ae9e8f0c14808224676da1acdb2933f7088e30c97b867502269734e5fa06a9bc
 size 29264715

checkpoint-220/trainer_state.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "best_global_step": 220,
-  "best_metric": 1.4725391864776611,
   "best_model_checkpoint": "models/Llama-10M-1M/checkpoint-220",
   "epoch": 2.0,
   "eval_steps": 500,
@@ -11,95 +11,95 @@
   "log_history": [
     {
       "epoch": 0.18181818181818182,
-      "grad_norm": 2.203366756439209,
       "learning_rate": 5.6999999999999996e-05,
-      "loss": 5.382,
       "step": 20
     },
     {
       "epoch": 0.36363636363636365,
-      "grad_norm": 2.104827880859375,
       "learning_rate": 0.000117,
-      "loss": 4.7236,
       "step": 40
     },
     {
       "epoch": 0.5454545454545454,
-      "grad_norm": 1.913203477859497,
       "learning_rate": 0.00017699999999999997,
-      "loss": 3.7918,
       "step": 60
     },
     {
       "epoch": 0.7272727272727273,
-      "grad_norm": 1.5432658195495605,
       "learning_rate": 0.000237,
-      "loss": 2.9107,
       "step": 80
     },
     {
       "epoch": 0.9090909090909091,
-      "grad_norm": 0.8627734184265137,
       "learning_rate": 0.00029699999999999996,
-      "loss": 2.1595,
       "step": 100
     },
     {
       "epoch": 1.0,
-      "eval_loss": 1.674540400505066,
-      "eval_runtime": 0.5467,
-      "eval_samples_per_second": 468.237,
-      "eval_steps_per_second": 58.53,
       "step": 110
     },
     {
       "epoch": 1.0909090909090908,
-      "grad_norm": 0.42695462703704834,
       "learning_rate": 0.00028182256689929475,
-      "loss": 1.6818,
       "step": 120
     },
     {
       "epoch": 1.2727272727272727,
-      "grad_norm": 0.38224583864212036,
       "learning_rate": 0.0002283747847073923,
-      "loss": 1.5296,
       "step": 140
     },
     {
       "epoch": 1.4545454545454546,
-      "grad_norm": 0.2458978146314621,
       "learning_rate": 0.00015392654224618098,
-      "loss": 1.4866,
       "step": 160
     },
     {
       "epoch": 1.6363636363636362,
-      "grad_norm": 0.26635897159576416,
       "learning_rate": 7.842618596105872e-05,
-      "loss": 1.4683,
       "step": 180
     },
     {
       "epoch": 1.8181818181818183,
-      "grad_norm": 0.2912762761116028,
       "learning_rate": 2.210397534688617e-05,
-      "loss": 1.4608,
       "step": 200
     },
     {
       "epoch": 2.0,
-      "grad_norm": 0.27549538016319275,
       "learning_rate": 5.1401253666411016e-08,
-      "loss": 1.4563,
       "step": 220
     },
     {
       "epoch": 2.0,
-      "eval_loss": 1.4725391864776611,
-      "eval_runtime": 0.5705,
-      "eval_samples_per_second": 448.749,
-      "eval_steps_per_second": 56.094,
       "step": 220
     }
   ],

 {
   "best_global_step": 220,
+  "best_metric": 1.4682797193527222,
   "best_model_checkpoint": "models/Llama-10M-1M/checkpoint-220",
   "epoch": 2.0,
   "eval_steps": 500,
   "log_history": [
     {
       "epoch": 0.18181818181818182,
+      "grad_norm": 2.169536828994751,
       "learning_rate": 5.6999999999999996e-05,
+      "loss": 5.3417,
       "step": 20
     },
     {
       "epoch": 0.36363636363636365,
+      "grad_norm": 2.4265267848968506,
       "learning_rate": 0.000117,
+      "loss": 4.6389,
       "step": 40
     },
     {
       "epoch": 0.5454545454545454,
+      "grad_norm": 1.8873662948608398,
       "learning_rate": 0.00017699999999999997,
+      "loss": 3.6383,
       "step": 60
     },
     {
       "epoch": 0.7272727272727273,
+      "grad_norm": 1.449324607849121,
       "learning_rate": 0.000237,
+      "loss": 2.7798,
       "step": 80
     },
     {
       "epoch": 0.9090909090909091,
+      "grad_norm": 0.8489532470703125,
       "learning_rate": 0.00029699999999999996,
+      "loss": 2.0772,
       "step": 100
     },
     {
       "epoch": 1.0,
+      "eval_loss": 1.6459211111068726,
+      "eval_runtime": 0.5498,
+      "eval_samples_per_second": 465.608,
+      "eval_steps_per_second": 58.201,
       "step": 110
     },
     {
       "epoch": 1.0909090909090908,
+      "grad_norm": 0.400846004486084,
       "learning_rate": 0.00028182256689929475,
+      "loss": 1.65,
       "step": 120
     },
     {
       "epoch": 1.2727272727272727,
+      "grad_norm": 0.38010889291763306,
       "learning_rate": 0.0002283747847073923,
+      "loss": 1.518,
       "step": 140
     },
     {
       "epoch": 1.4545454545454546,
+      "grad_norm": 0.23362764716148376,
       "learning_rate": 0.00015392654224618098,
+      "loss": 1.4804,
       "step": 160
     },
     {
       "epoch": 1.6363636363636362,
+      "grad_norm": 0.27331477403640747,
       "learning_rate": 7.842618596105872e-05,
+      "loss": 1.4636,
       "step": 180
     },
     {
       "epoch": 1.8181818181818183,
+      "grad_norm": 0.2885988652706146,
       "learning_rate": 2.210397534688617e-05,
+      "loss": 1.4567,
       "step": 200
     },
     {
       "epoch": 2.0,
+      "grad_norm": 0.27050530910491943,
       "learning_rate": 5.1401253666411016e-08,
+      "loss": 1.4523,
       "step": 220
     },
     {
       "epoch": 2.0,
+      "eval_loss": 1.4682797193527222,
+      "eval_runtime": 0.6071,
+      "eval_samples_per_second": 421.665,
+      "eval_steps_per_second": 52.708,
       "step": 220
     }
   ],

checkpoint-220/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7081ab95bdd29f298ff46535770fb8f791491cc9135ee1f83aa9fcac31132803
 size 5713

 version https://git-lfs.github.com/spec/v1
+oid sha256:7b8b524ccc79f0e11610c634958c875c565569ce0ff90ffbb93a06434dc458fe
 size 5713

dataset_info.json ADDED Viewed

	@@ -0,0 +1,19 @@

+{
+  "name": "synthetic_babylm",
+  "type": "synthetic",
+  "description": "Synthetically generated text data in BabyLM style",
+  "size": {
+    "train_samples": 3519,
+    "eval_samples": 256,
+    "sequence_length": 128
+  },
+  "preprocessing": {
+    "tokenizer": "GPT2TokenizerFast",
+    "vocab_size": 288,
+    "special_tokens": [
+      "<s>",
+      "</s>",
+      "<pad>"
+    ]
+  }
+}

evaluation_plots.png CHANGED Viewed

Git LFS Details

SHA256: 4920d9658bef69e263cf776fadd1fb8317a7558184b0f6cbb8d75a17afc6659b
Pointer size: 131 Bytes
Size of remote file: 117 kB

Git LFS Details

SHA256: 1b764ec245c47c6e41a540ea992df8bffc9ddc70a0de67cf43e1af5b369c2e5f
Pointer size: 131 Bytes
Size of remote file: 113 kB

evaluation_results.json CHANGED Viewed

@@ -1,28 +1,28 @@
 {
-  "perplexity": 33.73914194244188,
-  "average_loss": 3.518658645331541,
-  "std_loss": 0.35455904870960114,
-  "min_loss": 2.8167550563812256,
-  "max_loss": 4.2027974128723145,
   "num_sequences": 100,
-  "total_tokens": 2992,
-  "avg_diversity_score": 0.9286521711438311,
-  "avg_repetition_score": 0.5445736434108527,
   "generation_samples": [
-    "A student discovers sadly in the classroom. A writer thinks creatively on the street. A child builds brilliantly at the office, furthermore a dog thinks slowly. A student reads carefully in the park. The teacher reads creatively outside. The scientist learns patiently on the",
-    "The bird thinks sadly in the garden. The cat jumps thoughtfully on the street, afterwards the scientist jumps slowly. An artist teaches quickly at the library. A dog writes patiently on the street, consequently the engineer learns eagerly. The cat runs slowly on the",
-    "A child discovers quickly outside. The teacher walks carefully in the park, however the teacher reads brilliantly. The cat creates slowly in the lab. The bird teaches thoughtfully in the lab, therefore a dog learns carefully. A child writes brilliantly on the street,",
-    "An artist thinks carefully on the street, consequently an artist creates patiently. The teacher designs thoughtfully at the library, and a writer jumps eagerly. The engineer jumps creatively on the street, therefore the engineer learns creatively. The engineer creates brilliantly at home. An",
-    "A child imagines brilliantly in the garden. A dog reads creatively in the school. The scientist explores happily outside. The teacher discovers creatively on the street, but the scientist walks happily. The bird imagines patiently in the classroom. The cat writes creatively on the",
-    "The scientist designs happily at home, afterwards the cat jumps eagerly. A writer jumps happily at the library, but the bird runs creatively. The teacher reads quickly in the park. A child discovers brilliantly at home, however the cat builds happily. The teacher",
-    "A writer discovers carefully outside. The scientist jumps sadly in the garden, afterwards the bird runs brilliantly. A student thinks slowly in the lab, moreover the cat writes thoughtfully. The scientist discovers quickly outside. The teacher walks brilliantly in the park, additionally the",
-    "A student creates happily at the library, but a dog designs sadly. A writer writes thoughtfully in the park, furthermore a writer imagines happily. The cat jumps sadly in the classroom. The engineer runs sadly in the lab, additionally the cat explores quickly.",
-    "The cat learns brilliantly in the classroom. The teacher builds thoughtfully at the office. A dog teaches thoughtfully in the classroom. The bird teaches slowly at the office. A dog learns quickly in the classroom. A student reads happily in the garden, moreover a",
-    "A child reads brilliantly in the garden, and the cat creates quickly. The scientist reads carefully in the lab, but the bird runs happily. The scientist builds creatively in the garden. The cat builds eagerly in the garden, furthermore the scientist runs quickly."
   ],
-  "avg_top_token_prob": 0.3424334205046762,
-  "std_top_token_prob": 0.2698688641225257,
-  "avg_entropy": 2.0452219695628933,
-  "std_entropy": 0.9384522715673917,
-  "low_confidence_ratio": 0.8365885416666666
 }

 {
+  "perplexity": 46.30131494735422,
+  "average_loss": 3.8351703612797032,
+  "std_loss": 0.3615157261181016,
+  "min_loss": 3.2384719848632812,
+  "max_loss": 4.606306076049805,
   "num_sequences": 100,
+  "total_tokens": 2907,
+  "avg_diversity_score": 0.9324846102931076,
+  "avg_repetition_score": 0.5276691331923891,
   "generation_samples": [
+    "A child teaches slowly at the office, therefore the teacher writes happily. The bird reads thoughtfully in the garden. An artist writes carefully outside, afterwards the engineer explores eagerly. A child walks quickly in the park, meanwhile a writer creates sadly. A student",
+    "The cat designs carefully at the library. A child jumps eagerly in the school, furthermore an artist learns thoughtfully. The engineer explores carefully in the school. The cat discovers eagerly on the street, and the scientist teaches quickly. The bird explores slowly in the",
+    "The scientist teaches quickly in the park, however the engineer imagines creatively. A child thinks sadly in the lab, however a writer walks carefully. A dog writes sadly at the office. A dog explores patiently in the classroom. The engineer creates sadly in the",
+    "A writer thinks sadly at the library. A writer reads carefully on the street, but the cat builds quickly. A student jumps patiently in the school. A student runs happily in the school, moreover a writer reads quickly. The cat creates brilliantly in the",
+    "The engineer learns creatively at the office, afterwards a student runs quickly. The teacher thinks creatively in the school, and the scientist creates patiently. The scientist writes brilliantly in the lab, therefore the scientist designs brilliantly. A writer imagines creatively in the school.",
+    "The scientist explores slowly on the street, furthermore the cat walks eagerly. A child thinks creatively at the library. A writer imagines sadly at home, additionally the teacher writes patiently. A dog builds creatively in the garden. The cat builds patiently at home,",
+    "The engineer designs brilliantly at the library. A student thinks brilliantly in the lab. The scientist builds creatively at home, furthermore the engineer jumps slowly. The scientist teaches brilliantly at home, additionally a child jumps quickly. The teacher teaches patiently at the library.",
+    "A child runs thoughtfully in the park, and the engineer reads eagerly. A writer discovers happily on the street. The teacher writes creatively in the park, therefore a child writes brilliantly. A student explores eagerly in the school. A writer runs eagerly in the",
+    "A writer builds slowly at home. A writer thinks carefully in the lab, and a dog teaches sadly. A writer imagines creatively at the library, however the engineer jumps quickly. An artist builds patiently in the garden. The bird builds sadly in the garden",
+    "The cat learns eagerly at home, afterwards the scientist teaches brilliantly. A writer learns brilliantly at the library, furthermore the teacher writes carefully. A student jumps carefully in the park, however the engineer imagines creatively. The cat jumps slowly in the garden, therefore"
   ],
+  "avg_top_token_prob": 0.35648854288045845,
+  "std_top_token_prob": 0.2770798175977247,
+  "avg_entropy": 2.0150103131619814,
+  "std_entropy": 0.9688909622718542,
+  "low_confidence_ratio": 0.7913413768630234
 }

generation_examples.json ADDED Viewed

	@@ -0,0 +1,80 @@

+{
+  "examples": [
+    {
+      "id": 1,
+      "generated_text": "A student discovers sadly in the classroom. A writer thinks creatively on the street. A child builds brilliantly at the office, furthermore a dog thinks slowly. A student reads carefully in the park. The teacher reads creatively outside. The scientist learns patiently on the",
+      "method": "sampling",
+      "temperature": 0.8,
+      "top_p": 0.9
+    },
+    {
+      "id": 2,
+      "generated_text": "The bird thinks sadly in the garden. The cat jumps thoughtfully on the street, afterwards the scientist jumps slowly. An artist teaches quickly at the library. A dog writes patiently on the street, consequently the engineer learns eagerly. The cat runs slowly on the",
+      "method": "sampling",
+      "temperature": 0.8,
+      "top_p": 0.9
+    },
+    {
+      "id": 3,
+      "generated_text": "A child discovers quickly outside. The teacher walks carefully in the park, however the teacher reads brilliantly. The cat creates slowly in the lab. The bird teaches thoughtfully in the lab, therefore a dog learns carefully. A child writes brilliantly on the street,",
+      "method": "sampling",
+      "temperature": 0.8,
+      "top_p": 0.9
+    },
+    {
+      "id": 4,
+      "generated_text": "An artist thinks carefully on the street, consequently an artist creates patiently. The teacher designs thoughtfully at the library, and a writer jumps eagerly. The engineer jumps creatively on the street, therefore the engineer learns creatively. The engineer creates brilliantly at home. An",
+      "method": "sampling",
+      "temperature": 0.8,
+      "top_p": 0.9
+    },
+    {
+      "id": 5,
+      "generated_text": "A child imagines brilliantly in the garden. A dog reads creatively in the school. The scientist explores happily outside. The teacher discovers creatively on the street, but the scientist walks happily. The bird imagines patiently in the classroom. The cat writes creatively on the",
+      "method": "sampling",
+      "temperature": 0.8,
+      "top_p": 0.9
+    },
+    {
+      "id": 6,
+      "generated_text": "The scientist designs happily at home, afterwards the cat jumps eagerly. A writer jumps happily at the library, but the bird runs creatively. The teacher reads quickly in the park. A child discovers brilliantly at home, however the cat builds happily. The teacher",
+      "method": "sampling",
+      "temperature": 0.8,
+      "top_p": 0.9
+    },
+    {
+      "id": 7,
+      "generated_text": "A writer discovers carefully outside. The scientist jumps sadly in the garden, afterwards the bird runs brilliantly. A student thinks slowly in the lab, moreover the cat writes thoughtfully. The scientist discovers quickly outside. The teacher walks brilliantly in the park, additionally the",
+      "method": "sampling",
+      "temperature": 0.8,
+      "top_p": 0.9
+    },
+    {
+      "id": 8,
+      "generated_text": "A student creates happily at the library, but a dog designs sadly. A writer writes thoughtfully in the park, furthermore a writer imagines happily. The cat jumps sadly in the classroom. The engineer runs sadly in the lab, additionally the cat explores quickly.",
+      "method": "sampling",
+      "temperature": 0.8,
+      "top_p": 0.9
+    },
+    {
+      "id": 9,
+      "generated_text": "The cat learns brilliantly in the classroom. The teacher builds thoughtfully at the office. A dog teaches thoughtfully in the classroom. The bird teaches slowly at the office. A dog learns quickly in the classroom. A student reads happily in the garden, moreover a",
+      "method": "sampling",
+      "temperature": 0.8,
+      "top_p": 0.9
+    },
+    {
+      "id": 10,
+      "generated_text": "A child reads brilliantly in the garden, and the cat creates quickly. The scientist reads carefully in the lab, but the bird runs happily. The scientist builds creatively in the garden. The cat builds eagerly in the garden, furthermore the scientist runs quickly.",
+      "method": "sampling",
+      "temperature": 0.8,
+      "top_p": 0.9
+    }
+  ],
+  "generation_config": {
+    "temperature": 0.8,
+    "top_p": 0.9,
+    "max_new_tokens": 50,
+    "do_sample": true
+  }
+}

metrics_summary.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+  "training_metrics": {
+    "loss": 2.5500883795998313,
+    "runtime_seconds": 26.0941,
+    "samples_per_second": 269.717,
+    "steps_per_second": 8.431
+  },
+  "evaluation_metrics": {
+    "perplexity": 33.73914194244188,
+    "average_loss": 3.518658645331541,
+    "diversity_score": 0.9286521711438311,
+    "repetition_score": 0.5445736434108527,
+    "confidence_score": 0.3424334205046762,
+    "entropy": 2.0452219695628933
+  }
+}

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e25793471728cb6b312aba089827e86543414bf3d7255639385ac7e6ad26f313
 size 14614216

 version https://git-lfs.github.com/spec/v1
+oid sha256:44c4152579055e34dc5ec45941f07908ba6328eb43a9851f23d71b427b97b242
 size 14614216

model_card_metadata.yaml ADDED Viewed

	@@ -0,0 +1,22 @@

+datasets:
+- synthetic
+key_metrics:
+  diversity_score: 0.9286521711438311
+  perplexity: 33.73914194244188
+  training_loss: 2.5500883795998313
+language: en
+license: mit
+metrics:
+- perplexity
+- loss
+- diversity
+model_name: Llama-10M-1M
+model_size: 3652032
+model_type: causal-lm
+tags:
+- text-generation
+- pytorch
+- causal-lm
+- babylm
+- small-language-model
+training_data_size: 3519

model_info.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "model_name": "Llama-10M-1M",
+  "model_type": "causal-lm",
+  "architecture": "LLaMA",
+  "framework": "transformers",
+  "created_at": "2025-07-05T16:11:06.857492",
+  "parameters": {
+    "total": 3652032,
+    "hidden_size": 192,
+    "num_layers": 6,
+    "num_heads": 6,
+    "vocab_size": 288,
+    "sequence_length": 128
+  },
+  "training": {
+    "dataset_size": 3519,
+    "epochs": 2,
+    "batch_size": 32,
+    "learning_rate": "3e-4",
+    "training_time_seconds": 26.0941,
+    "final_loss": 2.5500883795998313
+  },
+  "evaluation": {
+    "perplexity": 33.73914194244188,
+    "diversity_score": 0.9286521711438311,
+    "repetition_score": 0.5445736434108527,
+    "top_token_confidence": 0.3424334205046762,
+    "entropy": 2.0452219695628933,
+    "num_eval_samples": 100
+  }
+}

performance_benchmarks.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "text_generation": {
+    "perplexity": {
+      "value": 33.73914194244188,
+      "description": "Lower is better",
+      "benchmark_type": "intrinsic"
+    },
+    "diversity": {
+      "value": 0.9286521711438311,
+      "description": "Higher is better (0-1 scale)",
+      "benchmark_type": "quality"
+    },
+    "repetition": {
+      "value": 0.5445736434108527,
+      "description": "Lower is better (0-1 scale)",
+      "benchmark_type": "quality"
+    }
+  },
+  "efficiency": {
+    "parameters": 3652032,
+    "training_time": 26.0941,
+    "inference_speed": "Not measured"
+  }
+}

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7081ab95bdd29f298ff46535770fb8f791491cc9135ee1f83aa9fcac31132803
 size 5713

 version https://git-lfs.github.com/spec/v1
+oid sha256:7b8b524ccc79f0e11610c634958c875c565569ce0ff90ffbb93a06434dc458fe
 size 5713

training_metrics.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "train_runtime": 26.0941,
-  "train_samples_per_second": 269.717,
-  "train_steps_per_second": 8.431,
   "total_flos": 19441019879424.0,
-  "train_loss": 2.5500883795998313,
   "epoch": 2.0,
   "train_samples": 3519,
   "eval_samples": 256,

 {
+  "train_runtime": 29.3597,
+  "train_samples_per_second": 239.716,
+  "train_steps_per_second": 7.493,
   "total_flos": 19441019879424.0,
+  "train_loss": 2.499714469909668,
   "epoch": 2.0,
   "train_samples": 3519,
   "eval_samples": 256,