Shivansh Puri commited on
Commit
f6eb358
·
0 Parent(s):

Initial upload of MyAwesome-299M-Model

Browse files
.gitattributes ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
2
+ *.bin filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - en
4
+ license: mit
5
+ library_name: transformers
6
+ pipeline_tag: text-generation
7
+ tags:
8
+ - demo
9
+ - llama
10
+ - efficient
11
+ base_model: gpt2-medium
12
+ model_type: llama
13
+ ---
14
+
15
+ # MyAwesome-299M-Model (Demo)
16
+
17
+ A compact demo language model showcasing the Transfer-First LLM Framework capabilities.
18
+
19
+ ## Model Details
20
+
21
+ - **Model Type:** Decoder-only transformer (Llama architecture)
22
+ - **Parameters:** ~52M (demo size)
23
+ - **Architecture:** 512d × 8 layers
24
+ - **Vocabulary:** 50,257 tokens (GPT-2 compatible)
25
+ - **Context Length:** 1,024 tokens
26
+ - **Attention:** Grouped-Query Attention (GQA)
27
+
28
+ ## Usage
29
+
30
+ ```python
31
+ from transformers import AutoModelForCausalLM, AutoTokenizer
32
+ import torch
33
+
34
+ # Load model and tokenizer
35
+ model = AutoModelForCausalLM.from_pretrained("shivash/MyAwesome-299M-Model")
36
+ tokenizer = AutoTokenizer.from_pretrained("shivash/MyAwesome-299M-Model")
37
+
38
+ # Generate text
39
+ prompt = "The future of AI is"
40
+ inputs = tokenizer(prompt, return_tensors="pt")
41
+
42
+ with torch.no_grad():
43
+ outputs = model.generate(
44
+ **inputs,
45
+ max_new_tokens=50,
46
+ temperature=0.7,
47
+ do_sample=True,
48
+ pad_token_id=tokenizer.eos_token_id
49
+ )
50
+
51
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
52
+ print(response)
53
+ ```
54
+
55
+ ## Framework
56
+
57
+ This model demonstrates the Transfer-First LLM Framework:
58
+ - Compact, efficient architecture
59
+ - Ready for adapter-based fine-tuning
60
+ - Optimized for parameter efficiency
61
+
62
+ ## License
63
+
64
+ MIT License
65
+
66
+ ---
67
+
68
+ *Demo model from the Transfer-First LLM Framework*
config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "LlamaForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 1,
8
+ "dtype": "float32",
9
+ "eos_token_id": 2,
10
+ "head_dim": 64,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 512,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 2048,
15
+ "max_position_embeddings": 1024,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "num_attention_heads": 8,
19
+ "num_hidden_layers": 8,
20
+ "num_key_value_heads": 4,
21
+ "pretraining_tp": 1,
22
+ "rms_norm_eps": 1e-05,
23
+ "rope_scaling": null,
24
+ "rope_theta": 10000.0,
25
+ "tie_word_embeddings": true,
26
+ "transformers_version": "4.56.2",
27
+ "use_cache": true,
28
+ "vocab_size": 50257
29
+ }
generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "eos_token_id": 2,
5
+ "transformers_version": "4.56.2"
6
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3398b1b6c998db4e93608362bb4182cce117e66315d3acf16d46d9b11d867c53
3
+ size 228798488
special_tokens_map.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<|endoftext|>",
3
+ "eos_token": "<|endoftext|>",
4
+ "pad_token": "<|endoftext|>",
5
+ "unk_token": "<|endoftext|>"
6
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "50256": {
5
+ "content": "<|endoftext|>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ }
12
+ },
13
+ "bos_token": "<|endoftext|>",
14
+ "clean_up_tokenization_spaces": false,
15
+ "eos_token": "<|endoftext|>",
16
+ "extra_special_tokens": {},
17
+ "model_max_length": 1024,
18
+ "pad_token": "<|endoftext|>",
19
+ "tokenizer_class": "GPT2Tokenizer",
20
+ "unk_token": "<|endoftext|>"
21
+ }
vocab.json ADDED
The diff for this file is too large to render. See raw diff