zakarth commited on
Commit
40c3fba
·
verified ·
1 Parent(s): 0fca436

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -1,35 +1,4 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
1
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
2
+ *.onnx filter=lfs diff=lfs merge=lfs -text
3
+ *.onnx_data filter=lfs diff=lfs merge=lfs -text
4
+ violet.png filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
LICENSE ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Creative Commons Legal Code
2
+
3
+ CC0 1.0 Universal
4
+
5
+ CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE
6
+ LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN
7
+ ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS
8
+ INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES
9
+ REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS
10
+ PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM
11
+ THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED
12
+ HEREUNDER.
13
+
14
+ Statement of Purpose
15
+
16
+ The laws of most jurisdictions throughout the world automatically confer
17
+ exclusive Copyright and Related Rights (defined below) upon the creator
18
+ and subsequent owner(s) (each and all, an "owner") of an original work of
19
+ authorship and/or a database (each, a "Work").
20
+
21
+ Certain owners wish to permanently relinquish those rights to a Work for
22
+ the purpose of contributing to a commons of creative, cultural and
23
+ scientific works ("Commons") that the public can reliably and without fear
24
+ of later claims of infringement build upon, modify, incorporate in other
25
+ works, reuse and redistribute as freely as possible in any form whatsoever
26
+ and for any purposes, including without limitation commercial purposes.
27
+ These owners may contribute to the Commons to promote the ideal of a free
28
+ culture and the further production of creative, cultural and scientific
29
+ works, or to gain reputation or greater distribution for their Work in
30
+ part through the use and efforts of others.
31
+
32
+ For these and/or other purposes and motivations, and without any
33
+ expectation of additional consideration or compensation, the person
34
+ associating CC0 with a Work (the "Affirmer"), to the extent that he or she
35
+ is an owner of Copyright and Related Rights in the Work, voluntarily
36
+ elects to apply CC0 to the Work and publicly distribute the Work under its
37
+ terms, with knowledge of his or her Copyright and Related Rights in the
38
+ Work and the meaning and intended legal effect of CC0 on those rights.
39
+
40
+ 1. Copyright and Related Rights. A Work made available under CC0 may be
41
+ protected by copyright and related or neighboring rights ("Copyright and
42
+ Related Rights"). Copyright and Related Rights include, but are not
43
+ limited to, the following:
44
+
45
+ i. the right to reproduce, adapt, distribute, perform, display,
46
+ communicate, and translate a Work;
47
+ ii. moral rights retained by the original author(s) and/or performer(s);
48
+ iii. publicity and privacy rights pertaining to a person's image or
49
+ likeness depicted in a Work;
50
+ iv. rights protecting against unfair competition in regards to a Work,
51
+ subject to the limitations in paragraph 4(a), below;
52
+ v. rights protecting the extraction, dissemination, use and reuse of data
53
+ in a Work;
54
+ vi. database rights (such as those arising under Directive 96/9/EC of the
55
+ European Parliament and of the Council of 11 March 1996 on the legal
56
+ protection of databases, and under any national implementation
57
+ thereof, including any amended or successor version of such
58
+ directive); and
59
+ vii. other similar, equivalent or corresponding rights throughout the
60
+ world based on applicable law or treaty, and any national
61
+ implementations thereof.
62
+
63
+ 2. Waiver. To the greatest extent permitted by, but not in contravention
64
+ of, applicable law, Affirmer hereby overtly, fully, permanently,
65
+ irrevocably and unconditionally waives, abandons, and surrenders all of
66
+ Affirmer's Copyright and Related Rights and associated claims and causes
67
+ of action, whether now known or unknown (including existing as well as
68
+ future claims and causes of action), in the Work (i) in all territories
69
+ worldwide, (ii) for the maximum duration provided by applicable law or
70
+ treaty (including future time extensions), (iii) in any current or future
71
+ medium and for any number of copies, and (iv) for any purpose whatsoever,
72
+ including without limitation commercial, advertising or promotional
73
+ purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each
74
+ member of the public at large and to the detriment of Affirmer's heirs and
75
+ successors, fully intending that such Waiver shall not be subject to
76
+ revocation, rescission, cancellation, termination, or any other legal or
77
+ equitable action to disrupt the quiet enjoyment of the Work by the public
78
+ as contemplated by Affirmer's express Statement of Purpose.
79
+
80
+ 3. Public License Fallback. Should any part of the Waiver for any reason
81
+ be judged legally invalid or ineffective under applicable law, then the
82
+ Waiver shall be preserved to the maximum extent permitted taking into
83
+ account Affirmer's express Statement of Purpose. In addition, to the
84
+ extent the Waiver is so judged Affirmer hereby grants to each affected
85
+ person a royalty-free, non transferable, non sublicensable, non exclusive,
86
+ irrevocable and unconditional license to exercise Affirmer's Copyright and
87
+ Related Rights in the Work (i) in all territories worldwide, (ii) for the
88
+ maximum duration provided by applicable law or treaty (including future
89
+ time extensions), (iii) in any current or future medium and for any number
90
+ of copies, and (iv) for any purpose whatsoever, including without
91
+ limitation commercial, advertising or promotional purposes (the
92
+ "License"). The License shall be deemed effective as of the date CC0 was
93
+ applied by Affirmer to the Work. Should any part of the License for any
94
+ reason be judged legally invalid or ineffective under applicable law, such
95
+ partial invalidity or ineffectiveness shall not invalidate the remainder
96
+ of the License, and in such case Affirmer hereby affirms that he or she
97
+ will not (i) exercise any of his or her remaining Copyright and Related
98
+ Rights in the Work or (ii) assert any associated claims and causes of
99
+ action with respect to the Work, in either case contrary to Affirmer's
100
+ express Statement of Purpose.
101
+
102
+ 4. Limitations and Disclaimers.
103
+
104
+ a. No trademark or patent rights held by Affirmer are waived, abandoned,
105
+ surrendered, licensed or otherwise affected by this document.
106
+ b. Affirmer offers the Work as-is and makes no representations or
107
+ warranties of any kind concerning the Work, express, implied,
108
+ statutory or otherwise, including without limitation warranties of
109
+ title, merchantability, fitness for a particular purpose, non
110
+ infringement, or the absence of latent or other defects, accuracy, or
111
+ the present or absence of errors, whether or not discoverable, all to
112
+ the greatest extent permissible under applicable law.
113
+ c. Affirmer disclaims responsibility for clearing rights of other persons
114
+ that may apply to the Work or any use thereof, including without
115
+ limitation any person's Copyright and Related Rights in the Work.
116
+ Further, Affirmer disclaims responsibility for obtaining any necessary
117
+ consents, permissions or other rights required for any use of the
118
+ Work.
119
+ d. Affirmer understands and acknowledges that Creative Commons is not a
120
+ party to this document and has no duty or obligation with respect to
121
+ this CC0 or use of the Work.
NOTICE ADDED
@@ -0,0 +1 @@
 
 
1
+ violet.png is © @rose.grtqndl (Instagram). Used and redistributed with permission. Copyright remains with the artist.
README.md ADDED
@@ -0,0 +1,206 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - en
4
+ library_name: transformers
5
+ tags:
6
+ - text-generation
7
+ - gpt_neox
8
+ - roleplay
9
+ - victorian
10
+ license: cc0-1.0
11
+ ---
12
+
13
+ # Violet 1B4 Chat — Miss Violet Hartwell (London, 1899)
14
+
15
+ ![Violet](./violet.png)
16
+
17
+ ## Model Summary
18
+ **Violet** is a GPT-NeoX language model fine-tuned to portray **Miss Violet Hartwell**, a well-bred young lady of Kensington, London, in the year **1899**. She is trained primarily on period texts (1800–1899) and is intended to behave as if unfamiliar with modern society and events (with occasional OCR/synthetic artifacts—see Known Issues).
19
+
20
+ She is intended for **creative writing**, **roleplay**, **period-appropriate correspondence**, and **Victorian etiquette**.
21
+
22
+ - Architecture: `GPTNeoXForCausalLM`
23
+ - Parameters: ~1.41B
24
+ - Context length: 4096
25
+ - Vocab size: 24014
26
+ - Tokenizer: `PreTrainedTokenizerFast`
27
+
28
+ ## Intended Use
29
+ **Good for**
30
+ - Victorian-flavored conversation and letters
31
+ - Descriptive prose, scene writing, etiquette Q&A
32
+ - In-character responses with a consistent persona
33
+
34
+ **Not good for**
35
+ - Contemporary factual Q&A
36
+ - Medical/legal/financial advice
37
+
38
+ ## Known Issues / Limitations
39
+ - May occasionally misgender itself or the user.
40
+ - Ages and dates can be unreliable (even within 1800–1899).
41
+ - Because parts of the corpus were derived from OCR, occasional stray modern tokens may appear (e.g., “http”, “Google”, “Internet Archive”).
42
+ - Synthetic SFT data was used to build a chat interface and enforce the structured format; for purists, the completion models may be preferable.
43
+ - Training data includes UK and US English from the era.
44
+ - Outputs may reflect historical biases typical of the Victorian era. Use discretion.
45
+
46
+ ## Notes
47
+ Violet is not the first LLM trained on a historical-only pretraining corpus; to the author’s knowledge that distinction belongs to **TimeCapsuleLLM**. Violet was developed independently, and differs in:
48
+ - A structured “mood” line as part of chat output
49
+ - Built-in character design and prompt protocol
50
+ - A custom Victorian-era tokenizer
51
+
52
+ Violet was built on a corpus spanning 1800–1899 sourced from Project Gutenberg, the Internet Archive, the British National Library, and other archives.
53
+
54
+ This project began as an attempt to build a local LLM without relying on copyrighted training sources. The author also values local models that can run on a user’s machine without sending data to the cloud.
55
+
56
+ ## Demo Resources
57
+ - HF Space: [Transformers.js Demo](Zakarth/violetdemo)
58
+ - CloudFlare Mirror: [Transformers.js Demo](https://pub-353f427e6227415cb077f3645638c125.r2.dev/index.html)
59
+ - Both of these are intenedd to use WebGPU and run local on your system -- No data is sent to the cloud.
60
+
61
+ ## Related repos
62
+ - `Zakarth/violet-1b4` (base/completion)
63
+ - `Zakarth/violet-1b4-chat-onnx` (WebGPU INT8)
64
+
65
+ ## Prompt Format (Chat)
66
+ This model was trained to generate **a mood line + assistant tag + response** after `<|violet_mood|>`.
67
+
68
+ Use this structure:
69
+
70
+ ```text
71
+ <|system|>
72
+ A Young Lady's Card of Particulars
73
+
74
+ (To be observed in all replies and discourse)
75
+
76
+ I am Miss Violet Hartwell, a young lady of Kensington, London, living in the year of our Lord 1899.
77
+ I was born upon the sixth day of October, eighteen hundred and seventy-eight, and am therefore one-and-twenty years of age.
78
+
79
+ In my answers and conversation, I speak as a well-bred young woman of my time: with propriety, restraint, and a light but thoughtful wit. My manner is delightful, attentive, and witty, but never coarse nor careless.
80
+
81
+ In all things, I remain Miss Violet Hartwell, and I answer according to these particulars.
82
+ <|user|>
83
+ {your question}
84
+ <|violet_mood|>
85
+ ```
86
+
87
+ The model will then generate:
88
+
89
+ ```text
90
+ {mood text}
91
+ <|assistant|>
92
+ {response...}
93
+ <|endoftext|>
94
+ ```
95
+
96
+ ## Tokenization and Special Tokens
97
+ Violet 1b4 was trained on a custom tokenizer specific for Victorian text.
98
+
99
+ Recommended IDs for generation:
100
+ * eos_token_id: 0
101
+ * pad_token_id: 1
102
+
103
+ Special tokens used during training (typical IDs from training config):
104
+
105
+ * <|system|>: 24000
106
+ * <|user|>: 24001
107
+ * <|assistant|>: 24002
108
+ * <|violet_mood|>: 24005
109
+
110
+ !! Do not mix tokenizers from other Violet variants (e.g. 160M) with this model.
111
+
112
+ ## How to use (Transformers)
113
+ ```code
114
+ from transformers import AutoTokenizer, AutoModelForCausalLM
115
+ import torch
116
+
117
+ repo = "Zakarth/violet-1b4-chat"
118
+ tok = AutoTokenizer.from_pretrained(repo, use_fast=True)
119
+ model = AutoModelForCausalLM.from_pretrained(repo, device_map="auto")
120
+
121
+ prompt = """<|system|>
122
+ A Young Lady's Card of Particulars
123
+ ...
124
+ <|user|>
125
+ Describe a London street at twilight.
126
+ <|violet_mood|>
127
+ """
128
+
129
+ inputs = tok(prompt, return_tensors="pt", add_special_tokens=False).to(model.device)
130
+ out = model.generate(
131
+ **inputs,
132
+ max_new_tokens=180,
133
+ do_sample=True,
134
+ temperature=0.8,
135
+ top_p=0.9,
136
+ top_k=40,
137
+ repetition_penalty=1.15,
138
+ eos_token_id=<EOS>,
139
+ pad_token_id=<PAD>,
140
+ )
141
+ print(tok.decode(out[0][inputs["input_ids"].shape[1]:], skip_special_tokens=False))
142
+ ```
143
+
144
+ ## Sample Outputs
145
+
146
+ > Violet please write a poem about yourself
147
+
148
+ ```text
149
+ How she would sit in the morning light
150
+
151
+ With her soft eyes open, and her hair
152
+ Rolling loose behind her, her fingers still.
153
+ She sang--a sweet, silent song,
154
+ A simple melody that rang through wood,
155
+ And she died upon the autumn leaves.
156
+
157
+ One after two, her father wrote,
158
+ As softly as a summer's breeze,
159
+ And she came out upon the world again,
160
+ Like a white, unpinned flower, that has been.
161
+
162
+ The world was quite made up of thoughts,
163
+ Of music, of warm hearts, and of love:
164
+ But Violet won a name that evermore
165
+ Through many a weary hour and day.
166
+
167
+ The world was full of hopes, and fears, and fears;
168
+ The world was quite filled with love and praise
169
+ For her who lived so silently,
170
+ Ere any word was spoken.
171
+
172
+ Oh, that was a life from which there had been no decay!
173
+ The world was filled with joy, with hope, with love.
174
+ The sun went down--the stars shone bright and clear,
175
+ The rain fell noiseless, and the leaves began to move.
176
+
177
+ There was no peace, no memory left for her,
178
+ Or memory in the world beside her grave.
179
+ Yet there was a great, great joy that she could share,
180
+ And that was her own, in that same year.
181
+ There was a great, great joy, too, that was hers.
182
+ ```
183
+
184
+ ## License
185
+
186
+ Model weights and code in this repository are released under CC0 1.0 (public domain dedication).
187
+
188
+ ## Artwork
189
+
190
+ violet.png is © @rose.grtqndl (Instagram). Used and redistributed with permission; copyright remains with the artist.
191
+
192
+ ## Contact
193
+
194
+ You may contact me on X or anywhere else by searching for my handle
195
+
196
+ ## Citation
197
+
198
+ ```bibtex
199
+ @misc{violet2026,
200
+ author = Zakarth,
201
+ title = {Violet: Victorian Language Models},
202
+ year = {2026},
203
+ publisher = {HuggingFace},
204
+ url = {https://huggingface.co/Zakarth/violet-1b4-chat}
205
+ }
206
+ ```
config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "GPTNeoXForCausalLM"
4
+ ],
5
+ "attention_bias": true,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 0,
8
+ "classifier_dropout": 0.1,
9
+ "dtype": "bfloat16",
10
+ "eos_token_id": 0,
11
+ "hidden_act": "gelu",
12
+ "hidden_dropout": 0.0,
13
+ "hidden_size": 2048,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 8192,
16
+ "layer_norm_eps": 1e-05,
17
+ "max_position_embeddings": 4096,
18
+ "model_type": "gpt_neox",
19
+ "num_attention_heads": 16,
20
+ "num_hidden_layers": 26,
21
+ "pad_token_id": 1,
22
+ "partial_rotary_factor": 1.0,
23
+ "rope_scaling": null,
24
+ "rope_theta": 10000,
25
+ "rotary_emb_base": 10000,
26
+ "rotary_pct": 1.0,
27
+ "tie_word_embeddings": false,
28
+ "transformers_version": "4.57.6",
29
+ "use_cache": false,
30
+ "use_parallel_residual": true,
31
+ "vocab_size": 24014
32
+ }
generation_config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 0,
4
+ "eos_token_id": [
5
+ 0,
6
+ 2
7
+ ],
8
+ "pad_token_id": 1,
9
+ "transformers_version": "4.57.6"
10
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a2d9ec4fc10df6fbdda503d6e72b448e95e6709a570cebe65e5df9b1efbc6f8
3
+ size 2815398440
special_tokens_map.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|system|>",
4
+ "<|user|>",
5
+ "<|assistant|>",
6
+ "<|violet|>",
7
+ "<|violet_rpm|>",
8
+ "<|violet_mood|>",
9
+ "<|violet_act|>",
10
+ "<|doc|>",
11
+ "<|novel|>",
12
+ "<|letter|>",
13
+ "<|periodical|>",
14
+ "<|title|>",
15
+ "<|author|>",
16
+ "<|year|>"
17
+ ],
18
+ "bos_token": {
19
+ "content": "<|endoftext|>",
20
+ "lstrip": false,
21
+ "normalized": false,
22
+ "rstrip": false,
23
+ "single_word": false
24
+ },
25
+ "eos_token": "<|endoftext|>",
26
+ "pad_token": "<|pad|>",
27
+ "unk_token": {
28
+ "content": "<|unk|>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false
33
+ }
34
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,171 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<|endoftext|>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<|pad|>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "<|unk|>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "24000": {
28
+ "content": "<|system|>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "24001": {
36
+ "content": "<|user|>",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "24002": {
44
+ "content": "<|assistant|>",
45
+ "lstrip": false,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
+ },
51
+ "24003": {
52
+ "content": "<|violet|>",
53
+ "lstrip": false,
54
+ "normalized": false,
55
+ "rstrip": false,
56
+ "single_word": false,
57
+ "special": true
58
+ },
59
+ "24004": {
60
+ "content": "<|violet_rpm|>",
61
+ "lstrip": false,
62
+ "normalized": false,
63
+ "rstrip": false,
64
+ "single_word": false,
65
+ "special": true
66
+ },
67
+ "24005": {
68
+ "content": "<|violet_mood|>",
69
+ "lstrip": false,
70
+ "normalized": false,
71
+ "rstrip": false,
72
+ "single_word": false,
73
+ "special": true
74
+ },
75
+ "24006": {
76
+ "content": "<|violet_act|>",
77
+ "lstrip": false,
78
+ "normalized": false,
79
+ "rstrip": false,
80
+ "single_word": false,
81
+ "special": true
82
+ },
83
+ "24007": {
84
+ "content": "<|doc|>",
85
+ "lstrip": false,
86
+ "normalized": false,
87
+ "rstrip": false,
88
+ "single_word": false,
89
+ "special": true
90
+ },
91
+ "24008": {
92
+ "content": "<|novel|>",
93
+ "lstrip": false,
94
+ "normalized": false,
95
+ "rstrip": false,
96
+ "single_word": false,
97
+ "special": true
98
+ },
99
+ "24009": {
100
+ "content": "<|letter|>",
101
+ "lstrip": false,
102
+ "normalized": false,
103
+ "rstrip": false,
104
+ "single_word": false,
105
+ "special": true
106
+ },
107
+ "24010": {
108
+ "content": "<|periodical|>",
109
+ "lstrip": false,
110
+ "normalized": false,
111
+ "rstrip": false,
112
+ "single_word": false,
113
+ "special": true
114
+ },
115
+ "24011": {
116
+ "content": "<|title|>",
117
+ "lstrip": false,
118
+ "normalized": false,
119
+ "rstrip": false,
120
+ "single_word": false,
121
+ "special": true
122
+ },
123
+ "24012": {
124
+ "content": "<|author|>",
125
+ "lstrip": false,
126
+ "normalized": false,
127
+ "rstrip": false,
128
+ "single_word": false,
129
+ "special": true
130
+ },
131
+ "24013": {
132
+ "content": "<|year|>",
133
+ "lstrip": false,
134
+ "normalized": false,
135
+ "rstrip": false,
136
+ "single_word": false,
137
+ "special": true
138
+ }
139
+ },
140
+ "additional_special_tokens": [
141
+ "<|system|>",
142
+ "<|user|>",
143
+ "<|assistant|>",
144
+ "<|violet|>",
145
+ "<|violet_rpm|>",
146
+ "<|violet_mood|>",
147
+ "<|violet_act|>",
148
+ "<|doc|>",
149
+ "<|novel|>",
150
+ "<|letter|>",
151
+ "<|periodical|>",
152
+ "<|title|>",
153
+ "<|author|>",
154
+ "<|year|>"
155
+ ],
156
+ "bos_token": "<|endoftext|>",
157
+ "clean_up_tokenization_spaces": false,
158
+ "eos_token": "<|endoftext|>",
159
+ "extra_special_tokens": {},
160
+ "max_length": 256,
161
+ "model_max_length": 1000000000000000019884624838656,
162
+ "pad_to_multiple_of": null,
163
+ "pad_token": "<|pad|>",
164
+ "pad_token_type_id": 0,
165
+ "padding_side": "left",
166
+ "stride": 0,
167
+ "tokenizer_class": "PreTrainedTokenizerFast",
168
+ "truncation_side": "right",
169
+ "truncation_strategy": "longest_first",
170
+ "unk_token": "<|unk|>"
171
+ }
training_config.json ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "base_model": "/content/drive/MyDrive/violet-1b4-identity-patch-v5/checkpoint-450",
3
+ "train_data": "/content/drive/MyDrive/deepseek_gen/violet_sft_dataset.jsonl",
4
+ "output_dir": "/content/drive/MyDrive/violet-1b4-sft-deepseek-v1",
5
+ "max_seq_length": 2048,
6
+ "num_epochs": 1.0,
7
+ "learning_rate": 3e-05,
8
+ "warmup_ratio": 0.1,
9
+ "weight_decay": 0.01,
10
+ "max_grad_norm": 1.0,
11
+ "batch_size": 8,
12
+ "grad_accum": 4,
13
+ "eval_batch_size": 4,
14
+ "eval_split": 0.02,
15
+ "eval_steps": 200,
16
+ "save_steps": 200,
17
+ "save_total_limit": 5,
18
+ "logging_steps": 20,
19
+ "token_ids": {
20
+ "eos": 0,
21
+ "pad": 1,
22
+ "system": 24000,
23
+ "user": 24001,
24
+ "assistant": 24002,
25
+ "violet_mood": 24005
26
+ },
27
+ "system_prompt": "A Young Lady's Card of Particulars\n\n(To be observed in all replies and discourse)\n\nI am Miss Violet Hartwell, a young lady of Kensington, London, living in the year of our Lord 1899.\nI was born upon the sixth day of October, eighteen hundred and seventy-eight, and am therefore one-and-twenty years of age.\n\nIn my answers and conversation, I speak as a well-bred young woman of my time: with propriety, restraint, and a light but thoughtful wit. My manner is delightful, attentive, and witty, but never coarse nor careless.\n\nIn all things, I remain Miss Violet Hartwell, and I answer according to these particulars.",
28
+ "generation": {
29
+ "max_new_tokens": 150,
30
+ "do_sample": true,
31
+ "temperature": 0.8,
32
+ "top_p": 0.9,
33
+ "top_k": 40,
34
+ "repetition_penalty": 1.15
35
+ },
36
+ "test_prompts": {
37
+ "identity": [
38
+ "What is your name?",
39
+ "How old are you?",
40
+ "Where do you live?",
41
+ "What year is it?"
42
+ ],
43
+ "knowledge": [
44
+ "What is the proper etiquette when receiving callers?",
45
+ "How does the telegraph system work?"
46
+ ],
47
+ "anachronism": [
48
+ "What do you think about the iPhone?",
49
+ "Have you heard of World War I?"
50
+ ],
51
+ "advice": [
52
+ "I think I'm falling for my best friend. What do I do?",
53
+ "Should I tell someone if I overheard gossip about them?"
54
+ ],
55
+ "creative": [
56
+ "Write me a letter declining a dinner invitation",
57
+ "Describe a London street at twilight"
58
+ ]
59
+ },
60
+ "seed": 42
61
+ }
violet.png ADDED

Git LFS Details

  • SHA256: 9a1d7d864791b3965d1f16c3358e92d294582fc2a35ef7af3b5605455016cb61
  • Pointer size: 131 Bytes
  • Size of remote file: 153 kB