RadAlienware commited on
Commit
d22ce26
·
verified ·
1 Parent(s): e345bbd

Upload 8 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tinystories.sentis filter=lfs diff=lfs merge=lfs -text
README (1).md ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: mit
3
+ library_name: unity-sentis
4
+ pipeline_tag: text-generation
5
+ ---
6
+
7
+ # Tiny Stories Model in Unity Sentis Format (Sentis 1.4.0-pre.2*)
8
+ *Version 1.3.0 Sentis files are not compatible with Sentis 1.4.0 and would need to be recreated/downloaded
9
+
10
+ This is the [Tiny Stories model](https://huggingface.co/roneneldan/TinyStories-33M) checked to run on Unity 2023. Tiny Stories is a Large Language Model that was trained on children's stories and can create stories based on the first couple of sentences.
11
+
12
+
13
+ ## How to Use
14
+ * Create a new scene in Unity 2023
15
+ * Install `com.unity.sentis` and `com.unity.nuget.newtonsoft-json` packages
16
+ * Add the RunTinyStories.cs file to the Main Camera
17
+ * Put `tinystories.sentis`, `vocab.json` and `merges.txt` in the Assets/StreamingAssets folder
18
+ * Adjust some of the variables such as the `outputText` string to set the prompt
19
+ * Press run
20
+ * The output will appear in the console window
21
+
22
+ ## Example Input
23
+ ```
24
+ One day an alien came down from Mars. It saw a chicken
25
+ ```
26
+ ## Example Output
27
+ ```
28
+ One day an alien came down from Mars. It saw a chicken and said, "Hello, little chicken. What are you doing here?"
29
+
30
+ The chicken replied, "I'm looking for a place to stay. I'm very tired."
31
+
32
+ The alien said, "You can stay here. I have a nice place for you. It's very comfortable."
33
+
34
+ The chicken was so happy. She thanked the alien and said, "Thank you. I'm very comfortable here."
35
+
36
+ The alien smiled and said, "You're welcome
37
+ ```
38
+
39
+ ## Unity Sentis
40
+ Unity Sentis is the inference engine which runs on Unity 2023. More can be found about it [here](https://unity.com/products/sentis)
41
+
42
+ ## Disclaimer
43
+ The model was trained on children's stories so very unlikely to produce undesirable text. As an extra precaution, we removed a few tokens from vocab.json that might not be suitable for younger audiences. The original json can be found on the Tiny Stories original page.
RunTinyStories.cs ADDED
@@ -0,0 +1,262 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ using System.Collections;
2
+ using System.Collections.Generic;
3
+ using UnityEngine;
4
+ using Unity.Sentis;
5
+ using System.IO;
6
+ using System.Text;
7
+ using FF = Unity.Sentis.Functional;
8
+
9
+ /*
10
+ * Tiny Stories Inference Code
11
+ * ===========================
12
+ *
13
+ * Put this script on the Main Camera
14
+ *
15
+ * In Assets/StreamingAssets put:
16
+ *
17
+ * tinystories.sentis (or put in asset folder and drag onto field)
18
+ * vocab.json
19
+ * merges.txt
20
+ *
21
+ * Install package com.unity.nuget.newtonsoft-json from packagemanger
22
+ * Install package com.unity.sentis
23
+ *
24
+ */
25
+
26
+
27
+ public class RunTinyStories : MonoBehaviour
28
+ {
29
+ //Drop the tinystories.sentis or onnx file on here if using an asset:
30
+ //public ModelAsset asset;
31
+ const BackendType backend = BackendType.GPUCompute;
32
+
33
+ //string outputString = "Once upon a time, there were three bears";
34
+ string outputString = "One day an alien came down from Mars. It saw a chicken";
35
+
36
+ // This is how many tokens you want. It can be adjusted.
37
+ const int maxTokens = 100;
38
+
39
+ //Make this smaller for more randomness
40
+ const float predictability = 5f;
41
+
42
+ //Special tokens
43
+ const int END_OF_TEXT = 50256;
44
+
45
+ //Store the vocabulary
46
+ string[] tokens;
47
+
48
+ IWorker engine;
49
+
50
+ int currentToken = 0;
51
+ int[] outputTokens = new int[maxTokens];
52
+
53
+ // Used for special character decoding
54
+ int[] whiteSpaceCharacters = new int[256];
55
+ int[] encodedCharacters = new int[256];
56
+
57
+ bool runInference = false;
58
+
59
+
60
+ //stop after this many tokens
61
+ const int stopAfter = 100;
62
+
63
+ int totalTokens = 0;
64
+
65
+ string[] merges;
66
+ Dictionary<string, int> vocab;
67
+
68
+ void Start()
69
+ {
70
+ SetupWhiteSpaceShifts();
71
+
72
+ LoadVocabulary();
73
+
74
+ var model1 = ModelLoader.Load(Path.Join(Application.streamingAssetsPath , "tinystories.sentis"));
75
+ //var model1 = ModelLoader.Load(asset);
76
+ //Create a new model to select the random token:
77
+ var model2 = FF.Compile(
78
+ (input, currentToken) =>
79
+ {
80
+ var row = FF.Select(model1.Forward(input)[8], 1, currentToken);
81
+ return FF.Multinomial(predictability * row, 1);
82
+ },
83
+ (model1.inputs[0], InputDef.Int(new TensorShape()))
84
+ );
85
+
86
+ engine = WorkerFactory.CreateWorker(backend, model2);
87
+
88
+ DecodePrompt(outputString);
89
+
90
+ runInference = true;
91
+ }
92
+
93
+ // Update is called once per frame
94
+ void Update()
95
+ {
96
+ if (runInference)
97
+ {
98
+ RunInference();
99
+ }
100
+ }
101
+
102
+ void RunInference()
103
+ {
104
+ using var tokensSoFar = new TensorInt(new TensorShape(1, maxTokens), outputTokens);
105
+ using var index = new TensorInt(currentToken);
106
+
107
+ engine.Execute(new Dictionary<string, Tensor> { {"input_0", tokensSoFar }, { "input_1", index }});
108
+
109
+ var probs = engine.PeekOutput() as TensorInt;
110
+ Debug.Log(probs.shape);
111
+
112
+ probs.CompleteOperationsAndDownload();
113
+
114
+ int ID = probs[0];
115
+
116
+ //shift window down if got to the end
117
+ if (currentToken >= maxTokens - 1)
118
+ {
119
+ for (int i = 0; i < maxTokens - 1; i++) outputTokens[i] = outputTokens[i + 1];
120
+ currentToken--;
121
+ }
122
+
123
+ outputTokens[++currentToken] = ID;
124
+ totalTokens++;
125
+
126
+ if (ID == END_OF_TEXT || totalTokens >= stopAfter)
127
+ {
128
+ runInference = false;
129
+ }
130
+ else outputString += GetUnicodeText(tokens[ID]);
131
+
132
+ Debug.Log(outputString);
133
+
134
+ }
135
+
136
+ void DecodePrompt(string text)
137
+ {
138
+ var inputTokens = GetTokens(text);
139
+
140
+ for(int i = 0; i < inputTokens.Count; i++)
141
+ {
142
+ outputTokens[i] = inputTokens[i];
143
+ }
144
+ currentToken = inputTokens.Count - 1;
145
+ }
146
+
147
+ void LoadVocabulary()
148
+ {
149
+ var jsonText = File.ReadAllText(Path.Join(Application.streamingAssetsPath , "vocab.json"));
150
+ vocab = Newtonsoft.Json.JsonConvert.DeserializeObject<Dictionary<string, int>>(jsonText);
151
+ tokens = new string[vocab.Count];
152
+ foreach (var item in vocab)
153
+ {
154
+ tokens[item.Value] = item.Key;
155
+ }
156
+
157
+ merges = File.ReadAllLines(Path.Join(Application.streamingAssetsPath , "merges.txt"));
158
+ }
159
+
160
+ // Translates encoded special characters to Unicode
161
+ string GetUnicodeText(string text)
162
+ {
163
+ var bytes = Encoding.GetEncoding("ISO-8859-1").GetBytes(ShiftCharacterDown(text));
164
+ return Encoding.UTF8.GetString(bytes);
165
+ }
166
+ string GetASCIIText(string newText)
167
+ {
168
+ var bytes = Encoding.UTF8.GetBytes(newText);
169
+ return ShiftCharacterUp(Encoding.GetEncoding("ISO-8859-1").GetString(bytes));
170
+ }
171
+
172
+ string ShiftCharacterDown(string text)
173
+ {
174
+ string outText = "";
175
+ foreach (char letter in text)
176
+ {
177
+ outText += ((int)letter <= 256) ? letter :
178
+ (char)whiteSpaceCharacters[(int)(letter - 256)];
179
+ }
180
+ return outText;
181
+ }
182
+
183
+ string ShiftCharacterUp(string text)
184
+ {
185
+ string outText = "";
186
+ foreach (char letter in text)
187
+ {
188
+ outText += (char)encodedCharacters[(int)letter];
189
+ }
190
+ return outText;
191
+ }
192
+
193
+ void SetupWhiteSpaceShifts()
194
+ {
195
+ for (int i = 0, n = 0; i < 256; i++)
196
+ {
197
+ encodedCharacters[i] = i;
198
+ if (IsWhiteSpace(i))
199
+ {
200
+ encodedCharacters[i] = n + 256;
201
+ whiteSpaceCharacters[n++] = i;
202
+ }
203
+ }
204
+ }
205
+
206
+ bool IsWhiteSpace(int i)
207
+ {
208
+ //returns true if it is a whitespace character
209
+ return i <= 32 || (i >= 127 && i <= 160) || i == 173;
210
+ }
211
+
212
+ List<int> GetTokens(string text)
213
+ {
214
+ text = GetASCIIText(text);
215
+
216
+ // Start with a list of single characters
217
+ var inputTokens = new List<string>();
218
+ foreach(var letter in text)
219
+ {
220
+ inputTokens.Add(letter.ToString());
221
+ }
222
+
223
+ ApplyMerges(inputTokens);
224
+
225
+ //Find the ids of the words in the vocab
226
+ var ids = new List<int>();
227
+ foreach(var token in inputTokens)
228
+ {
229
+ if (vocab.TryGetValue(token, out int id))
230
+ {
231
+ ids.Add(id);
232
+ }
233
+ }
234
+
235
+ return ids;
236
+ }
237
+
238
+ void ApplyMerges(List<string> inputTokens)
239
+ {
240
+ foreach(var merge in merges)
241
+ {
242
+ string[] pair = merge.Split(' ');
243
+ int n = 0;
244
+ while (n >= 0)
245
+ {
246
+ n = inputTokens.IndexOf(pair[0], n);
247
+ if (n != -1 && n < inputTokens.Count - 1 && inputTokens[n + 1] == pair[1])
248
+ {
249
+ inputTokens[n] += inputTokens[n + 1];
250
+ inputTokens.RemoveAt(n + 1);
251
+ }
252
+ if (n != -1) n++;
253
+ }
254
+ }
255
+ }
256
+
257
+ private void OnDestroy()
258
+ {
259
+ engine?.Dispose();
260
+ }
261
+
262
+ }
gitattributes ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tinystories.sentis filter=lfs diff=lfs merge=lfs -text
info.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "code": [
3
+ "RunTinyStories.cs"
4
+ ],
5
+ "models": [
6
+ "tinystories.sentis"
7
+ ],
8
+ "data": [
9
+ "vocab.json",
10
+ "merges.txt"
11
+ ],
12
+ "version": [
13
+ "1.4.0"
14
+ ]
15
+ }
merges (3).txt ADDED
The diff for this file is too large to render. See raw diff
 
tinystories (1).onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fcb65da6a70db2b11f977b136683616958339ea0a23b99552258421b6f2c1e4b
3
+ size 436935967
tinystories.sentis ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7962eb7db56b241cc19cd3f0cffcf5d76d3c35639917f07effa6b3c242c91e9
3
+ size 478818076
vocab (2).json ADDED
The diff for this file is too large to render. See raw diff