Illidan1234 commited on
Commit
5e754bc
·
verified ·
1 Parent(s): 871f99c

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ onnx/model.onnx_data filter=lfs diff=lfs merge=lfs -text
37
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ pipeline_tag: text-generation
3
+ base_model:
4
+ - google/gemma-3-1b-it
5
+ library_name: transformers.js
6
+ license: gemma
7
+ new_version: onnx-community/gemma-3-1b-it-ONNX-GQA
8
+ ---
9
+
10
+ ## Usage
11
+
12
+ ### ONNXRuntime
13
+
14
+ ```py
15
+ from transformers import AutoConfig, AutoTokenizer
16
+ import onnxruntime
17
+ import numpy as np
18
+
19
+ # 1. Load config, processor, and model
20
+ path_to_model = "./gemma-3-1b-it-ONNX"
21
+ config = AutoConfig.from_pretrained(path_to_model)
22
+ tokenizer = AutoTokenizer.from_pretrained(path_to_model)
23
+ decoder_session = onnxruntime.InferenceSession(f"{path_to_model}/onnx/model.onnx")
24
+
25
+ ## Set config values
26
+ num_key_value_heads = config.num_key_value_heads
27
+ head_dim = config.head_dim
28
+ num_hidden_layers = config.num_hidden_layers
29
+ eos_token_id = 106 # 106 is for <end_of_turn>
30
+
31
+ # 2. Prepare inputs
32
+ ## Create input messages
33
+ messages = [
34
+ { "role": "system", "content": "You are a helpful assistant." },
35
+ { "role": "user", "content": "Write me a poem about Machine Learning." },
36
+ ]
37
+
38
+ ## Apply tokenizer
39
+ inputs = tokenizer.apply_chat_template(messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="np")
40
+
41
+ ## Prepare decoder inputs
42
+ batch_size = inputs['input_ids'].shape[0]
43
+ past_key_values = {
44
+ f'past_key_values.{layer}.{kv}': np.zeros([batch_size, num_key_value_heads, 0, head_dim], dtype=np.float32)
45
+ for layer in range(num_hidden_layers)
46
+ for kv in ('key', 'value')
47
+ }
48
+ input_ids = inputs['input_ids']
49
+ position_ids = np.tile(np.arange(1, input_ids.shape[-1] + 1), (batch_size, 1))
50
+
51
+ # 3. Generation loop
52
+ max_new_tokens = 1024
53
+ generated_tokens = np.array([[]], dtype=np.int64)
54
+ for i in range(max_new_tokens):
55
+ logits, *present_key_values = decoder_session.run(None, dict(
56
+ input_ids=input_ids,
57
+ position_ids=position_ids,
58
+ **past_key_values,
59
+ ))
60
+
61
+ ## Update values for next generation loop
62
+ input_ids = logits[:, -1].argmax(-1, keepdims=True)
63
+ position_ids = position_ids[:, -1:] + 1
64
+ for j, key in enumerate(past_key_values):
65
+ past_key_values[key] = present_key_values[j]
66
+
67
+ generated_tokens = np.concatenate([generated_tokens, input_ids], axis=-1)
68
+ if (input_ids == eos_token_id).all():
69
+ break
70
+
71
+ ## (Optional) Streaming
72
+ print(tokenizer.decode(input_ids[0]), end='', flush=True)
73
+ print()
74
+
75
+ # 4. Output result
76
+ print(tokenizer.batch_decode(generated_tokens))
77
+ ```
78
+
79
+ <details>
80
+ <summary>See example output</summary>
81
+
82
+ ```
83
+ Okay, here’s a poem about Machine Learning, aiming for a balance of technical and evocative language:
84
+
85
+ **The Silent Learner**
86
+
87
+ The data streams, a boundless flow,
88
+ A river vast, where patterns grow.
89
+ No human hand to guide the way,
90
+ Just algorithms, come what may.
91
+
92
+ Machine Learning, a subtle art,
93
+ To teach a system, a brand new start.
94
+ With weights and biases, finely tuned,
95
+ It seeks the truth, beneath the moon.
96
+
97
+ It learns from errors, big and small,
98
+ Adjusting swiftly, standing tall.
99
+ From pixels bright to voices clear,
100
+ It builds a model, banishing fear.
101
+
102
+ Of blind prediction, cold and stark,
103
+ It finds the meaning, leaves its mark.
104
+ A network deep, a complex grace,
105
+ Discovering insights, time and space.
106
+
107
+ It sees the trends, the subtle hue,
108
+ Predicting futures, fresh and new.
109
+ A silent learner, ever keen,
110
+ A digital mind, unseen, serene.
111
+
112
+ So let the code begin to gleam,
113
+ A blossoming of a learning dream.
114
+ Machine Learning, a wondrous sight,
115
+ Shaping the future, shining bright.
116
+
117
+ ---
118
+
119
+ Would you like me to:
120
+
121
+ * Adjust the tone or style? (e.g., more technical, more metaphorical)
122
+ * Focus on a specific aspect of ML (e.g., neural networks, data analysis)?
123
+ * Create a different length or format?
124
+ ```
125
+
126
+ </details>
127
+
128
+
129
+
130
+ ### Transformers.js
131
+ ```js
132
+ import { pipeline } from "@huggingface/transformers";
133
+
134
+ // Create a text generation pipeline
135
+ const generator = await pipeline(
136
+ "text-generation",
137
+ "onnx-community/gemma-3-1b-it-ONNX",
138
+ { dtype: "q4" },
139
+ );
140
+
141
+ // Define the list of messages
142
+ const messages = [
143
+ { role: "system", content: "You are a helpful assistant." },
144
+ { role: "user", content: "Write me a poem about Machine Learning." },
145
+ ];
146
+
147
+ // Generate a response
148
+ const output = await generator(messages, { max_new_tokens: 512, do_sample: false });
149
+ console.log(output[0].generated_text.at(-1).content);
150
+ ```
config.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_attn_implementation_autoset": true,
3
+ "_name_or_path": "google/gemma-3-1b-it",
4
+ "architectures": [
5
+ "Gemma3ForCausalLM"
6
+ ],
7
+ "attention_bias": false,
8
+ "attention_dropout": 0.0,
9
+ "attn_logit_softcapping": null,
10
+ "bos_token_id": 2,
11
+ "cache_implementation": "hybrid",
12
+ "eos_token_id": [
13
+ 1,
14
+ 106
15
+ ],
16
+ "final_logit_softcapping": null,
17
+ "head_dim": 256,
18
+ "hidden_activation": "gelu_pytorch_tanh",
19
+ "hidden_size": 1152,
20
+ "initializer_range": 0.02,
21
+ "intermediate_size": 6912,
22
+ "max_position_embeddings": 32768,
23
+ "model_type": "gemma3_text",
24
+ "num_attention_heads": 4,
25
+ "num_hidden_layers": 26,
26
+ "num_key_value_heads": 1,
27
+ "pad_token_id": 0,
28
+ "query_pre_attn_scalar": 256,
29
+ "rms_norm_eps": 1e-06,
30
+ "rope_local_base_freq": 10000,
31
+ "rope_scaling": null,
32
+ "rope_theta": 1000000,
33
+ "sliding_window": 512,
34
+ "sliding_window_pattern": 6,
35
+ "torch_dtype": "float32",
36
+ "transformers_version": "4.50.0.dev0",
37
+ "transformers.js_config": {
38
+ "use_external_data_format": {
39
+ "model.onnx": true
40
+ }
41
+ },
42
+ "use_cache": true,
43
+ "vocab_size": 262144
44
+ }
generation_config.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 2,
4
+ "cache_implementation": "hybrid",
5
+ "eos_token_id": [
6
+ 1,
7
+ 106
8
+ ],
9
+ "pad_token_id": 0,
10
+ "transformers_version": "4.50.0.dev0"
11
+ }
onnx/model.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fcdd6cc5892fb4f3ea04b423657561b437239b37ab963b85bebae4532081a16f
3
+ size 1012725
onnx/model_bnb4.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f261e3a3b222e3110bd7d07f1a24d3bdcade389df85d163f11c82cfa1182b700
3
+ size 1602007661
onnx/model_fp16.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7278853fe0e9babcdaad178c2d91aff09830b510a9ca88317634f0ef8194d19e
3
+ size 2000774696
onnx/model_int8.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d8ddeb9c637d43625df45933ad3a9e2337b8a027ab37a70dc230735ba285f5c
3
+ size 1001481982
onnx/model_q4.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd65478ade20f0bea7ed1e80455e3b3b6eb6f4242d611d63e7bcddc94cc0f108
3
+ size 1645616265
onnx/model_q4f16.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a8cb5ab287f04050d29de31e47354f8868069c0dec8cab326376274a6a12508
3
+ size 997769309
onnx/model_quantized.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d8ddeb9c637d43625df45933ad3a9e2337b8a027ab37a70dc230735ba285f5c
3
+ size 1001481982
onnx/model_uint8.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:714ab717b4840136b736a3a288f853ac9386751f33c2edd8a753086530cf9dbc
3
+ size 1001482078
special_tokens_map.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "boi_token": "<start_of_image>",
3
+ "bos_token": {
4
+ "content": "<bos>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ },
10
+ "eoi_token": "<end_of_image>",
11
+ "eos_token": {
12
+ "content": "<eos>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false
17
+ },
18
+ "image_token": "<image_soft_token>",
19
+ "pad_token": {
20
+ "content": "<pad>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false
25
+ },
26
+ "unk_token": {
27
+ "content": "<unk>",
28
+ "lstrip": false,
29
+ "normalized": false,
30
+ "rstrip": false,
31
+ "single_word": false
32
+ }
33
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4667f2089529e8e7657cfb6d1c19910ae71ff5f28aa7ab2ff2763330affad795
3
+ size 33384568
tokenizer_config.json ADDED
The diff for this file is too large to render. See raw diff