2796gauravc commited on
Commit
19f66e0
·
verified ·
1 Parent(s): 76d2fc7

Upload MLC model weights

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # FunctionGemma Fine-tuned Model for WebLLM
2
+
3
+ This model can be used with [WebLLM](https://github.com/mlc-ai/web-llm).
4
+
5
+ ## Model Information
6
+ - Base Model: google/functiongemma-270m-it
7
+ - LoRA Adapter: 2796gauravc/functiongemma-physics-game-lora
8
+ - Quantization: q4f16_1
9
+
10
+ ## Usage with WebLLM
11
+
12
+ Since compiling to WASM requires building from source, you can use this model
13
+ with the pre-compiled Gemma WASM library from WebLLM:
14
+
15
+ ```javascript
16
+ import * as webllm from "@mlc-ai/web-llm";
17
+
18
+ const appConfig = {
19
+ model_list: [
20
+ {
21
+ model: "https://huggingface.co/2796gauravc/functiongemma-mlc",
22
+ model_id: "functiongemma-physics",
23
+ // Use the official Gemma WASM (compatible with your model)
24
+ model_lib: "https://raw.githubusercontent.com/mlc-ai/binary-mlc-llm-libs/main/gemma-2b-it-q4f16_1-ctx4k_cs1k-webgpu.wasm"
25
+ }
26
+ ]
27
+ };
28
+
29
+ const engine = await webllm.CreateMLCEngine(
30
+ "functiongemma-physics",
31
+ { appConfig }
32
+ );
33
+
34
+ const response = await engine.chat.completions.create({
35
+ messages: [{ role: "user", content: "Hello!" }]
36
+ });
37
+ ```
38
+
39
+ ## Alternative: Use Ollama for Local Testing
40
+
41
+ For local CPU/GPU inference without browser:
42
+
43
+ ```bash
44
+ # Convert to GGUF format first
45
+ pip install llama-cpp-python
46
+
47
+ # Then use with Ollama or llama.cpp
48
+ ```
49
+
50
+ ## Files in This Repo
51
+
52
+ - `params_shard_*.bin`: Model weights in MLC format
53
+ - `mlc-chat-config.json`: Model configuration
54
+ - `tokenizer.json`: Tokenizer
55
+ - `tokenizer_config.json`: Tokenizer configuration
56
+
57
+ ## Note on WASM Compilation
58
+
59
+ Compiling custom WASM libraries requires building MLC-LLM from source with
60
+ Emscripten, which takes 1-2 hours. For most use cases, using the official
61
+ Gemma WASM is sufficient and fully compatible.
added_tokens.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "<end_of_image>": 262145,
3
+ "<image_soft_token>": 262144
4
+ }
example.html ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html>
3
+ <head>
4
+ <title>FunctionGemma Physics Demo</title>
5
+ <script type="module">
6
+ import * as webllm from "https://esm.run/@mlc-ai/web-llm";
7
+
8
+ const appConfig = {
9
+ model_list: [
10
+ {
11
+ model: "https://huggingface.co/2796gauravc/functiongemma-mlc",
12
+ model_id: "functiongemma-physics",
13
+ // Use official Gemma WASM (compatible)
14
+ model_lib: "https://raw.githubusercontent.com/mlc-ai/binary-mlc-llm-libs/main/gemma-2b-it-q4f16_1-ctx4k_cs1k-webgpu.wasm"
15
+ }
16
+ ]
17
+ };
18
+
19
+ async function initModel() {
20
+ const statusDiv = document.getElementById("status");
21
+ statusDiv.textContent = "Loading model...";
22
+
23
+ try {
24
+ const engine = await webllm.CreateMLCEngine(
25
+ "functiongemma-physics",
26
+ {
27
+ appConfig,
28
+ initProgressCallback: (progress) => {
29
+ statusDiv.textContent = progress.text;
30
+ }
31
+ }
32
+ );
33
+
34
+ statusDiv.textContent = "Model loaded! Type a message.";
35
+
36
+ document.getElementById("send").onclick = async () => {
37
+ const input = document.getElementById("input").value;
38
+ const output = document.getElementById("output");
39
+
40
+ output.textContent = "Thinking...";
41
+
42
+ const response = await engine.chat.completions.create({
43
+ messages: [{ role: "user", content: input }],
44
+ stream: false
45
+ });
46
+
47
+ output.textContent = response.choices[0].message.content;
48
+ };
49
+ } catch (error) {
50
+ statusDiv.textContent = "Error: " + error.message;
51
+ }
52
+ }
53
+
54
+ window.onload = initModel;
55
+ </script>
56
+ </head>
57
+ <body>
58
+ <h1>FunctionGemma Physics Game Assistant</h1>
59
+ <div id="status">Initializing...</div>
60
+ <br>
61
+ <input type="text" id="input" placeholder="Ask about physics..." style="width:400px">
62
+ <button id="send">Send</button>
63
+ <br><br>
64
+ <div id="output" style="border:1px solid #ccc; padding:10px; min-height:100px;"></div>
65
+ </body>
66
+ </html>
mlc-chat-config.json ADDED
@@ -0,0 +1,139 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "0.1.0",
3
+ "model_type": "gemma3_text",
4
+ "quantization": "q4f16_1",
5
+ "model_config": {
6
+ "text_config": {
7
+ "hidden_size": 640,
8
+ "intermediate_size": 2048,
9
+ "num_hidden_layers": 18,
10
+ "attention_bias": false,
11
+ "num_attention_heads": 4,
12
+ "num_key_value_heads": 1,
13
+ "head_dim": 256,
14
+ "rms_norm_eps": 1e-06,
15
+ "hidden_activation": "gelu_pytorch_tanh",
16
+ "position_embedding_base": 1000000.0,
17
+ "rope_scaling": null,
18
+ "context_window_size": 8192,
19
+ "prefill_chunk_size": 8192,
20
+ "query_pre_attn_scalar": 256,
21
+ "sliding_window_size": 512,
22
+ "kwargs": {
23
+ "_sliding_window_pattern": 6,
24
+ "architectures": [
25
+ "Gemma3ForCausalLM"
26
+ ],
27
+ "attention_dropout": 0.0,
28
+ "attn_logit_softcapping": null,
29
+ "bos_token_id": 2,
30
+ "dtype": "bfloat16",
31
+ "eos_token_id": [
32
+ 1,
33
+ 50
34
+ ],
35
+ "final_logit_softcapping": null,
36
+ "initializer_range": 0.02,
37
+ "layer_types": [
38
+ "sliding_attention",
39
+ "sliding_attention",
40
+ "sliding_attention",
41
+ "sliding_attention",
42
+ "sliding_attention",
43
+ "full_attention",
44
+ "sliding_attention",
45
+ "sliding_attention",
46
+ "sliding_attention",
47
+ "sliding_attention",
48
+ "sliding_attention",
49
+ "full_attention",
50
+ "sliding_attention",
51
+ "sliding_attention",
52
+ "sliding_attention",
53
+ "sliding_attention",
54
+ "sliding_attention",
55
+ "full_attention"
56
+ ],
57
+ "max_position_embeddings": 32768,
58
+ "model_type": "gemma3_text",
59
+ "pad_token_id": 0,
60
+ "rope_local_base_freq": 10000.0,
61
+ "sliding_window": 512,
62
+ "transformers_version": "4.57.3",
63
+ "use_bidirectional_attention": false,
64
+ "use_cache": true
65
+ }
66
+ },
67
+ "vocab_size": 262144,
68
+ "tensor_parallel_shards": 1,
69
+ "max_batch_size": 128,
70
+ "context_window_size": 8192,
71
+ "sliding_window_size": 512,
72
+ "prefill_chunk_size": 8192,
73
+ "is_text_model": true
74
+ },
75
+ "vocab_size": 262144,
76
+ "context_window_size": 8192,
77
+ "sliding_window_size": 512,
78
+ "prefill_chunk_size": 8192,
79
+ "attention_sink_size": -1,
80
+ "tensor_parallel_shards": 1,
81
+ "pipeline_parallel_stages": 1,
82
+ "active_vocab_size": 262146,
83
+ "temperature": 1.0,
84
+ "presence_penalty": 0.0,
85
+ "frequency_penalty": 0.0,
86
+ "repetition_penalty": 1.0,
87
+ "top_p": 0.95,
88
+ "tokenizer_files": [
89
+ "tokenizer.model",
90
+ "tokenizer.json",
91
+ "added_tokens.json",
92
+ "tokenizer_config.json"
93
+ ],
94
+ "tokenizer_info": {
95
+ "token_postproc_method": "byte_fallback",
96
+ "prepend_space_in_encode": false,
97
+ "strip_space_in_decode": false
98
+ },
99
+ "conv_template": {
100
+ "name": "gemma_instruction",
101
+ "system_template": "{system_message}",
102
+ "system_message": "",
103
+ "system_prefix_token_ids": [
104
+ 2
105
+ ],
106
+ "add_role_after_system_message": true,
107
+ "roles": {
108
+ "user": "<start_of_turn>user",
109
+ "assistant": "<start_of_turn>model"
110
+ },
111
+ "role_templates": {
112
+ "user": "{user_message}",
113
+ "assistant": "{assistant_message}",
114
+ "tool": "{tool_message}"
115
+ },
116
+ "messages": [],
117
+ "seps": [
118
+ "<end_of_turn>\n"
119
+ ],
120
+ "role_content_sep": "\n",
121
+ "role_empty_sep": "\n",
122
+ "stop_str": [
123
+ "<end_of_turn>"
124
+ ],
125
+ "stop_token_ids": [
126
+ 1,
127
+ 107
128
+ ],
129
+ "function_string": "",
130
+ "use_function_calling": false
131
+ },
132
+ "pad_token_id": 0,
133
+ "bos_token_id": 2,
134
+ "eos_token_id": [
135
+ 1,
136
+ 50,
137
+ 106
138
+ ]
139
+ }
params_shard_0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f952551766a057de045a7d12981247388fdf487479e0bfe2b234bbef4462810
3
+ size 83886080
params_shard_1.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24c7a77bea53581c66cec96797f7aa3b791e32762397096966a979a1468348bf
3
+ size 33201408
params_shard_2.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a705931eeb9387fae6338fe96211ea3716124b8485aa9ab1b7e6a7d268256bca
3
+ size 33336064
params_shard_3.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1cd14018d9cf05ea824c6a4e2aa1438b094113277d8c3b3aecf409290f0ffd09
3
+ size 462080
tensor-cache.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6b09a0b4a803ad453063ca4bb49a784540e8120004e2450e025df2b27d41fb2
3
+ size 33384899
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa009fcbc3589a9904d30d04834094fea4653c2ac6d2de2cd1262d4f7a50ceb3
3
+ size 4689144
tokenizer_config.json ADDED
The diff for this file is too large to render. See raw diff