INC4AI commited on
Commit
55c3d07
·
verified ·
1 Parent(s): 5b906c3

Upload folder using huggingface_hub

Browse files
chat_template.jinja ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {%- macro to_text(content) -%}
2
+ {%- if content is string -%}
3
+ {{- content -}}
4
+ {%- elif content is iterable and content is not mapping -%}
5
+ {%- for item in content -%}
6
+ {%- if item is mapping and item.type == 'text' and item.text is defined -%}
7
+ {{- item.text -}}
8
+ {%- elif item is mapping and (item.type == 'audio' or 'audio' in item) -%}
9
+ <|begin_of_audio|><|pad|><|end_of_audio|><|user|>
10
+ {% elif item is string -%}
11
+ {{- item -}}
12
+ {%- endif -%}
13
+ {%- endfor -%}
14
+ {%- else -%}
15
+ {{- content -}}
16
+ {%- endif -%}
17
+ {%- endmacro -%}
18
+ {%- for m in messages -%}
19
+ {%- if m.role == 'system' -%}
20
+ <|system|>
21
+ {{ to_text(m.content) | trim }}
22
+ {%- elif m.role == 'user' -%}
23
+ <|user|>
24
+ {{ to_text(m.content) | trim }}
25
+ {%- elif m.role == 'assistant' -%}
26
+ <|assistant|>
27
+ {{ to_text(m.content) | trim }}
28
+ {%- endif -%}
29
+ {%- endfor -%}
30
+ {%- if add_generation_prompt -%}
31
+ <|assistant|>
32
+ {% endif -%}
config.json ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "GlmAsrForConditionalGeneration"
4
+ ],
5
+ "audio_config": {
6
+ "attention_dropout": 0.0,
7
+ "dtype": "bfloat16",
8
+ "head_dim": 64,
9
+ "hidden_act": "gelu",
10
+ "hidden_size": 1280,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 5120,
13
+ "max_position_embeddings": 1500,
14
+ "model_type": "glmasr_encoder",
15
+ "num_attention_heads": 20,
16
+ "num_hidden_layers": 32,
17
+ "num_key_value_heads": 20,
18
+ "num_mel_bins": 128,
19
+ "partial_rotary_factor": 0.5,
20
+ "rope_parameters": {
21
+ "partial_rotary_factor": 0.5,
22
+ "rope_theta": 10000.0,
23
+ "rope_type": "default"
24
+ }
25
+ },
26
+ "audio_token_id": 59260,
27
+ "dtype": "bfloat16",
28
+ "hidden_size": 2048,
29
+ "model_type": "glmasr",
30
+ "projector_hidden_act": "gelu",
31
+ "quantization_config": {
32
+ "autoround_version": "0.9.5",
33
+ "bits": 4,
34
+ "block_name_to_quantize": "audio_tower.layers,language_model.model.layers",
35
+ "data_type": "int",
36
+ "group_size": 128,
37
+ "iters": 0,
38
+ "packing_format": "auto_round:auto_gptq",
39
+ "quant_method": "auto-round",
40
+ "sym": true
41
+ },
42
+ "text_config": {
43
+ "attention_bias": false,
44
+ "attention_dropout": 0.0,
45
+ "bos_token_id": 1,
46
+ "dtype": "bfloat16",
47
+ "eos_token_id": [
48
+ 59246,
49
+ 59253,
50
+ 59255
51
+ ],
52
+ "head_dim": 128,
53
+ "hidden_act": "silu",
54
+ "hidden_size": 2048,
55
+ "initializer_range": 0.02,
56
+ "intermediate_size": 6144,
57
+ "max_position_embeddings": 8192,
58
+ "mlp_bias": false,
59
+ "model_type": "llama",
60
+ "num_attention_heads": 16,
61
+ "num_hidden_layers": 28,
62
+ "num_key_value_heads": 4,
63
+ "pad_token_id": null,
64
+ "pretraining_tp": 1,
65
+ "rms_norm_eps": 1e-05,
66
+ "rope_parameters": {
67
+ "rope_theta": 10000.0,
68
+ "rope_type": "default"
69
+ },
70
+ "tie_word_embeddings": false,
71
+ "use_cache": true,
72
+ "vocab_size": 59264
73
+ },
74
+ "transformers_version": "5.0.0.dev0",
75
+ "vocab_size": 59264
76
+ }
generation_config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "eos_token_id": [
5
+ 59246,
6
+ 59253,
7
+ 59255
8
+ ],
9
+ "transformers_version": "5.0.0.dev0"
10
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c6141ae61cf6ab57dae5ca7ef36906abb91663383cd0f44494445f1958a8958
3
+ size 1584969104
processor_config.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "audio_token": "<|pad|>",
3
+ "default_transcription_prompt": "Please transcribe this audio into text",
4
+ "feature_extractor": {
5
+ "chunk_length": 30,
6
+ "dither": 0.0,
7
+ "feature_extractor_type": "WhisperFeatureExtractor",
8
+ "feature_size": 128,
9
+ "hop_length": 160,
10
+ "n_fft": 400,
11
+ "n_samples": 480000,
12
+ "nb_max_frames": 3000,
13
+ "padding_side": "right",
14
+ "padding_value": 0.0,
15
+ "return_attention_mask": false,
16
+ "sampling_rate": 16000
17
+ },
18
+ "max_audio_len": 655,
19
+ "processor_class": "GlmAsrProcessor"
20
+ }
quantization_config.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bits": 4,
3
+ "group_size": 128,
4
+ "sym": true,
5
+ "data_type": "int",
6
+ "iters": 0,
7
+ "autoround_version": "0.9.5",
8
+ "block_name_to_quantize": "audio_tower.layers,language_model.model.layers",
9
+ "quant_method": "auto-round",
10
+ "packing_format": "auto_round:auto_gptq"
11
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "backend": "tokenizers",
3
+ "clean_up_tokenization_spaces": false,
4
+ "do_lower_case": false,
5
+ "eos_token": "<|endoftext|>",
6
+ "extra_special_tokens": [
7
+ "<|endoftext|>",
8
+ "[MASK]",
9
+ "[gMASK]",
10
+ "[sMASK]",
11
+ "<sop>",
12
+ "<eop>",
13
+ "<|system|>",
14
+ "<|user|>",
15
+ "<|assistant|>",
16
+ "<|observation|>",
17
+ "<|begin_of_image|>",
18
+ "<|end_of_image|>",
19
+ "<|begin_of_video|>",
20
+ "<|end_of_video|>",
21
+ "<|pad|>",
22
+ "<|begin_of_audio|>",
23
+ "<|end_of_audio|>"
24
+ ],
25
+ "is_local": true,
26
+ "model_input_names": [
27
+ "input_ids",
28
+ "attention_mask"
29
+ ],
30
+ "model_max_length": 65536,
31
+ "model_specific_special_tokens": {},
32
+ "pad_token": "<|endoftext|>",
33
+ "padding_side": "left",
34
+ "processor_class": "GlmAsrProcessor",
35
+ "remove_space": false,
36
+ "tokenizer_class": "TokenizersBackend"
37
+ }