Wonder-Griffin commited on
Commit
fc84ba8
·
verified ·
1 Parent(s): c838758

Initial ZeusMM: tiny-pretrained weights, tokenizer, config, custom code

Browse files
Files changed (3) hide show
  1. config.json +1 -0
  2. special_tokens_map.json +38 -74
  3. zeus_mm.py +14 -5
config.json CHANGED
@@ -1,4 +1,5 @@
1
  {
 
2
  "architectures": [
3
  "ZeusForCausalLM"
4
  ],
 
1
  {
2
+ "_name_or_path": "ZeusMM",
3
  "architectures": [
4
  "ZeusForCausalLM"
5
  ],
special_tokens_map.json CHANGED
@@ -1,78 +1,42 @@
1
  {
2
  "additional_special_tokens": [
3
- {
4
- "content": "<|system|>",
5
- "lstrip": false,
6
- "normalized": false,
7
- "rstrip": false,
8
- "single_word": false
9
- },
10
- {
11
- "content": "<|user|>",
12
- "lstrip": false,
13
- "normalized": false,
14
- "rstrip": false,
15
- "single_word": false
16
- },
17
- {
18
- "content": "<|assistant|>",
19
- "lstrip": false,
20
- "normalized": false,
21
- "rstrip": false,
22
- "single_word": false
23
- },
24
- {
25
- "content": "<image>",
26
- "lstrip": false,
27
- "normalized": false,
28
- "rstrip": false,
29
- "single_word": false
30
- },
31
- {
32
- "content": "</image>",
33
- "lstrip": false,
34
- "normalized": false,
35
- "rstrip": false,
36
- "single_word": false
37
- },
38
- {
39
- "content": "<audio>",
40
- "lstrip": false,
41
- "normalized": false,
42
- "rstrip": false,
43
- "single_word": false
44
- },
45
- {
46
- "content": "</audio>",
47
- "lstrip": false,
48
- "normalized": false,
49
- "rstrip": false,
50
- "single_word": false
51
- },
52
- {
53
- "content": "<kb>",
54
- "lstrip": false,
55
- "normalized": false,
56
- "rstrip": false,
57
- "single_word": false
58
- },
59
- {
60
- "content": "</kb>",
61
- "lstrip": false,
62
- "normalized": false,
63
- "rstrip": false,
64
- "single_word": false
65
- },
66
- {
67
- "content": "<|end|>",
68
- "lstrip": false,
69
- "normalized": false,
70
- "rstrip": false,
71
- "single_word": false
72
- }
73
  ],
74
- "bos_token": "<|endoftext|>",
75
- "eos_token": "<|endoftext|>",
76
- "pad_token": "<|endoftext|>",
77
- "unk_token": "<|endoftext|>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
  }
 
1
  {
2
  "additional_special_tokens": [
3
+ "<|system|>",
4
+ "<|user|>",
5
+ "<|assistant|>",
6
+ "<image>",
7
+ "</image>",
8
+ "<audio>",
9
+ "</audio>",
10
+ "<kb>",
11
+ "</kb>",
12
+ "<|end|>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  ],
14
+ "bos_token": {
15
+ "content": "<|endoftext|>",
16
+ "lstrip": false,
17
+ "normalized": true,
18
+ "rstrip": false,
19
+ "single_word": false
20
+ },
21
+ "eos_token": {
22
+ "content": "<|endoftext|>",
23
+ "lstrip": false,
24
+ "normalized": true,
25
+ "rstrip": false,
26
+ "single_word": false
27
+ },
28
+ "pad_token": {
29
+ "content": "<|endoftext|>",
30
+ "lstrip": false,
31
+ "normalized": true,
32
+ "rstrip": false,
33
+ "single_word": false
34
+ },
35
+ "unk_token": {
36
+ "content": "<|endoftext|>",
37
+ "lstrip": false,
38
+ "normalized": true,
39
+ "rstrip": false,
40
+ "single_word": false
41
+ }
42
  }
zeus_mm.py CHANGED
@@ -514,15 +514,20 @@ class ZeusForCausalLM(PreTrainedModel):
514
  def forward(
515
  self,
516
  input_ids: torch.LongTensor,
517
- attention_mask: Optional[torch.LongTensor] = None, # [B,T_new]
518
  labels: Optional[torch.LongTensor] = None,
519
- role_ids: Optional[torch.LongTensor] = None, # [B,T_total] (0/1/2)
520
  past_key_values: Optional[List[Tuple[torch.Tensor, torch.Tensor]]] = None,
521
  use_cache: Optional[bool] = None,
522
 
 
 
 
 
 
523
  # ---- Raw inputs for backends OR precomputed memories ----
524
  # Vision
525
- pixel_values: Optional[torch.FloatTensor] = None, # [B,3,H,W] normalized
526
  image_memory: Optional[torch.FloatTensor] = None, # [B,Li,D]
527
  # Audio
528
  input_values: Optional[torch.FloatTensor] = None, # [B,T_audio]
@@ -532,11 +537,15 @@ class ZeusForCausalLM(PreTrainedModel):
532
  retr_input_ids: Optional[torch.LongTensor] = None, # [B,Nr]
533
  retr_attention_mask: Optional[torch.LongTensor] = None, # [B,Nr]
534
  retr_memory: Optional[torch.FloatTensor] = None, # [B,Lr,D]
535
-
536
- # Pre-assembled (advanced): concat memory & mask + pooled summary
537
  memory_mask: Optional[torch.LongTensor] = None, # [B,Lm]
538
  media_summary: Optional[torch.FloatTensor] = None, # [B,D]
 
 
 
539
  ):
 
 
540
  B, T = input_ids.shape
541
  x = self.embed_tokens(input_ids)
542
  x = self.drop(x)
 
514
  def forward(
515
  self,
516
  input_ids: torch.LongTensor,
517
+ attention_mask: Optional[torch.LongTensor] = None,
518
  labels: Optional[torch.LongTensor] = None,
519
+ role_ids: Optional[torch.LongTensor] = None,
520
  past_key_values: Optional[List[Tuple[torch.Tensor, torch.Tensor]]] = None,
521
  use_cache: Optional[bool] = None,
522
 
523
+ # HF Generation adds these — accept & ignore
524
+ return_dict: Optional[bool] = None,
525
+ output_attentions: Optional[bool] = None,
526
+ output_hidden_states: Optional[bool] = None,
527
+
528
  # ---- Raw inputs for backends OR precomputed memories ----
529
  # Vision
530
+ pixel_values: Optional[torch.FloatTensor] = None, # [B,3,H,W]
531
  image_memory: Optional[torch.FloatTensor] = None, # [B,Li,D]
532
  # Audio
533
  input_values: Optional[torch.FloatTensor] = None, # [B,T_audio]
 
537
  retr_input_ids: Optional[torch.LongTensor] = None, # [B,Nr]
538
  retr_attention_mask: Optional[torch.LongTensor] = None, # [B,Nr]
539
  retr_memory: Optional[torch.FloatTensor] = None, # [B,Lr,D]
540
+ # Pre-assembled
 
541
  memory_mask: Optional[torch.LongTensor] = None, # [B,Lm]
542
  media_summary: Optional[torch.FloatTensor] = None, # [B,D]
543
+
544
+ # future-proof
545
+ **unused,
546
  ):
547
+
548
+
549
  B, T = input_ids.shape
550
  x = self.embed_tokens(input_ids)
551
  x = self.drop(x)