omdeep22 commited on
Commit
985fc2e
·
verified ·
1 Parent(s): 144d992

Fix: float causal mask, weight tying, attention_mask, config aliases

Browse files
Files changed (1) hide show
  1. config.json +27 -67
config.json CHANGED
@@ -1,66 +1,11 @@
1
  {
2
- "return_dict": true,
3
- "output_hidden_states": false,
4
- "torchscript": false,
5
- "dtype": null,
6
- "pruned_heads": {},
7
- "tie_word_embeddings": true,
8
- "chunk_size_feed_forward": 0,
9
- "is_encoder_decoder": false,
10
- "is_decoder": false,
11
- "cross_attention_hidden_size": null,
12
- "add_cross_attention": false,
13
- "tie_encoder_decoder": false,
14
- "architectures": [
15
- "KonkanGPT"
16
- ],
17
- "finetuning_task": null,
18
- "id2label": {
19
- "0": "LABEL_0",
20
- "1": "LABEL_1"
21
- },
22
- "label2id": {
23
- "LABEL_0": 0,
24
- "LABEL_1": 1
25
  },
26
- "task_specific_params": null,
27
- "problem_type": null,
28
- "tokenizer_class": null,
29
- "prefix": null,
30
- "bos_token_id": null,
31
- "pad_token_id": null,
32
- "eos_token_id": null,
33
- "sep_token_id": null,
34
- "decoder_start_token_id": null,
35
- "max_length": 20,
36
- "min_length": 0,
37
- "do_sample": false,
38
- "early_stopping": false,
39
- "num_beams": 1,
40
- "temperature": 1.0,
41
- "top_k": 50,
42
- "top_p": 1.0,
43
- "typical_p": 1.0,
44
- "repetition_penalty": 1.0,
45
- "length_penalty": 1.0,
46
- "no_repeat_ngram_size": 0,
47
- "encoder_no_repeat_ngram_size": 0,
48
- "bad_words_ids": null,
49
- "num_return_sequences": 1,
50
- "output_scores": false,
51
- "return_dict_in_generate": false,
52
- "forced_bos_token_id": null,
53
- "forced_eos_token_id": null,
54
- "remove_invalid_values": false,
55
- "exponential_decay_length_penalty": null,
56
- "suppress_tokens": null,
57
- "begin_suppress_tokens": null,
58
- "num_beam_groups": 1,
59
- "diversity_penalty": 0.0,
60
- "_name_or_path": "",
61
- "transformers_version": "4.57.1",
62
- "tf_legacy_loss": false,
63
- "use_bfloat16": false,
64
  "vocab_size": 32000,
65
  "d_model": 768,
66
  "n_layers": 12,
@@ -68,10 +13,25 @@
68
  "d_ff": 3072,
69
  "max_len": 1024,
70
  "dropout": 0.1,
71
- "model_type": "konkangpt",
72
- "output_attentions": false,
73
- "auto_map": {
74
- "AutoConfig": "configuration_konkan.KonkanSmallConfig",
75
- "AutoModelForCausalLM": "modeling_konkan.KonkanGPT"
76
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
  }
 
1
  {
2
+ "model_type": "konkangpt",
3
+ "_name_or_path": "omdeep22/Gonyai-v1",
4
+ "architectures": ["KonkanGPT"],
5
+ "auto_map": {
6
+ "AutoConfig": "configuration_konkan.KonkanSmallConfig",
7
+ "AutoModelForCausalLM": "modeling_konkan.KonkanGPT"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  "vocab_size": 32000,
10
  "d_model": 768,
11
  "n_layers": 12,
 
13
  "d_ff": 3072,
14
  "max_len": 1024,
15
  "dropout": 0.1,
16
+ "hidden_size": 768,
17
+ "num_hidden_layers": 12,
18
+ "num_attention_heads": 12,
19
+ "intermediate_size": 3072,
20
+ "max_position_embeddings": 1024,
21
+ "bos_token_id": 0,
22
+ "pad_token_id": 1,
23
+ "eos_token_id": 2,
24
+ "is_decoder": true,
25
+ "is_encoder_decoder": false,
26
+ "use_cache": false,
27
+ "tie_word_embeddings": true,
28
+ "torch_dtype": "float32",
29
+ "transformers_version": "4.38.0",
30
+ "max_length": 1024,
31
+ "do_sample": true,
32
+ "temperature": 0.7,
33
+ "top_k": 50,
34
+ "top_p": 0.9,
35
+ "repetition_penalty": 1.0,
36
+ "tokenizer_class": "PreTrainedTokenizerFast"
37
  }