Commit ·
d6dbe55
1
Parent(s): 679d9f0
Upload LanceAI
Browse files- generation_config.json +7 -0
- lance_ai_model.py +30 -1
generation_config.json
CHANGED
|
@@ -1,4 +1,11 @@
|
|
| 1 |
{
|
| 2 |
"_from_model_config": true,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
"transformers_version": "4.51.3"
|
| 4 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"_from_model_config": true,
|
| 3 |
+
"do_sample": true,
|
| 4 |
+
"max_new_tokens": 150,
|
| 5 |
+
"no_repeat_ngram_size": 3,
|
| 6 |
+
"repetition_penalty": 1.3,
|
| 7 |
+
"temperature": 0.8,
|
| 8 |
+
"top_k": 40,
|
| 9 |
+
"top_p": 0.9,
|
| 10 |
"transformers_version": "4.51.3"
|
| 11 |
}
|
lance_ai_model.py
CHANGED
|
@@ -34,9 +34,19 @@ class LanceAI(PreTrainedModel, GenerationMixin):
|
|
| 34 |
self.lm_head = nn.Linear(config.hidden_size, config.vocab_size)
|
| 35 |
self.loss_fct = nn.CrossEntropyLoss(ignore_index=-100)
|
| 36 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
self.init_weights()
|
| 38 |
|
| 39 |
-
def forward(self, input_ids=None, attention_mask=None, labels=None, inputs_embeds=None, return_dict=True, use_cache=False):
|
| 40 |
embeddings = self.embedding(input_ids) if inputs_embeds is None else inputs_embeds
|
| 41 |
encoder_output = self.encoder(embeddings)
|
| 42 |
decoder_output = self.decoder(embeddings, encoder_output)
|
|
@@ -55,6 +65,25 @@ class LanceAI(PreTrainedModel, GenerationMixin):
|
|
| 55 |
|
| 56 |
return (loss, logits) if loss is not None else logits
|
| 57 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
# Register with Hugging Face
|
| 59 |
CONFIG_MAPPING.register("lance_ai", LanceAIConfig)
|
| 60 |
MODEL_FOR_CAUSAL_LM_MAPPING.register(LanceAIConfig, LanceAI)
|
|
|
|
| 34 |
self.lm_head = nn.Linear(config.hidden_size, config.vocab_size)
|
| 35 |
self.loss_fct = nn.CrossEntropyLoss(ignore_index=-100)
|
| 36 |
|
| 37 |
+
# Set generation config defaults for more natural responses
|
| 38 |
+
self.generation_config.max_new_tokens = 150
|
| 39 |
+
self.generation_config.temperature = 0.8
|
| 40 |
+
self.generation_config.top_k = 40
|
| 41 |
+
self.generation_config.top_p = 0.9
|
| 42 |
+
self.generation_config.do_sample = True
|
| 43 |
+
self.generation_config.repetition_penalty = 1.3
|
| 44 |
+
self.generation_config.no_repeat_ngram_size = 3
|
| 45 |
+
self.generation_config.length_penalty = 1.0
|
| 46 |
+
|
| 47 |
self.init_weights()
|
| 48 |
|
| 49 |
+
def forward(self, input_ids=None, attention_mask=None, labels=None, inputs_embeds=None, return_dict=True, use_cache=False, **kwargs):
|
| 50 |
embeddings = self.embedding(input_ids) if inputs_embeds is None else inputs_embeds
|
| 51 |
encoder_output = self.encoder(embeddings)
|
| 52 |
decoder_output = self.decoder(embeddings, encoder_output)
|
|
|
|
| 65 |
|
| 66 |
return (loss, logits) if loss is not None else logits
|
| 67 |
|
| 68 |
+
def prepare_inputs_for_generation(self, input_ids, past_key_values=None, attention_mask=None, **kwargs):
|
| 69 |
+
# Only last token for inputs_ids if past is defined in kwargs
|
| 70 |
+
if past_key_values:
|
| 71 |
+
input_ids = input_ids[:, -1].unsqueeze(-1)
|
| 72 |
+
|
| 73 |
+
return {
|
| 74 |
+
"input_ids": input_ids,
|
| 75 |
+
"attention_mask": attention_mask,
|
| 76 |
+
"past_key_values": past_key_values,
|
| 77 |
+
**kwargs,
|
| 78 |
+
}
|
| 79 |
+
|
| 80 |
+
def _reorder_cache(self, past_key_values, beam_idx):
|
| 81 |
+
# Reorder the cache for beam search
|
| 82 |
+
reordered_past = ()
|
| 83 |
+
for layer_past in past_key_values:
|
| 84 |
+
reordered_past += (tuple(past_state.index_select(0, beam_idx) for past_state in layer_past),)
|
| 85 |
+
return reordered_past
|
| 86 |
+
|
| 87 |
# Register with Hugging Face
|
| 88 |
CONFIG_MAPPING.register("lance_ai", LanceAIConfig)
|
| 89 |
MODEL_FOR_CAUSAL_LM_MAPPING.register(LanceAIConfig, LanceAI)
|