moelanoby commited on
Commit
5168ee0
·
verified ·
1 Parent(s): c794919

Direct upload

Browse files
Files changed (2) hide show
  1. bucket_memory_model.py +28 -7
  2. model.safetensors +2 -2
bucket_memory_model.py CHANGED
@@ -243,22 +243,26 @@ class BucketMemoryTransformerLayer(nn.Module):
243
  return x
244
 
245
 
246
-
247
  # Updated model with HuggingFace compatibility
248
  class BucketMemoryModel(PreTrainedModel):
249
- config_class = BucketMemoryConfig # Add this line
250
  base_model_prefix = "bucket-memory-model2"
251
  def __init__(self, config, adapter_kwargs=None):
252
  super().__init__(config)
253
  self.d_model = config.d_model
254
  self.token_embedding = nn.Embedding(config.vocab_size, config.d_model)
 
 
 
 
 
 
 
 
255
  self.pos_encoding = nn.Parameter(torch.zeros(1, config.max_seq_length, config.d_model))
256
  self._init_positional_encoding(config.max_seq_length, config.d_model)
257
 
258
- # Use config.num_attention_heads if available, otherwise calculate
259
- num_heads = getattr(config, 'num_attention_heads', config.d_model // 64)
260
- num_heads = max(1, num_heads) # Ensure at least 1 head
261
-
262
  self.layers = nn.ModuleList([
263
  BucketMemoryTransformerLayer(
264
  d_model=config.d_model,
@@ -287,10 +291,11 @@ class BucketMemoryModel(PreTrainedModel):
287
  def forward(self, input_ids, attention_mask=None, labels=None):
288
  batch_size, seq_len = input_ids.size()
289
  x = self.token_embedding(input_ids) * np.sqrt(self.d_model)
 
 
290
  x = x + self.pos_encoding[:, :seq_len]
291
  x = self.dropout(x)
292
 
293
- # Process through transformer layers
294
  for layer in self.layers:
295
  x = layer(x, attention_mask)
296
 
@@ -303,6 +308,22 @@ class BucketMemoryModel(PreTrainedModel):
303
  return type('ModelOutput', (), {'loss': loss, 'logits': logits})
304
  return logits
305
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
306
  AutoConfig.register("bucket-memory-model3", BucketMemoryConfig)
307
  AutoModel.register(BucketMemoryConfig, BucketMemoryModel)
308
  BucketMemoryConfig.register_for_auto_class()
 
243
  return x
244
 
245
 
 
246
  # Updated model with HuggingFace compatibility
247
  class BucketMemoryModel(PreTrainedModel):
248
+ config_class = BucketMemoryConfig
249
  base_model_prefix = "bucket-memory-model2"
250
  def __init__(self, config, adapter_kwargs=None):
251
  super().__init__(config)
252
  self.d_model = config.d_model
253
  self.token_embedding = nn.Embedding(config.vocab_size, config.d_model)
254
+
255
+ # TAPE-style dynamic position encoding
256
+ self.tape_position_encoder = nn.Sequential(
257
+ nn.Linear(config.d_model, config.d_model),
258
+ nn.ReLU(),
259
+ nn.Linear(config.d_model, config.d_model)
260
+ )
261
+
262
  self.pos_encoding = nn.Parameter(torch.zeros(1, config.max_seq_length, config.d_model))
263
  self._init_positional_encoding(config.max_seq_length, config.d_model)
264
 
265
+ num_heads = max(1, getattr(config, 'num_attention_heads', config.d_model // 64))
 
 
 
266
  self.layers = nn.ModuleList([
267
  BucketMemoryTransformerLayer(
268
  d_model=config.d_model,
 
291
  def forward(self, input_ids, attention_mask=None, labels=None):
292
  batch_size, seq_len = input_ids.size()
293
  x = self.token_embedding(input_ids) * np.sqrt(self.d_model)
294
+ tape_pos = self.tape_position_encoder(x)
295
+ x = x + tape_pos
296
  x = x + self.pos_encoding[:, :seq_len]
297
  x = self.dropout(x)
298
 
 
299
  for layer in self.layers:
300
  x = layer(x, attention_mask)
301
 
 
308
  return type('ModelOutput', (), {'loss': loss, 'logits': logits})
309
  return logits
310
 
311
+ def generate(self, input_ids, max_length=50):
312
+ generated_tokens = input_ids
313
+ for _ in range(max_length):
314
+ logits = self.forward(generated_tokens)
315
+ # Handle both cases: when logits is a tensor or when it's a ModelOutput object
316
+ if hasattr(logits, 'logits'):
317
+ next_token_logits = logits.logits[:, -1, :]
318
+ else:
319
+ next_token_logits = logits[:, -1, :]
320
+
321
+ next_token_id = torch.argmax(next_token_logits, dim=-1).unsqueeze(-1)
322
+ generated_tokens = torch.cat((generated_tokens, next_token_id), dim=1)
323
+
324
+ if next_token_id.item() == self.config.eos_token_id:
325
+ break
326
+ return generated_tokens
327
  AutoConfig.register("bucket-memory-model3", BucketMemoryConfig)
328
  AutoModel.register(BucketMemoryConfig, BucketMemoryModel)
329
  BucketMemoryConfig.register_for_auto_class()
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cc67a1cb0105fc1bcabf585d7b9d60aed7c787486008f53d73e90a12e5a71762
3
- size 410490444
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eac09ba151cbcef373c847cabad46af2a3cde38d98403438e949c4b88f8ae061
3
+ size 412592100