aframson
/

bibleGPT

Text Generation

Model card Files Files and versions

aframson commited on Nov 12, 2023

Commit

39c6477

·

1 Parent(s): 1224b7d

sds

Files changed (1) hide show

model.py +47 -8

model.py CHANGED Viewed

@@ -107,16 +107,55 @@ class Block(nn.Module):
         x = x + self.mlp(self.ln_2(x))
         return x
 @dataclass
 class GPTConfig(PretrainedConfig):
-    block_size: int = 1024
-    vocab_size: int = 50304 # GPT-2 vocab_size of 50257, padded up to nearest multiple of 64 for efficiency
-    n_layer: int = 12
-    n_head: int = 12
-    n_embd: int = 768
-    dropout: float = 0.0
-    bias: bool = True # True: bias in Linears and LayerNorms, like GPT-2. False: a bit better and faster
 class GPT(PreTrainedModel):
     def __init__(self, config):

         x = x + self.mlp(self.ln_2(x))
         return x
 @dataclass
 class GPTConfig(PretrainedConfig):
+    def __init__(self,
+    model_type="GPT",
+    auto_map={
+        "AutoConfig": "model.GPTConfig",
+        "AutoModel": "model.GPT",
+        "AutoModelForCausalLM": "model.GPT",
+        "AutoModelForQuestionAnswering": "model.GPT"
+    },
+    hidden_size=4,
+    num_attention_heads=2,
+    num_hidden_layers=2,
+    hidden_dropout_prob=0.1,
+    batch_size=60,
+    max_iters=200,
+    eval_interval=100,
+    learning_rate=0.001,
+    device="cpu",
+    block_size: int = 1024,
+    vocab_size: int = 50304, # GPT-2 vocab_size of 50257, padded up to nearest multiple of 64 for efficiency
+    n_layer: int = 12,
+    n_head: int = 12,
+    n_embd: int = 768,
+    dropout: float = 0.0,
+    bias: bool = True, # True: bias in Linears and LayerNorms, like GPT-2. False: a bit better and faster
+    **kwargs
+    )->None:
+        super().__init__(**kwargs)
+        self.model_type = model_type
+        self.auto_map = auto_map
+        self.vocab_size = vocab_size
+        self.hidden_size = hidden_size
+        self.num_attention_heads = num_attention_heads
+        self.num_hidden_layers = num_hidden_layers
+        self.hidden_dropout_prob = hidden_dropout_prob
+        self.block_size = block_size
+        self.batch_size = batch_size
+        self.max_iters = max_iters
+        self.eval_interval = eval_interval
+        self.learning_rate = learning_rate
+        self.device = device
+        self.n_layer = n_layer
+        self.n_head = n_head
+        self.n_embd = n_embd
+        self.dropout = dropout
+        self.bias = bias
 class GPT(PreTrainedModel):
     def __init__(self, config):