flpelerin commited on
Commit
fd0332a
·
1 Parent(s): 5a8f44b

Update 3 files

Browse files

- /trainer.py
- /dataset.py
- /trainer.cli.py

Files changed (3) hide show
  1. dataset.py +15 -0
  2. trainer.cli.py +3 -0
  3. trainer.py +0 -5
dataset.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datasets import load_dataset
2
+
3
+ from init import Config
4
+
5
+
6
+ class Dataset:
7
+ def __init__(self, config: Config):
8
+ self.__dict__ = dict(config.__dict__)
9
+
10
+ self.dataset = load_dataset(self.remote_path)
11
+ self.text = ''.join(s for s in self.dataset['train']['text']).encode('ascii', 'ignore').decode('ascii')
12
+
13
+
14
+ def Batch(self, ids):
15
+ pass
trainer.cli.py CHANGED
@@ -23,5 +23,8 @@ if __name__ == '__main__':
23
 
24
  config = ConfigParser(args.config_path).config
25
 
 
 
 
26
 
27
  trainer = Trainer(config)
 
23
 
24
  config = ConfigParser(args.config_path).config
25
 
26
+ dataset = Dataset(config.dataset)
27
+
28
+ #tokenizer = Tokenizer()
29
 
30
  trainer = Trainer(config)
trainer.py CHANGED
@@ -8,12 +8,7 @@ class Trainer:
8
  def __init__(self, config: Config):
9
  self.__dict__ = dict(config.__dict__)
10
 
11
- #print(f"self.dict: {self.__dict__}")
12
- #print(f"locals: {locals()}")
13
-
14
  #self.wandb = Wandb(config.wandb)
15
 
16
- #print(f"model config: {self.model}")
17
- #print(f"config.params: {self.model.params}")
18
  self.model = Model(config.model)
19
 
 
8
  def __init__(self, config: Config):
9
  self.__dict__ = dict(config.__dict__)
10
 
 
 
 
11
  #self.wandb = Wandb(config.wandb)
12
 
 
 
13
  self.model = Model(config.model)
14