WCNegentropy commited on
Commit
6985b50
·
verified ·
1 Parent(s): a8a2807

Remove nested directory: BitTransformerLM/build_full_bits.py

Browse files
Files changed (1) hide show
  1. BitTransformerLM/build_full_bits.py +0 -23
BitTransformerLM/build_full_bits.py DELETED
@@ -1,23 +0,0 @@
1
- import pathlib
2
- import torch
3
- from datasets import load_dataset
4
-
5
- TXT_MB = 100
6
- OUT = pathlib.Path('full_bits.pt')
7
-
8
-
9
- def build_bits(out: pathlib.Path = OUT, txt_mb: int = TXT_MB) -> None:
10
- ds = load_dataset('wikitext', 'wikitext-2-raw-v1', split='train')
11
- buf = bytearray()
12
- for line in ds['text']:
13
- buf.extend(line.encode() + b"\n")
14
- if len(buf) >= txt_mb * 2 ** 20:
15
- break
16
- bits = []
17
- for byte in buf:
18
- bits.extend(int(b) for b in f'{byte:08b}')
19
- tensor = torch.tensor(bits, dtype=torch.uint8)
20
- torch.save(tensor, out)
21
-
22
- if __name__ == '__main__':
23
- build_bits()