m4vic commited on
Commit
24dbc4d
·
verified ·
1 Parent(s): aad0166

Create README.md

Browse files
Files changed (1) hide show
  1. README.md +32 -0
README.md ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ readme_text = """
2
+ # MiniGPT (WikiText-103)
3
+
4
+ This is a **MiniGPT** model built from scratch in PyTorch and trained on the WikiText-103 dataset.
5
+
6
+ ## Files
7
+ - `mini_gpt_best.pt` → model checkpoint
8
+ - `config.json` → model configuration
9
+ - `vocab.json` → tokenizer vocabulary
10
+
11
+ ## Training
12
+ - Epochs: 5
13
+ - Sequence length: 128
14
+ - Train PPL: 1.18
15
+ - Validation PPL: 1.17
16
+
17
+ ## Usage
18
+ ```python
19
+ import torch
20
+ from model import MiniGPT # your model definition
21
+ import json
22
+
23
+ # load vocab
24
+ with open("vocab.json") as f:
25
+ vocab = json.load(f)
26
+ inv_vocab = {idx: word for word, idx in vocab.items()}
27
+
28
+ # load model
29
+ model = MiniGPT(**json.load(open("config.json")))
30
+ model.load_state_dict(torch.load("mini_gpt_best.pt"))
31
+ model.eval()
32
+