Update README.md
Browse files
README.md
CHANGED
|
@@ -41,14 +41,22 @@ Loading the Model and Tokenizer
|
|
| 41 |
```python
|
| 42 |
import torch
|
| 43 |
from transformers import GPT2TokenizerFast
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
from huggingface_hub import snapshot_download
|
| 45 |
|
| 46 |
# Download the model from Hugging Face Hub
|
| 47 |
model_path = snapshot_download(repo_id="liminerity/tiny-epstein-100m")
|
| 48 |
|
| 49 |
-
|
| 50 |
-
tokenizer =
|
| 51 |
-
|
| 52 |
# ------------------------------------------------------------------------------
|
| 53 |
# Configuration (scaled to ~150M for L4 GPU)
|
| 54 |
# ------------------------------------------------------------------------------
|
|
@@ -263,27 +271,23 @@ class TinyAya(nn.Module):
|
|
| 263 |
next_token = torch.multinomial(probs, num_samples=1)
|
| 264 |
input_ids = torch.cat([input_ids, next_token], dim=-1)
|
| 265 |
return input_ids
|
| 266 |
-
|
| 267 |
-
tokenizer.pad_token = tokenizer.eos_token
|
| 268 |
model = TinyAya(ModelConfig())
|
| 269 |
state_dict = torch.load(os.path.join(model_path, "pytorch_model.bin"), map_location="cpu")
|
| 270 |
model.load_state_dict(state_dict)
|
| 271 |
model.eval()
|
|
|
|
| 272 |
```
|
| 273 |
|
| 274 |
Text Generation Example
|
| 275 |
|
| 276 |
```python
|
| 277 |
-
prompt = "
|
| 278 |
-
|
| 279 |
with torch.no_grad():
|
| 280 |
-
|
| 281 |
-
|
| 282 |
-
|
| 283 |
-
temperature=0.8,
|
| 284 |
-
do_sample=True
|
| 285 |
-
)
|
| 286 |
-
print(tokenizer.decode(outputs[0], skip_special_tokens=True))
|
| 287 |
```
|
| 288 |
|
| 289 |
Training Details
|
|
|
|
| 41 |
```python
|
| 42 |
import torch
|
| 43 |
from transformers import GPT2TokenizerFast
|
| 44 |
+
import os
|
| 45 |
+
import torch.nn as nn
|
| 46 |
+
import torch.nn.functional as F
|
| 47 |
+
from torch.utils.data import DataLoader
|
| 48 |
+
from transformers import AutoTokenizer
|
| 49 |
+
from datasets import load_dataset, concatenate_datasets, Dataset
|
| 50 |
+
from tqdm import tqdm
|
| 51 |
+
import math
|
| 52 |
+
from huggingface_hub import hf_hub_download
|
| 53 |
from huggingface_hub import snapshot_download
|
| 54 |
|
| 55 |
# Download the model from Hugging Face Hub
|
| 56 |
model_path = snapshot_download(repo_id="liminerity/tiny-epstein-100m")
|
| 57 |
|
| 58 |
+
tokenizer = GPT2TokenizerFast.from_pretrained('gpt2')
|
| 59 |
+
tokenizer.pad_token = tokenizer.eos_token
|
|
|
|
| 60 |
# ------------------------------------------------------------------------------
|
| 61 |
# Configuration (scaled to ~150M for L4 GPU)
|
| 62 |
# ------------------------------------------------------------------------------
|
|
|
|
| 271 |
next_token = torch.multinomial(probs, num_samples=1)
|
| 272 |
input_ids = torch.cat([input_ids, next_token], dim=-1)
|
| 273 |
return input_ids
|
| 274 |
+
|
|
|
|
| 275 |
model = TinyAya(ModelConfig())
|
| 276 |
state_dict = torch.load(os.path.join(model_path, "pytorch_model.bin"), map_location="cpu")
|
| 277 |
model.load_state_dict(state_dict)
|
| 278 |
model.eval()
|
| 279 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 280 |
```
|
| 281 |
|
| 282 |
Text Generation Example
|
| 283 |
|
| 284 |
```python
|
| 285 |
+
prompt = """Was Jeffrey a good guy?"""
|
| 286 |
+
input_ids = tokenizer.encode(prompt, return_tensors="pt").to(device)
|
| 287 |
with torch.no_grad():
|
| 288 |
+
output = model.generate(input_ids, max_new_tokens=50, temperature=0.8)
|
| 289 |
+
print("Generated text:")
|
| 290 |
+
print(tokenizer.decode(output[0]))
|
|
|
|
|
|
|
|
|
|
|
|
|
| 291 |
```
|
| 292 |
|
| 293 |
Training Details
|