PoetryGPT2 - a gpt2 lora fine-tune on 28k poems
PoetryGPT2 is a fine-tuned version of GPT2 focused on poetry.
Dataset
The dataset contains ~28,000 different poems, has ~10.4M tokens, and is ~37MB (un-tokenized).
| Key | Value |
|---|---|
| Poems | 28,082 |
| Size (in bytes) | 37,060,055 |
| Tokens | 10,431,999 |
| Words | 6,383,094 |
| Characters | 35,728,573 |
| Unique Words | 147,090 |
| Avg. Tokens Per Poem | 215 |
Training Setup
PoetryGPT2 was trained for 4 epochs using LoRa fine-tuning (merged later) with a batch size of 6 and a gradient accumulation of 3. The validation split was 5% (~1MB, 200-400K tokens) of the total data. It saw passages of poems up to 512 tokens, so output sequences longer than 256-512 tokens may be incoherent or the model may lose context and derail itself.
Hardware
PoetryGPT2 was trained on Google Colaboratory, with 1 Nvidia Tesla T4 GPU, 15GB of VRAM, and 12.7GB of RAM.
Training Results
step |
train_loss |
val_loss |
train_ppl |
val_ppl |
|---|---|---|---|---|
| 380 | 3.860600 | 3.673387 | ~47.5 | ~39.4 |
| 1520 | 3.729200 | 3.619360 | ~41.6 | ~37.3 |
| 3800 | 3.652100 | 3.585514 | ~38.6 | ~36.0 |
| 6080 | 3.607800 | 3.570159 | ~36.9 | ~35.5 |
| 8360 | 3.592500 | 3.564851 | ~36.3 | ~35.3 |
Generation Examples
Prompt:
The Sunshine
Output:
By D. G. Campbell
It's hard to tell how it got here.
I'm thinking of the city, I think about its streets and its hills; all those things: mountains on a cliff.
But I never imagined it—at least not in my mind.
(For example at the end there was no sun.) How did this come about?
‘Well we're just doing what we can do to help out the poor and the sick,
and that is to make money for the government,’ “just by helping us get ahead.”
'We have to be better than everybody else.'
Then again, why would they want to see him alive? Why are they so scared?
And then I remember the way he talked to me when I left the house—
and one day after that I started running into him.
(He said he couldn't believe I had run away. I thought he'd been mad. Then he found his phone.) . . .
In the morning he came home and told me how I should stay with him,
and later took off his clothes, and went back to sleep.
Prompt:
Lonely
Output:
By JohnBishopWatsonChamberschwyd
Nay, so it seemed,
But now the world, with its strange sights and sounds of wind And moon-beams,
Dances on the distant sky, as if in a dream.
Limitations
- The model produces made-up author names
- It loses context and derails after 256 tokens
- Output formatting has been observed to have weird artifacts
- Sometimes the model can generate nothing or multiple spaces in a row.
Inference Script
# inference_repl.py
# REPL: enter a prompt and get streamed generation (no chat history preserved).
import os
import sys
import threading
import time
from pathlib import Path
import torch
from transformers import (
AutoTokenizer,
AutoModelForCausalLM,
TextIteratorStreamer,
AutoConfig,
)
# CONFIG
MODEL_NAME = "Harley-ml/gpt2-poetry"
AUTHOR = "PoetryGPT2"
MAX_NEW_TOKENS = 256
TEMPERATURE = 0.7
TOP_P = 0.95
TOP_K = 40
REPETITION_PENALTY = 1.1
DO_SAMPLE = True
# You can change the above constants before running the script.
# load tokenizer
def load_tokenizer():
model_path = Path(MODEL_NAME)
if model_path.exists() and ((model_path / "tokenizer.json").exists() or (model_path / "tokenizer_config.json").exists()):
print(f"Loading tokenizer from local folder: {model_path}")
tk = AutoTokenizer.from_pretrained(str(model_path), use_fast=True)
else:
print(f"Loading tokenizer from model hub: {MODEL_NAME}")
tk = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True)
if tk.pad_token_id is None:
# ensure pad exists (training script added <|pad|> if needed)
tk.add_special_tokens({"pad_token": "<|pad|>"})
return tk
# load model
def load_model(tokenizer):
# choose dtype only if CUDA available
use_cuda = torch.cuda.is_available()
dtype = torch.float16 if use_cuda else torch.float32
print("Loading model (with adjusted config.vocab_size)...")
config = AutoConfig.from_pretrained(MODEL_NAME)
config.vocab_size = len(tokenizer) # <-- force model to expect tokenizer size
model = AutoModelForCausalLM.from_pretrained(
MODEL_NAME,
config=config,
device_map="auto",
dtype=dtype,
trust_remote_code=True,
)
# resize tokens if tokenizer added tokens
model.resize_token_embeddings(len(tokenizer))
model.eval()
return model
# Prompt wrapper
def make_input_text(user_text: str) -> str:
return f"{user_text}"
def stream_generate(model, tokenizer, prompt_text: str, gen_kwargs):
"""
Stream generation to stdout using TextIteratorStreamer.
Blocks until generation completes.
"""
# tokenize
input_text = make_input_text(prompt_text)
inputs = tokenizer(input_text, return_tensors="pt")
input_ids = inputs["input_ids"].to(model.device)
attention_mask = inputs["attention_mask"].to(model.device)
# streamer yields decoded text as it is generated
streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
# call generate in a background thread (generate is blocking)
def _gen():
with torch.no_grad():
try:
model.generate(
input_ids=input_ids,
attention_mask=attention_mask,
streamer=streamer,
**gen_kwargs,
)
except Exception as e:
# Inform the main thread that generation failed
streamer.put("") # ensure iterator ends
print(f"\nError In Generation: {e}", file=sys.stderr)
gen_thread = threading.Thread(target=_gen)
gen_thread.start()
# Read tokens as they arrive and print them (flush so user sees streaming)
out_text = ""
try:
for new_text in streamer:
# new_text are chunks of decoded text (strings)
print(new_text, end="", flush=True)
out_text += new_text
except GeneratorExit:
pass
gen_thread.join()
print() # newline after generation finished
return out_text
def repl_loop(model, tokenizer):
print("\n--- Inference REPL (no chat history). Type Ctrl-C or '/exit' to quit. ---")
print("Quick commands:\n /exit -> quit\n /params -> show generation params\n /set key=value -> set a param (max_new_tokens, temperature, top_p, top_k, repetition_penalty, do_sample)\n")
# local generation params mutable
gen_params = {
"max_new_tokens": MAX_NEW_TOKENS,
"temperature": TEMPERATURE,
"top_p": TOP_P,
"top_k": TOP_K,
"repetition_penalty": REPETITION_PENALTY,
"do_sample": DO_SAMPLE,
"pad_token_id": tokenizer.pad_token_id,
# ensure eos_token_id exists; fallback to tokenizer.eos_token_id or None
"eos_token_id": (tokenizer.eos_token_id if tokenizer.eos_token_id is not None else tokenizer.sep_token_id),
}
while True:
try:
user = input("\n>>> ").strip()
except (KeyboardInterrupt, EOFError):
print("\nExiting.")
break
if not user:
continue
if user.lower() in ["/exit", "exit", "quit"]:
print("Bye.")
break
if user.lower() == "/params":
print("Current generation params:")
for k, v in gen_params.items():
print(f" {k}: {v}")
continue
if user.startswith("/set "):
# simple parser: /set key=value
try:
body = user[len("/set ") :].strip()
k, v = body.split("=", 1)
k = k.strip()
v = v.strip()
if k not in gen_params:
print(f"Unknown param: {k}")
continue
# cast to proper type
if isinstance(gen_params[k], bool):
val = v.lower() in ("1", "true", "yes", "y")
elif isinstance(gen_params[k], int):
val = int(v)
elif isinstance(gen_params[k], float):
val = float(v)
else:
val = v
gen_params[k] = val
print(f"Set {k} = {val}")
except Exception as e:
print(f"Failed to parse set command: {e}")
continue
# Normal prompt: stream generation
print("[streaming output below]\n")
try:
# copy gen_params to pass valid kwargs
kwargs = dict(gen_params)
# make sure correct dtypes for booleans/ints
kwargs["do_sample"] = bool(kwargs.get("do_sample", False))
kwargs["max_new_tokens"] = int(kwargs.get("max_new_tokens", 256))
# actually stream
_ = stream_generate(model, tokenizer, user, gen_kwargs=kwargs)
except Exception as e:
print(f"[ERROR] generation failed: {e}", file=sys.stderr)
def main():
tokenizer = load_tokenizer()
model = load_model(tokenizer)
try:
repl_loop(model, tokenizer)
finally:
# attempt clean-up
try:
# free memory
del model
torch.cuda.empty_cache()
except Exception:
pass
if __name__ == "__main__":
main()
Inference Providers
NEW
This model isn't deployed by any Inference Provider.
🙋
Ask for provider support
Model tree for Harley-ml/gpt2-poetry
Base model
openai-community/gpt2