MiniGPT / train_custom.py
CreatedNull's picture
Upload folder using huggingface_hub
4de3b20 verified
raw
history blame
736 Bytes
import torch
from dataset import MiniBPETokenizr, ChatDataset, train,SimpleTokenizr
from model import MiniGPT
import json
# Load and prepare
with open("./customchatbot-v1/data/filtered_data.jsonl", "r", encoding="utf-8") as f:
texts = [json.loads(line)["text"] for line in f if line.strip()]
tokenizer = SimpleTokenizr()
tokenizer.train(texts)
ch_path = "./customchatbot-v1/trained-mini-gpt/checkpoint-mini-gpt.pth"
dataset = ChatDataset("./customchatbot-v1/data/filtered_data.jsonl", tokenizer)
model = MiniGPT(vocab_size=len(tokenizer))
model.reset_params()
#model.load_state_dict(torch.load(ch_path))
# Train
train(model, dataset, tokenizer, epochs=3, filepathh="./customchatbot-v1/data/merged_data.jsonl")