MiniGPT / datasetgen.py
CreatedNull's picture
Upload folder using huggingface_hub
4de3b20 verified
raw
history blame
467 Bytes
from datasets import load_dataset
import json
import re
from tqdm import tqdm
from filter import filterdata
ds = load_dataset("fka/awesome-chatgpt-prompts",split="train")
convo = []
buffer = {}
print("getting data...")
for entry in tqdm(ds):
print(entry)
#convo.append({"text": f"^User: {buffer['user']}\nMiniGPT:{buffer['assistant']} <END>"})
print(f"Got {len(convo)} pairs/amount of q&a")
print("Filtering data...")
filterdata(convo)