Spaces:
Sleeping
Sleeping
File size: 896 Bytes
11e7313 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 |
from datasets import load_dataset
import datasets
from huggingface_hub import hf_hub_download
from PIL import Image
import torch
import requests
import os
class Preprocessing():
def __init__(self):
pass
def load_dataset(self,split):
os.environ["HF_HUB_DOWNLOAD_TIMEOUT"] = "10500"
dataset = load_dataset("lmms-lab/COCO-Caption", split=split, cache_dir="D:/Java Projects/Findr/Server/datasets")
ds = dataset.filter(lambda x: x['image'] is not None and x['question_id'] is not None and len(x['answer']) > 0)
return ds
def image_caption_pairs(self,ds):
import random
for data in ds:
img:Image.Image=data['image'].convert('RGB')
cap=random.choice(data['answer']).strip()
print(img,cap)
yield img,cap
if __name__=="__main__":
obj=Preprocessing()
obj.load_dataset('val') |