Spaces:
Sleeping
Sleeping
| from datasets import load_dataset | |
| import datasets | |
| from huggingface_hub import hf_hub_download | |
| from PIL import Image | |
| import torch | |
| import requests | |
| import os | |
| class Preprocessing(): | |
| def __init__(self): | |
| pass | |
| def load_dataset(self,split): | |
| os.environ["HF_HUB_DOWNLOAD_TIMEOUT"] = "10500" | |
| dataset = load_dataset("lmms-lab/COCO-Caption", split=split, cache_dir="D:/Java Projects/Findr/Server/datasets") | |
| ds = dataset.filter(lambda x: x['image'] is not None and x['question_id'] is not None and len(x['answer']) > 0) | |
| return ds | |
| def image_caption_pairs(self,ds): | |
| import random | |
| for data in ds: | |
| img:Image.Image=data['image'].convert('RGB') | |
| cap=random.choice(data['answer']).strip() | |
| print(img,cap) | |
| yield img,cap | |
| if __name__=="__main__": | |
| obj=Preprocessing() | |
| obj.load_dataset('val') |