File size: 504 Bytes
379b35c
 
 
 
 
dcc8a5e
379b35c
 
 
dcc8a5e
379b35c
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
import pandas as pd
import json
import os


dataset = pd.read_csv('data_80k/data.csv')
labels = dataset['image_file']
text = dataset['text']
json_data = []
images_path = '/kaggle/input/hindi-english-images/data_80k/output_images/'
for i in range(len(labels)):
    json_data.append(
        {
            "query": "<image>",
            "response": text[i],
            "images": [os.path.join(images_path, labels[i])],
        }
    )
with open('dataset.json', 'w') as f:
    json.dump(json_data, f)