vam
commited on
Upload preprocess.py
Browse files- preprocess.py +22 -0
preprocess.py
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import pandas as pd
|
| 3 |
+
|
| 4 |
+
df = pd.read_json("/kaggle/input/50memejson/data_set_50.json")
|
| 5 |
+
|
| 6 |
+
meme_attribute = {}
|
| 7 |
+
meme_filename = []
|
| 8 |
+
meme_list = []
|
| 9 |
+
|
| 10 |
+
for col in df.columns:
|
| 11 |
+
name = df.loc["filename", col]
|
| 12 |
+
name = name.replace(".jpg", "")
|
| 13 |
+
name = name.replace("High-Quality-", "")
|
| 14 |
+
attribute_tmp = df.loc["file_attributes", col]
|
| 15 |
+
if attribute_tmp == {}:
|
| 16 |
+
continue
|
| 17 |
+
attribute_str = attribute_tmp['image_label'].strip().strip('"')
|
| 18 |
+
cleaned_attribute = attribute_str.replace(',', '')
|
| 19 |
+
file_name = df.loc["filename", col]
|
| 20 |
+
meme_attribute[name] = cleaned_attribute
|
| 21 |
+
meme_filename.append(file_name)
|
| 22 |
+
meme_list.append(name)
|