EmoVitt / emo /all.py
manhteky123's picture
Upload 23 files
7b34c4b verified
import json
import os
import re
import shutil
import random
out = []
# reasoning
folder_path_reasoning = './emo/reasoning/'
filelist_reasoning = os.listdir(folder_path_reasoning)
for class_name in filelist_reasoning:
path = os.path.join(folder_path_reasoning, class_name)
item = os.listdir(path)
for name in item:
with open(folder_path_reasoning + class_name + '/' + name, 'r', encoding='utf-8') as file:
text = file.read()
pattern = r"(?i)Question\s*:(.*?)\s*Answer\s*:(.*?)(?=\s*(Question\s*:|Answer\s*:|$))"
matches = re.findall(pattern, text, re.DOTALL)
reasoning = []
for match in matches:
question = match[0].strip()
answer = match[1].strip()
reasoning.append({"from": "human", "value": question})
reasoning.append({"from": "gpt", "value": answer})
for i in range(int(len(reasoning)/2)):
out.append({"id": name.split('_')[1][:5], "image": name.split('.')[0] + '.jpg', 'conversations': reasoning[2*i:2*i+2]})
# conversation
folder_path = './emo/conversation/'
filelist = os.listdir(folder_path)
for class_name in filelist:
path = os.path.join(folder_path, class_name)
item = os.listdir(path)
for name in item:
with open(folder_path + class_name + '/' + name, 'r', encoding='utf-8') as file:
text = file.read()
pattern = r"(?i)Question\s*\d*:(.*?)\s*Answer\s*\d*:(.*?)\s*(?=(Question:\d*|Complex Question:\d*|Complex question:\d*|$))"
matches = re.findall(pattern, text, re.DOTALL)
conversations = []
for match in matches:
question = match[0].strip()
answer = match[1].strip()
conversations.append({"from": "human", "value": question})
conversations.append({"from": "gpt", "value": answer})
#conversations[0]['value'] = conversations[0]['value'] + '\n<image>'
conversations[0]['value'] = conversations[0]['value']
#out.append({"id": name.split('_')[1][:5], "image": name.split('.')[0] + '.jpg', 'conversations': conversations})
for i in range(int(len(conversations)/2)):
out.append({"id": name.split('_')[1][:5], "image": name.split('.')[0] + '.jpg', 'conversations': conversations[2*i:2*i+2]})
shutil.copy('./emo/image/' + class_name + '/' + name[:-3] + 'jpg', './emo/image/train_image')
##### classification
with open('./emo/train.json', 'r') as json_file:
json_data = json.load(json_file)
amusement_data = []
anger_data = []
awe_data = []
contentment_data = []
disgust_data = []
excitement_data = []
fear_data = []
sadness_data = []
for item in json_data:
category = item[0]
if category == 'amusement':
amusement_data.append(item[1].split('/')[2][:-4])
elif category == 'anger':
anger_data.append(item[1].split('/')[2][:-4])
elif category == 'awe':
awe_data.append(item[1].split('/')[2][:-4])
elif category == 'contentment':
contentment_data.append(item[1].split('/')[2][:-4])
elif category == 'disgust':
disgust_data.append(item[1].split('/')[2][:-4])
elif category == 'excitement':
excitement_data.append(item[1].split('/')[2][:-4])
elif category == 'fear':
fear_data.append(item[1].split('/')[2][:-4])
elif category == 'sadness':
sadness_data.append(item[1].split('/')[2][:-4])
all_data = [amusement_data, anger_data, awe_data, contentment_data, disgust_data, excitement_data, fear_data, sadness_data]
emo = ['amusement', 'anger', 'awe', 'contentment', 'disgust', 'excitement', 'fear', 'sadness']
for i in range(8):
for j in range(1000, 5600):
word = [
{
"from": "human",
"value": "Please select the emotion closest to the image from the following options:\
amusement, \
anger, \
awe, \
contentment, \
disgust, \
excitement, \
fear and sadness \
(Do not provide answers outside of the candidates options.) Please answer in the following format: Predict emotion:"
},
{
"from": "gpt",
"value": 'Predict emotion: ' + emo[i]
}
]
temp = {'id': all_data[i][j][-5:], 'image': all_data[i][j] + '.jpg', 'conversations': word}
out.append(temp)
shutil.copy('./emo/image/' + emo[i] + '/' + all_data[i][j] + '.jpg', './emo/image/train_image')
#####
random.shuffle(out)
with open('./emo/train.json', 'w') as json_file:
json.dump(out, json_file, indent=2)