kimyeonz's picture
Update util.py
69de832 verified
import torch
from qwen_vl_utils import process_vision_info
KOREAN_MORAL_EMOTION_THRESHOLDS = {
'other_condemning': 0.75,
'other_praising': 0.22,
'other_suffering': 0.56,
'self_conscious': 0.26,
'non_moral_emotion': 0.19,
'neutral': 0.55
}
ENGLISH_MORAL_EMOTION_THRESHOLDS = {
'other_condemning': 0.63,
'other_praising': 0.54,
'other_suffering': 0.52,
'self_conscious': 0.17,
'non_moral_emotion': 0.24,
'neutral': 0.59
}
MORAL_EMOTION_MAPPING = {
'other_condemning': 'Other-condemning',
'other_praising': 'Other-praising',
'other_suffering': 'Other-suffering',
'self_conscious': 'Self-conscious',
'non_moral_emotion': 'Non-moral emotion',
'neutral': 'Neutral'
}
KOREAN_EMOTION_DESCRIPTION = {
'Other-condemning': 'λΆ„λ…Έ, κ²½λ©Έ, 혐였 λ“±κ³Ό 같이 타인을 λΉ„λ‚œν•˜λŠ” 감정',
'Other-praising': '감탄, 감사, 경외감 λ“±κ³Ό 같이 타인을 μΉ­μ°¬ν•˜λŠ” 감정',
'Other-suffering': 'μ—°λ―Ό, 동정 λ“±κ³Ό 같이 타인을 κ³΅κ°ν•˜λŠ” 감정',
'Self-conscious': 'μˆ˜μΉ˜μ‹¬, 죄책감, λ‹Ήν˜Ήκ° λ“±κ³Ό 같이 μžμ‹ μ„ λΆ€μ •μ μœΌλ‘œ ν‰κ°€ν•˜λŠ” 감정',
'Non-moral emotion': '두렀움, 놀라움, 기쁨, λ‚™κ΄€μ£Όμ˜ λ“±κ³Ό 같이 감정이 μžˆμ§€λ§Œ λ‹€λ₯Έ 도덕감정 기쀀에 μ†ν•˜μ§€ μ•ŠλŠ” 감정',
'Neutral': '감정이 μ—†κ±°λ‚˜ 거의 μ—†λŠ” 쀑립적인 μΉ΄ν…Œκ³ λ¦¬'
}
ENGLISH_EMOTION_DESCRIPTION = {
'Other-condemning': 'Emotions that condemn others, such as anger, contempt, or disgust.',
'Other-praising': 'Emotions that praise others, such as admiration, gratitude, or awe.',
'Other-suffering': 'Emotions of empathy for the suffering of others, such as compassion, or sympathy.',
'Self-conscious': 'Emotions that negatively evaluate oneself, such as shame, guilt, or embarrassment.',
'Non-moral emotion': 'Emotions that are emotional but not one of the other emotions, such as fear, surprise, joy, optimism, etc.',
'Neutral': 'A neutral category with no or few emotions.'
}
def get_prompt(emotion, korean=True):
emotion = MORAL_EMOTION_MAPPING[emotion]
if korean:
system_prompt = f"""
당신은 β€˜λ„λ•κ°μ •β€™ 뢄야에 λŒ€ν•œ μ „λ¬Έκ°€μž…λ‹ˆλ‹€. μ£Όμ–΄μ§€λŠ” ν…μŠ€νŠΈμ™€ 이미지에 λ‚˜νƒ€λ‚˜λŠ” 도덕감정이 {emotion} μΈμ§€μ˜ μ—¬λΆ€λ₯Ό λΆ„λ₯˜ν•˜μ„Έμš”. {emotion} 은 λ‹€μŒκ³Ό 같이 μ •μ˜ ν•  수 μžˆμŠ΅λ‹ˆλ‹€.
1. {emotion}: {KOREAN_EMOTION_DESCRIPTION[emotion]}
μ£Όμ–΄μ§€λŠ” 데이터가 {emotion} 의 도덕감정을 λ‚˜νƒ€λ‚΄λŠ”μ§€ True ν˜Ήμ€ False 둜 λΆ„λ₯˜ν•˜κ³ , 닡을 ν•  λ•ŒλŠ” λ‹Ήμ‹ μ˜ 좔가적인 μ„€λͺ… 없이 True ν˜Ήμ€ False 둜만 λ‹΅ν•˜μ„Έμš”.
"""
message_gpt = """
μ£Όμ–΄μ§„ Headline Text와 Image에 λͺ¨λ‘μ—μ„œ λ‚˜νƒ€λ‚œ 도덕감정은 {emotion} 이야?
좔가적인 μ„€λͺ… 없이 도덕감정이 {emotion} 인지 μ—¬λΆ€λ§Œ True ν˜Ήμ€ False 둜 λ‹€μŒκ³Ό 같이 λ‹΅λ³€ν•΄μ€˜.
{λ‹΅}
"""
return system_prompt, message_gpt
system_prompt = f"""
You are an AI expert in 'moral emotions'. Classify whether the moral emotion in the given text and image is {emotion}. {emotion} can be defined as follows.
1. {emotion}: {ENGLISH_EMOTION_DESCRIPTION[emotion]}
Classify the given data as True or False to indicate the moral emotion of {emotion} and answer only True or False without your further explanation.
"""
message_gpt = """
Is the moral emotion shown in both the given headline text and image {emotion}?
Without further explanation, just answer True or False whether the moral emotion is {emotion}.
{Answer}
"""
return system_prompt, message_gpt
def format_data(sample, target_emotion, korean=True):
system, message = get_prompt(target_emotion, korean)
return {"messages": [
{
"role": "system",
"content": [{"type": "text", "text": system}],
},
{
"role": "user",
"content": [
{
"type": "text",
"text": message + '\n' + sample["title"],
},{
"type": "image",
"image": sample["thumbnail"],
}
],
}
],
"id": sample['id'],
"label": 'True' if sample['human_gold_label'] == target_emotion else 'False',
}
def collate_fn(examples, processor, device, label2id, train=True):
# Get the texts and images, and apply the chat template
texts = [processor.apply_chat_template(example["messages"], tokenize=False) for example in examples]
image_inputs = [process_vision_info(example["messages"])[0] for example in examples]
ids = [example['id'] for example in examples]
# Tokenize the texts and process the images
batch = processor(text=texts, images=image_inputs, return_tensors="pt", padding=True)
batch = {k: v.to(device) for k, v in batch.items()}
batch['ids'] = ids
if not train: return batch
cls_labels = [
int(label2id[ex['label']])
for ex in examples
]
batch["labels"] = torch.tensor(
cls_labels,
dtype=torch.float16,
device=device
).unsqueeze(1)
return batch