File size: 572 Bytes
5f2a5b3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
import pandas as pd

df = pd.read_csv('x_sensitive_train_v2.csv')

new_df = df[df['label'] == 'CONFLICTUAL'].copy()

new_df['label'] = 1
negative_size = len(new_df)
unharmful_sample = df[df['label'] == 'UNHARMFUL_PROFANITY'][:int(negative_size/2)]
unharmful_sample['label'] = 0
none_sample = df[df['label'] == "NONE"][:int(negative_size/2)]
none_sample['label'] = 0

new_df = pd.concat([new_df, unharmful_sample, none_sample])
new_df = new_df.rename(columns={"span": "span_text", "original_index": "id"})

new_df.to_csv('binary_classification.csv', index=False, sep=';')