Spaces:
Configuration error
Configuration error
File size: 3,453 Bytes
9e2ba5f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 |
import pandas as pd
import config
def relabel_target(df:pd.DataFrame) -> pd.DataFrame:
"""
Relabel duplicate tweets that are mislabelled in the training dataset
:param df: A pandas dataframe with a "target" column
:return: df
"""
# copy old target label
df[config.RELABELED_TARGET] = df[config.TARGET].copy()
# relabel samples with different labels to their duplicates
df.loc[df[config.TEXT] == 'like for the music video I want some real action shit like burning buildings and police chases not some weak ben winston shit',
config.RELABELED_TARGET] = 0
df.loc[df[config.TEXT] == 'Hellfire is surrounded by desires so be careful and donÛªt let your desires control you! #Afterlife',
config.RELABELED_TARGET] = 0
df.loc[df[config.TEXT] == 'To fight bioterrorism sir.',
config.RELABELED_TARGET] = 0
df.loc[df[config.TEXT] == '.POTUS #StrategicPatience is a strategy for #Genocide; refugees; IDP Internally displaced people; horror; etc. https://t.co/rqWuoy1fm4',
config.RELABELED_TARGET] = 1
df.loc[df[config.TEXT] == 'CLEARED:incident with injury:I-495 inner loop Exit 31 - MD 97/Georgia Ave Silver Spring',
config.RELABELED_TARGET] = 1
df.loc[df[config.TEXT] == '#foodscare #offers2go #NestleIndia slips into loss after #Magginoodle #ban unsafe and hazardous for #humanconsumption',
config.RELABELED_TARGET] = 0
df.loc[df[config.TEXT] == 'In #islam saving a person is equal in reward to saving all humans! Islam is the opposite of terrorism!',
config.RELABELED_TARGET] = 0
df.loc[df[config.TEXT] == 'Who is bringing the tornadoes and floods. Who is bringing the climate change. God is after America He is plaguing her\n \n#FARRAKHAN #QUOTE',
config.RELABELED_TARGET] = 1
df.loc[df[config.TEXT] == 'RT NotExplained: The only known image of infamous hijacker D.B. Cooper. http://t.co/JlzK2HdeTG',
config.RELABELED_TARGET] = 1
df.loc[df[config.TEXT] == "Mmmmmm I'm burning.... I'm burning buildings I'm building.... Oooooohhhh oooh ooh...",
config.RELABELED_TARGET] = 0
df.loc[df[config.TEXT] == "wowo--=== 12000 Nigerian refugees repatriated from Cameroon",
config.RELABELED_TARGET] = 0
df.loc[df[config.TEXT] == "He came to a land which was engulfed in tribal war and turned it into a land of peace i.e. Madinah. #ProphetMuhammad #islam",
config.RELABELED_TARGET] = 0
df.loc[df[config.TEXT] == "Hellfire! We donÛªt even want to think about it or mention it so letÛªs not do anything that leads to it #islam!",
config.RELABELED_TARGET] = 0
df.loc[df[config.TEXT] == "The Prophet (peace be upon him) said 'Save yourself from Hellfire even if it is by giving half a date in charity.'",
config.RELABELED_TARGET] = 0
df.loc[df[config.TEXT] == "Caution: breathing may be hazardous to your health.",
config.RELABELED_TARGET] = 1
df.loc[df[config.TEXT] == "I Pledge Allegiance To The P.O.P.E. And The Burning Buildings of Epic City. ??????",
config.RELABELED_TARGET] = 0
df.loc[df[config.TEXT] == "#Allah describes piling up #wealth thinking it would last #forever as the description of the people of #Hellfire in Surah Humaza. #Reflect",
config.RELABELED_TARGET] = 0
df.loc[df[config.TEXT] == "that horrible sinking feeling when youÛªve been at home on your phone for a while and you realise its been on 3G this whole time",
config.RELABELED_TARGET] = 0
return df |