NishX17's picture
Upload 31 files
5d2030a verified
Raw
History Blame Contribute Delete
5.86 kB
from typing import List, Dict, Tuple
"""
Video Face Manipulation Detection Through Ensemble of CNNs
Image and Sound Processing Lab - Politecnico di Milano
Nicolò Bonettini
Edoardo Daniele Cannas
Sara Mandelli
Luca Bondi
Paolo Bestagini
"""
import numpy as np
import pandas as pd
available_datasets = [
'dfdc-35-5-10',
'ff-c23-720-140-140',
'ff-c23-720-140-140-5fpv',
'ff-c23-720-140-140-10fpv',
'ff-c23-720-140-140-15fpv',
'ff-c23-720-140-140-20fpv',
'ff-c23-720-140-140-25fpv',
'celebdf', # just for convenience, not used in the original paper
]
def load_df(dfdc_df_path: str, ffpp_df_path: str, dfdc_faces_dir: str, ffpp_faces_dir: str, dataset: str) -> (pd.DataFrame, str):
if dataset.startswith('dfdc'):
df = pd.read_pickle(dfdc_df_path)
root = dfdc_faces_dir
elif dataset.startswith('ff-'):
df = pd.read_pickle(ffpp_df_path)
root = ffpp_faces_dir
else:
raise NotImplementedError('Unknown dataset: {}'.format(dataset))
return df, root
def get_split_df(df: pd.DataFrame, dataset: str, split: str) -> pd.DataFrame:
if dataset == 'dfdc-35-5-10':
if split == 'train':
split_df = df[df['folder'].isin(range(35))]
elif split == 'val':
split_df = df[df['folder'].isin(range(35, 40))]
elif split == 'test':
split_df = df[df['folder'].isin(range(40, 50))]
else:
raise NotImplementedError('Unknown split: {}'.format(split))
elif dataset.startswith('ff-c23-720-140-140'):
# Save random state
st0 = np.random.get_state()
# Set seed for this selection only
np.random.seed(41)
# Split on original videos
crf = dataset.split('-')[1]
random_youtube_videos = np.random.permutation(
df[(df['source'] == 'youtube') & (df['quality'] == crf)]['video'].unique())
train_orig = random_youtube_videos[:720]
val_orig = random_youtube_videos[720:720 + 140]
test_orig = random_youtube_videos[720 + 140:]
if split == 'train':
split_df = pd.concat((df[df['original'].isin(train_orig)], df[df['video'].isin(train_orig)]), axis=0)
elif split == 'val':
split_df = pd.concat((df[df['original'].isin(val_orig)], df[df['video'].isin(val_orig)]), axis=0)
elif split == 'test':
split_df = pd.concat((df[df['original'].isin(test_orig)], df[df['video'].isin(test_orig)]), axis=0)
else:
raise NotImplementedError('Unknown split: {}'.format(split))
if dataset.endswith('fpv'):
fpv = int(dataset.rsplit('-', 1)[1][:-3])
idxs = []
for video in split_df['video'].unique():
idxs.append(np.random.choice(split_df[split_df['video'] == video].index, fpv, replace=False))
idxs = np.concatenate(idxs)
split_df = split_df.loc[idxs]
# Restore random state
np.random.set_state(st0)
elif dataset == 'celebdf':
seed = 41
num_real_train = 600
# Save random state
st0 = np.random.get_state()
# Set seed for this selection only
np.random.seed(seed)
# Split on original videos
random_train_val_real_videos = np.random.permutation(
df[(df['label'] == False) & (df['test'] == False)]['video'].unique())
train_orig = random_train_val_real_videos[:num_real_train]
val_orig = random_train_val_real_videos[num_real_train:]
if split == 'train':
split_df = pd.concat((df[df['original'].isin(train_orig)], df[df['video'].isin(train_orig)]), axis=0)
elif split == 'val':
split_df = pd.concat((df[df['original'].isin(val_orig)], df[df['video'].isin(val_orig)]), axis=0)
elif split == 'test':
split_df = df[df['test'] == True]
else:
raise NotImplementedError('Unknown split: {}'.format(split))
# Restore random state
np.random.set_state(st0)
else:
raise NotImplementedError('Unknown dataset: {}'.format(dataset))
return split_df
def make_splits(dfdc_df: str, ffpp_df: str, dfdc_dir: str, ffpp_dir: str, dbs: Dict[str, List[str]]) -> Dict[str, Dict[str, Tuple[pd.DataFrame, str]]]:
"""
Make split and return Dataframe and root
:param
dfdc_df: str, path to the DataFrame containing info on the faces extracted from the DFDC dataset with extract_faces.py
ffpp_df: str, path to the DataFrame containing info on the faces extracted from the FF++ dataset with extract_faces.py
dfdc_dir: str, path to the directory containing the faces extracted from the DFDC dataset with extract_faces.py
ffpp_dir: str, path to the directory containing the faces extracted from the FF++ dataset with extract_faces.py
dbs: {split_name:[split_dataset1,split_dataset2,...]}
Example:
{'train':['dfdc-35-5-15',],'val':['dfdc-35-5-15',]}
:return: split_dict: dictonary containing {split_name: ['train', 'val'], splitdb: List(pandas.DataFrame, str)}
Example:
{'train, 'dfdc-35-5-15': (dfdc_train_df, 'path/to/dir/of/DFDC/faces')}
"""
split_dict = {}
full_dfs = {}
for split_name, split_dbs in dbs.items():
split_dict[split_name] = dict()
for split_db in split_dbs:
if split_db not in full_dfs:
full_dfs[split_db] = load_df(dfdc_df, ffpp_df, dfdc_dir, ffpp_dir, split_db)
full_df, root = full_dfs[split_db]
split_df = get_split_df(df=full_df, dataset=split_db, split=split_name)
split_dict[split_name][split_db] = (split_df, root)
return split_dict