Analysis_System / utils /data_loader.py
kankur0007's picture
Add application file
4475241
from glob import glob
import pandas as pd
def load_subtitles_dataset(dataset_path):
subtitles_paths = glob(dataset_path+'/*.ass')
scripts=[]
episode_num=[]
for path in subtitles_paths:
#Read Lines
with open(path,'r') as file:
lines = file.readlines()
lines = lines[27:]
lines = [ ",".join(line.split(',')[9:]) for line in lines ]
lines = [ line.replace('\\N',' ') for line in lines]
script = " ".join(lines)
episode = int(path.split('-')[-1].split('.')[0].strip())
scripts.append(script)
episode_num.append(episode)
df = pd.DataFrame.from_dict({"episode":episode_num, "script":scripts })
return df