Spaces:
Sleeping
Sleeping
| from glob import glob | |
| import pandas as pd | |
| def load_subtitles_dataset(dataset_path): | |
| subtitles_paths = glob(dataset_path+'/*.ass') | |
| scripts=[] | |
| episode_num=[] | |
| for path in subtitles_paths: | |
| #Read Lines | |
| with open(path,'r') as file: | |
| lines = file.readlines() | |
| lines = lines[27:] | |
| lines = [ ",".join(line.split(',')[9:]) for line in lines ] | |
| lines = [ line.replace('\\N',' ') for line in lines] | |
| script = " ".join(lines) | |
| episode = int(path.split('-')[-1].split('.')[0].strip()) | |
| scripts.append(script) | |
| episode_num.append(episode) | |
| df = pd.DataFrame.from_dict({"episode":episode_num, "script":scripts }) | |
| return df |