Spaces:
Sleeping
Sleeping
File size: 1,548 Bytes
2fbb982 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 |
import pandas as pd
import json
FILE_PATH = "Resources/captions.jsonl"
def get_captions_by_video_id(video_id):
with open(FILE_PATH) as file:
captions = pd.read_json(file, lines=True)
captions_edit = captions[captions['file'] == video_id]
captions_edit = captions_edit[['start_time', 'text', 'end_time']]
captions_edit.columns = ["Start", "Text", "End"]
return captions_edit
def save_dataframe(df, video_id, user):
cols = ["clean_text", "start_time", "user_id", "signer", "file", "end_time", "url", "text"]
other_captions_data = []
new_captions_data = []
with open(FILE_PATH) as f:
for line in f:
caption = json.loads(line)
if caption['file'] == video_id:
new_captions_data.append(caption)
else:
other_captions_data.append(caption)
other_captions = pd.DataFrame(data=other_captions_data, columns=cols)
new_captions = pd.DataFrame(data=new_captions_data, columns=cols)
try:
new_captions['start_time'] = df['Start'].apply(lambda x: float(x))
new_captions['text'] = df['Text']
new_captions['end_time'] = df['End'].apply(lambda x: float(x))
new_captions['user_id'] = user
all_captions = pd.concat([other_captions, new_captions], ignore_index=True)
all_captions.to_json(FILE_PATH, orient='records', lines=True)
return "Save successful!"
except ValueError:
return "Save failed: Incorrect input format"
|