Spaces:
Sleeping
Sleeping
edited the output format for transcript sorter
Browse files
app.py
CHANGED
|
@@ -6,7 +6,9 @@ from pathlib import Path
|
|
| 6 |
|
| 7 |
import gradio as gr
|
| 8 |
|
| 9 |
-
from utlis import HHMMSS_to_sec, convert_video_format, molly_xlsx_to_table,
|
|
|
|
|
|
|
| 10 |
|
| 11 |
|
| 12 |
def delete_files(files):
|
|
|
|
| 6 |
|
| 7 |
import gradio as gr
|
| 8 |
|
| 9 |
+
from utlis import (HHMMSS_to_sec, convert_video_format, molly_xlsx_to_table,
|
| 10 |
+
sort_transcript, table_to_ELAN_tsv, trim_media,
|
| 11 |
+
xlsx_to_table)
|
| 12 |
|
| 13 |
|
| 14 |
def delete_files(files):
|
utlis.py
CHANGED
|
@@ -4,9 +4,9 @@ import re
|
|
| 4 |
import subprocess
|
| 5 |
from pathlib import Path
|
| 6 |
|
|
|
|
| 7 |
import pandas as pd
|
| 8 |
|
| 9 |
-
import gradio as gr
|
| 10 |
|
| 11 |
def sort_transcript(file_path, save_path):
|
| 12 |
"""
|
|
@@ -33,14 +33,16 @@ def sort_transcript(file_path, save_path):
|
|
| 33 |
table.columns = map(str.lower, table.columns)
|
| 34 |
|
| 35 |
# select and reorder the desired columns
|
| 36 |
-
table = table[['speaker', '
|
| 37 |
-
|
|
|
|
| 38 |
# extract the start time from the 'start' column
|
| 39 |
table['start_time'] = table['start'].str.split('.', expand=True)[0]
|
| 40 |
-
|
| 41 |
# sort by start_time
|
| 42 |
sorted_table = table.sort_values('start_time')
|
| 43 |
-
|
|
|
|
|
|
|
| 44 |
print("saved sorted transcript to", save_path)
|
| 45 |
return save_path
|
| 46 |
|
|
|
|
| 4 |
import subprocess
|
| 5 |
from pathlib import Path
|
| 6 |
|
| 7 |
+
import gradio as gr
|
| 8 |
import pandas as pd
|
| 9 |
|
|
|
|
| 10 |
|
| 11 |
def sort_transcript(file_path, save_path):
|
| 12 |
"""
|
|
|
|
| 33 |
table.columns = map(str.lower, table.columns)
|
| 34 |
|
| 35 |
# select and reorder the desired columns
|
| 36 |
+
table = table[['speaker', 'start', 'end', 'transcript']]
|
| 37 |
+
# rename the 'transcript' column to 'utterance'
|
| 38 |
+
table = table.rename(columns={'transcript': 'utterance'})
|
| 39 |
# extract the start time from the 'start' column
|
| 40 |
table['start_time'] = table['start'].str.split('.', expand=True)[0]
|
|
|
|
| 41 |
# sort by start_time
|
| 42 |
sorted_table = table.sort_values('start_time')
|
| 43 |
+
# drop the 'start_time' column
|
| 44 |
+
sorted_table = sorted_table.drop(columns=['start_time'])
|
| 45 |
+
sorted_table.to_csv(save_path, sep='\t', index=False, header=False)
|
| 46 |
print("saved sorted transcript to", save_path)
|
| 47 |
return save_path
|
| 48 |
|