Spaces:

levicu
/

transcriber_tools

Sleeping

Ericwang commited on Apr 19, 2023

Commit

1b4658a

1 Parent(s): d186f96

edited the output format for transcript sorter

Files changed (2) hide show

app.py CHANGED Viewed

@@ -6,7 +6,9 @@ from pathlib import Path
 import gradio as gr
-from utlis import HHMMSS_to_sec, convert_video_format, molly_xlsx_to_table, xlsx_to_table, table_to_ELAN_tsv, trim_media, sort_transcript
 def delete_files(files):

 import gradio as gr
+from utlis import (HHMMSS_to_sec, convert_video_format, molly_xlsx_to_table,
+                   sort_transcript, table_to_ELAN_tsv, trim_media,
+                   xlsx_to_table)
 def delete_files(files):

utlis.py CHANGED Viewed

@@ -4,9 +4,9 @@ import re
 import subprocess
 from pathlib import Path
 import pandas as pd
-import gradio as gr
 def sort_transcript(file_path, save_path):
     """
@@ -33,14 +33,16 @@ def sort_transcript(file_path, save_path):
     table.columns = map(str.lower, table.columns)
     # select and reorder the desired columns
-    table = table[['speaker', 'transcript', 'start', 'end']]
     # extract the start time from the 'start' column
     table['start_time'] = table['start'].str.split('.', expand=True)[0]
     # sort by start_time
     sorted_table = table.sort_values('start_time')
-    sorted_table.to_csv(save_path, sep='\t', index=False)
     print("saved sorted transcript to", save_path)
     return save_path

 import subprocess
 from pathlib import Path
+import gradio as gr
 import pandas as pd
 def sort_transcript(file_path, save_path):
     """
     table.columns = map(str.lower, table.columns)
     # select and reorder the desired columns
+    table = table[['speaker', 'start', 'end', 'transcript']]
+    # rename the 'transcript' column to 'utterance'
+    table = table.rename(columns={'transcript': 'utterance'})
     # extract the start time from the 'start' column
     table['start_time'] = table['start'].str.split('.', expand=True)[0]
     # sort by start_time
     sorted_table = table.sort_values('start_time')
+    # drop the 'start_time' column
+    sorted_table = sorted_table.drop(columns=['start_time'])
+    sorted_table.to_csv(save_path, sep='\t', index=False, header=False)
     print("saved sorted transcript to", save_path)
     return save_path