Ericwang commited on
Commit
1b4658a
·
1 Parent(s): d186f96

edited the output format for transcript sorter

Browse files
Files changed (2) hide show
  1. app.py +3 -1
  2. utlis.py +7 -5
app.py CHANGED
@@ -6,7 +6,9 @@ from pathlib import Path
6
 
7
  import gradio as gr
8
 
9
- from utlis import HHMMSS_to_sec, convert_video_format, molly_xlsx_to_table, xlsx_to_table, table_to_ELAN_tsv, trim_media, sort_transcript
 
 
10
 
11
 
12
  def delete_files(files):
 
6
 
7
  import gradio as gr
8
 
9
+ from utlis import (HHMMSS_to_sec, convert_video_format, molly_xlsx_to_table,
10
+ sort_transcript, table_to_ELAN_tsv, trim_media,
11
+ xlsx_to_table)
12
 
13
 
14
  def delete_files(files):
utlis.py CHANGED
@@ -4,9 +4,9 @@ import re
4
  import subprocess
5
  from pathlib import Path
6
 
 
7
  import pandas as pd
8
 
9
- import gradio as gr
10
 
11
  def sort_transcript(file_path, save_path):
12
  """
@@ -33,14 +33,16 @@ def sort_transcript(file_path, save_path):
33
  table.columns = map(str.lower, table.columns)
34
 
35
  # select and reorder the desired columns
36
- table = table[['speaker', 'transcript', 'start', 'end']]
37
-
 
38
  # extract the start time from the 'start' column
39
  table['start_time'] = table['start'].str.split('.', expand=True)[0]
40
-
41
  # sort by start_time
42
  sorted_table = table.sort_values('start_time')
43
- sorted_table.to_csv(save_path, sep='\t', index=False)
 
 
44
  print("saved sorted transcript to", save_path)
45
  return save_path
46
 
 
4
  import subprocess
5
  from pathlib import Path
6
 
7
+ import gradio as gr
8
  import pandas as pd
9
 
 
10
 
11
  def sort_transcript(file_path, save_path):
12
  """
 
33
  table.columns = map(str.lower, table.columns)
34
 
35
  # select and reorder the desired columns
36
+ table = table[['speaker', 'start', 'end', 'transcript']]
37
+ # rename the 'transcript' column to 'utterance'
38
+ table = table.rename(columns={'transcript': 'utterance'})
39
  # extract the start time from the 'start' column
40
  table['start_time'] = table['start'].str.split('.', expand=True)[0]
 
41
  # sort by start_time
42
  sorted_table = table.sort_values('start_time')
43
+ # drop the 'start_time' column
44
+ sorted_table = sorted_table.drop(columns=['start_time'])
45
+ sorted_table.to_csv(save_path, sep='\t', index=False, header=False)
46
  print("saved sorted transcript to", save_path)
47
  return save_path
48