Ericwang commited on
Commit
b905ddd
·
1 Parent(s): 0eb9c19

modifed the code for report errors

Browse files
Files changed (1) hide show
  1. utlis.py +146 -128
utlis.py CHANGED
@@ -6,118 +6,128 @@ from pathlib import Path
6
 
7
  import pandas as pd
8
 
 
 
9
 
10
  def convert_video_format(media_in, media_out):
11
- WAV_CHANNELS = 1
12
- WAV_SAMPLE_RATE = 16000
13
-
14
- if not isinstance(media_in, (str, Path)):
15
- raise TypeError("media_in must be a string or a PathLike object")
16
-
17
- if not isinstance(media_out, (str, Path)):
18
- raise TypeError("media_out must be a string or a PathLike object")
19
- ext = Path(media_out).suffix
20
- if ext == '.wav':
21
- # convert to wav with standard format for audio models
22
- print(f'...generating {media_out}...')
23
- subprocess.run(f'ffmpeg -y -i "{media_in}" -acodec pcm_s16le -ac {WAV_CHANNELS} -ar {WAV_SAMPLE_RATE} "{media_out}" -hide_banner -loglevel warning', check=True, shell=True)
24
- return media_out
25
-
26
- else:
27
- print(f'...Using ffmpeg to convert {media_in} to {media_out}...')
28
- print(f'...generating {media_out}...')
29
-
30
- subprocess.run(['ffmpeg',
31
- '-y',
32
- '-i',
33
- media_in,
34
- '-c',
35
- 'copy',
36
- media_out,
37
- '-hide_banner',
38
- '-loglevel',
39
- 'warning'
40
- ], check=True)
41
- return media_out
 
 
 
42
 
43
  def trim_media(media_in, media_out, start, end):
44
-
45
- # options for writing out audio if converting
46
- WAV_CHANNELS = 1
47
- WAV_SAMPLE_RATE = 16000
48
-
49
- media_type = Path(media_in).suffix
50
- ext = Path(media_out).suffix
51
- print("EXT", ext)
52
- if isinstance(start, str):
53
- start_sec = HHMMSS_to_sec(start)
54
- else:
55
- start_sec = float(start)
56
- if isinstance(end, str):
57
- end_sec = HHMMSS_to_sec(end)
58
- else:
59
- end_sec = float(end)
60
-
61
- if ext == '.wav':
62
- # convert to wav with standard format for audio models
63
- print(f'...Using ffmpeg to trim video from {start} to {end} \n and convert to {WAV_SAMPLE_RATE}Hz WAV with {WAV_CHANNELS} channels...')
64
- print(f'...generating {media_out}...')
 
 
 
 
 
 
 
 
 
65
 
66
- subprocess.run(f'ffmpeg -y -i "{media_in}" -ss {start_sec} -to {end_sec} -acodec pcm_s16le -ac {WAV_CHANNELS} -ar {WAV_SAMPLE_RATE} "{media_out}" -hide_banner -loglevel warning', check=True, shell=True)
67
-
68
- print(f'...done trimming and converting to {media_out}...')
69
- return media_out
70
-
71
- else:
72
- print(f'...Using ffmpeg to trim video from {start_sec} to {end_sec}...')
73
- print(f'...generating {media_out}...')
74
-
75
- subprocess.run(['ffmpeg',
76
- '-y',
77
- '-i',
78
- media_in,
79
- '-ss',
80
- f'{start_sec}',
81
- '-to',
82
- f'{end_sec}',
83
- '-c',
84
- 'copy',
85
- media_out,
86
- '-hide_banner',
87
- '-loglevel',
88
- 'warning'
89
- ], check=True)
90
- return media_out
91
 
92
  def HHMMSS_to_sec(time_str):
93
  """Get Seconds from timestamp string with milliseconds."""
94
- if not time_str:
95
- return None
96
- if time_str.count(':')==2:
97
- h, m, s = time_str.split(':')
98
- elif time_str.count(':')==3:
99
- # weird timestamps where there is a field followign seconds delimited by colon
100
- h, m, s, u = time_str.split(':')
101
- # determine whether ms field is in tenths or hundredths or thousandths by countng how many digits
102
- if len(u)==1:
103
- print('Weird time format detected - HH:MM:SS:tenths - please verify this is how you want the time interpreted')
104
- ms = float(u)/10
105
- elif len(u)==2: # hundredths
106
- ms = float(u)/100
107
- elif len(u)==3: # hundredths
108
- ms = float(u)/1000
 
 
 
 
 
 
 
 
 
109
  else:
110
  print(f'input string format not supported: {time_str}')
111
  return None
112
- s = int(s)+ms
113
- elif time_str.count(':')==1:
114
- # print('missing HH from timestamp, assuming MM:SS')
115
- m, s = time_str.split(':')
116
- h=0
117
- else:
118
- print(f'input string format not supported: {time_str}')
119
- return None
120
- return int(h) * 3600 + int(m) * 60 + float(s)
121
 
122
  def molly_xlsx_to_table(xl_file):
123
  # contractor transcribers provide an xlsx with the following columns
@@ -141,35 +151,43 @@ def molly_xlsx_to_table(xl_file):
141
  return table
142
 
143
  def xlsx_to_table(xl_file):
144
- # read the first sheet of the Excel file into a DataFrame
145
- print(f'...reading {xl_file}...')
146
- table = pd.read_excel(xl_file, sheet_name=0)
147
- print(f'...done reading {xl_file}...')
148
- # extract start and end time from the Timecode column
149
- print(f'...splitting Timecode column into start and end time...')
150
- timecodes = table['Timecode'].str.split(' - ', expand=True)
151
- table['start_time'] = timecodes[0]
152
- table['end_time'] = timecodes[1]
153
- print(f'...done splitting Timecode column into start and end time...')
154
- # convert start and end time to seconds using the HHMMSS_to_sec function
155
- print(f'...converting start and end time to seconds...')
156
- table['start_sec'] = table['start_time'].apply(HHMMSS_to_sec)
157
- table['end_sec'] = table['end_time'].apply(HHMMSS_to_sec)
158
- print(f'...done converting start and end time to seconds...')
159
- # drop unnecessary columns
160
- print(f'...dropping unnecessary columns...')
161
- table.drop(['Timecode', 'Annotations', 'Error Type', 'Duration'], axis=1, inplace=True)
162
-
163
- # rename columns
164
- print(f'...renaming columns...')
165
- table.rename(columns={'#': 'uttID', 'Speaker': 'speaker', 'Dialogue': 'transcript'}, inplace=True)
166
-
167
- # reorder columns
168
- print(f'...reordering columns...')
169
- table = table[['uttID', 'speaker', 'transcript', 'start_sec', 'end_sec']]
170
 
171
- return table
 
 
 
 
 
 
172
 
 
 
 
173
 
174
  def table_to_ELAN_tsv(table:pd.DataFrame, path:str):
175
  # write table to tsv compatible with ELAN import
 
6
 
7
  import pandas as pd
8
 
9
+ import gradio as gr
10
+
11
 
12
  def convert_video_format(media_in, media_out):
13
+ try:
14
+ WAV_CHANNELS = 1
15
+ WAV_SAMPLE_RATE = 16000
16
+
17
+ if not isinstance(media_in, (str, Path)):
18
+ raise TypeError("media_in must be a string or a PathLike object")
19
+
20
+ if not isinstance(media_out, (str, Path)):
21
+ raise TypeError("media_out must be a string or a PathLike object")
22
+ ext = Path(media_out).suffix
23
+ if ext == '.wav':
24
+ # convert to wav with standard format for audio models
25
+ print(f'...generating {media_out}...')
26
+ subprocess.run(f'ffmpeg -y -i "{media_in}" -acodec pcm_s16le -ac {WAV_CHANNELS} -ar {WAV_SAMPLE_RATE} "{media_out}" -hide_banner -loglevel warning', check=True, shell=True)
27
+ return media_out
28
+
29
+ else:
30
+ print(f'...Using ffmpeg to convert {media_in} to {media_out}...')
31
+ print(f'...generating {media_out}...')
32
+
33
+ subprocess.run(['ffmpeg',
34
+ '-y',
35
+ '-i',
36
+ media_in,
37
+ '-c',
38
+ 'copy',
39
+ media_out,
40
+ '-hide_banner',
41
+ '-loglevel',
42
+ 'warning'
43
+ ], check=True)
44
+ return media_out
45
+ except Exception as e:
46
+ gr.Error(f"Error converting video format: {e}")
47
 
48
  def trim_media(media_in, media_out, start, end):
49
+ try:
50
+ # options for writing out audio if converting
51
+ WAV_CHANNELS = 1
52
+ WAV_SAMPLE_RATE = 16000
53
+
54
+ media_type = Path(media_in).suffix
55
+ ext = Path(media_out).suffix
56
+ print("EXT", ext)
57
+ if isinstance(start, str):
58
+ start_sec = HHMMSS_to_sec(start)
59
+ else:
60
+ start_sec = float(start)
61
+ if isinstance(end, str):
62
+ end_sec = HHMMSS_to_sec(end)
63
+ else:
64
+ end_sec = float(end)
65
+
66
+ if ext == '.wav':
67
+ # convert to wav with standard format for audio models
68
+ print(f'...Using ffmpeg to trim video from {start} to {end} \n and convert to {WAV_SAMPLE_RATE}Hz WAV with {WAV_CHANNELS} channels...')
69
+ print(f'...generating {media_out}...')
70
+
71
+ subprocess.run(f'ffmpeg -y -i "{media_in}" -ss {start_sec} -to {end_sec} -acodec pcm_s16le -ac {WAV_CHANNELS} -ar {WAV_SAMPLE_RATE} "{media_out}" -hide_banner -loglevel warning', check=True, shell=True)
72
+
73
+ print(f'...done trimming and converting to {media_out}...')
74
+ return media_out
75
+
76
+ else:
77
+ print(f'...Using ffmpeg to trim video from {start_sec} to {end_sec}...')
78
+ print(f'...generating {media_out}...')
79
 
80
+ subprocess.run(['ffmpeg',
81
+ '-y',
82
+ '-i',
83
+ media_in,
84
+ '-ss',
85
+ f'{start_sec}',
86
+ '-to',
87
+ f'{end_sec}',
88
+ '-c',
89
+ 'copy',
90
+ media_out,
91
+ '-hide_banner',
92
+ '-loglevel',
93
+ 'warning'
94
+ ], check=True)
95
+ return media_out
96
+ except Exception as e:
97
+ gr.Error(f"Error trimming media: {e}")
 
 
 
 
 
 
 
98
 
99
  def HHMMSS_to_sec(time_str):
100
  """Get Seconds from timestamp string with milliseconds."""
101
+ try:
102
+ if not time_str:
103
+ return None
104
+ if time_str.count(':')==2:
105
+ h, m, s = time_str.split(':')
106
+ elif time_str.count(':')==3:
107
+ # weird timestamps where there is a field followign seconds delimited by colon
108
+ h, m, s, u = time_str.split(':')
109
+ # determine whether ms field is in tenths or hundredths or thousandths by countng how many digits
110
+ if len(u)==1:
111
+ print('Weird time format detected - HH:MM:SS:tenths - please verify this is how you want the time interpreted')
112
+ ms = float(u)/10
113
+ elif len(u)==2: # hundredths
114
+ ms = float(u)/100
115
+ elif len(u)==3: # hundredths
116
+ ms = float(u)/1000
117
+ else:
118
+ print(f'input string format not supported: {time_str}')
119
+ return None
120
+ s = int(s)+ms
121
+ elif time_str.count(':')==1:
122
+ # print('missing HH from timestamp, assuming MM:SS')
123
+ m, s = time_str.split(':')
124
+ h=0
125
  else:
126
  print(f'input string format not supported: {time_str}')
127
  return None
128
+ return int(h) * 3600 + int(m) * 60 + float(s)
129
+ except Exception as e:
130
+ gr.Error(f"Error converting time to seconds: {e}")
 
 
 
 
 
 
131
 
132
  def molly_xlsx_to_table(xl_file):
133
  # contractor transcribers provide an xlsx with the following columns
 
151
  return table
152
 
153
  def xlsx_to_table(xl_file):
154
+ try:
155
+ # read the first sheet of the Excel file into a DataFrame
156
+ print(f'...reading {xl_file}...')
157
+ table = pd.read_excel(xl_file, sheet_name=0)
158
+ print(f'...done reading {xl_file}...')
159
+
160
+ # convert column names to lowercase
161
+ table.columns = map(str.lower, table.columns)
162
+
163
+ # extract start and end time from the Timecode column
164
+ print(f'...splitting Timecode column into start and end time...')
165
+ timecodes = table['timecode'].str.split(' - ', expand=True)
166
+ table['start_time'] = timecodes[0]
167
+ table['end_time'] = timecodes[1]
168
+ print(f'...done splitting Timecode column into start and end time...')
169
+
170
+ # convert start and end time to seconds using the HHMMSS_to_sec function
171
+ print(f'...converting start and end time to seconds...')
172
+ table['start_sec'] = table['start_time'].apply(HHMMSS_to_sec)
173
+ table['end_sec'] = table['end_time'].apply(HHMMSS_to_sec)
174
+ print(f'...done converting start and end time to seconds...')
175
+
176
+ # drop unnecessary columns
177
+ print(f'...dropping unnecessary columns...')
178
+ table.drop(['timecode', 'annotations', 'error type', 'duration'], axis=1, inplace=True)
 
179
 
180
+ # rename columns
181
+ print(f'...renaming columns...')
182
+ table.rename(columns={'#': 'uttID', 'speaker': 'speaker', 'dialogue': 'transcript'}, inplace=True)
183
+
184
+ # reorder columns
185
+ print(f'...reordering columns...')
186
+ table = table[['uttID', 'speaker', 'transcript', 'start_sec', 'end_sec']]
187
 
188
+ return table
189
+ except Exception as e:
190
+ gr.Error(f'Error converting {xl_file}: {e}')
191
 
192
  def table_to_ELAN_tsv(table:pd.DataFrame, path:str):
193
  # write table to tsv compatible with ELAN import