Spaces:

levicu
/

transcriber_tools

Sleeping

App Files Files Community

Ericwang commited on Apr 14, 2023

Commit

b905ddd

1 Parent(s): 0eb9c19

modifed the code for report errors

Browse files

Files changed (1) hide show

utlis.py +146 -128

utlis.py CHANGED Viewed

@@ -6,118 +6,128 @@ from pathlib import Path
 import pandas as pd
 def convert_video_format(media_in, media_out):
-    WAV_CHANNELS = 1
-    WAV_SAMPLE_RATE = 16000
-    if not isinstance(media_in, (str, Path)):
-        raise TypeError("media_in must be a string or a PathLike object")
-    if not isinstance(media_out, (str, Path)):
-        raise TypeError("media_out must be a string or a PathLike object")
-    ext = Path(media_out).suffix
-    if ext == '.wav':
-        # convert to wav with standard format for audio models
-        print(f'...generating {media_out}...')
-        subprocess.run(f'ffmpeg -y -i "{media_in}" -acodec pcm_s16le -ac {WAV_CHANNELS} -ar {WAV_SAMPLE_RATE} "{media_out}" -hide_banner -loglevel warning', check=True, shell=True)
-        return media_out
-    else:
-        print(f'...Using ffmpeg to convert {media_in} to {media_out}...')
-        print(f'...generating {media_out}...')
-        subprocess.run(['ffmpeg',
-                         '-y',
-                         '-i',
-                         media_in,
-                         '-c',
-                         'copy',
-                         media_out,
-                         '-hide_banner',
-                         '-loglevel',
-                         'warning'
-                        ], check=True)
-        return media_out
 def trim_media(media_in, media_out, start, end):
-    # options for writing out audio if converting
-    WAV_CHANNELS = 1
-    WAV_SAMPLE_RATE = 16000
-    media_type = Path(media_in).suffix
-    ext = Path(media_out).suffix
-    print("EXT", ext)
-    if isinstance(start, str):
-        start_sec = HHMMSS_to_sec(start)
-    else:
-        start_sec = float(start)
-    if isinstance(end, str):
-        end_sec = HHMMSS_to_sec(end)
-    else:
-        end_sec = float(end)
-    if ext == '.wav':
-        # convert to wav with standard format for audio models
-        print(f'...Using ffmpeg to trim video from {start} to {end} \n   and convert to {WAV_SAMPLE_RATE}Hz WAV with {WAV_CHANNELS} channels...')
-        print(f'...generating {media_out}...')
-        subprocess.run(f'ffmpeg -y -i "{media_in}" -ss {start_sec} -to {end_sec} -acodec pcm_s16le -ac {WAV_CHANNELS} -ar {WAV_SAMPLE_RATE} "{media_out}" -hide_banner -loglevel warning', check=True, shell=True)
-        print(f'...done trimming and converting to {media_out}...')
-        return media_out
-    else:
-        print(f'...Using ffmpeg to trim video from {start_sec} to {end_sec}...')
-        print(f'...generating {media_out}...')
-        subprocess.run(['ffmpeg',
-                         '-y',
-                         '-i',
-                         media_in,
-                         '-ss',
-                         f'{start_sec}',
-                         '-to',
-                         f'{end_sec}',
-                         '-c',
-                         'copy',
-                         media_out,
-                         '-hide_banner',
-                         '-loglevel',
-                         'warning'
-                        ], check=True)
-        return media_out
 def HHMMSS_to_sec(time_str):
     """Get Seconds from timestamp string with milliseconds."""
-    if not time_str:
-        return None
-    if time_str.count(':')==2:
-        h, m, s = time_str.split(':')
-    elif time_str.count(':')==3:
-    # weird timestamps where there is a field followign seconds delimited by colon
-        h, m, s, u = time_str.split(':')
-        # determine whether ms field is in tenths or hundredths or thousandths by countng how many digits
-        if len(u)==1:
-            print('Weird time format detected - HH:MM:SS:tenths - please verify this is how you want the time interpreted')
-            ms = float(u)/10
-        elif len(u)==2: # hundredths
-            ms = float(u)/100
-        elif len(u)==3: # hundredths
-            ms = float(u)/1000
         else:
             print(f'input string format not supported: {time_str}')
             return None
-        s = int(s)+ms
-    elif time_str.count(':')==1:
-        # print('missing HH from timestamp, assuming MM:SS')
-        m, s = time_str.split(':')
-        h=0
-    else:
-        print(f'input string format not supported: {time_str}')
-        return None
-    return int(h) * 3600 + int(m) * 60 + float(s)
 def molly_xlsx_to_table(xl_file):
     # contractor transcribers provide an xlsx with the following columns
@@ -141,35 +151,43 @@ def molly_xlsx_to_table(xl_file):
     return table
 def xlsx_to_table(xl_file):
-    # read the first sheet of the Excel file into a DataFrame
-    print(f'...reading {xl_file}...')
-    table = pd.read_excel(xl_file, sheet_name=0)
-    print(f'...done reading {xl_file}...')
-    # extract start and end time from the Timecode column
-    print(f'...splitting Timecode column into start and end time...')
-    timecodes = table['Timecode'].str.split(' - ', expand=True)
-    table['start_time'] = timecodes[0]
-    table['end_time'] = timecodes[1]
-    print(f'...done splitting Timecode column into start and end time...')
-    # convert start and end time to seconds using the HHMMSS_to_sec function
-    print(f'...converting start and end time to seconds...')
-    table['start_sec'] = table['start_time'].apply(HHMMSS_to_sec)
-    table['end_sec'] = table['end_time'].apply(HHMMSS_to_sec)
-    print(f'...done converting start and end time to seconds...')
-    # drop unnecessary columns
-    print(f'...dropping unnecessary columns...')
-    table.drop(['Timecode', 'Annotations', 'Error Type', 'Duration'], axis=1, inplace=True)
-    # rename columns
-    print(f'...renaming columns...')
-    table.rename(columns={'#': 'uttID', 'Speaker': 'speaker', 'Dialogue': 'transcript'}, inplace=True)
-    # reorder columns
-    print(f'...reordering columns...')
-    table = table[['uttID', 'speaker', 'transcript', 'start_sec', 'end_sec']]
-    return table
 def table_to_ELAN_tsv(table:pd.DataFrame, path:str):
     # write table to tsv compatible with ELAN import

 import pandas as pd
+import gradio as gr
 def convert_video_format(media_in, media_out):
+    try:
+        WAV_CHANNELS = 1
+        WAV_SAMPLE_RATE = 16000
+        if not isinstance(media_in, (str, Path)):
+            raise TypeError("media_in must be a string or a PathLike object")
+        if not isinstance(media_out, (str, Path)):
+            raise TypeError("media_out must be a string or a PathLike object")
+        ext = Path(media_out).suffix
+        if ext == '.wav':
+            # convert to wav with standard format for audio models
+            print(f'...generating {media_out}...')
+            subprocess.run(f'ffmpeg -y -i "{media_in}" -acodec pcm_s16le -ac {WAV_CHANNELS} -ar {WAV_SAMPLE_RATE} "{media_out}" -hide_banner -loglevel warning', check=True, shell=True)
+            return media_out
+        else:
+            print(f'...Using ffmpeg to convert {media_in} to {media_out}...')
+            print(f'...generating {media_out}...')
+            subprocess.run(['ffmpeg',
+                            '-y',
+                            '-i',
+                            media_in,
+                            '-c',
+                            'copy',
+                            media_out,
+                            '-hide_banner',
+                            '-loglevel',
+                            'warning'
+                            ], check=True)
+            return media_out
+    except Exception as e:
+        gr.Error(f"Error converting video format: {e}")
 def trim_media(media_in, media_out, start, end):
+    try:
+        # options for writing out audio if converting
+        WAV_CHANNELS = 1
+        WAV_SAMPLE_RATE = 16000
+        media_type = Path(media_in).suffix
+        ext = Path(media_out).suffix
+        print("EXT", ext)
+        if isinstance(start, str):
+            start_sec = HHMMSS_to_sec(start)
+        else:
+            start_sec = float(start)
+        if isinstance(end, str):
+            end_sec = HHMMSS_to_sec(end)
+        else:
+            end_sec = float(end)
+        if ext == '.wav':
+            # convert to wav with standard format for audio models
+            print(f'...Using ffmpeg to trim video from {start} to {end} \n   and convert to {WAV_SAMPLE_RATE}Hz WAV with {WAV_CHANNELS} channels...')
+            print(f'...generating {media_out}...')
+            subprocess.run(f'ffmpeg -y -i "{media_in}" -ss {start_sec} -to {end_sec} -acodec pcm_s16le -ac {WAV_CHANNELS} -ar {WAV_SAMPLE_RATE} "{media_out}" -hide_banner -loglevel warning', check=True, shell=True)
+            print(f'...done trimming and converting to {media_out}...')
+            return media_out
+        else:
+            print(f'...Using ffmpeg to trim video from {start_sec} to {end_sec}...')
+            print(f'...generating {media_out}...')
+            subprocess.run(['ffmpeg',
+                            '-y',
+                            '-i',
+                            media_in,
+                            '-ss',
+                            f'{start_sec}',
+                            '-to',
+                            f'{end_sec}',
+                            '-c',
+                            'copy',
+                            media_out,
+                            '-hide_banner',
+                            '-loglevel',
+                            'warning'
+                            ], check=True)
+            return media_out
+    except Exception as e:
+        gr.Error(f"Error trimming media: {e}")
 def HHMMSS_to_sec(time_str):
     """Get Seconds from timestamp string with milliseconds."""
+    try:
+        if not time_str:
+            return None
+        if time_str.count(':')==2:
+            h, m, s = time_str.split(':')
+        elif time_str.count(':')==3:
+        # weird timestamps where there is a field followign seconds delimited by colon
+            h, m, s, u = time_str.split(':')
+            # determine whether ms field is in tenths or hundredths or thousandths by countng how many digits
+            if len(u)==1:
+                print('Weird time format detected - HH:MM:SS:tenths - please verify this is how you want the time interpreted')
+                ms = float(u)/10
+            elif len(u)==2: # hundredths
+                ms = float(u)/100
+            elif len(u)==3: # hundredths
+                ms = float(u)/1000
+            else:
+                print(f'input string format not supported: {time_str}')
+                return None
+            s = int(s)+ms
+        elif time_str.count(':')==1:
+            # print('missing HH from timestamp, assuming MM:SS')
+            m, s = time_str.split(':')
+            h=0
         else:
             print(f'input string format not supported: {time_str}')
             return None
+        return int(h) * 3600 + int(m) * 60 + float(s)
+    except Exception as e:
+        gr.Error(f"Error converting time to seconds: {e}")
 def molly_xlsx_to_table(xl_file):
     # contractor transcribers provide an xlsx with the following columns
     return table
 def xlsx_to_table(xl_file):
+    try:
+        # read the first sheet of the Excel file into a DataFrame
+        print(f'...reading {xl_file}...')
+        table = pd.read_excel(xl_file, sheet_name=0)
+        print(f'...done reading {xl_file}...')
+        # convert column names to lowercase
+        table.columns = map(str.lower, table.columns)
+        # extract start and end time from the Timecode column
+        print(f'...splitting Timecode column into start and end time...')
+        timecodes = table['timecode'].str.split(' - ', expand=True)
+        table['start_time'] = timecodes[0]
+        table['end_time'] = timecodes[1]
+        print(f'...done splitting Timecode column into start and end time...')
+        # convert start and end time to seconds using the HHMMSS_to_sec function
+        print(f'...converting start and end time to seconds...')
+        table['start_sec'] = table['start_time'].apply(HHMMSS_to_sec)
+        table['end_sec'] = table['end_time'].apply(HHMMSS_to_sec)
+        print(f'...done converting start and end time to seconds...')
+        # drop unnecessary columns
+        print(f'...dropping unnecessary columns...')
+        table.drop(['timecode', 'annotations', 'error type', 'duration'], axis=1, inplace=True)
+        # rename columns
+        print(f'...renaming columns...')
+        table.rename(columns={'#': 'uttID', 'speaker': 'speaker', 'dialogue': 'transcript'}, inplace=True)
+        # reorder columns
+        print(f'...reordering columns...')
+        table = table[['uttID', 'speaker', 'transcript', 'start_sec', 'end_sec']]
+        return table
+    except Exception as e:
+        gr.Error(f'Error converting {xl_file}: {e}')
 def table_to_ELAN_tsv(table:pd.DataFrame, path:str):
     # write table to tsv compatible with ELAN import