File size: 25,233 Bytes
ecfddf0
d40a36c
3d64725
 
 
af858e3
8059bbb
 
d9fb961
5d0f90f
 
d9fb961
 
3d64725
8059bbb
3d64725
 
 
 
 
 
d971130
3d64725
c4ab5b6
d40a36c
d186f96
d40a36c
 
 
d186f96
d40a36c
c4ab5b6
d40a36c
 
 
a80ecb7
d40a36c
0eb9c19
a80ecb7
d40a36c
c4ab5b6
 
d40a36c
c4ab5b6
d971130
daa6f25
 
 
c4ab5b6
d40a36c
 
 
 
d971130
daa6f25
 
 
c4ab5b6
d40a36c
 
 
 
 
c4ab5b6
d40a36c
c4ab5b6
d40a36c
 
af858e3
 
 
c4ab5b6
 
af858e3
 
 
 
d971130
c4ab5b6
af858e3
 
 
a80ecb7
af858e3
 
 
daa6f25
 
 
af858e3
 
 
 
 
daa6f25
 
 
af858e3
c4ab5b6
daa6f25
af858e3
 
daa6f25
af858e3
040d75f
af858e3
 
 
c29e499
c4ab5b6
c29e499
 
d40a36c
a80ecb7
d40a36c
 
 
c29e499
d40a36c
daa6f25
 
 
d40a36c
 
 
 
3d64725
c4ab5b6
d40a36c
3d64725
 
d40a36c
c29e499
040d75f
c29e499
8059bbb
ecfddf0
c4ab5b6
8059bbb
d40a36c
a80ecb7
d40a36c
 
8059bbb
d40a36c
daa6f25
 
 
d40a36c
c4ab5b6
3d64725
 
 
ecfddf0
 
040d75f
ecfddf0
 
 
c4ab5b6
ecfddf0
d40a36c
a80ecb7
4d1a4b3
 
d40a36c
 
daa6f25
 
 
c4ab5b6
3d64725
c4ab5b6
3d64725
 
 
8059bbb
 
040d75f
8059bbb
 
d9fb961
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5d0f90f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d9fb961
 
 
5d0f90f
d9fb961
 
 
 
 
5d0f90f
d9fb961
 
 
 
 
5d0f90f
d9fb961
 
 
 
 
5d0f90f
d9fb961
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97631c5
 
 
 
 
a80ecb7
97631c5
c477ae4
97631c5
 
 
 
109738c
97631c5
 
c477ae4
97631c5
 
 
 
 
af858e3
 
 
 
 
f2bd641
c477ae4
834eb54
af858e3
 
834eb54
f2bd641
c29e499
 
 
 
 
 
 
 
 
 
c477ae4
7191918
 
 
 
 
 
 
 
 
c29e499
d9fb961
 
 
 
 
 
 
 
 
 
 
55f82cb
d9fb961
 
565db3d
d9fb961
 
 
 
 
 
 
 
 
 
 
 
 
565db3d
d9fb961
 
 
 
 
 
 
 
 
 
 
565db3d
d9fb961
 
 
 
 
 
 
 
 
 
 
565db3d
d9fb961
 
 
 
 
 
 
 
 
 
 
 
565db3d
d9fb961
 
 
 
 
 
 
 
 
 
 
 
 
 
565db3d
d9fb961
 
 
5d0f90f
 
 
 
 
 
 
 
 
 
 
 
 
d9fb961
 
 
 
 
 
 
 
 
 
5d0f90f
d9fb961
 
 
 
 
 
 
 
 
55f82cb
 
d9fb961
5d0f90f
55f82cb
 
 
 
d9fb961
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
import os
import re
import threading
import time
from pathlib import Path
import random
import gradio as gr

from utils import (HHMMSS_to_sec,  convert_and_trim_video,
                   table_to_ELAN_tsv, parse_label_csv,
                   xlsx_to_table, merge_ellipsis,
                   convert_transcript_for_TM, convert_transcript_for_annotation,
                   table_to_ELAN_tsv, ELAN_to_labels_csv, deidentify_speaker)


def delete_files(files):
    time.sleep(300)
    for file in files:
        try:
            os.remove(file)
        except FileNotFoundError:
            print(f"File {file} not found for deletion.")
            pass
    print("...files deleted")

def set_output_file(input_file, output_format, folder, insert_string = 'trimmed'):
    # Set output file name and extension
    if not os.path.exists(folder):
        os.makedirs(folder)
    file_name = f"{Path(input_file.name).stem.partition('.')[0]}_{insert_string}.{output_format}"
    output_file = os.path.join(folder, file_name)
    print(f"...set output file: {output_file}")
    return output_file

def trim_video_helper(input_file, output_file, start_time, end_time):
    if not start_time:
        start_time = 0
        end_time = 300
    elif start_time and not end_time:
        end_time = 300 + HHMMSS_to_sec(time_str=start_time)
    print("...start time (s): ", start_time)
    print("...end time (s): ", end_time)
    # Trim the video
    print("...start trimming")
    output_file = convert_and_trim_video(input_file.name, output_file, start_time, end_time)
    if not output_file:
        print("...trimming failed due to FFMPEG error")
        return None
    print("...finished trimming")
    return output_file

def convert_video_helper(input_file, output_file, output_format):
    # convert video
    output_file = convert_and_trim_video(input_file.name, output_file)
    if not output_file:
        print("...converting failed due to FFMPEG error") 
        return None
    print("...finished converting")
    return output_file

def convert_transcript_helper(input_transcript, output_transcript):
    # convert transcript
    table = xlsx_to_table(xl_file=input_transcript)
    print("...parsed transcript to table")
    output_file = table_to_ELAN_tsv(table, output_transcript)
    print("...finished converting transcript")
    return output_file



def trim_video_vtr(input_file, output_format):
    print(f"BEGIN TASK: trimming {input_file} to 10-minute interval with random start time")
    # trim video to 10-min interval with a random selected start time
    try: 
        # randomly select start time
        start_time = random.randint(300, 900)
        end_time = start_time + 600 # since 10 minutes
        audio_base_name = input_file.name.split("/")[-1].split(".")[-2]
        print("...audio_base_name: ", audio_base_name)
        
        # set output file
        insert_string = f"start{start_time}_end{end_time}"
        output_folder = f"{os.getcwd()}/results/"
        output_file = set_output_file(input_file, output_format, output_folder, insert_string)

        # write the start time, and end time to a txt file
        time_file = f"{os.getcwd()}/results/{audio_base_name}_start_end_time.txt"
        print("time_file: ", time_file)
        with open(time_file, "w") as f:
            f.write(f"{start_time}\n")
            f.write(f"{end_time}\n")

        # Trim the video
        output_file = trim_video_helper(input_file, output_file, start_time, end_time)
        if not output_file:
            gr.Error(f"Error: FFMPEG failed to trim the video.")
            return None, None
        # delete threading
        print("Done trimming. Deleting files...")
        path_to_delete = [input_file.name, output_file, time_file]
        threading.Thread(target=delete_files, args=([path_to_delete])).start()
        
        return output_file, time_file
    except Exception as e:
        gr.Error(f"Error: {str(e)}")
        return f"Error: {e}"


def trim_video_wt(input_file, input_transcript, output_format, start_time, end_time):
    print(f"BEGIN TASK: trimming {input_file} with transcript {input_transcript} from {start_time} to {end_time}")
    # trim video with transcript
    try:
        # set output file
        output_folder = f"{os.getcwd()}/results/"
        output_file = set_output_file(input_file, output_format, output_folder)
        output_transcript = set_output_file(input_transcript, "tsv", output_folder)

        # Trim the video
        output_file = trim_video_helper(input_file, output_file, start_time, end_time)
        if not output_file:
            gr.Error(f"Error: FFMPEG failed to trim the video.")
            return None, None
        # convert transcript
        path = input_transcript.name
        output_transcript = convert_transcript_helper(path, output_transcript)
        # output_transcript = output_file
        # remove file after 10 minutes for security
        print("Done trimming. Deleting files...")
        path_to_delete = [input_file.name, input_transcript.name, output_file, output_transcript]
        threading.Thread(target=delete_files, args=([path_to_delete])).start()    

        return output_file, output_transcript
    except Exception as e:
        gr.Error(f"Error: {str(e)}")
        return f"Error: {str(e)}"

def trim_video(input_file, output_format, start_time, end_time):
    print(f"\nBEGIN TASK: trimming {input_file} from {start_time} to {end_time}")
    try:
        # Set output file
        output_folder = f"{os.getcwd()}/results/"
        output_file = set_output_file(input_file, output_format, output_folder)

        # Trim the video
        output_file = trim_video_helper(input_file, output_file, start_time, end_time)
        if not output_file:
            gr.Error(f"Error: FFMPEG failed to trim the video.")
            return None
        # Remove files after 10 minutes for security
        print("Done trimming. Deleting files...")
        path_to_delete = [input_file.name, output_file]
        threading.Thread(target=delete_files, args=([path_to_delete])).start()

        return output_file
    except Exception as e:
        gr.Error(f"Error: {str(e)}")
        return f"Error: {str(e)}"

def convert_video(input_file, output_format):
    print(f"\nBEGIN TASK: converting {input_file} to {output_format}")
    try:
        # Set output file
        output_folder = f"{os.getcwd()}/results/"
        output_file = set_output_file(input_file, output_format, output_folder, \
                                      insert_string = 'converted')
        # Convert video
        output_file = convert_video_helper(input_file, output_file, output_format)
        if not output_file:
            gr.Error(f"Error: FFMPEG failed to convert the video.")
            return None
        print(f"...created output file: {output_file}")
        # remove file after 10 minutes for security 
        print("Done converting. Deleting files...")
        path_to_delete = [input_file.name, output_file]
        threading.Thread(target=delete_files, args=([path_to_delete])).start()

        return output_file
    except Exception as e:
        gr.Error(f"Error: {str(e)}")
        return f"Error: {str(e)}"

def delete_files(output_filepath_list, trans_log_filepath_list, global_log_filepath):
    for output_filepath in output_filepath_list:
        try:
            os.remove(output_filepath)
        except FileNotFoundError:
            pass
    for trans_log_filepath in trans_log_filepath_list:
        try:
            os.remove(trans_log_filepath)
        except FileNotFoundError:
            pass
    try:
        os.remove(global_log_filepath)
    except FileNotFoundError:
        pass
    print("Files deleted")

def delete_files_thread(output_filepath_list, trans_log_filepath_list, global_log_filepath):
    print("Thread started")
    time.sleep(20)  
    delete_files(output_filepath_list, trans_log_filepath_list, global_log_filepath)

def convert_xlsx_to_TMxlsx(input_file_list):
    
    file_list = [file.name for file in input_file_list]
    output_filepath_list, trans_log_filepath_list, error_check, global_transfer_log_path = convert_transcript_for_TM(file_list=file_list)
    if not error_check:
        error_check = "No errors found."
    
    delete_thread = threading.Thread(target=delete_files_thread, args=(output_filepath_list, trans_log_filepath_list, global_transfer_log_path))
    delete_thread.start()

    return output_filepath_list, trans_log_filepath_list, global_transfer_log_path, error_check

def convert_for_annotation(input_file_list, annotation_scheme):
    output_files=[]
    for input_transcript in input_file_list:
        print("start converting transcript")    
        output_file = convert_transcript_for_annotation(file=input_transcript, annotation_scheme=annotation_scheme)
        print("finished converting transcript to xlsx for annotation")
        output_files.append(output_file)
    return output_files


def convert_xlsx_to_ELANtsv(input_file_list):
    output_files=[]
    for input_transcript in input_file_list:
        # convert transcript
        print("start converting transcript")
        table = old_xlsx_to_table(xl_file=input_transcript)
        print("finished converting transcript to table")
        output_transcript = input_transcript.replace('.xlsx', '.tsv')
        output_file = table_to_ELAN_tsv(table, output_transcript)
        print("saved table to tsv")
        output_files.append(output_file)
    return output_files

def sort_and_merge(input_file_list, merge_on_ellipsis=False):
    # simply load a csv file using parse_label_csv, then merge the segments on ellipsis 
    # and save to a new file
    output_files=[]
    for input_transcript in input_file_list:
        # convert transcript
        # if is excel then use xlsx_to_table
        if input_transcript.endswith('.xlsx') or input_transcript.endswith('.xls'):
            print("...input is xlsx")
            table = xlsx_to_table(xl_file=input_transcript)
            input_transcript = input_transcript.replace('.xlsx', '.csv')
        elif input_transcript.endswith('.csv') or input_transcript.endswith('.txt') or input_transcript.endswith('.tsv'):
            print("...input is csv, txt, or tsv")
            table = parse_label_csv(input_transcript)
        else:
            print(f"...input {input_transcript} is not a supported file type")
            continue
        table = table.sort_values(by=['start_sec'])
        if merge_on_ellipsis:
            table = merge_ellipsis(table)
            print("finished sorting and merging segments")
            # make filename    
            if 'seg_labels' in input_transcript:
                output_file= input_transcript.replace('seg_labels', 'utt_labels')
            elif 'seglabels' in input_transcript:
                output_file= input_transcript.replace('seglabels', 'utt_labels')
            else:
                # prepend it to the filename (but it could be a path so be careful)
                output_file_base = os.path.basename(input_transcript)
                output_file = os.path.join(os.path.dirname(input_transcript), f"utt_labels_{output_file_base}")
        else:
            print("finished sorting segments")
            # make filename
            output_file = input_transcript.replace('.csv', '_sorted.csv')
        # save to csv
        table.to_csv(output_file, index=False)
        print("saved processed transcript to csv")
        output_files.append(output_file)
    return output_files

#TODO: support sort and merge for XLSX output if this is needed

def convert_ELANtsv_to_CSV(input_file_list, merge_on_ellipsis=False):
    output_files=[]
    for input_transcript in input_file_list:
        # convert transcript
        print("start converting transcript")
        output_transcript = input_transcript.replace('.tsv', '.csv')
        output_file = ELAN_to_labels_csv(input_transcript, merge_segments = merge_on_ellipsis)
        print("finish converting transcript")
        output_files.append(output_file)
    return output_files

# TODO: XLSX to csv (seg_labels or utt_labels)
def convert_xlsx_to_csv(input_file_list, merge_on_ellipsis=False):
    output_files=[]
    for input_transcript in input_file_list:
        # read xl file to table
        # write table to csv with option to merge segments on ellipsis
        output_transcript = input_transcript.replace('.xlsx', '.csv')
        output_file = old_xlsx_to_labels_csv(input_transcript, merge_segments = merge_on_ellipsis)
        output_files.append(output_file)
    return output_files
    
def deidentify_transcripts(input_file_list, who='student'):
    output_files=[]
    for file in input_file_list:
        basename = os.path.basename(file)
        ext = file.split('.')[-1]
        if file.endswith('.xlsx') or file.endswith('.xls'):
            df = pd.read_excel(file)
        elif file.endswith('.csv'):
            df = pd.read_csv(file)
        elif file.endswith('.tsv'):
            df = pd.read_csv(file, sep='\t')
        elif file.endswith('.txt'):
            df = pd.read_csv(file, sep='\t')
        else:
            gr.Warning("File type not supported (must be .xlsx, .xls, .csv, .tsv, or .txt)")
        try:
            df = deidentify_speaker(df, who=who)
        except ValueError as e:
            gr.Warning(f"{e}: {basename} ")
            continue
        output_file = file.replace(f'.{ext}', f'_deidentified.{ext}')
        if ext == 'xlsx' or ext == 'xls':
            df.to_excel(output_file, index=False)
        elif ext == 'csv':
            df.to_csv(output_file, index=False)
        elif ext == 'tsv' or ext == 'txt':
            df.to_csv(output_file, sep='\t', index=False)
        output_files.append(output_file)
    return output_files



###### GRADIO INTERFACE ######

# gr components for video trimmer
input_file = gr.File(label="Select video file")
output_format = gr.Dropdown(choices=["mkv", "MOV", "mp4", "wav"], label="Select output format", value="mp4", )
start_time = gr.Textbox(label="Start time (in seconds or HH:MM:SS). Leave blank to start at beginning.")
end_time = gr.Textbox(label="End time (in seconds or HH:MM:SS). Leave blank to trim a 5-minute interval since start.")
output_file = gr.File(label="Download trimmed file")
interface = gr.Interface(fn=trim_video, inputs=[input_file, output_format, start_time, end_time], outputs=output_file, title="Video Trimmer", flagging_mode="never",
                        description="Trim a video file to a specific time interval. Please wait for the file to upload before clicking the 'Submit' button.")

# gr components for video converter
input_file_c = gr.File(label="Select video file")
output_format_c = gr.Dropdown(choices=["mkv", "MOV", "mp4", "wav"], label="Select output format", value="mp4",)
output_file_c = gr.File(label="Download converted file")

interface_c = gr.Interface(fn=convert_video, inputs=[input_file_c, output_format_c], outputs=output_file_c, title="Video Converter", flagging_mode="never",
                            description="Convert a video file to a different format. Please wait for the file to upload before clicking the 'Submit' button.")




# gr components for video trimmer with random start
input_file_vtr = gr.File(label="Select video file")
output_format_vtr = gr.Dropdown(choices=["mkv", "MOV", "mp4", "wav"], label="Select output format", value="mp4", )
output_file_vtr = gr.File(label="Download trimmed file")
log_file_vtr = gr.File(label="Download log file")
interface_vtr = gr.Interface(fn=trim_video_vtr, inputs=[input_file_vtr, output_format_vtr], 
                            outputs=[output_file_vtr, log_file_vtr], flagging_mode="never", 
                            title="Video Trimmer with Random Start Time", 
                            description="This app trims a 10-minute interval from a video file. \
                                        The start time is randomly selected between 5 and 15 minutes. \
                                        The log file contains the start time and end time of the trimmed video.",
                                        )

# gr components for video trimmer with transcript
input_file_wt = gr.File(label="Select video file")
input_transcript_wt = gr.File(label="Transcript of the video")
output_format_wt = gr.Dropdown(choices=["mkv", "MOV", "mp4", "wav"], label="Select output format", value="mp4", )
start_time_wt = gr.Textbox(label="Start time (in seconds or HH:MM:SS). Leave blank to start at beginning.")
end_time_wt = gr.Textbox(label="End time (in seconds or HH:MM:SS). Leave blank to trim a  5-minute interval since start.")
output_file_wt = gr.File(label="Download trimmed file")
output_transcript_wt = gr.File(label="Download trimmed transcript")
interface_wt = gr.Interface(fn=trim_video_wt, inputs=[input_file_wt, input_transcript_wt, output_format_wt, start_time_wt, end_time_wt], 
                            outputs=[output_file_wt, output_transcript_wt], title="Video Trimmer with transcript converted", flagging_mode="never",
                            description="Trim a video file to a specific time interval with transcript format converted. Please wait for the file to upload before clicking the 'Submit' button. \n\
                                        This transcript should be .xlsx files from Happyscribe (an external transcription service). The columns in the file are as follows: \n\
                                        `#`: an integer index over utterances. \n\
                                        `Timecode`: a string in the format `HH:MM:SS:ss - HH:MM:SS:ss` representing the start and end time of the utterance. \n\
                                        `Duration`: a string in the format `HH:MM:SS:ss` representing the duration of the utterance. \n\
                                        `Speaker`: a string representing the speaker of the utterance. \n\
                                        `Dialogue`: a string representing the text of the utterance. \n\
                                        `Annotations`: a string that may be blank, representing any annotations for the utterance. \n\
                                        `Error Type`: a string that may be blank, representing any errors in the transcription of the utterance. ")


#### TRANSCRIPT COMPONENTS ####
# gr components for TM converter
input_xlsx = gr.Files(label="Input XLSX or CSV transcript file", type="filepath", file_types=[".xlsx", ".csv"])
output_xlsx_tm = gr.Files(label="Output XLSX file", type="filepath", file_types=[".xlsx"])
process_log_tm = gr.File(label="Process Log", type="filepath", file_types=[".log", ".txt"] )
global_transfer_log_tm = gr.File(label="Global transfer log", type="filepath", file_types=[".log", ".txt"])
error_check_tm = gr.Textbox(label="Error Check", type="text")
interface_tm = gr.Interface(fn=convert_xlsx_to_TMxlsx, 
                        inputs=input_xlsx, 
                        outputs=[output_xlsx_tm, process_log_tm, global_transfer_log_tm, error_check_tm],
                        title="transcript-->XLSX+TM",
                        description="Converts XLSX or csv transcript to XLSX+TM transcript with prefilled dropdown for talkmoves",
                        live=False,
                        flagging_mode="never",)

# gr components for xlsx to ELAN
input_x2e = gr.Files(label="Input XLSX or CSV transcript file", type="filepath", file_types=[".xlsx", ".csv"])
output_x2e = gr.Files(label="Output ELAN-compatible tsv file", type="filepath", file_types=[".tsv",'.txt'])
# process_log_x2e = gr.File(label="Process Log", type="filepath", file_types=[".log", ".txt"] )
# global_transfer_log_x2e = gr.File(label="Global transfer log", type="filepath", file_types=[".log", ".txt"])
# error_check_x2e = gr.Textbox(label="Error Check", type="text")
interface_x2e = gr.Interface(fn=convert_xlsx_to_ELANtsv, # TODO: swap out for correct fn
                        inputs=input_x2e,
                        outputs=output_x2e,
                        title="XLSX-->ELAN",
                        description="Converts XLSX transcript to ELAN-compatible tsv file",
                        live=False,
                        flagging_mode="never",)

# gr components for ELAN to CSV
input_e2c = gr.Files(label="Input ELAN-compatible tsv file", type="filepath", file_types=[".tsv",'.txt'])
merge_e2c = gr.Checkbox(label="Merge segments on ellipsis?")
output_e2c = gr.Files(label="Output CSV file", type="filepath", file_types=[".csv"])
interface_e2c = gr.Interface(fn=convert_ELANtsv_to_CSV, # TODO: swap out for correct fn
                        inputs=[input_e2c, merge_e2c],
                        outputs=[output_e2c],
                        title="ELAN-->CSV",
                        description="Converts ELAN-exported file (.txt or .tsv, tab separated values) to standardized CSV file with rows sorted by segment start time. Optionally merges segments on ellipsis.",
                        live=False,
                        flagging_mode="never",)

# gr components for XLSX to CSV
input_x2c = gr.Files(label="Input XLSX file", type="filepath", file_types=[".xlsx", ".csv"])
merge_x2c = gr.Checkbox(label="Merge segments on ellipsis?")
output_x2c = gr.Files(label="Output CSV file", type="filepath", file_types=[".csv"])
interface_x2c = gr.Interface(fn=convert_xlsx_to_csv, # TODO: swap out for correct fn
                        inputs=[input_x2c, merge_x2c],
                        outputs=[output_x2c],
                        title="XLSX-->CSV",
                        description="Converts old version XLSX transcript (with a single Timecode column) to standardized CSV file with rows sorted by segment start time. Optionally merges segments on ellipsis.",
                        live=False,
                        flagging_mode="never",)

# gr components for annotation XLSX
input_c2a = gr.Files(label="Input CSV file", type="filepath", file_types=[".csv"])
annotation_scheme_c2a = gr.Radio(label="Annotation Scheme", choices=[("CPS","CPS"), ("TalkMove","TM"),("None",None)])
output_c2a = gr.Files(label="Output XLSX file", type="filepath", file_types=[".xlsx"])
interface_c2a = gr.Interface(
                        fn=convert_for_annotation, # TODO: swap out for correct fn
                        inputs=[input_c2a, annotation_scheme_c2a],
                        outputs=[output_c2a],
                        title="CSV-->XLSX+annotation",
                        description="Converts CSV file to XLSX file for annotation (added columns for CPS or TM or None)",
                        live=False,
                        flagging_mode="never",
                        # submit_btn="Convert"
                        )

# gr components for deidentification
input_di = gr.Files(label="Input transcript file", type="filepath", file_types=[".xlsx", ".xls",".csv", ".tsv", ".txt"])
who_di = gr.Radio(label="Who to deidentify", choices=[("student","student"), ("all","all")])
output_di = gr.Files(label="Output deidentified transcript file", type="filepath", file_types=[".xlsx", ".xls",".csv", ".tsv", ".txt"])
interface_di = gr.Interface(
    fn=deidentify_transcripts,
    inputs=[input_di, who_di],
    outputs=[output_di],
    title="Deidentify",
    description="Deidentify speaker labels in a transcript. Compatible with .xlsx, .xls, .csv, .tsv, .txt files with a column containing speaker labels. Will not work if speaker column is missing a header. Speaker names or IDs will be replaced with a deidentified label numbered in order of appearance. Choose whether to deidentify just students or all speakers.",
    live=False,
    flagging_mode="never",
    )


# gr components for transcript sorter
input_file_s = gr.Files(label="Select transcript files", type="filepath", file_types=[".csv", ".xlsx",".xls", ".tsv", ".txt"])
merge_s = gr.Checkbox(label="Merge segments on ellipsis?")
output_file_s = gr.Files(label="Download sorted/merged transcript as .csv", type="filepath", file_types=[".csv"])
interface_s = gr.Interface(fn=sort_and_merge, 
                           inputs=[input_file_s, merge_s],
                            outputs=output_file_s, 
                            title="Sort+Merge", 
                            description="Sort a transcript file by time, and optionally merge partial utterances on ellipsis. Output is a .csv file in standard format.",
                            live=False,
                            flagging_mode="never")



######## LAUNCH APP ########
demo = gr.TabbedInterface(
    [
    interface_e2c, 
    interface_x2e, 
    interface_x2c,
    interface_c2a,
    interface_tm, 
    interface_di,
    interface_s,
    interface_c, 
    interface,  
    interface_vtr, 
    interface_wt
    ], 
    [
    "๐Ÿ“โ†’๐Ÿ—’๏ธ ELANโ†’CSV",
    "โŽโ†’๐Ÿ“ XLSXโ†’ELAN",
    "โŽโ†’๐Ÿ—’๏ธ XLSXโ†’CSV", 
    "๐Ÿ—’๏ธโ†’โŽโ˜ท CSVโ†’XLSX",
    "๐Ÿ—’๏ธโ†’โŽ๐Ÿ’ฌ CSVโ†’XLSX+TM",
    "๐Ÿ—’๏ธโ†’๐Ÿฅท๐Ÿป Deidentify",
    "๐Ÿ—’๏ธ๐Ÿ”€๐Ÿ—’๏ธ Sort+Merge",
    "๐ŸŽฅโ†’๐Ÿ“ฝ Convert",
    "๐ŸŽฅโœ‚๏ธ Trim",
    "๐ŸŽฅโœ‚๏ธ๐ŸŽฒ Trim Random",
    "๐ŸŽฅ๐Ÿ—’๏ธโœ‚๏ธ Trim + Transcript"
    ]
)

demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True)