| | import os |
| | from preprocess import process_audio_file |
| | from pause import annotate_pauses |
| | from repetition import annotate_repetitions |
| | from syllable import annotate_syllables |
| | from fillerword import annotate_fillerwords |
| | from mispronunciation import annotate_mispronunciation |
| |
|
| | from feature_extraction import feature_extraction |
| |
|
| |
|
| | from annotation import annotate_transcript |
| |
|
| | def main(): |
| |
|
| | input_audio_file = "/home/easgrad/shuweiho/workspace/volen/SATE_docker_test/input/454.mp3" |
| | device = "cuda" |
| | pause_threshold = 0.3 |
| |
|
| | print("Start init...") |
| | |
| | session_id = process_audio_file(input_audio_file, num_speakers=2, device=device) |
| |
|
| | |
| | annotate_pauses(session_id, pause_threshold) |
| | annotate_repetitions(session_id) |
| | annotate_syllables(session_id) |
| | annotate_fillerwords(session_id) |
| | |
| | |
| | |
| | |
| |
|
| | |
| | output_annotation = annotate_transcript(session_id) |
| | print(f"Done: {output_annotation}") |
| |
|
| | if __name__ == "__main__": |
| | main() |
| |
|