Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import json | |
| from faster_whisper import WhisperModel # Assuming you have installed this library | |
| def split_text_into_lines(data, max_chars, max_duration, max_gap): | |
| subtitles = [] | |
| line = [] | |
| line_duration = 0 | |
| for idx, word_data in enumerate(data): | |
| word = word_data["word"] | |
| start = word_data["start"] | |
| end = word_data["end"] | |
| line.append(word_data) | |
| line_duration += end - start | |
| temp = " ".join(item["word"] for item in line) | |
| duration_exceeded = line_duration > max_duration | |
| chars_exceeded = len(temp) > max_chars | |
| maxgap_exceeded = (word_data['start'] - data[idx - 1]['end']) > max_gap if idx > 0 else False | |
| if duration_exceeded or chars_exceeded or maxgap_exceeded: | |
| if line: | |
| subtitle_line = { | |
| "word": temp, | |
| "start": line[0]["start"], | |
| "end": line[-1]["end"], | |
| "textcontents": line | |
| } | |
| subtitles.append(subtitle_line) | |
| line = [] | |
| line_duration = 0 | |
| if line: | |
| subtitle_line = { | |
| "word": " ".join(item["word"] for item in line), | |
| "start": line[0]["start"], | |
| "end": line[-1]["end"], | |
| "textcontents": line | |
| } | |
| subtitles.append(subtitle_line) | |
| return subtitles | |
| def transcribe_audio(audiofilename, max_chars, max_duration, max_gap): | |
| model_size = "medium" | |
| model = WhisperModel(model_size) | |
| segments, info = model.transcribe(audiofilename, word_timestamps=True) | |
| segments = list(segments) # The transcription will actually run here. | |
| wordlevel_info = [] | |
| for segment in segments: | |
| for word in segment.words: | |
| wordlevel_info.append({'word': word.word, 'start': word.start, 'end': word.end}) | |
| linelevel_subtitles = split_text_into_lines(wordlevel_info, max_chars, max_duration, max_gap) | |
| return linelevel_subtitles | |
| def audio_transcription(audiofile, max_chars, max_duration, max_gap): | |
| transcription = transcribe_audio(audiofile, max_chars, max_duration, max_gap) | |
| return json.dumps(transcription, indent=4) | |
| iface = gr.Interface(audio_transcription, | |
| [gr.Audio(sources="upload", type="filepath"), | |
| gr.Number(label="MaxChars"), | |
| gr.Number(label="MaxDuration"), | |
| gr.Number(label="MaxGap")], | |
| "text", | |
| description="Upload an audio file and get its transcription in JSON format.") | |
| iface.launch() | |