| import whisper | |
| import numpy as np | |
| from pydub import AudioSegment | |
| from deep_translator import GoogleTranslator, detection | |
| import os | |
| available_languages = GoogleTranslator().get_supported_languages(as_dict=True) | |
| formatted_languages = {key.title(): value for key, value in available_languages.items()} | |
| formatted_codes = {value: key.title() for key, value in available_languages.items()} | |
| lang_detect_key = os.getenv("detect_language_api_key") | |
| def audio_to_numpy(audio_file_input): | |
| audio = AudioSegment.from_file(audio_file_input) | |
| audio = audio.set_channels(1).set_frame_rate(16000) | |
| samples = np.array(audio.get_array_of_samples(), dtype=np.float32) | |
| return samples / np.iinfo(audio.array_type).max | |
| def src_audio_to_eng_translator(audio_file_input, model_size = "turbo", target_lang = "English"): | |
| audio_data = audio_to_numpy(audio_file_input) | |
| model = whisper.load_model(model_size) | |
| result = model.transcribe(audio_data) | |
| input_text = result["text"] | |
| src_lang_code = detection.single_detection(input_text, api_key = lang_detect_key) | |
| src_lang = formatted_languages.get(src_lang_code, 'Source language not detected') | |
| target_lang_code = formatted_languages.get(target_lang, 'en') | |
| translated_text = GoogleTranslator(source='auto', target=target_lang_code).translate(input_text) | |
| return input_text, translated_text, src_lang |