Update app.py
Browse files
app.py
CHANGED
|
@@ -14,15 +14,17 @@ import gradio as gr
|
|
| 14 |
import concurrent.futures
|
| 15 |
import assemblyai as aai
|
| 16 |
|
| 17 |
-
aai.settings.api_key = "d5b107f34d534b4ebdfbd869f8408f92"
|
| 18 |
-
transcriber = aai.Transcriber()
|
| 19 |
|
| 20 |
AI71_API_KEY = os.getenv('AI71_API_KEY')
|
| 21 |
XI_API_KEY = os.getenv('ELEVEN_LABS_API_KEY')
|
| 22 |
client = ElevenLabs(api_key=XI_API_KEY)
|
| 23 |
|
| 24 |
-
|
| 25 |
tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_1.2B")
|
|
|
|
|
|
|
| 26 |
# transcriber = whisper.load_model("turbo")
|
| 27 |
|
| 28 |
language_codes = {"English":"en", "Hindi":"hi", "Portuguese":"pt", "Chinese":"zh", "Spanish":"es",
|
|
@@ -162,10 +164,13 @@ def summarize(meeting_texts=meeting_texts):
|
|
| 162 |
# Placeholder function for speech to text conversion
|
| 163 |
def speech_to_text(video):
|
| 164 |
print('Started transcribing')
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
transcript = transcriber.transcribe(video).text
|
|
|
|
|
|
|
|
|
|
| 169 |
print('transcript:', transcript)
|
| 170 |
return transcript
|
| 171 |
|
|
@@ -173,7 +178,7 @@ def speech_to_text(video):
|
|
| 173 |
def translate_text(text, source_language,target_language):
|
| 174 |
tokenizer.src_lang = source_language
|
| 175 |
encoded_ln = tokenizer(text, return_tensors="pt")
|
| 176 |
-
generated_tokens =
|
| 177 |
translated_text = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
|
| 178 |
print('translated_text:', translated_text)
|
| 179 |
return translated_text
|
|
|
|
| 14 |
import concurrent.futures
|
| 15 |
import assemblyai as aai
|
| 16 |
|
| 17 |
+
# aai.settings.api_key = "d5b107f34d534b4ebdfbd869f8408f92"
|
| 18 |
+
# transcriber = aai.Transcriber()
|
| 19 |
|
| 20 |
AI71_API_KEY = os.getenv('AI71_API_KEY')
|
| 21 |
XI_API_KEY = os.getenv('ELEVEN_LABS_API_KEY')
|
| 22 |
client = ElevenLabs(api_key=XI_API_KEY)
|
| 23 |
|
| 24 |
+
translator = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_1.2B")
|
| 25 |
tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_1.2B")
|
| 26 |
+
|
| 27 |
+
transcriber = gr.load("models/openai/whisper-large-v3-turbo")
|
| 28 |
# transcriber = whisper.load_model("turbo")
|
| 29 |
|
| 30 |
language_codes = {"English":"en", "Hindi":"hi", "Portuguese":"pt", "Chinese":"zh", "Spanish":"es",
|
|
|
|
| 164 |
# Placeholder function for speech to text conversion
|
| 165 |
def speech_to_text(video):
|
| 166 |
print('Started transcribing')
|
| 167 |
+
audio = AudioSegment.from_file(video, format="mp4")
|
| 168 |
+
audio.export('temp.wav', format="wav")
|
| 169 |
+
|
| 170 |
+
# transcript = transcriber.transcribe(video).text
|
| 171 |
+
# transcript = transcriber.transcribe(video).text
|
| 172 |
+
transcript = transcriber("temp.wav").split("'")[1].strip()
|
| 173 |
+
|
| 174 |
print('transcript:', transcript)
|
| 175 |
return transcript
|
| 176 |
|
|
|
|
| 178 |
def translate_text(text, source_language,target_language):
|
| 179 |
tokenizer.src_lang = source_language
|
| 180 |
encoded_ln = tokenizer(text, return_tensors="pt")
|
| 181 |
+
generated_tokens = translator.generate(**encoded_ln, forced_bos_token_id=tokenizer.get_lang_id(target_language))
|
| 182 |
translated_text = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
|
| 183 |
print('translated_text:', translated_text)
|
| 184 |
return translated_text
|