Spaces:

Manikeerthan01
/

mini3

Sleeping

App Files Files Community

Manikeerthan01 commited on Aug 29, 2024

Commit

e6e0737

verified ·

1 Parent(s): 014d4b0

Create app.py

Browse files

Files changed (1) hide show

app.py +110 -0

app.py ADDED Viewed

	@@ -0,0 +1,110 @@

+from IPython.display import HTML, Javascript
+from google.colab.output import eval_js
+import base64
+import time
+import torch
+from transformers import WhisperProcessor, WhisperForConditionalGeneration
+from googletrans import Translator
+from pydub import AudioSegment
+import io
+def record():
+    js = Javascript("""
+    async function recordAudio() {
+        const div = document.createElement('div');
+        const audio = document.createElement('audio');
+        const strtButton = document.createElement('button');
+        const stopButton = document.createElement('button');
+        strtButton.textContent = 'Start Recording';
+        stopButton.textContent = 'Stop Recording';
+        document.body.appendChild(div);
+        div.appendChild(strtButton);
+        div.appendChild(audio);
+        const stream = await navigator.mediaDevices.getUserMedia({audio:true});
+        let recorder = new MediaRecorder(stream);
+        audio.style.display = 'block';
+        audio.srcObject = stream;
+        audio.controls = true;
+        audio.muted = true;
+        await new Promise((resolve) => strtButton.onclick = resolve);
+        strtButton.replaceWith(stopButton);
+        recorder.start();
+        await new Promise((resolve) => stopButton.onclick = resolve);
+        recorder.stop();
+        let recData = await new Promise((resolve) => recorder.ondataavailable = resolve);
+        let arrBuff = await recData.data.arrayBuffer();
+        stream.getAudioTracks()[0].stop();
+        div.remove();
+        let binaryString = '';
+        let bytes = new Uint8Array(arrBuff);
+        bytes.forEach((byte) => { binaryString += String.fromCharCode(byte); });
+        const url = URL.createObjectURL(recData.data);
+        const player = document.createElement('audio');
+        player.controls = true;
+        player.src = url;
+        document.body.appendChild(player);
+        return btoa(binaryString);
+    }""")
+    display(js)
+    output = eval_js('recordAudio({})')
+    # Generate a unique filename using the current timestamp
+    filename = f"audio_{int(time.time())}.wav"
+    with open(filename, 'wb') as file:
+        binary = base64.b64decode(output)
+        file.write(binary)
+    print('Recording saved to:', file.name)
+    return filename
+def transcribe_and_translate(audio_filename, target_language=None):
+    # Load the processor and model from Hugging Face's transformers library
+    processor = WhisperProcessor.from_pretrained("openai/whisper-large-v2")
+    model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large-v2")
+    # Load the audio file
+    audio = AudioSegment.from_wav(audio_filename)
+    audio = audio.set_channels(1).set_frame_rate(16000)
+    audio = io.BytesIO()
+    audio.export(audio, format="wav")
+    audio = torch.FloatTensor(audio.getvalue()).unsqueeze(0)
+    # Process the audio and perform transcription
+    inputs = processor(audio, return_tensors="pt").input_values
+    with torch.no_grad():
+        logits = model(input_values=inputs).logits
+    transcription = processor.batch_decode(logits.numpy())
+    print("Transcription:", transcription[0])
+    # Translate the transcription if a target language is provided
+    if target_language:
+        translator = Translator()
+        translation = translator.translate(transcription[0], dest=target_language)
+        print(f"Translation to {target_language}: {translation.text}")
+        return transcription[0], translation.text
+    else:
+        return transcription[0], None
+def main():
+    ad = record()
+    # Prompt the user for a target language
+    target_language = input("Enter the target language code (e.g., 'es' for Spanish, 'fr' for French, etc.), or press Enter to skip translation: ")
+    # Transcribe and optionally translate
+    transcribe_and_translate(ad, target_language if target_language else None)
+if __name__ == "__main__":
+    main()