Manikeerthan01 commited on
Commit
e6e0737
·
verified ·
1 Parent(s): 014d4b0

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +110 -0
app.py ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+
3
+ from IPython.display import HTML, Javascript
4
+ from google.colab.output import eval_js
5
+ import base64
6
+ import time
7
+ import torch
8
+ from transformers import WhisperProcessor, WhisperForConditionalGeneration
9
+ from googletrans import Translator
10
+ from pydub import AudioSegment
11
+ import io
12
+
13
+ def record():
14
+ js = Javascript("""
15
+ async function recordAudio() {
16
+ const div = document.createElement('div');
17
+ const audio = document.createElement('audio');
18
+ const strtButton = document.createElement('button');
19
+ const stopButton = document.createElement('button');
20
+
21
+ strtButton.textContent = 'Start Recording';
22
+ stopButton.textContent = 'Stop Recording';
23
+
24
+ document.body.appendChild(div);
25
+ div.appendChild(strtButton);
26
+ div.appendChild(audio);
27
+
28
+ const stream = await navigator.mediaDevices.getUserMedia({audio:true});
29
+ let recorder = new MediaRecorder(stream);
30
+ audio.style.display = 'block';
31
+ audio.srcObject = stream;
32
+ audio.controls = true;
33
+ audio.muted = true;
34
+
35
+ await new Promise((resolve) => strtButton.onclick = resolve);
36
+ strtButton.replaceWith(stopButton);
37
+ recorder.start();
38
+
39
+ await new Promise((resolve) => stopButton.onclick = resolve);
40
+ recorder.stop();
41
+ let recData = await new Promise((resolve) => recorder.ondataavailable = resolve);
42
+ let arrBuff = await recData.data.arrayBuffer();
43
+ stream.getAudioTracks()[0].stop();
44
+ div.remove();
45
+
46
+ let binaryString = '';
47
+ let bytes = new Uint8Array(arrBuff);
48
+ bytes.forEach((byte) => { binaryString += String.fromCharCode(byte); });
49
+
50
+ const url = URL.createObjectURL(recData.data);
51
+ const player = document.createElement('audio');
52
+ player.controls = true;
53
+ player.src = url;
54
+ document.body.appendChild(player);
55
+
56
+ return btoa(binaryString);
57
+ }""")
58
+ display(js)
59
+ output = eval_js('recordAudio({})')
60
+
61
+ # Generate a unique filename using the current timestamp
62
+ filename = f"audio_{int(time.time())}.wav"
63
+
64
+ with open(filename, 'wb') as file:
65
+ binary = base64.b64decode(output)
66
+ file.write(binary)
67
+
68
+ print('Recording saved to:', file.name)
69
+ return filename
70
+
71
+ def transcribe_and_translate(audio_filename, target_language=None):
72
+ # Load the processor and model from Hugging Face's transformers library
73
+ processor = WhisperProcessor.from_pretrained("openai/whisper-large-v2")
74
+ model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large-v2")
75
+
76
+ # Load the audio file
77
+ audio = AudioSegment.from_wav(audio_filename)
78
+ audio = audio.set_channels(1).set_frame_rate(16000)
79
+ audio = io.BytesIO()
80
+ audio.export(audio, format="wav")
81
+ audio = torch.FloatTensor(audio.getvalue()).unsqueeze(0)
82
+
83
+ # Process the audio and perform transcription
84
+ inputs = processor(audio, return_tensors="pt").input_values
85
+ with torch.no_grad():
86
+ logits = model(input_values=inputs).logits
87
+ transcription = processor.batch_decode(logits.numpy())
88
+
89
+ print("Transcription:", transcription[0])
90
+
91
+ # Translate the transcription if a target language is provided
92
+ if target_language:
93
+ translator = Translator()
94
+ translation = translator.translate(transcription[0], dest=target_language)
95
+ print(f"Translation to {target_language}: {translation.text}")
96
+ return transcription[0], translation.text
97
+ else:
98
+ return transcription[0], None
99
+
100
+ def main():
101
+ ad = record()
102
+
103
+ # Prompt the user for a target language
104
+ target_language = input("Enter the target language code (e.g., 'es' for Spanish, 'fr' for French, etc.), or press Enter to skip translation: ")
105
+
106
+ # Transcribe and optionally translate
107
+ transcribe_and_translate(ad, target_language if target_language else None)
108
+
109
+ if __name__ == "__main__":
110
+ main()