Spaces:

Manikeerthan01
/

mini3

Sleeping

App Files Files Community

mini3 / app.py

Manikeerthan01

Create app.py

e6e0737 verified over 1 year ago

raw

history blame contribute delete

3.89 kB



	from IPython.display import HTML, Javascript
	from google.colab.output import eval_js
	import base64
	import time
	import torch
	from transformers import WhisperProcessor, WhisperForConditionalGeneration
	from googletrans import Translator
	from pydub import AudioSegment
	import io

	def record():
	js = Javascript("""
	async function recordAudio() {
	const div = document.createElement('div');
	const audio = document.createElement('audio');
	const strtButton = document.createElement('button');
	const stopButton = document.createElement('button');

	strtButton.textContent = 'Start Recording';
	stopButton.textContent = 'Stop Recording';

	document.body.appendChild(div);
	div.appendChild(strtButton);
	div.appendChild(audio);

	const stream = await navigator.mediaDevices.getUserMedia({audio:true});
	let recorder = new MediaRecorder(stream);
	audio.style.display = 'block';
	audio.srcObject = stream;
	audio.controls = true;
	audio.muted = true;

	await new Promise((resolve) => strtButton.onclick = resolve);
	strtButton.replaceWith(stopButton);
	recorder.start();

	await new Promise((resolve) => stopButton.onclick = resolve);
	recorder.stop();
	let recData = await new Promise((resolve) => recorder.ondataavailable = resolve);
	let arrBuff = await recData.data.arrayBuffer();
	stream.getAudioTracks()[0].stop();
	div.remove();

	let binaryString = '';
	let bytes = new Uint8Array(arrBuff);
	bytes.forEach((byte) => { binaryString += String.fromCharCode(byte); });

	const url = URL.createObjectURL(recData.data);
	const player = document.createElement('audio');
	player.controls = true;
	player.src = url;
	document.body.appendChild(player);

	return btoa(binaryString);
	}""")
	display(js)
	output = eval_js('recordAudio({})')

	# Generate a unique filename using the current timestamp
	filename = f"audio_{int(time.time())}.wav"

	with open(filename, 'wb') as file:
	binary = base64.b64decode(output)
	file.write(binary)

	print('Recording saved to:', file.name)
	return filename

	def transcribe_and_translate(audio_filename, target_language=None):
	# Load the processor and model from Hugging Face's transformers library
	processor = WhisperProcessor.from_pretrained("openai/whisper-large-v2")
	model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large-v2")

	# Load the audio file
	audio = AudioSegment.from_wav(audio_filename)
	audio = audio.set_channels(1).set_frame_rate(16000)
	audio = io.BytesIO()
	audio.export(audio, format="wav")
	audio = torch.FloatTensor(audio.getvalue()).unsqueeze(0)

	# Process the audio and perform transcription
	inputs = processor(audio, return_tensors="pt").input_values
	with torch.no_grad():
	logits = model(input_values=inputs).logits
	transcription = processor.batch_decode(logits.numpy())

	print("Transcription:", transcription[0])

	# Translate the transcription if a target language is provided
	if target_language:
	translator = Translator()
	translation = translator.translate(transcription[0], dest=target_language)
	print(f"Translation to {target_language}: {translation.text}")
	return transcription[0], translation.text
	else:
	return transcription[0], None

	def main():
	ad = record()

	# Prompt the user for a target language
	target_language = input("Enter the target language code (e.g., 'es' for Spanish, 'fr' for French, etc.), or press Enter to skip translation: ")

	# Transcribe and optionally translate
	transcribe_and_translate(ad, target_language if target_language else None)

	if __name__ == "__main__":
	main()