Spaces:

cyanab
/

GlobalVoice1

Runtime error

App Files Files Community

GlobalVoice1 / app.py

cyanab

updated key secret

9fc877a almost 3 years ago

raw

history blame contribute delete

2.87 kB

	#imports
	import gradio as gr
	import wave
	import requests
	import moviepy.editor as mp
	import openai
	import ffmpeg
	import os

	# Set OpenAI and ELEVENLABS API keys
	openai.api_key = os.getenv('openai')
	ELEVENLABS_API_KEY = os.getenv('elev_labs')

	import wave
	import json
	#@title forked 61
	def extract_audio(input_video):#input_video='video_path+name.mp4'
	# Load the input video
	# video = mp.VideoFileClip(input_video.name)
	video = mp.VideoFileClip(input_video)
	# Extract audio from the video
	audio = video.audio
	extracted_audio = "audio.wav" # assign the file name to the variable
	audio.write_audiofile(extracted_audio) # write the audio to the file
	with open(extracted_audio, "rb") as f: # open the file as a binary object
	transcription = openai.Audio.transcribe("whisper-1", f) # transcribe the file object
	# ptranscription = print(transcription) # return the transcription

	target_language = 'English'
	response = openai.Completion.create(
	engine="text-davinci-003",
	prompt=f"Translate the following text to {target_language}: {transcription}",
	temperature=0.5,
	max_tokens=100,
	top_p=1,
	frequency_penalty=0,
	presence_penalty=0,
	stop=None)
	# Extract the translated text from the API response
	translated_text = response.choices[0].text.strip()

	headers = {
	'accept': 'audio/mpeg',
	'xi-api-key': ELEVENLABS_API_KEY,
	'Content-Type': 'application/json',
	}
	data = {
	'text': translated_text,
	'language': 'fr-FR',
	'speaker': 'female'
	}
	response = requests.post('https://api.elevenlabs.io/v1/text-to-speech/21m00Tcm4TlvDq8ikWAM',
	headers=headers, data=json.dumps(data))
	# Check the status code of the response
	if response.status_code == 200:
	# Return the speech output as bytes
	speech_bytes = response.content
	# Write the speech output to a wav file
	with wave.open('translated_audio.wav', 'wb') as f:
	#with wave.open('translated_audio.wav', 'wb') as f:
	f.setnchannels(1)
	f.setsampwidth(2)
	f.setframerate(44100)
	speech = f.writeframes(speech_bytes)

	video_mp = video
	audio_mp = speech

	# combine audio_mp n video_mp
	final_video = video_mp.set_audio(audio_mp)

	# Write the final video to a file
	video_combined = final_video.write_videofile("video_combined.mp4")
	else:
	# Handle errors or redirections
	print(f"Request failed with status code {response.status_code}")
	return 'translated_audio.wav'


	# Gradio
	demo = gr.Interface(fn=extract_audio,
	inputs=gr.inputs.Video(),
	outputs=gr.outputs.Audio(type='numpy'))
	# demo.launch(debug=True)
	# demo.launch(share=True, debug=True)
	demo.launch()