#imports
import gradio as gr
import wave
import requests
import moviepy.editor as mp
import openai
import ffmpeg
import os

# Set OpenAI and ELEVENLABS API keys
openai.api_key = os.getenv('openai')
ELEVENLABS_API_KEY = os.getenv('elev_labs')

import wave
import json
#@title forked 61 
def extract_audio(input_video):#input_video='video_path+name.mp4'
    # Load the input video
    # video = mp.VideoFileClip(input_video.name)
    video = mp.VideoFileClip(input_video)
    # Extract audio from the video
    audio = video.audio
    extracted_audio = "audio.wav" # assign the file name to the variable
    audio.write_audiofile(extracted_audio) # write the audio to the file
    with open(extracted_audio, "rb") as f: # open the file as a binary object
      transcription = openai.Audio.transcribe("whisper-1", f) # transcribe the file object
    # ptranscription = print(transcription) # return the transcription
    
    target_language = 'English'
    response = openai.Completion.create(
          engine="text-davinci-003",
          prompt=f"Translate the following text to {target_language}: {transcription}",
          temperature=0.5,
          max_tokens=100,
          top_p=1,
          frequency_penalty=0,
          presence_penalty=0,
          stop=None)
    # Extract the translated text from the API response
    translated_text = response.choices[0].text.strip()

    headers = {
	    'accept': 'audio/mpeg',
	    'xi-api-key': ELEVENLABS_API_KEY,
	    'Content-Type': 'application/json',
    }
    data = {
	    'text': translated_text,
	    'language': 'fr-FR',
	    'speaker': 'female'
    }
    response = requests.post('https://api.elevenlabs.io/v1/text-to-speech/21m00Tcm4TlvDq8ikWAM',
	                         headers=headers, data=json.dumps(data))
	  # Check the status code of the response
    if response.status_code == 200:
	    # Return the speech output as bytes
      speech_bytes = response.content
	    # Write the speech output to a wav file
      with wave.open('translated_audio.wav', 'wb') as f:
      #with wave.open('translated_audio.wav', 'wb') as f:
          f.setnchannels(1)
          f.setsampwidth(2)
          f.setframerate(44100)
          speech = f.writeframes(speech_bytes)

      video_mp = video
      audio_mp = speech

      # combine audio_mp n video_mp
      final_video = video_mp.set_audio(audio_mp)

      # Write the final video to a file
      video_combined = final_video.write_videofile("video_combined.mp4")
    else:
	    # Handle errors or redirections
      print(f"Request failed with status code {response.status_code}")
    return 'translated_audio.wav'   


# Gradio
demo = gr.Interface(fn=extract_audio,
                    inputs=gr.inputs.Video(), 
                    outputs=gr.outputs.Audio(type='numpy'))
# demo.launch(debug=True)
# demo.launch(share=True, debug=True)
demo.launch()