| | import os |
| | import shutil |
| | from gradio_client import Client |
| | from langchain.llms import OpenAI |
| | from langchain.chains import ConversationChain |
| | from langchain.memory import ConversationBufferMemory |
| | from transformers import MusicgenForConditionalGeneration |
| | import torch |
| | from transformers import AutoProcessor |
| | import scipy |
| | import gradio as gr |
| | import colorama |
| | from pydub import AudioSegment |
| | from colorama import Fore |
| | import subprocess |
| |
|
| | import re |
| |
|
| | def clean_string(string): |
| | |
| | padrao = r'[^a-zA-Z0-9.]' |
| | return re.sub(padrao, '', string) |
| |
|
| | def rename_file(video_path): |
| | |
| | uploaded_filename = video_path.split("/")[2] |
| | new_filename = "input.mp4" |
| | os.rename(uploaded_filename, new_filename) |
| |
|
| |
|
| |
|
| | def making_dir(): |
| | |
| |
|
| | if not os.path.exists("fotopastas"): |
| | os.makedirs("fotopastas") |
| | image_files = [file for file in os.listdir() if file.startswith("frames_")] |
| | for image in image_files: |
| | shutil.move(image, os.path.join("fotopastas", image)) |
| |
|
| | |
| | pasta = '/content/fotopastas' |
| |
|
| | |
| | extensoes_de_imagem = ['.jpg', '.png', '.jpeg'] |
| |
|
| | |
| | arquivos_ordenados = sorted( |
| | [arquivo for arquivo in os.listdir(pasta) if any(arquivo.lower().endswith(ext) for ext in extensoes_de_imagem)], |
| | key=lambda arquivo: int(arquivo.split("_")[1].split(".")[0]) |
| | ) |
| |
|
| | return [arquivos_ordenados,pasta] |
| |
|
| | def frame_list(video_path,seconds): |
| |
|
| | rename_file(video_path) |
| |
|
| | |
| | command = [ |
| | 'ffmpeg', |
| | '-i', 'input.mp4', |
| | '-vf', f'fps=1/{seconds}', |
| | '-q:v', '2', |
| | 'frames_%03d.jpg' |
| | ] |
| |
|
| | |
| | subprocess.run(command) |
| | |
| |
|
| | elements = making_dir() |
| |
|
| | from gradio_client import Client |
| |
|
| | |
| | client = Client("https://fffiloni-clip-interrogator-2.hf.space/") |
| |
|
| | finalList = [] |
| |
|
| | |
| | for arquivo in elements[0]: |
| | caminho_arquivo = os.path.join(elements[1], arquivo) |
| | result = client.predict( |
| | caminho_arquivo, |
| | "best", |
| | 8, |
| | api_name="/clipi2" |
| | ) |
| | newList = [] |
| | for item in result: |
| | if isinstance(item, str) and "{" in item: |
| | break |
| | newList.append(item) |
| |
|
| | newString = newList[0] if newList else "" |
| | finalList.append(newString) |
| |
|
| | resultList = [] |
| |
|
| | for description in finalList: |
| | first = description.split(',') |
| | resultList.append(first[0]) |
| | print(resultList) |
| | return resultList |
| |
|
| |
|
| | def langchain_handle_text(text): |
| | print(Fore.CYAN + "to no lang") |
| | os.environ["OPENAI_API_KEY"] = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" |
| | llm = OpenAI(temperature=0.3,model_name="gpt-3.5-turbo") |
| | conversation = ConversationChain( |
| |
|
| | llm=llm, |
| | verbose=True, |
| |
|
| | memory=ConversationBufferMemory() |
| | ) |
| |
|
| | conversation.predict(input=f"Given a text and you being an internationally renowned melodist, create a melody description with instruments and necessary transitions according to the context of the text. The text:{text}") |
| | output = conversation.predict(input="Summarize the melody without removing the necessary instruments and transitions. the otuput should be : the melody begins...") |
| | print(output) |
| |
|
| | return output |
| |
|
| |
|
| | def eleven_labs(prompt): |
| | import requests |
| |
|
| | CHUNK_SIZE = 1024 |
| | url = "https://api.elevenlabs.io/v1/text-to-speech/21m00Tcm4TlvDq8ikWAM" |
| |
|
| | headers = { |
| | "Accept": "audio/mpeg", |
| | "Content-Type": "application/json", |
| | "xi-api-key": "xxxxxxxxxxxxxxxxxxxxxxxxx" |
| | } |
| |
|
| | data = { |
| | "text": prompt, |
| | "model_id": "eleven_multilingual_v1", |
| | "voice_settings": { |
| | "stability": 0.5, |
| | "similarity_boost": 0.5 |
| | |
| | } |
| | } |
| |
|
| | response = requests.post(url, json=data, headers=headers) |
| | print(response.text) |
| | with open('narracao.mp3', 'wb') as f: |
| | for chunk in response.iter_content(chunk_size=CHUNK_SIZE): |
| | if chunk: |
| | f.write(chunk) |
| |
|
| |
|
| | def check_duration(): |
| | |
| | audio1 = AudioSegment.from_file("audio.mp3", format="mp3") |
| | audio2 = AudioSegment.from_file("narracao.mp3", format="mp3") |
| |
|
| | |
| | duração_em_milissegundos = len(audio1) |
| | duração_em_milissegundos2 = len(audio2) |
| |
|
| | |
| | duração_em_segundos = duração_em_milissegundos / 1000 |
| | duração_em_segundos2 = duração_em_milissegundos2 / 1000 |
| |
|
| | print(f"A duração do áudio é de {duração_em_segundos} segundos.") |
| | print(f"A duração do áudio é de {duração_em_segundos2} segundos.") |
| | if duração_em_segundos > duração_em_segundos2: |
| | maior = duração_em_segundos |
| | else: |
| | maior = duração_em_segundos2 |
| |
|
| | return maior |
| |
|
| |
|
| | def merge_audio_text(): |
| | |
| | |
| | subprocess.run(['ffmpeg', '-y', '-i', 'audio_1.wav', '-vn', '-ar', '44100', '-ac', '2', '-b:a', '192k', 'audio.mp3']) |
| | duration = check_duration() |
| | |
| | subprocess.run(['ffmpeg', '-stream_loop', '-1', '-i', 'audio.mp3', '-t', str(duration), '-c:a', 'libmp3lame', 'audio_loop.mp3']) |
| | |
| | |
| | subprocess.run(['ffmpeg', '-i', 'narracao.mp3', '-i', 'audio_loop.mp3', '-filter_complex', 'amix=inputs=2:duration=first:dropout_transition=2', 'output.mp3']) |
| | audio_final = '/content/output.mp3' |
| | return audio_final |
| |
|
| |
|
| | def langchain_handle(description): |
| | print(Fore.CYAN + "to no lang") |
| | os.environ["OPENAI_API_KEY"] = "xxxxxxxxxxxxxxxxxxxxxxxxxxxx" |
| | llm = OpenAI(temperature=0.3,model_name="gpt-3.5-turbo") |
| | conversation = ConversationChain( |
| |
|
| | llm=llm, |
| | verbose=True, |
| |
|
| | memory=ConversationBufferMemory() |
| | ) |
| |
|
| | conversation.predict(input=f"given a list of phrases and you being a world-renowned melodist, create a melody based on the context generated by the phrases on the list, reporting the necessary instruments and their transitions. The list:{description}") |
| | conversation.predict(input="put the intro and all the scenes together in one phrase. Give me the output star with: the melody begins ") |
| | y = conversation.predict(input='Summarize the and starts with: the melody begins') |
| | print(y) |
| | return y |
| |
|
| | def music_gen(description): |
| | model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small") |
| | device = "cuda:0" if torch.cuda.is_available() else "cpu" |
| | model.to(device); |
| | processor = AutoProcessor.from_pretrained("facebook/musicgen-small") |
| |
|
| | inputs = processor( |
| | text=[f"{description}"], |
| | padding=True, |
| | return_tensors="pt", |
| | ) |
| | print('antes do sampling') |
| | sampling_rate = model.config.audio_encoder.sampling_rate |
| | print('depois do sampling') |
| |
|
| | audio_values = model.generate(**inputs.to(device), do_sample=True, guidance_scale=3, max_new_tokens=1503) |
| |
|
| | |
| | print('vou salvar o audio') |
| |
|
| | nome = 'audio_1.wav' |
| | scipy.io.wavfile.write(nome, rate=sampling_rate, data=audio_values[0, 0].cpu().numpy()) |
| |
|
| | return "/content/audio_1.wav" |
| |
|
| |
|
| | def merge_audio_video(): |
| |
|
| | |
| |
|
| | |
| |
|
| | subprocess.run(['ffmpeg', '-y', '-i', 'audio_1.wav', '-vn', '-ar', '44100', '-ac', '2', '-b:a', '192k', 'audio.mp3']) |
| |
|
| | |
| | subprocess.run(['ffmpeg', '-y', '-i', 'input.mp4', '-i', 'audio.mp3', '-c:v', 'copy', '-c:a', 'copy', 'output.mp4']) |
| |
|
| |
|
| | def handle_text(text): |
| | description = langchain_handle_text(text) |
| | audio = music_gen(description) |
| | eleven_labs(text) |
| | audio_final = merge_audio_text() |
| | return audio_final |
| |
|
| | import gradio as gr |
| | from pytube import YouTube |
| | def download_youtube_video(youtube_link,seconds): |
| |
|
| | |
| | yt = YouTube(youtube_link) |
| |
|
| | |
| | video_stream = yt.streams.filter(resolution = '720p',only_video=True).first() |
| |
|
| | yt.title = clean_string(yt.title) |
| | |
| | video_stream.download(output_path = '/content', filename = f'{yt.title}.mp4') |
| |
|
| | video_path = f"/content/{yt.title}.mp4" |
| | print(video_path) |
| | print(yt.length) |
| | description = frame_list(video_path,seconds) |
| |
|
| | final_description = langchain_handle(description) |
| | audio_path = music_gen(final_description) |
| | merge_audio_video() |
| | new_video_path = '/content/output.mp4' |
| | return new_video_path |
| |
|
| |
|
| |
|
| |
|
| |
|
| | iface_1 = gr.Interface( |
| | download_youtube_video, |
| | [gr.Textbox(label="Enter YouTube Video Link"), |
| | gr.Dropdown( ["5", "3", "1"], label="Seconds", info="Extract an image every chosen number of seconds")], |
| | "video", |
| |
|
| | ) |
| | iface_2 = gr.Interface( |
| | handle_text, |
| | gr.Textbox(label="Enter a Text"), |
| | "audio" |
| | ) |
| |
|
| |
|
| | |
| | demo = gr.TabbedInterface([iface_1, iface_2], ["video-to-SoundClip", "video-to-NarrativeText"]) |
| | demo.launch(share=True,debug=True,enable_queue=True) |
| |
|