yasmws commited on
Commit
9843f33
·
1 Parent(s): 308533e

Upload s0HEGKH4.txt

Browse files
Files changed (1) hide show
  1. s0HEGKH4.txt +302 -0
s0HEGKH4.txt ADDED
@@ -0,0 +1,302 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import shutil
3
+ from gradio_client import Client
4
+ from langchain.llms import OpenAI
5
+ from langchain.chains import ConversationChain
6
+ from langchain.memory import ConversationBufferMemory
7
+ from transformers import MusicgenForConditionalGeneration
8
+ import torch
9
+ from transformers import AutoProcessor
10
+ import scipy
11
+ import gradio as gr
12
+ import colorama
13
+ from pydub import AudioSegment
14
+ from colorama import Fore
15
+ import subprocess
16
+
17
+ import re
18
+
19
+ def clean_string(string):
20
+ # Usando uma expressão regular para encontrar letras, números e pontos
21
+ padrao = r'[^a-zA-Z0-9.]'
22
+ return re.sub(padrao, '', string)
23
+
24
+ def rename_file(video_path):
25
+ # Essa parte renomeia o arquivo para input.mp4
26
+ uploaded_filename = video_path.split("/")[2]
27
+ new_filename = "input.mp4"
28
+ os.rename(uploaded_filename, new_filename)
29
+
30
+
31
+
32
+ def making_dir():
33
+ #pasta com todos os frames do vídeo
34
+
35
+ if not os.path.exists("fotopastas"):
36
+ os.makedirs("fotopastas")
37
+ image_files = [file for file in os.listdir() if file.startswith("frames_")]
38
+ for image in image_files:
39
+ shutil.move(image, os.path.join("fotopastas", image))
40
+
41
+ # Defina o caminho para a pasta com as fotos
42
+ pasta = '/content/fotopastas' # Substitua pelo caminho da sua pasta
43
+
44
+ # Lista de extensões de arquivos de imagem que você deseja processar
45
+ extensoes_de_imagem = ['.jpg', '.png', '.jpeg']
46
+
47
+ # Ordenando os arquivos
48
+ arquivos_ordenados = sorted(
49
+ [arquivo for arquivo in os.listdir(pasta) if any(arquivo.lower().endswith(ext) for ext in extensoes_de_imagem)],
50
+ key=lambda arquivo: int(arquivo.split("_")[1].split(".")[0])
51
+ )
52
+
53
+ return [arquivos_ordenados,pasta]
54
+
55
+ def frame_list(video_path,seconds):
56
+
57
+ rename_file(video_path)
58
+
59
+ # ffmpeg -i input.mp4 -vf "fps=1/$seconds" -q:v 2 frames_%03d.jpg
60
+ command = [
61
+ 'ffmpeg',
62
+ '-i', 'input.mp4',
63
+ '-vf', f'fps=1/{seconds}',
64
+ '-q:v', '2',
65
+ 'frames_%03d.jpg'
66
+ ]
67
+
68
+ # Run the command using subprocess
69
+ subprocess.run(command)
70
+ #pasta com todos os frames do vídeo
71
+
72
+ elements = making_dir()
73
+
74
+ from gradio_client import Client
75
+
76
+ # Inicialize o cliente
77
+ client = Client("https://fffiloni-clip-interrogator-2.hf.space/")
78
+
79
+ finalList = []
80
+
81
+ # Loop para percorrer as fotos na pasta
82
+ for arquivo in elements[0]:
83
+ caminho_arquivo = os.path.join(elements[1], arquivo)
84
+ result = client.predict(
85
+ caminho_arquivo,
86
+ "best",
87
+ 8,
88
+ api_name="/clipi2"
89
+ )
90
+ newList = []
91
+ for item in result:
92
+ if isinstance(item, str) and "{" in item:
93
+ break
94
+ newList.append(item)
95
+
96
+ newString = newList[0] if newList else ""
97
+ finalList.append(newString)
98
+
99
+ resultList = []
100
+
101
+ for description in finalList:
102
+ first = description.split(',')
103
+ resultList.append(first[0])
104
+ print(resultList)
105
+ return resultList
106
+
107
+
108
+ def langchain_handle_text(text):
109
+ print(Fore.CYAN + "to no lang")
110
+ os.environ["OPENAI_API_KEY"] = "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"
111
+ llm = OpenAI(temperature=0.3,model_name="gpt-3.5-turbo")
112
+ conversation = ConversationChain(
113
+
114
+ llm=llm,
115
+ verbose=True,
116
+
117
+ memory=ConversationBufferMemory()
118
+ )
119
+
120
+ conversation.predict(input=f"Given a text and you being an internationally renowned melodist, create a melody description with instruments and necessary transitions according to the context of the text. The text:{text}")
121
+ output = conversation.predict(input="Summarize the melody without removing the necessary instruments and transitions. the otuput should be : the melody begins...")
122
+ print(output)
123
+
124
+ return output
125
+
126
+
127
+ def eleven_labs(prompt):
128
+ import requests
129
+
130
+ CHUNK_SIZE = 1024
131
+ url = "https://api.elevenlabs.io/v1/text-to-speech/21m00Tcm4TlvDq8ikWAM"
132
+
133
+ headers = {
134
+ "Accept": "audio/mpeg",
135
+ "Content-Type": "application/json",
136
+ "xi-api-key": "002d6f1bc217bf9fa97228658d501e8e"
137
+ }
138
+
139
+ data = {
140
+ "text": prompt,
141
+ "model_id": "eleven_multilingual_v1",
142
+ "voice_settings": {
143
+ "stability": 0.5,
144
+ "similarity_boost": 0.5
145
+
146
+ }
147
+ }
148
+
149
+ response = requests.post(url, json=data, headers=headers)
150
+ print(response.text)
151
+ with open('narracao.mp3', 'wb') as f:
152
+ for chunk in response.iter_content(chunk_size=CHUNK_SIZE):
153
+ if chunk:
154
+ f.write(chunk)
155
+
156
+
157
+ def check_duration():
158
+ # Carregue o arquivo de áudio
159
+ audio1 = AudioSegment.from_file("audio.mp3", format="mp3")
160
+ audio2 = AudioSegment.from_file("narracao.mp3", format="mp3")
161
+
162
+ # Obtenha a duração em milissegundos
163
+ duração_em_milissegundos = len(audio1)
164
+ duração_em_milissegundos2 = len(audio2)
165
+
166
+ # Converta a duração para segundos
167
+ duração_em_segundos = duração_em_milissegundos / 1000
168
+ duração_em_segundos2 = duração_em_milissegundos2 / 1000
169
+
170
+ print(f"A duração do áudio é de {duração_em_segundos} segundos.")
171
+ print(f"A duração do áudio é de {duração_em_segundos2} segundos.")
172
+ if duração_em_segundos > duração_em_segundos2:
173
+ maior = duração_em_segundos
174
+ else:
175
+ maior = duração_em_segundos2
176
+
177
+ return maior
178
+
179
+
180
+ def merge_audio_text():
181
+
182
+ #ffmpeg -y -i audio_1.wav -vn -ar 44100 -ac 2 -b:a 192k audio.mp3
183
+ subprocess.run(['ffmpeg', '-y', '-i', 'audio_1.wav', '-vn', '-ar', '44100', '-ac', '2', '-b:a', '192k', 'audio.mp3'])
184
+ duration = check_duration()
185
+ #ffmpeg -stream_loop -1 -i audio.mp3 -t "$duration" -c:a libmp3lame audio_loop.mp3
186
+ subprocess.run(['ffmpeg', '-stream_loop', '-1', '-i', 'audio.mp3', '-t', str(duration), '-c:a', 'libmp3lame', 'audio_loop.mp3'])
187
+
188
+ #ffmpeg -i narracao.mp3 -i audio_loop.mp3 -filter_complex amix=inputs=2:duration=first:dropout_transition=2 output.mp3
189
+ subprocess.run(['ffmpeg', '-i', 'narracao.mp3', '-i', 'audio_loop.mp3', '-filter_complex', 'amix=inputs=2:duration=first:dropout_transition=2', 'output.mp3'])
190
+ audio_final = '/content/output.mp3'
191
+ return audio_final
192
+
193
+
194
+ def langchain_handle(description):
195
+ print(Fore.CYAN + "to no lang")
196
+ os.environ["OPENAI_API_KEY"] = "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"
197
+ llm = OpenAI(temperature=0.3,model_name="gpt-3.5-turbo")
198
+ conversation = ConversationChain(
199
+
200
+ llm=llm,
201
+ verbose=True,
202
+
203
+ memory=ConversationBufferMemory()
204
+ )
205
+
206
+ conversation.predict(input=f"given a list of phrases and you being a world-renowned melodist, create a melody based on the context generated by the phrases on the list, reporting the necessary instruments and their transitions. The list:{description}")
207
+ conversation.predict(input="put the intro and all the scenes together in one phrase. Give me the output star with: the melody begins ")
208
+ y = conversation.predict(input='Summarize the and starts with: the melody begins')
209
+ print(y)
210
+ return y
211
+
212
+ def music_gen(description):
213
+ model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")
214
+ device = "cuda:0" if torch.cuda.is_available() else "cpu"
215
+ model.to(device);
216
+ processor = AutoProcessor.from_pretrained("facebook/musicgen-small")
217
+
218
+ inputs = processor(
219
+ text=[f"{description}"],
220
+ padding=True,
221
+ return_tensors="pt",
222
+ )
223
+ print('antes do sampling')
224
+ sampling_rate = model.config.audio_encoder.sampling_rate
225
+ print('depois do sampling')
226
+
227
+ audio_values = model.generate(**inputs.to(device), do_sample=True, guidance_scale=3, max_new_tokens=1503)
228
+
229
+ # Audio(audio_values[0].cpu().numpy(), rate=sampling_rate)
230
+ print('vou salvar o audio')
231
+
232
+ nome = 'audio_1.wav'
233
+ scipy.io.wavfile.write(nome, rate=sampling_rate, data=audio_values[0, 0].cpu().numpy())
234
+
235
+ return "/content/audio_1.wav"
236
+
237
+
238
+ def merge_audio_video():
239
+
240
+ # ffmpeg -y -i audio_1.wav -vn -ar 44100 -ac 2 -b:a 192k audio.mp3
241
+
242
+ # ffmpeg -y -i input.mp4 -i audio.mp3 -c:v copy -c:a copy output.mp4
243
+
244
+ subprocess.run(['ffmpeg', '-y', '-i', 'audio_1.wav', '-vn', '-ar', '44100', '-ac', '2', '-b:a', '192k', 'audio.mp3'])
245
+
246
+ # Combinar input.mp4 com audio.mp3 em output.mp4
247
+ subprocess.run(['ffmpeg', '-y', '-i', 'input.mp4', '-i', 'audio.mp3', '-c:v', 'copy', '-c:a', 'copy', 'output.mp4'])
248
+
249
+
250
+ def handle_text(text):
251
+ description = langchain_handle_text(text)
252
+ audio = music_gen(description)
253
+ eleven_labs(text)
254
+ audio_final = merge_audio_text()
255
+ return audio_final
256
+
257
+ import gradio as gr
258
+ from pytube import YouTube
259
+ def download_youtube_video(youtube_link,seconds):
260
+
261
+ # Create a YouTube object for the provided link
262
+ yt = YouTube(youtube_link)
263
+
264
+ # Get the highest resolution stream (You can customize this)
265
+ video_stream = yt.streams.filter(resolution = '720p',only_video=True).first()
266
+
267
+ yt.title = clean_string(yt.title)
268
+ # Download the video
269
+ video_stream.download(output_path = '/content', filename = f'{yt.title}.mp4')
270
+
271
+ video_path = f"/content/{yt.title}.mp4"
272
+ print(video_path)
273
+ print(yt.length)
274
+ description = frame_list(video_path,seconds)
275
+
276
+ final_description = langchain_handle(description)
277
+ audio_path = music_gen(final_description)
278
+ merge_audio_video()
279
+ new_video_path = '/content/output.mp4'
280
+ return new_video_path
281
+
282
+
283
+
284
+
285
+
286
+ iface_1 = gr.Interface(
287
+ download_youtube_video,
288
+ [gr.Textbox(label="Enter YouTube Video Link"),
289
+ gr.Dropdown( ["5", "3", "1"], label="Seconds", info="Extract an image every chosen number of seconds")],
290
+ "video",
291
+
292
+ )
293
+ iface_2 = gr.Interface(
294
+ handle_text,
295
+ gr.Textbox(label="Enter a Text"),
296
+ "audio"
297
+ )
298
+
299
+
300
+ # iface_1.launch(share = True,debug=True,enable_queue=True)
301
+ demo = gr.TabbedInterface([iface_1, iface_2], ["video-to-SoundClip", "video-to-NarrativeText"])
302
+ demo.launch(share=True,debug=True,enable_queue=True)