# -*- coding: utf-8 -*- """Youtube Pipeline Automatically generated by Colaboratory. Original file is located at https://colab.research.google.com/drive/1tHUaxj9C_-nAr4AUMg8MHCwvUNPISNm2 """ # Commented out IPython magic to ensure Python compatibility. # %%capture # !pip install streamlit # # !pip install diffusers # !pip install transformers # !pip install torch torchvision imageio # !pip install gradio # !pip install diffusers==0.11.1 # !pip install transformers scipy ftfy accelerate # !pip install torch # !pip install accelerate # !pip install diffusers # !pip install yt_dlp # !pip install transformers # # !streamlit run youtube_pipeline.py & npx localtunnel --port 8501 import torch import yt_dlp as youtube_dl from transformers import pipeline from transformers.pipelines.audio_utils import ffmpeg_read import tempfile import os MODEL_NAME = "openai/whisper-large-v3" BATCH_SIZE = 8 YT_LENGTH_LIMIT_S = 3600 # limit to 1 hour YouTube files device = 0 if torch.cuda.is_available() else "cpu" pipe = pipeline( task="automatic-speech-recognition", model=MODEL_NAME, chunk_length_s=30, device=device, ) def transcribe_audio(inputs, task): if inputs is None: raise ValueError("No audio file submitted! Please provide an audio file.") text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"] return text def download_yt_audio(yt_url, filename): info_loader = youtube_dl.YoutubeDL() try: info = info_loader.extract_info(yt_url, download=False) except youtube_dl.utils.DownloadError as err: raise ValueError(str(err)) file_length = info["duration"] if file_length > YT_LENGTH_LIMIT_S: raise ValueError(f"Maximum YouTube length is {YT_LENGTH_LIMIT_S} seconds, got {file_length} seconds YouTube video.") ydl_opts = {"outtmpl": filename, "format": "worstvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best"} with youtube_dl.YoutubeDL(ydl_opts) as ydl: try: ydl.download([yt_url]) except youtube_dl.utils.ExtractorError as err: raise ValueError(str(err)) def yt_transcribe(yt_url, task): with tempfile.TemporaryDirectory() as tmpdirname: filepath = os.path.join(tmpdirname, "video.mp4") download_yt_audio(yt_url, filepath) with open(filepath, "rb") as f: inputs = f.read() inputs = ffmpeg_read(inputs, pipe.feature_extractor.sampling_rate) inputs = {"array": inputs, "sampling_rate": pipe.feature_extractor.sampling_rate} text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"] return text def main(): print("Whisper Large V3: Transcribe YouTube Video\n") yt_url = "https://www.youtube.com/watch?v=Vd-YX40Zz0Q" # task = input("Select Task ('transcribe' or 'translate'): ") # if task.lower() not in ['transcribe', 'translate']: # print("Invalid task! Please choose either 'transcribe' or 'translate'.") # return try: global text text = yt_transcribe(yt_url, "transcribe") print("\nTranscription:\n", text) except Exception as e: print("\nAn error occurred:", str(e)) if __name__ == "__main__": main() print(text) from huggingface_hub import InferenceClient client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.1") def save_to_file(output_text, filename='script.txt'): with open(filename, 'w') as f: f.write(output_text) def format_prompt(message, history): prompt = "" for user_prompt, bot_response in history: prompt += f"[INST] {user_prompt} [/INST]" prompt += f" {bot_response} " prompt += f"[INST] {message} [/INST]" return prompt def generate(prompt, history, temperature=0.9, max_new_tokens=256, top_p=0.95, repetition_penalty=1.0): temperature = float(temperature) if temperature < 1e-2: temperature = 1e-2 top_p = float(top_p) generate_kwargs = dict( temperature=temperature, max_new_tokens=max_new_tokens, top_p=top_p, repetition_penalty=repetition_penalty, do_sample=True, seed=42, ) formatted_prompt = format_prompt(prompt, history) stream = client.text_generation( formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False ) output = "" for response in stream: output += response.token.text return output def main(): history = [] # You may add functionality to maintain history here max_new_tokens = 768 prompt = "Summarize this transcription into a video summary -"+text print("Generating A Script For you...") global qt qt = generate(prompt, history, 0.90, max_new_tokens, 0.90, 1.20) print(qt) # save_to_file(output_text) # print("Script Saved'") if __name__ == "__main__": main() from huggingface_hub import InferenceClient client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.1") def save_to_file(output_text, filename='script.txt'): with open(filename, 'w') as f: f.write(output_text) def format_prompt(message, history): prompt = "" for user_prompt, bot_response in history: prompt += f"[INST] {user_prompt} [/INST]" prompt += f" {bot_response} " prompt += f"[INST] {message} [/INST]" return prompt def generate(prompt, history, temperature=0.9, max_new_tokens=256, top_p=0.95, repetition_penalty=1.0): temperature = float(temperature) if temperature < 1e-2: temperature = 1e-2 top_p = float(top_p) generate_kwargs = dict( temperature=temperature, max_new_tokens=max_new_tokens, top_p=top_p, repetition_penalty=repetition_penalty, do_sample=True, seed=42, ) formatted_prompt = format_prompt(prompt, history) stream = client.text_generation( formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False ) output = "" for response in stream: output += response.token.text return output def main(): history = [] # You may add functionality to maintain history here max_new_tokens = 768 prompt = "Give me a new script for a youtube video.The script should be inspired by this youtube video transcription"+qt print("Generating A Script For you...") global output_text output_text = generate(prompt, history, 0.90, max_new_tokens, 0.90, 1.20) print(output_text[:-4]) save_to_file(output_text) print("Script Saved'") if __name__ == "__main__": main() from huggingface_hub import InferenceClient client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.1") def save_to_file(output_text, filename='script.txt'): with open(filename, 'w') as f: f.write(output_text) def format_prompt(message, history): prompt = "" for user_prompt, bot_response in history: prompt += f"[INST] {user_prompt} [/INST]" prompt += f" {bot_response} " prompt += f"[INST] {message} [/INST]" return prompt def generate(prompt, history, temperature=0.9, max_new_tokens=256, top_p=0.95, repetition_penalty=1.0): temperature = float(temperature) if temperature < 1e-2: temperature = 1e-2 top_p = float(top_p) generate_kwargs = dict( temperature=temperature, max_new_tokens=max_new_tokens, top_p=top_p, repetition_penalty=repetition_penalty, do_sample=True, seed=42, ) formatted_prompt = format_prompt(prompt, history) stream = client.text_generation( formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False ) output = "" for response in stream: output += response.token.text return output def main(): history = [] # You may add functionality to maintain history here max_new_tokens =128 prompt = "Generate a prompt for stable diffusion to generate a youtube thumbnail for the script.I ONLY WANT A PROMPT (SO I CAN GIVE AN AI TO GENERATE A THUMBNAIL)The script is as follows"+output_text global popo popo = generate(prompt, history, 0.90, max_new_tokens, 0.90, 1.20) print(popo[:-4]) if __name__ == "__main__": main() """Youtube Pipeline Hugging Face-Stable Diffusion """ import torch from diffusers import StableDiffusionPipeline pipe1 = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4", torch_dtype=torch.float16) pipe1 = pipe1.to("cuda") # prompt = "a cat running on a treadmill in a rich house" image = pipe1(popo).images[0] # image here is in [PIL format](https://pillow.readthedocs.io/en/stable/) # Now to display an image you can either save it such as: image.save("thumbnail.png") # or if you're in a google colab you can directly display it with image """Mid journey """ import requests API_URL = "https://api-inference.huggingface.co/models/Kvikontent/midjourney-v6" headers = {"Authorization": "Bearer HUGGINGFACE_API_TOKEN"} def query(payload): response = requests.post(API_URL, headers=headers, json=payload) return response.content image_bytes = query({ "inputs": popo, }) # You can access the image with PIL.Image for example import io from PIL import Image image = Image.open(io.BytesIO(image_bytes))