YoutubePPL / app.py
Siddhanth Sridhar
Update app.py
c281579 verified
# -*- coding: utf-8 -*-
"""Youtube Pipeline
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1tHUaxj9C_-nAr4AUMg8MHCwvUNPISNm2
"""
# Commented out IPython magic to ensure Python compatibility.
# %%capture
# !pip install streamlit
# # !pip install diffusers
# !pip install transformers
# !pip install torch torchvision imageio
# !pip install gradio
# !pip install diffusers==0.11.1
# !pip install transformers scipy ftfy accelerate
# !pip install torch
# !pip install accelerate
# !pip install diffusers
# !pip install yt_dlp
# !pip install transformers
#
# !streamlit run youtube_pipeline.py & npx localtunnel --port 8501
import torch
import yt_dlp as youtube_dl
from transformers import pipeline
from transformers.pipelines.audio_utils import ffmpeg_read
import tempfile
import os
MODEL_NAME = "openai/whisper-large-v3"
BATCH_SIZE = 8
YT_LENGTH_LIMIT_S = 3600 # limit to 1 hour YouTube files
device = 0 if torch.cuda.is_available() else "cpu"
pipe = pipeline(
task="automatic-speech-recognition",
model=MODEL_NAME,
chunk_length_s=30,
device=device,
)
def transcribe_audio(inputs, task):
if inputs is None:
raise ValueError("No audio file submitted! Please provide an audio file.")
text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
return text
def download_yt_audio(yt_url, filename):
info_loader = youtube_dl.YoutubeDL()
try:
info = info_loader.extract_info(yt_url, download=False)
except youtube_dl.utils.DownloadError as err:
raise ValueError(str(err))
file_length = info["duration"]
if file_length > YT_LENGTH_LIMIT_S:
raise ValueError(f"Maximum YouTube length is {YT_LENGTH_LIMIT_S} seconds, got {file_length} seconds YouTube video.")
ydl_opts = {"outtmpl": filename, "format": "worstvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best"}
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
try:
ydl.download([yt_url])
except youtube_dl.utils.ExtractorError as err:
raise ValueError(str(err))
def yt_transcribe(yt_url, task):
with tempfile.TemporaryDirectory() as tmpdirname:
filepath = os.path.join(tmpdirname, "video.mp4")
download_yt_audio(yt_url, filepath)
with open(filepath, "rb") as f:
inputs = f.read()
inputs = ffmpeg_read(inputs, pipe.feature_extractor.sampling_rate)
inputs = {"array": inputs, "sampling_rate": pipe.feature_extractor.sampling_rate}
text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
return text
def main():
print("Whisper Large V3: Transcribe YouTube Video\n")
yt_url = "https://www.youtube.com/watch?v=Vd-YX40Zz0Q"
# task = input("Select Task ('transcribe' or 'translate'): ")
# if task.lower() not in ['transcribe', 'translate']:
# print("Invalid task! Please choose either 'transcribe' or 'translate'.")
# return
try:
global text
text = yt_transcribe(yt_url, "transcribe")
print("\nTranscription:\n", text)
except Exception as e:
print("\nAn error occurred:", str(e))
if __name__ == "__main__":
main()
print(text)
from huggingface_hub import InferenceClient
client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.1")
def save_to_file(output_text, filename='script.txt'):
with open(filename, 'w') as f:
f.write(output_text)
def format_prompt(message, history):
prompt = ""
for user_prompt, bot_response in history:
prompt += f"[INST] {user_prompt} [/INST]"
prompt += f" {bot_response}</s> "
prompt += f"[INST] {message} [/INST]"
return prompt
def generate(prompt, history, temperature=0.9, max_new_tokens=256, top_p=0.95, repetition_penalty=1.0):
temperature = float(temperature)
if temperature < 1e-2:
temperature = 1e-2
top_p = float(top_p)
generate_kwargs = dict(
temperature=temperature,
max_new_tokens=max_new_tokens,
top_p=top_p,
repetition_penalty=repetition_penalty,
do_sample=True,
seed=42,
)
formatted_prompt = format_prompt(prompt, history)
stream = client.text_generation(
formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False
)
output = ""
for response in stream:
output += response.token.text
return output
def main():
history = [] # You may add functionality to maintain history here
max_new_tokens = 768
prompt = "Summarize this transcription into a video summary -"+text
print("Generating A Script For you...")
global qt
qt = generate(prompt, history, 0.90, max_new_tokens, 0.90, 1.20)
print(qt)
# save_to_file(output_text)
# print("Script Saved'")
if __name__ == "__main__":
main()
from huggingface_hub import InferenceClient
client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.1")
def save_to_file(output_text, filename='script.txt'):
with open(filename, 'w') as f:
f.write(output_text)
def format_prompt(message, history):
prompt = ""
for user_prompt, bot_response in history:
prompt += f"[INST] {user_prompt} [/INST]"
prompt += f" {bot_response}</s> "
prompt += f"[INST] {message} [/INST]"
return prompt
def generate(prompt, history, temperature=0.9, max_new_tokens=256, top_p=0.95, repetition_penalty=1.0):
temperature = float(temperature)
if temperature < 1e-2:
temperature = 1e-2
top_p = float(top_p)
generate_kwargs = dict(
temperature=temperature,
max_new_tokens=max_new_tokens,
top_p=top_p,
repetition_penalty=repetition_penalty,
do_sample=True,
seed=42,
)
formatted_prompt = format_prompt(prompt, history)
stream = client.text_generation(
formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False
)
output = ""
for response in stream:
output += response.token.text
return output
def main():
history = [] # You may add functionality to maintain history here
max_new_tokens = 768
prompt = "Give me a new script for a youtube video.The script should be inspired by this youtube video transcription"+qt
print("Generating A Script For you...")
global output_text
output_text = generate(prompt, history, 0.90, max_new_tokens, 0.90, 1.20)
print(output_text[:-4])
save_to_file(output_text)
print("Script Saved'")
if __name__ == "__main__":
main()
from huggingface_hub import InferenceClient
client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.1")
def save_to_file(output_text, filename='script.txt'):
with open(filename, 'w') as f:
f.write(output_text)
def format_prompt(message, history):
prompt = ""
for user_prompt, bot_response in history:
prompt += f"[INST] {user_prompt} [/INST]"
prompt += f" {bot_response}</s> "
prompt += f"[INST] {message} [/INST]"
return prompt
def generate(prompt, history, temperature=0.9, max_new_tokens=256, top_p=0.95, repetition_penalty=1.0):
temperature = float(temperature)
if temperature < 1e-2:
temperature = 1e-2
top_p = float(top_p)
generate_kwargs = dict(
temperature=temperature,
max_new_tokens=max_new_tokens,
top_p=top_p,
repetition_penalty=repetition_penalty,
do_sample=True,
seed=42,
)
formatted_prompt = format_prompt(prompt, history)
stream = client.text_generation(
formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False
)
output = ""
for response in stream:
output += response.token.text
return output
def main():
history = [] # You may add functionality to maintain history here
max_new_tokens =128
prompt = "Generate a prompt for stable diffusion to generate a youtube thumbnail for the script.I ONLY WANT A PROMPT (SO I CAN GIVE AN AI TO GENERATE A THUMBNAIL)The script is as follows"+output_text
global popo
popo = generate(prompt, history, 0.90, max_new_tokens, 0.90, 1.20)
print(popo[:-4])
if __name__ == "__main__":
main()
"""Youtube Pipeline
Hugging Face-Stable Diffusion
"""
import torch
from diffusers import StableDiffusionPipeline
pipe1 = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4", torch_dtype=torch.float16)
pipe1 = pipe1.to("cuda")
# prompt = "a cat running on a treadmill in a rich house"
image = pipe1(popo).images[0] # image here is in [PIL format](https://pillow.readthedocs.io/en/stable/)
# Now to display an image you can either save it such as:
image.save("thumbnail.png")
# or if you're in a google colab you can directly display it with
image
"""Mid journey
"""
import requests
API_URL = "https://api-inference.huggingface.co/models/Kvikontent/midjourney-v6"
headers = {"Authorization": "Bearer HUGGINGFACE_API_TOKEN"}
def query(payload):
response = requests.post(API_URL, headers=headers, json=payload)
return response.content
image_bytes = query({
"inputs": popo,
})
# You can access the image with PIL.Image for example
import io
from PIL import Image
image = Image.open(io.BytesIO(image_bytes))