Spaces:

iamacaru
/

whisper

Sleeping

App Files Files Community

whisper / app.py

iamacaru

Update app.py

965e092 verified almost 2 years ago

raw

history blame contribute delete

1.81 kB

	from pytube import YouTube
	import os
	import torch
	from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
	import whisperx
	from datasets import load_dataset
	import os.path as osp
	from mlxtend.file_io import find_files
	from mlxtend.utils import Counter
	import accelerate
	import gc
	import gradio as gr

	# Definimos una función que se encarga de llevar a cabo las transcripciones
	def URLToText(URL):

	# url input from user
	yt = YouTube(URL)

	# extract only audio
	video = yt.streams.filter(only_audio=True).first()

	# check for destination to save file
	destination = '.'

	# download the file
	out_file = video.download(output_path=destination)

	# save the file
	base, ext = os.path.splitext(out_file)
	base = base.replace(" ", "")
	new_file = base + '.mp3'
	os.rename(out_file, new_file)

	# Pasamos el auido a texto
	device = "cuda:0" if torch.cuda.is_available() else "cpu"
	torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32

	model_id = "openai/whisper-medium"

	model = AutoModelForSpeechSeq2Seq.from_pretrained(
	model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True
	)
	model.to(device)

	processor = AutoProcessor.from_pretrained(model_id)

	pipe = pipeline(
	"automatic-speech-recognition",
	model=model,
	tokenizer=processor.tokenizer,
	feature_extractor=processor.feature_extractor,
	max_new_tokens=128,
	chunk_length_s=30,
	batch_size=16,
	return_timestamps=True,
	torch_dtype=torch_dtype,
	device=device,
	)
	result = pipe(new_file)
	return result["text"]

	# Creamos la interfaz y la lanzamos.
	gr.Interface(fn=URLToText, inputs=gr.inputs.Textbox(label="Video URL"), outputs=gr.outputs.Textbox(label="Transcripción")).launch(share=False)