Spaces:

leeboykt
/

video-extractor

Sleeping

change to old pipeline

cda531f over 1 year ago

1.67 kB

	import gradio as gr
	import torch
	from moviepy.editor import VideoFileClip
	from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline

	torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32

	model_id = "openai/whisper-large-v3"
	processor = AutoProcessor.from_pretrained(model_id)
	model = AutoModelForSpeechSeq2Seq.from_pretrained(
	model_id, torch_dtype=torch_dtype, use_safetensors=True
	)
	device = "cuda:0" if torch.cuda.is_available() else "cpu"
	pipe = pipeline(
	"automatic-speech-recognition",
	model=model,
	tokenizer=processor.tokenizer,
	feature_extractor=processor.feature_extractor,
	max_new_tokens=400, # Adjusted to a lower value
	chunk_length_s=30,
	batch_size=16,
	return_timestamps=True,
	torch_dtype=torch_dtype,
	device=device,
	)


	def transcribe_video(video_path):
	"""Transcribes the audio from a video file using Whisper.

	Args:
	video_path: Path to the video file.

	Returns:
	The transcribed text.
	"""
	try:
	# Extract audio from video
	video = VideoFileClip(video_path)
	audio_path = video_path.replace(".mp4", ".mp3") # Assuming input is MP4
	video.audio.write_audiofile(audio_path)

	# Load the Whisper model
	result = pipe(audio_path)

	return result
	except Exception as e:
	return f"An error occurred: {e}"


	# Create the Gradio interface
	iface = gr.Interface(
	fn=transcribe_video,
	inputs=gr.Video(label="Upload Video"),
	outputs="text",
	title="Video Transcription with Whisper",
	description="Upload a video to transcribe its audio content.",
	)

	iface.launch()