Agents_Course_Final_Assignment_Evaluator

Paused

Michele De Stefano

Adapted the code so that it can run locally

1b8aef5 10 months ago

1.41 kB

	import importlib.resources
	import json

	import torch

	from pathlib import Path
	from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline

	from question_retriever import get_question
	from tools.data_helpers import get_file_path

	__resources_path = Path(str(importlib.resources.files("data")))


	def test_whisper() -> None:

	task_id = "1f975693-876d-457b-a649-393859e79bf3"
	question = json.loads(get_question(task_id=task_id))

	audio_file = get_file_path(file_name=question["file_name"])

	# cuda_available = torch.cuda.is_available()
	cuda_available = False
	device = "cuda:0" if cuda_available else "cpu"
	torch_dtype = torch.float16 if cuda_available else torch.float32

	model_id = "openai/whisper-large-v3-turbo"

	model = AutoModelForSpeechSeq2Seq.from_pretrained(
	model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True
	)
	model.to(device)

	processor = AutoProcessor.from_pretrained(model_id)

	pipe = pipeline(
	"automatic-speech-recognition",
	model=model,
	tokenizer=processor.tokenizer,
	feature_extractor=processor.feature_extractor,
	torch_dtype=torch_dtype,
	device=device,
	)

	sample = audio_file

	generate_kwargs = {
	"return_timestamps": True,
	}

	result = pipe(sample, generate_kwargs=generate_kwargs)

	print(result["text"])