Spaces:

Boadiwaa
/

App-Transcriber

Build error

App Files Files Community

App-Transcriber / app.py

Boadiwaa

Update app.py

09df261 verified almost 2 years ago

raw

history blame contribute delete

2.75 kB

	import gradio as gr
	import base64
	import os
	import torch
	import numpy as np
	#import ast
	#import librosa
	from transformers import (
	AutomaticSpeechRecognitionPipeline,
	WhisperForConditionalGeneration,
	WhisperTokenizer,
	WhisperProcessor,
	)
	from peft import PeftModel, PeftConfig
	peft_model_id = "Boadiwaa/LORA-colab-Distil-Whisper-medium2"
	task = "transcribe"
	peft_config = PeftConfig.from_pretrained(peft_model_id)
	model = WhisperForConditionalGeneration.from_pretrained(
	peft_config.base_model_name_or_path,device_map="auto"
	)

	model = PeftModel.from_pretrained(model, peft_model_id)
	tokenizer = WhisperTokenizer.from_pretrained(peft_config.base_model_name_or_path,task=task)
	processor = WhisperProcessor.from_pretrained(peft_config.base_model_name_or_path,task=task)
	feature_extractor = processor.feature_extractor
	#forced_decoder_ids = processor.get_decoder_prompt_ids(language=language, task=task)
	pipe = AutomaticSpeechRecognitionPipeline(model=model, tokenizer=tokenizer, feature_extractor=feature_extractor)

	#api_key = os.getenv("HF_API_TOKEN")

	def transcribe(data):
	#data_list= data["data"]
	#url = data_list[0]
	#url["url"]
	decode_string = base64.b64decode(data)
	#array = np.frombuffer(decode_string, dtype=np.uint8)
	output_file_path = "audio.wav"
	with open(output_file_path, "wb") as output_file:
	output_file.write(decode_string)
	print("Success")
	with torch.cuda.amp.autocast():
	#data,samplerate = librosa.load(output_file_path)
	text = pipe(output_file_path,max_new_tokens=255)["text"]
	return text

	#hf_writer = gr.HuggingFaceDatasetSaver(hf_token = api_key,dataset_name="interaction-log2")
	demo = gr.Interface(
	fn=transcribe,
	inputs=gr.JSON(),
	outputs="text",
	title="Transcriber for Ghanaian-accented speech (English)",
	description="Realtime demo for Ghanaian-accented speech recognition (in English).",
	article = """
	By using this app you consent to your voice being used to train the underlying open-source model further.

	INSTRUCTIONS FOR USE:
	1. Click on record and speak into your microphone
	2. Click on stop and submit after you are done speaking.
	3. Speech input should not exceed 40s for optimal results.
	4. Please wait a few secs after input to see your results.
	NB: You might see "no microphone detected" when you first open the app, CONSIDER THAT A MICROPHONE TEST, record anyway and submit. You might see an Error in the output. Now delete the input by clicking the 'x' at the top and record your main input.
	The app should run seamlessly in the subsequent inputs.
	"""
	)

	demo.launch(share=True, show_error= True)

	if __name__ == "__main__":
	demo.launch()