App-Transcriber / app.py
Boadiwaa's picture
Update app.py
09df261 verified
import gradio as gr
import base64
import os
import torch
import numpy as np
#import ast
#import librosa
from transformers import (
AutomaticSpeechRecognitionPipeline,
WhisperForConditionalGeneration,
WhisperTokenizer,
WhisperProcessor,
)
from peft import PeftModel, PeftConfig
peft_model_id = "Boadiwaa/LORA-colab-Distil-Whisper-medium2"
task = "transcribe"
peft_config = PeftConfig.from_pretrained(peft_model_id)
model = WhisperForConditionalGeneration.from_pretrained(
peft_config.base_model_name_or_path,device_map="auto"
)
model = PeftModel.from_pretrained(model, peft_model_id)
tokenizer = WhisperTokenizer.from_pretrained(peft_config.base_model_name_or_path,task=task)
processor = WhisperProcessor.from_pretrained(peft_config.base_model_name_or_path,task=task)
feature_extractor = processor.feature_extractor
#forced_decoder_ids = processor.get_decoder_prompt_ids(language=language, task=task)
pipe = AutomaticSpeechRecognitionPipeline(model=model, tokenizer=tokenizer, feature_extractor=feature_extractor)
#api_key = os.getenv("HF_API_TOKEN")
def transcribe(data):
#data_list= data["data"]
#url = data_list[0]
#url["url"]
decode_string = base64.b64decode(data)
#array = np.frombuffer(decode_string, dtype=np.uint8)
output_file_path = "audio.wav"
with open(output_file_path, "wb") as output_file:
output_file.write(decode_string)
print("Success")
with torch.cuda.amp.autocast():
#data,samplerate = librosa.load(output_file_path)
text = pipe(output_file_path,max_new_tokens=255)["text"]
return text
#hf_writer = gr.HuggingFaceDatasetSaver(hf_token = api_key,dataset_name="interaction-log2")
demo = gr.Interface(
fn=transcribe,
inputs=gr.JSON(),
outputs="text",
title="Transcriber for Ghanaian-accented speech (English)",
description="Realtime demo for Ghanaian-accented speech recognition (in English).",
article = """
By using this app you consent to your voice being used to train the underlying open-source model further.
INSTRUCTIONS FOR USE:
1. Click on record and speak into your microphone
2. Click on stop and submit after you are done speaking.
3. Speech input should not exceed 40s for optimal results.
4. Please wait a few secs after input to see your results.
NB: You might see "no microphone detected" when you first open the app, CONSIDER THAT A MICROPHONE TEST, record anyway and submit. You might see an Error in the output. Now delete the input by clicking the 'x' at the top and record your main input.
The app should run seamlessly in the subsequent inputs.
"""
)
demo.launch(share=True, show_error= True)
if __name__ == "__main__":
demo.launch()