File size: 4,123 Bytes
0656ae3 3fcafd6 0656ae3 c0cf34e 0656ae3 7517e30 3fcafd6 7517e30 212cf45 698260d 7517e30 0656ae3 698260d 7517e30 698260d 212cf45 c0cf34e 6d4b56b c0cf34e 698260d 96bf3b4 698260d a621d2a 5863cc6 212cf45 5863cc6 212cf45 6dda1e7 c0cf34e 6dda1e7 c0cf34e 46828da 5863cc6 a621d2a 698260d 0656ae3 0a82cc9 a621d2a 0656ae3 6d4b56b 0656ae3 a621d2a c13a7b8 0656ae3 46828da 0656ae3 a621d2a 46828da 0656ae3 46828da c13a7b8 f080822 bb2dd4c 2ebe77e fb90234 2ebe77e 96463c9 2ebe77e c13a7b8 96463c9 46828da a621d2a 7ffe4bb a621d2a 7ffe4bb c13a7b8 96463c9 bb2dd4c 96463c9 46828da a621d2a 7ffe4bb a621d2a 7ffe4bb fb90234 46828da 0656ae3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 | import gradio as gr
import torch
import random
from difflib import Differ
from transformers import pipeline
BASE_MODEL_NAME = "openai/whisper-tiny"
FT_MODEL_NAME = "sotirios-slv/whisper-tiny-au-en"
BATCH_SIZE = 8
TASK = "automatic-speech-recognition"
device = 0 if torch.cuda.is_available() else "cpu"
pipe = pipeline(
task=TASK,
model=BASE_MODEL_NAME,
chunk_length_s=30,
device=device,
)
ft_pipe = pipeline(
task=TASK,
model=FT_MODEL_NAME,
chunk_length_s=30,
device=device,
)
def diff_texts(text1, text2):
d = Differ()
text1 = text1.casefold()
text2 = text2.casefold()
return [
(token[2:], token[0] if token[0] != " " else None)
for token in d.compare(text1, text2)
]
def transcribe(inputs, task='transcribe'):
print(f"selected_quote: {selected_quote}")
try:
if inputs is None:
raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
base_result = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task, "language": "en"})
ft_result = ft_pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task, "language": "en"})
base_diff_txt = diff_texts(selected_quote,base_result['text'])
ft_diff_text = diff_texts(selected_quote,ft_result['text'])
return base_result, base_diff_txt, ft_result, ft_diff_text
except Exception as e:
print(f"Error - {e}")
return e, ''
def select_quote():
global selected_quote
barry_quotes = [
'The hours of labor reduced to eight, leave to artisans, tradesmen, and other dwellers in towns a vary large portion of the remainder of the twenty-four virtually unoccupied.',
'How is this leisure to be disposed of? In the public-house? the singing hall? the dancing-saloon? which hold out seductions somewhat more dangerous, methinks, to honest labor than those presented by a library;',
'We may well rejoice, then, when we see a room such as this filled with attentive and reflective readers.',
'The insinuation of the waste of time in the perusal of unprofitable, trashy books must be met also by the enquiry — What does the expression mean?',
"Men's minds are not cast in one mould — what charms one may repel another — nor is one man's mind at all times in the same frame."
]
rand_idx = random.randint(0,(len(barry_quotes) - 1))
selected_quote = barry_quotes[rand_idx]
return f'## *"{barry_quotes[rand_idx]}"*'
descriptive_markdown = """
# Barryoke
Quotes taken from an address given by Sir Redmond Barry on the opening of the free public library of Ballarat East.
See the full address here - [https://latrobejournal.slv.vic.gov.au/latrobejournal/issue/latrobe-26/t1-g-t3.html](https://latrobejournal.slv.vic.gov.au/latrobejournal/issue/latrobe-26/t1-g-t3.html)
"""
with gr.Blocks() as demo:
selected_quote = gr.State([])
gr.Markdown(descriptive_markdown)
rand_btn = gr.Button("Pick a quote")
quote = gr.Markdown()
rand_btn.click(fn=select_quote,inputs=[],outputs=quote)
# with gr.Row():
mf_input = gr.Audio(sources='microphone',type="filepath"),
btn = gr.Button("Transcribe")
gr.Markdown(f'### Output transcribed using {BASE_MODEL_NAME}')
with gr.Row():
out_1 = gr.Textbox(label='Whisper transcription')
diff_out_1 = gr.HighlightedText(
label="WhisperDiff",
combine_adjacent=True,
show_legend=True,
color_map={"+": "green", "-": "blue"}
)
gr.Markdown(f'### Output transcribed using {FT_MODEL_NAME}')
with gr.Row():
out_2 = gr.Textbox(label='Finetune transcription')
diff_out_2 = gr.HighlightedText(
label="FinetuneDiff",
combine_adjacent=True,
show_legend=True,
color_map={"+": "green", "-": "blue"}
)
btn.click(fn=transcribe, inputs=mf_input, outputs=[out_1, diff_out_1, out_2, diff_out_2])
demo.launch()
|