Spaces:
Sleeping
Sleeping
File size: 2,044 Bytes
d58ab2d 1509b66 d58ab2d | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 | # %% [markdown]
# # IELTS Speaking Evaluation, The Application
#
# IELTS is a popular test
# %%
# setup package install
# import os
# os.system("pip install faster-whisper gradio hf_xet")
# %% [markdown]
# First, we import the relevant packages.
# %%
from faster_whisper import WhisperModel, BatchedInferencePipeline
import gradio as gr
# %% [markdown]
# First, we intitalize the model. The `hf_xet` package above comes into play, allowing faster downloads. Since IELTS is in English, we use the `.en` model which is optimized for English-only.
# %%
# this will take a while to setup the model...
# change to cuda on GoogleColab
model = WhisperModel(
"small.en",
device="cpu",
compute_type="int8",
cpu_threads=4,
num_workers=2)
batched_model = BatchedInferencePipeline(model=model)
# %% [markdown]
# Now we need to write down a simple audio recorder in gradio
# %%
from typing import Any
from google import genai
from google.genai import types
client = genai.Client(api_key="AIzaSyAVlkr-yD-PhA5kqkKExL9TKj_2L34pEOA")
def transcribe(audio: str) -> tuple[str, str | None]:
segments, info = batched_model.transcribe(
audio,
language="en", beam_size=5, batch_size=12)
segments = list(segments)
result = []
for segment in segments:
result.append(segment.text)
transcript = "\n".join(result)
response = client.models.generate_content(
model="gemini-1.5-flash-8b",
contents=["Rate this speaking exercise under IELTS speaking rubrics", transcript]
)
return (transcript, response.text)
# Specify type="filepath" to return the path to the audio file
audio_input = gr.Audio(type="filepath")
output_text = gr.Textbox(label="Transcript")
ai_output_text = gr.Textbox(label="AI response")
iface = gr.Interface(
fn=transcribe,
inputs=[audio_input],
outputs=[output_text, ai_output_text],
# live=True
title="IELTS Speaking App", description="IELTS speaking app with AI test")
iface.launch(debug=True)
# %%
|