IELTS_test / app.py
duc
Update app.py
1509b66 verified
# %% [markdown]
# # IELTS Speaking Evaluation, The Application
#
# IELTS is a popular test
# %%
# setup package install
# import os
# os.system("pip install faster-whisper gradio hf_xet")
# %% [markdown]
# First, we import the relevant packages.
# %%
from faster_whisper import WhisperModel, BatchedInferencePipeline
import gradio as gr
# %% [markdown]
# First, we intitalize the model. The `hf_xet` package above comes into play, allowing faster downloads. Since IELTS is in English, we use the `.en` model which is optimized for English-only.
# %%
# this will take a while to setup the model...
# change to cuda on GoogleColab
model = WhisperModel(
"small.en",
device="cpu",
compute_type="int8",
cpu_threads=4,
num_workers=2)
batched_model = BatchedInferencePipeline(model=model)
# %% [markdown]
# Now we need to write down a simple audio recorder in gradio
# %%
from typing import Any
from google import genai
from google.genai import types
client = genai.Client(api_key="AIzaSyAVlkr-yD-PhA5kqkKExL9TKj_2L34pEOA")
def transcribe(audio: str) -> tuple[str, str | None]:
segments, info = batched_model.transcribe(
audio,
language="en", beam_size=5, batch_size=12)
segments = list(segments)
result = []
for segment in segments:
result.append(segment.text)
transcript = "\n".join(result)
response = client.models.generate_content(
model="gemini-1.5-flash-8b",
contents=["Rate this speaking exercise under IELTS speaking rubrics", transcript]
)
return (transcript, response.text)
# Specify type="filepath" to return the path to the audio file
audio_input = gr.Audio(type="filepath")
output_text = gr.Textbox(label="Transcript")
ai_output_text = gr.Textbox(label="AI response")
iface = gr.Interface(
fn=transcribe,
inputs=[audio_input],
outputs=[output_text, ai_output_text],
# live=True
title="IELTS Speaking App", description="IELTS speaking app with AI test")
iface.launch(debug=True)
# %%