Spaces:
Sleeping
Sleeping
| # %% [markdown] | |
| # # IELTS Speaking Evaluation, The Application | |
| # | |
| # IELTS is a popular test | |
| # %% | |
| # setup package install | |
| # import os | |
| # os.system("pip install faster-whisper gradio hf_xet") | |
| # %% [markdown] | |
| # First, we import the relevant packages. | |
| # %% | |
| from faster_whisper import WhisperModel, BatchedInferencePipeline | |
| import gradio as gr | |
| # %% [markdown] | |
| # First, we intitalize the model. The `hf_xet` package above comes into play, allowing faster downloads. Since IELTS is in English, we use the `.en` model which is optimized for English-only. | |
| # %% | |
| # this will take a while to setup the model... | |
| # change to cuda on GoogleColab | |
| model = WhisperModel( | |
| "small.en", | |
| device="cpu", | |
| compute_type="int8", | |
| cpu_threads=4, | |
| num_workers=2) | |
| batched_model = BatchedInferencePipeline(model=model) | |
| # %% [markdown] | |
| # Now we need to write down a simple audio recorder in gradio | |
| # %% | |
| from typing import Any | |
| from google import genai | |
| from google.genai import types | |
| client = genai.Client(api_key="AIzaSyAVlkr-yD-PhA5kqkKExL9TKj_2L34pEOA") | |
| def transcribe(audio: str) -> tuple[str, str | None]: | |
| segments, info = batched_model.transcribe( | |
| audio, | |
| language="en", beam_size=5, batch_size=12) | |
| segments = list(segments) | |
| result = [] | |
| for segment in segments: | |
| result.append(segment.text) | |
| transcript = "\n".join(result) | |
| response = client.models.generate_content( | |
| model="gemini-1.5-flash-8b", | |
| contents=["Rate this speaking exercise under IELTS speaking rubrics", transcript] | |
| ) | |
| return (transcript, response.text) | |
| # Specify type="filepath" to return the path to the audio file | |
| audio_input = gr.Audio(type="filepath") | |
| output_text = gr.Textbox(label="Transcript") | |
| ai_output_text = gr.Textbox(label="AI response") | |
| iface = gr.Interface( | |
| fn=transcribe, | |
| inputs=[audio_input], | |
| outputs=[output_text, ai_output_text], | |
| # live=True | |
| title="IELTS Speaking App", description="IELTS speaking app with AI test") | |
| iface.launch(debug=True) | |
| # %% | |