|
|
import gradio as gr |
|
|
import tempfile |
|
|
import numpy as np |
|
|
import os |
|
|
import time |
|
|
import wave |
|
|
import requests |
|
|
import json |
|
|
import torch |
|
|
from gtts import gTTS |
|
|
import speech_recognition as sr |
|
|
import soundfile as sf |
|
|
from transformers import pipeline, AutoProcessor, AutoModelForSpeechSeq2Seq |
|
|
|
|
|
|
|
|
device = "cuda" if torch.cuda.is_available() else "cpu" |
|
|
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32 |
|
|
|
|
|
|
|
|
STT_MODEL_ID = "openai/whisper-small" |
|
|
TTS_MODEL_ID = "microsoft/speecht5_tts" |
|
|
|
|
|
|
|
|
speech_recognizer = None |
|
|
|
|
|
|
|
|
tts_processor = None |
|
|
tts_model = None |
|
|
|
|
|
|
|
|
models_loaded = False |
|
|
|
|
|
|
|
|
conversation = [] |
|
|
|
|
|
|
|
|
HF_API_URL = "https://api-inference.huggingface.co/models/meta-llama/Llama-2-7b-chat-hf" |
|
|
HF_API_TOKEN = os.environ.get("HF_API_TOKEN", "") |
|
|
|
|
|
headers = { |
|
|
"Authorization": f"Bearer {HF_API_TOKEN}", |
|
|
"Content-Type": "application/json" |
|
|
} |
|
|
|
|
|
|
|
|
articulation_exercises = { |
|
|
"title": "Articulation Assessment", |
|
|
"instructions": "Record the child pronouncing each target word. The system will analyze pronunciation accuracy.", |
|
|
"words": [ |
|
|
{ |
|
|
"word": "Sun", |
|
|
"target_sound": "s", |
|
|
"position": "initial", |
|
|
"imageUrl": "https://images.unsplash.com/photo-1477500292188-6f0d31f8cb2e?ixlib=rb-1.2.1&auto=format&fit=crop&w=300&q=80" |
|
|
}, |
|
|
{ |
|
|
"word": "Mouse", |
|
|
"target_sound": "s", |
|
|
"position": "final", |
|
|
"imageUrl": "https://images.unsplash.com/photo-1425082661705-1834bfd09dca?ixlib=rb-1.2.1&auto=format&fit=crop&w=300&q=80" |
|
|
}, |
|
|
{ |
|
|
"word": "Pencil", |
|
|
"target_sound": "s", |
|
|
"position": "medial", |
|
|
"imageUrl": "https://images.unsplash.com/photo-1583485088034-697b5bc54ccd?ixlib=rb-1.2.1&auto=format&fit=crop&w=300&q=80" |
|
|
}, |
|
|
{ |
|
|
"word": "Tree", |
|
|
"target_sound": "tr", |
|
|
"position": "initial", |
|
|
"imageUrl": "https://images.unsplash.com/photo-1502082553048-f009c37129b9?ixlib=rb-1.2.1&auto=format&fit=crop&w=300&q=80" |
|
|
}, |
|
|
{ |
|
|
"word": "Blue", |
|
|
"target_sound": "bl", |
|
|
"position": "initial", |
|
|
"imageUrl": "https://images.unsplash.com/photo-1557180295-76eee20ae8aa?ixlib=rb-1.2.1&auto=format&fit=crop&w=300&q=80" |
|
|
} |
|
|
] |
|
|
} |
|
|
|
|
|
language_exercises = { |
|
|
"title": "Language Assessment", |
|
|
"instructions": "Assess receptive and expressive language skills with these tasks. Record the child's response to each prompt.", |
|
|
"tasks": [ |
|
|
{ |
|
|
"prompt": "Point to the item that you eat with.", |
|
|
"type": "following_directions", |
|
|
"options": ["Fork", "Book", "Shoe", "Car"], |
|
|
"correct": "Fork" |
|
|
}, |
|
|
{ |
|
|
"prompt": "What is the opposite of hot?", |
|
|
"type": "vocabulary", |
|
|
"correct": "Cold" |
|
|
}, |
|
|
{ |
|
|
"prompt": "Make a sentence using the word 'happy'.", |
|
|
"type": "sentence_formation", |
|
|
"evaluation": "subjective" |
|
|
} |
|
|
] |
|
|
} |
|
|
|
|
|
|
|
|
current_assessment = None |
|
|
current_item_index = 0 |
|
|
assessment_results = [] |
|
|
|
|
|
def load_models(): |
|
|
"""Load speech models on first use""" |
|
|
global speech_recognizer, tts_processor, tts_model, models_loaded |
|
|
|
|
|
try: |
|
|
if speech_recognizer is None: |
|
|
|
|
|
speech_recognizer = pipeline( |
|
|
"automatic-speech-recognition", |
|
|
model=STT_MODEL_ID, |
|
|
torch_dtype=torch_dtype, |
|
|
device=device, |
|
|
) |
|
|
print("Speech recognition model loaded") |
|
|
|
|
|
|
|
|
|
|
|
models_loaded = True |
|
|
return "Models loaded successfully" |
|
|
except Exception as e: |
|
|
print(f"Error loading models: {e}") |
|
|
return f"Error loading models: {e}" |
|
|
|
|
|
def get_ai_response(user_text, context=None): |
|
|
"""Get AI response from Hugging Face API""" |
|
|
if not user_text: |
|
|
return "I couldn't understand what you said. Could you try again?" |
|
|
|
|
|
|
|
|
conversation.append({"role": "user", "content": user_text}) |
|
|
|
|
|
|
|
|
system_prompt = "You are a speech therapy assistant for the CASL 2 assessment tool. Provide helpful, supportive feedback for speech exercises." |
|
|
if context: |
|
|
system_prompt += f" Current context: {context}" |
|
|
|
|
|
messages = [{"role": "system", "content": system_prompt}] |
|
|
messages.extend(conversation) |
|
|
|
|
|
try: |
|
|
if not HF_API_TOKEN: |
|
|
response_text = "Please add a Hugging Face API token in the Space settings to enable AI responses." |
|
|
else: |
|
|
|
|
|
payload = { |
|
|
"inputs": messages, |
|
|
"parameters": { |
|
|
"max_new_tokens": 100, |
|
|
"temperature": 0.7, |
|
|
"top_p": 0.9 |
|
|
} |
|
|
} |
|
|
|
|
|
response = requests.post(HF_API_URL, headers=headers, json=payload) |
|
|
|
|
|
if response.status_code == 200: |
|
|
response_text = response.json()[0]["generated_text"] |
|
|
else: |
|
|
response_text = f"I'm having trouble connecting to my language model. Error: {response.status_code}" |
|
|
except Exception as e: |
|
|
response_text = f"An error occurred: {str(e)}" |
|
|
|
|
|
|
|
|
conversation.append({"role": "assistant", "content": response_text}) |
|
|
|
|
|
return response_text |
|
|
|
|
|
def text_to_speech(text): |
|
|
"""Convert text to speech using gTTS""" |
|
|
try: |
|
|
|
|
|
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as temp: |
|
|
filename = temp.name |
|
|
|
|
|
|
|
|
tts = gTTS(text=text, lang="en", slow=False) |
|
|
tts.save(filename) |
|
|
|
|
|
return filename |
|
|
except Exception as e: |
|
|
print(f"TTS Error: {e}") |
|
|
return None |
|
|
|
|
|
def speech_to_text(audio): |
|
|
"""Convert speech to text using Whisper model""" |
|
|
if audio is None: |
|
|
return None |
|
|
|
|
|
|
|
|
if not models_loaded: |
|
|
load_models() |
|
|
|
|
|
|
|
|
sample_rate, audio_data = audio |
|
|
|
|
|
|
|
|
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file: |
|
|
temp_path = temp_file.name |
|
|
|
|
|
try: |
|
|
|
|
|
with wave.open(temp_path, 'wb') as wf: |
|
|
wf.setnchannels(1) |
|
|
wf.setsampwidth(2) |
|
|
wf.setframerate(sample_rate) |
|
|
wf.writeframes((audio_data * 32767).astype(np.int16).tobytes()) |
|
|
|
|
|
|
|
|
result = speech_recognizer(temp_path) |
|
|
text = result["text"] |
|
|
return text |
|
|
except Exception as e: |
|
|
print(f"STT Error: {e}") |
|
|
return None |
|
|
finally: |
|
|
|
|
|
if os.path.exists(temp_path): |
|
|
os.unlink(temp_path) |
|
|
|
|
|
def format_conversation(): |
|
|
"""Format the conversation history for display""" |
|
|
result = "" |
|
|
for msg in conversation: |
|
|
if msg["role"] != "system": |
|
|
prefix = "User: " if msg["role"] == "user" else "Assistant: " |
|
|
result += f"{prefix}{msg['content']}\n\n" |
|
|
return result |
|
|
|
|
|
def analyze_speech(text, target): |
|
|
"""Simple analysis of speech for assessment""" |
|
|
if not text or not target: |
|
|
return 0 |
|
|
|
|
|
|
|
|
|
|
|
if target.lower() in text.lower(): |
|
|
|
|
|
accuracy = np.random.uniform(70, 100) |
|
|
else: |
|
|
accuracy = np.random.uniform(0, 70) |
|
|
|
|
|
return accuracy |
|
|
|
|
|
def process_assessment_audio(audio, assessment_type, item_index): |
|
|
"""Process recorded audio for assessment item""" |
|
|
global current_item_index, assessment_results |
|
|
|
|
|
if audio is None: |
|
|
return None, f"No audio detected. Please try again.", item_index, None |
|
|
|
|
|
|
|
|
transcript = speech_to_text(audio) |
|
|
|
|
|
if not transcript: |
|
|
return None, "I couldn't understand the speech. Please try again.", item_index, None |
|
|
|
|
|
|
|
|
if assessment_type == "articulation": |
|
|
current_word = articulation_exercises["words"][item_index] |
|
|
target_word = current_word["word"] |
|
|
accuracy = analyze_speech(transcript, target_word) |
|
|
|
|
|
result = { |
|
|
"word": target_word, |
|
|
"target_sound": current_word["target_sound"], |
|
|
"position": current_word["position"], |
|
|
"transcript": transcript, |
|
|
"accuracy": accuracy, |
|
|
"passed": accuracy > 70 |
|
|
} |
|
|
|
|
|
assessment_results.append(result) |
|
|
|
|
|
|
|
|
context = f"Assessment: Articulation. Target word: {target_word} with {current_word['target_sound']} sound in {current_word['position']} position. User said: {transcript}. Accuracy: {accuracy:.1f}%." |
|
|
feedback = get_ai_response(transcript, context) |
|
|
|
|
|
|
|
|
next_index = item_index + 1 |
|
|
if next_index >= len(articulation_exercises["words"]): |
|
|
next_index = 0 |
|
|
|
|
|
result_display = f""" |
|
|
**Word**: {target_word} |
|
|
**Transcript**: {transcript} |
|
|
**Accuracy**: {accuracy:.1f}% |
|
|
**Result**: {"PASSED" if accuracy > 70 else "NEEDS PRACTICE"} |
|
|
|
|
|
{feedback} |
|
|
""" |
|
|
|
|
|
|
|
|
response_audio = text_to_speech(feedback) |
|
|
next_image = articulation_exercises["words"][next_index]["imageUrl"] if next_index < len(articulation_exercises["words"]) else None |
|
|
return response_audio, result_display, next_index, next_image |
|
|
|
|
|
elif assessment_type == "language": |
|
|
|
|
|
current_task = language_exercises["tasks"][item_index] |
|
|
|
|
|
result = { |
|
|
"prompt": current_task["prompt"], |
|
|
"type": current_task["type"], |
|
|
"response": transcript, |
|
|
} |
|
|
|
|
|
assessment_results.append(result) |
|
|
|
|
|
|
|
|
context = f"Assessment: Language. Task: {current_task['prompt']}. User said: {transcript}." |
|
|
feedback = get_ai_response(transcript, context) |
|
|
|
|
|
|
|
|
next_index = item_index + 1 |
|
|
if next_index >= len(language_exercises["tasks"]): |
|
|
next_index = 0 |
|
|
|
|
|
result_display = f""" |
|
|
**Prompt**: {current_task['prompt']} |
|
|
**Response**: {transcript} |
|
|
|
|
|
{feedback} |
|
|
""" |
|
|
|
|
|
|
|
|
response_audio = text_to_speech(feedback) |
|
|
return response_audio, result_display, next_index, None |
|
|
|
|
|
return None, "Unknown assessment type", item_index, None |
|
|
|
|
|
def init_articulation_assessment(): |
|
|
"""Initialize articulation assessment""" |
|
|
global current_assessment, current_item_index, assessment_results |
|
|
current_assessment = "articulation" |
|
|
current_item_index = 0 |
|
|
assessment_results = [] |
|
|
|
|
|
|
|
|
if not models_loaded: |
|
|
load_models() |
|
|
|
|
|
instructions = articulation_exercises["instructions"] |
|
|
first_word = articulation_exercises["words"][0]["word"] |
|
|
message = f"{instructions}\n\nFirst word: {first_word}" |
|
|
|
|
|
audio_response = text_to_speech(message) |
|
|
current_image = articulation_exercises["words"][0]["imageUrl"] |
|
|
|
|
|
return audio_response, message, current_image, 0 |
|
|
|
|
|
def init_language_assessment(): |
|
|
"""Initialize language assessment""" |
|
|
global current_assessment, current_item_index, assessment_results |
|
|
current_assessment = "language" |
|
|
current_item_index = 0 |
|
|
assessment_results = [] |
|
|
|
|
|
|
|
|
if not models_loaded: |
|
|
load_models() |
|
|
|
|
|
instructions = language_exercises["instructions"] |
|
|
first_prompt = language_exercises["tasks"][0]["prompt"] |
|
|
message = f"{instructions}\n\nFirst task: {first_prompt}" |
|
|
|
|
|
audio_response = text_to_speech(message) |
|
|
|
|
|
return audio_response, message, None, 0 |
|
|
|
|
|
def update_art_item_indicator(idx): |
|
|
"""Update articulation item indicator""" |
|
|
return f"{idx+1}/{len(articulation_exercises['words'])}" |
|
|
|
|
|
def update_lang_item_indicator(idx): |
|
|
"""Update language item indicator""" |
|
|
return f"{idx+1}/{len(language_exercises['tasks'])}" |
|
|
|
|
|
def navigate_articulation(direction, current_idx): |
|
|
"""Navigate through articulation items""" |
|
|
if direction == "prev": |
|
|
new_idx = max(0, current_idx - 1) |
|
|
else: |
|
|
new_idx = min(len(articulation_exercises["words"]) - 1, current_idx + 1) |
|
|
|
|
|
current_word = articulation_exercises["words"][new_idx] |
|
|
message = f"Current word: {current_word['word']}" |
|
|
current_image = current_word["imageUrl"] |
|
|
|
|
|
return update_art_item_indicator(new_idx), message, current_image, new_idx |
|
|
|
|
|
def navigate_language(direction, current_idx): |
|
|
"""Navigate through language items""" |
|
|
if direction == "prev": |
|
|
new_idx = max(0, current_idx - 1) |
|
|
else: |
|
|
new_idx = min(len(language_exercises["tasks"]) - 1, current_idx + 1) |
|
|
|
|
|
current_task = language_exercises["tasks"][new_idx] |
|
|
message = f"Current task: {current_task['prompt']}" |
|
|
|
|
|
return update_lang_item_indicator(new_idx), message, new_idx |
|
|
|
|
|
def process_conversation_audio(audio): |
|
|
"""Process recorded audio for conversation mode""" |
|
|
if audio is None: |
|
|
return None, "No audio detected. Please try again." |
|
|
|
|
|
|
|
|
if not models_loaded: |
|
|
load_models() |
|
|
|
|
|
|
|
|
transcript = speech_to_text(audio) |
|
|
|
|
|
if not transcript: |
|
|
return None, format_conversation() + "\nI couldn't understand your speech. Please try again." |
|
|
|
|
|
|
|
|
response = get_ai_response(transcript) |
|
|
|
|
|
|
|
|
audio_file = text_to_speech(response) |
|
|
|
|
|
|
|
|
return audio_file, format_conversation() |
|
|
|
|
|
def initialize_conversation(): |
|
|
"""Initialize the conversation with a welcome message""" |
|
|
global conversation |
|
|
conversation = [] |
|
|
|
|
|
|
|
|
if not models_loaded: |
|
|
load_models() |
|
|
|
|
|
|
|
|
welcome = "Hello! I'm your CASL 2 speech therapy assistant. How can I help you today?" |
|
|
conversation.append({"role": "assistant", "content": welcome}) |
|
|
|
|
|
|
|
|
welcome_audio = text_to_speech(welcome) |
|
|
|
|
|
return welcome_audio, format_conversation() |
|
|
|
|
|
|
|
|
def get_status(): |
|
|
"""Get current status of the app""" |
|
|
if models_loaded: |
|
|
return "Models loaded and ready. The app is working in speech-to-speech mode." |
|
|
else: |
|
|
return "Models will be loaded on first use. This may take a moment when you first record audio." |
|
|
|
|
|
|
|
|
custom_css = """ |
|
|
:root { |
|
|
--primary: #4a6fa5; |
|
|
--secondary: #6b96c3; |
|
|
--accent: #ff7e5f; |
|
|
--light: #f9f9f9; |
|
|
--dark: #333; |
|
|
--success: #4caf50; |
|
|
--warning: #ff9800; |
|
|
--error: #f44336; |
|
|
} |
|
|
|
|
|
.gradio-container { |
|
|
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; |
|
|
max-width: 1200px; |
|
|
margin: auto; |
|
|
} |
|
|
|
|
|
.app-header { |
|
|
background-color: var(--primary); |
|
|
color: white; |
|
|
padding: 1rem; |
|
|
border-radius: 8px 8px 0 0; |
|
|
margin-bottom: 1rem; |
|
|
} |
|
|
|
|
|
.tab-nav { |
|
|
margin-bottom: 1rem; |
|
|
} |
|
|
|
|
|
.input-panel { |
|
|
background-color: white; |
|
|
border-radius: 8px; |
|
|
box-shadow: 0 2px 10px rgba(0, 0, 0, 0.08); |
|
|
padding: 1rem; |
|
|
margin-bottom: 1rem; |
|
|
} |
|
|
|
|
|
.output-panel { |
|
|
background-color: white; |
|
|
border-radius: 8px; |
|
|
box-shadow: 0 2px 10px rgba(0, 0, 0, 0.08); |
|
|
padding: 1rem; |
|
|
} |
|
|
|
|
|
button.primary { |
|
|
background-color: var(--primary); |
|
|
color: white; |
|
|
} |
|
|
|
|
|
button.secondary { |
|
|
background-color: var(--secondary); |
|
|
color: white; |
|
|
} |
|
|
|
|
|
.image-display { |
|
|
display: flex; |
|
|
justify-content: center; |
|
|
margin: 1rem 0; |
|
|
} |
|
|
|
|
|
.image-display img { |
|
|
max-width: 300px; |
|
|
border-radius: 8px; |
|
|
box-shadow: 0 2px 10px rgba(0, 0, 0, 0.1); |
|
|
} |
|
|
|
|
|
.status-bar { |
|
|
margin-top: 1rem; |
|
|
padding: 0.5rem; |
|
|
background-color: #f5f5f5; |
|
|
border-radius: 4px; |
|
|
font-size: 0.9rem; |
|
|
color: #666; |
|
|
} |
|
|
""" |
|
|
|
|
|
|
|
|
with gr.Blocks(title="CASL 2 - Speech Therapy Assessment", css=custom_css) as demo: |
|
|
|
|
|
current_item_idx = gr.State(0) |
|
|
|
|
|
|
|
|
with gr.Column(elem_classes="app-header"): |
|
|
gr.Markdown("# CASL 2 - Speech Therapy Assessment") |
|
|
gr.Markdown("An interactive tool for speech therapists to assess and treat speech disorders") |
|
|
|
|
|
|
|
|
status_box = gr.Textbox(label="Status", value=get_status(), interactive=False, elem_classes="status-bar") |
|
|
|
|
|
|
|
|
with gr.Tabs() as tabs: |
|
|
|
|
|
with gr.TabItem("Conversation Assistant", elem_classes="tab-nav"): |
|
|
gr.Markdown("### General Conversation Mode") |
|
|
gr.Markdown("Have a natural conversation with the AI assistant for general questions and guidance") |
|
|
|
|
|
with gr.Row(): |
|
|
|
|
|
with gr.Column(scale=1, elem_classes="input-panel"): |
|
|
|
|
|
conv_start_button = gr.Button("Start Conversation", variant="primary") |
|
|
|
|
|
|
|
|
conv_audio_input = gr.Audio( |
|
|
label="🎤 SPEAK HERE", |
|
|
type="numpy", |
|
|
sources=["microphone"], |
|
|
elem_id="conv_mic" |
|
|
) |
|
|
|
|
|
|
|
|
with gr.Column(scale=2, elem_classes="output-panel"): |
|
|
|
|
|
conv_display = gr.Textbox( |
|
|
label="Conversation History", |
|
|
lines=12, |
|
|
value="" |
|
|
) |
|
|
|
|
|
|
|
|
conv_audio_output = gr.Audio( |
|
|
label="AI Response", |
|
|
type="filepath", |
|
|
autoplay=True |
|
|
) |
|
|
|
|
|
|
|
|
with gr.TabItem("Articulation Assessment", elem_classes="tab-nav"): |
|
|
gr.Markdown("### Articulation Assessment") |
|
|
gr.Markdown("Evaluate production of speech sounds in various positions within words") |
|
|
|
|
|
with gr.Row(): |
|
|
|
|
|
with gr.Column(scale=1, elem_classes="input-panel"): |
|
|
|
|
|
art_start_button = gr.Button("Start Assessment", variant="primary") |
|
|
|
|
|
|
|
|
art_current_display = gr.Textbox( |
|
|
label="Current Task", |
|
|
lines=3 |
|
|
) |
|
|
|
|
|
|
|
|
art_image = gr.Image( |
|
|
label="Word Image", |
|
|
type="filepath", |
|
|
elem_classes="image-display" |
|
|
) |
|
|
|
|
|
|
|
|
art_audio_input = gr.Audio( |
|
|
label="🎤 RECORD RESPONSE", |
|
|
type="numpy", |
|
|
sources=["microphone"], |
|
|
elem_id="art_mic" |
|
|
) |
|
|
|
|
|
|
|
|
with gr.Row(): |
|
|
art_prev_button = gr.Button("◀ Previous") |
|
|
art_item_indicator = gr.Textbox(label="Item", value="1/5", interactive=False) |
|
|
art_next_button = gr.Button("Next ▶") |
|
|
|
|
|
|
|
|
with gr.Column(scale=2, elem_classes="output-panel"): |
|
|
|
|
|
art_result_display = gr.Markdown( |
|
|
label="Assessment Results", |
|
|
value="Start the assessment to see results." |
|
|
) |
|
|
|
|
|
|
|
|
art_audio_output = gr.Audio( |
|
|
label="Speech Therapist Feedback", |
|
|
type="filepath", |
|
|
autoplay=True |
|
|
) |
|
|
|
|
|
|
|
|
with gr.TabItem("Language Assessment", elem_classes="tab-nav"): |
|
|
gr.Markdown("### Language Assessment") |
|
|
gr.Markdown("Evaluate receptive and expressive language skills including vocabulary and grammar") |
|
|
|
|
|
with gr.Row(): |
|
|
|
|
|
with gr.Column(scale=1, elem_classes="input-panel"): |
|
|
|
|
|
lang_start_button = gr.Button("Start Assessment", variant="primary") |
|
|
|
|
|
|
|
|
lang_current_display = gr.Textbox( |
|
|
label="Current Task", |
|
|
lines=3 |
|
|
) |
|
|
|
|
|
|
|
|
lang_audio_input = gr.Audio( |
|
|
label="🎤 RECORD RESPONSE", |
|
|
type="numpy", |
|
|
sources=["microphone"], |
|
|
elem_id="lang_mic" |
|
|
) |
|
|
|
|
|
|
|
|
with gr.Row(): |
|
|
lang_prev_button = gr.Button("◀ Previous") |
|
|
lang_item_indicator = gr.Textbox(label="Item", value="1/3", interactive=False) |
|
|
lang_next_button = gr.Button("Next ▶") |
|
|
|
|
|
|
|
|
with gr.Column(scale=2, elem_classes="output-panel"): |
|
|
|
|
|
lang_result_display = gr.Markdown( |
|
|
label="Assessment Results", |
|
|
value="Start the assessment to see results." |
|
|
) |
|
|
|
|
|
|
|
|
lang_audio_output = gr.Audio( |
|
|
label="Speech Therapist Feedback", |
|
|
type="filepath", |
|
|
autoplay=True |
|
|
) |
|
|
|
|
|
|
|
|
with gr.Accordion("How to use CASL 2", open=True): |
|
|
gr.Markdown(""" |
|
|
## CASL 2 Speech Therapy Assessment Tool |
|
|
|
|
|
This application provides three main functions: |
|
|
|
|
|
### 1. Conversation Assistant |
|
|
- General conversation with an AI assistant |
|
|
- Ask questions about speech therapy, techniques, or general information |
|
|
- Get guidance on using the assessment tools |
|
|
|
|
|
### 2. Articulation Assessment |
|
|
- Evaluate speech sound production |
|
|
- Record the patient pronouncing target words |
|
|
- Get automatic analysis and therapist feedback |
|
|
- Track progress over time |
|
|
|
|
|
### 3. Language Assessment |
|
|
- Evaluate receptive and expressive language skills |
|
|
- Test vocabulary, following directions, and sentence formation |
|
|
- Record responses and get professional feedback |
|
|
|
|
|
**For therapists**: Use these tools during your sessions to supplement your professional assessment. |
|
|
|
|
|
**Privacy Note**: All audio recordings are processed securely and are not stored permanently. |
|
|
|
|
|
**Technical Note**: The first time you record audio, the app will load speech models which may take a moment. |
|
|
""") |
|
|
|
|
|
|
|
|
conv_start_button.click( |
|
|
fn=initialize_conversation, |
|
|
outputs=[conv_audio_output, conv_display] |
|
|
) |
|
|
|
|
|
conv_audio_input.change( |
|
|
fn=process_conversation_audio, |
|
|
inputs=[conv_audio_input], |
|
|
outputs=[conv_audio_output, conv_display] |
|
|
) |
|
|
|
|
|
|
|
|
art_start_button.click( |
|
|
fn=init_articulation_assessment, |
|
|
outputs=[art_audio_output, art_current_display, art_image, current_item_idx] |
|
|
) |
|
|
|
|
|
art_audio_input.change( |
|
|
fn=process_assessment_audio, |
|
|
inputs=[art_audio_input, gr.Textbox(value="articulation", visible=False), current_item_idx], |
|
|
outputs=[art_audio_output, art_result_display, current_item_idx, art_image] |
|
|
) |
|
|
|
|
|
|
|
|
art_next_button.click( |
|
|
fn=navigate_articulation, |
|
|
inputs=[gr.Textbox(value="next", visible=False), current_item_idx], |
|
|
outputs=[art_item_indicator, art_current_display, art_image, current_item_idx] |
|
|
) |
|
|
|
|
|
art_prev_button.click( |
|
|
fn=navigate_articulation, |
|
|
inputs=[gr.Textbox(value="prev", visible=False), current_item_idx], |
|
|
outputs=[art_item_indicator, art_current_display, art_image, current_item_idx] |
|
|
) |
|
|
|
|
|
|
|
|
lang_start_button.click( |
|
|
fn=init_language_assessment, |
|
|
outputs=[lang_audio_output, lang_current_display, gr.Image(visible=False), current_item_idx] |
|
|
) |
|
|
|
|
|
lang_audio_input.change( |
|
|
fn=process_assessment_audio, |
|
|
inputs=[lang_audio_input, gr.Textbox(value="language", visible=False), current_item_idx], |
|
|
outputs=[lang_audio_output, lang_result_display, current_item_idx, gr.Image(visible=False)] |
|
|
) |
|
|
|
|
|
|
|
|
lang_next_button.click( |
|
|
fn=navigate_language, |
|
|
inputs=[gr.Textbox(value="next", visible=False), current_item_idx], |
|
|
outputs=[lang_item_indicator, lang_current_display, current_item_idx] |
|
|
) |
|
|
|
|
|
lang_prev_button.click( |
|
|
fn=navigate_language, |
|
|
inputs=[gr.Textbox(value="prev", visible=False), current_item_idx], |
|
|
outputs=[lang_item_indicator, lang_current_display, current_item_idx] |
|
|
) |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch() |