PANINI-LLM / app.py
st192011's picture
Update app.py
8f4da15 verified
import os
import asyncio
import edge_tts
import librosa
import torch
import numpy as np
import pandas as pd
import re
import gradio as gr
from phonemizer import phonemize
from transformers import pipeline
from huggingface_hub import InferenceClient
# --- AUTHENTICATION ---
HF_TOKEN = os.getenv("HF_TOKEN")
# --- CONFIGURATION ---
# We use 3B to 9B models because they are the most stable on the free Inference API.
LLM_MODELS = {
"Llama 3.2 3B (Fastest)": "meta-llama/Llama-3.2-3B-Instruct",
"Qwen 2.5 7B (Most Accurate)": "Qwen/Qwen2.5-7B-Instruct",
"Gemma 2 9B (Excellent English)": "google/gemma-2-9b-it"
}
LANGUAGES = {
"English (US)": {"code": "en-US", "ipa": "en-us", "voice": "en-US-ChristopherNeural"},
"German": {"code": "de-DE", "ipa": "de", "voice": "de-DE-ConradNeural"},
"French": {"code": "fr-FR", "ipa": "fr-fr", "voice": "fr-FR-HenriNeural"},
"Spanish": {"code": "es-ES", "ipa": "es", "voice": "es-ES-AlvaroNeural"},
"Chinese (Mandarin)": {"code": "zh-CN", "ipa": "cmn", "voice": "zh-CN-XiaoxiaoNeural"}
}
# Load ASR model (Whisper Tiny for CPU efficiency)
print("Loading Whisper ASR...")
asr_pipe = pipeline("automatic-speech-recognition", model="openai/whisper-tiny", device=-1)
# --- FUNCTIONS ---
def get_llm_response(model_id, system_prompt, user_prompt):
# Fixed: Removed the 'provider' argument to prevent TypeError
client = InferenceClient(model=model_id, token=HF_TOKEN)
try:
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt}
]
output = client.chat_completion(
messages,
max_tokens=500,
stream=False
)
return output.choices[0].message.content
except Exception as e:
err = str(e)
if "503" in err:
return "⏳ The model is currently loading on Hugging Face servers. Please wait 30 seconds and try again."
return f"PANINI LLM Note: {err}"
def generate_curriculum(model_name, language, topic):
model_id = LLM_MODELS[model_name]
system_prompt = f"You are PANINI LLM, a world-class {language} teacher. Create a focused lesson plan."
user_prompt = f"Topic: {topic}. Provide 5 useful words/phrases in {language} with English translations, then give one expert learning tip."
return get_llm_response(model_id, system_prompt, user_prompt)
async def play_target_audio(text, lang_name):
if not text: return None
voice = LANGUAGES[lang_name]["voice"]
output_path = "target.mp3"
communicate = edge_tts.Communicate(text, voice)
await communicate.save(output_path)
return output_path
def analyze_speech(model_name, lang_name, target_text, audio_path):
if not audio_path or not target_text:
return "Incomplete data.", "", "Please provide both text and recording."
# 1. ASR Transcription
asr_res = asr_pipe(audio_path)["text"].strip()
# 2. Linguistic IPA Layer
ipa_code = LANGUAGES[lang_name]["ipa"]
try:
# Requires espeak-ng installed via packages.txt
target_ipa = phonemize(target_text, language=ipa_code, backend='espeak', strip=True)
user_ipa = phonemize(asr_res, language=ipa_code, backend='espeak', strip=True)
except:
target_ipa = "IPA Unavailable"
user_ipa = "IPA Unavailable"
# 3. LLM Anatomical Feedback
model_id = LLM_MODELS[model_name]
system_prompt = "You are a professional Speech-Language Pathologist. Compare the student's pronunciation to the target using IPA."
user_prompt = (
f"Target: '{target_text}' (IPA: /{target_ipa}/). "
f"Student: '{asr_res}' (IPA: /{user_ipa}/). "
f"Identify the primary phonetic error and give 1 specific anatomical tip (tongue/lip placement) in English."
)
feedback = get_llm_response(model_id, system_prompt, user_prompt)
return asr_res, f"/{user_ipa}/", feedback
# --- UI DESIGN ---
with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="slate"), css=".gradio-container {max-width: 950px !important}") as demo:
gr.HTML("<h1 style='text-align: center; color: #1e40af;'>πŸŽ™οΈ PANINI LLM</h1>")
gr.HTML("<p style='text-align: center; margin-top: -10px;'>Intelligent Multi-Model Language Tutoring</p>")
with gr.Tab("Step 1: Curriculum Creation"):
with gr.Row():
llm_choice = gr.Dropdown(list(LLM_MODELS.keys()), label="Select AI Teacher (LLM)", value="Qwen 2.5 7B (Most Accurate)")
lang_choice = gr.Dropdown(list(LANGUAGES.keys()), label="Language", value="English (US)")
topic_input = gr.Textbox(label="Lesson Topic", placeholder="e.g., Ordering Food, Job Interview, Airport Travel")
btn_gen = gr.Button("πŸ“š Build My Lesson", variant="primary")
curr_output = gr.Markdown("---")
with gr.Tab("Step 2: Pronunciation Practice"):
with gr.Row():
target_word = gr.Textbox(label="Word/Phrase to Practice", placeholder="Copy a phrase from Step 1 here")
btn_tts = gr.Button("πŸ”Š Play Native AI", scale=0)
audio_ref = gr.Audio(label="Teacher Reference", type="filepath")
with gr.Row():
audio_user = gr.Audio(label="Your Voice Recording", sources=["microphone"], type="filepath")
btn_analyze = gr.Button("πŸš€ Analyze My Accent", variant="primary")
with gr.Row():
out_transcript = gr.Textbox(label="AI Heard")
out_ipa = gr.Textbox(label="Your Phonetics (IPA)")
out_feedback = gr.Markdown("### Feedback from the AI Coach")
# Event Wireup
btn_gen.click(generate_curriculum, inputs=[llm_choice, lang_choice, topic_input], outputs=curr_output)
btn_tts.click(fn=lambda t, l: asyncio.run(play_target_audio(t, l)), inputs=[target_word, lang_choice], outputs=audio_ref)
btn_analyze.click(analyze_speech, inputs=[llm_choice, lang_choice, target_word, audio_user], outputs=[out_transcript, out_ipa, out_feedback])
# Run app
demo.launch()