Spaces:
Sleeping
Sleeping
| import os | |
| import asyncio | |
| import edge_tts | |
| import librosa | |
| import torch | |
| import numpy as np | |
| import pandas as pd | |
| import re | |
| import gradio as gr | |
| from phonemizer import phonemize | |
| from transformers import pipeline | |
| from huggingface_hub import InferenceClient | |
| # --- AUTHENTICATION --- | |
| HF_TOKEN = os.getenv("HF_TOKEN") | |
| # --- CONFIGURATION --- | |
| # We use 3B to 9B models because they are the most stable on the free Inference API. | |
| LLM_MODELS = { | |
| "Llama 3.2 3B (Fastest)": "meta-llama/Llama-3.2-3B-Instruct", | |
| "Qwen 2.5 7B (Most Accurate)": "Qwen/Qwen2.5-7B-Instruct", | |
| "Gemma 2 9B (Excellent English)": "google/gemma-2-9b-it" | |
| } | |
| LANGUAGES = { | |
| "English (US)": {"code": "en-US", "ipa": "en-us", "voice": "en-US-ChristopherNeural"}, | |
| "German": {"code": "de-DE", "ipa": "de", "voice": "de-DE-ConradNeural"}, | |
| "French": {"code": "fr-FR", "ipa": "fr-fr", "voice": "fr-FR-HenriNeural"}, | |
| "Spanish": {"code": "es-ES", "ipa": "es", "voice": "es-ES-AlvaroNeural"}, | |
| "Chinese (Mandarin)": {"code": "zh-CN", "ipa": "cmn", "voice": "zh-CN-XiaoxiaoNeural"} | |
| } | |
| # Load ASR model (Whisper Tiny for CPU efficiency) | |
| print("Loading Whisper ASR...") | |
| asr_pipe = pipeline("automatic-speech-recognition", model="openai/whisper-tiny", device=-1) | |
| # --- FUNCTIONS --- | |
| def get_llm_response(model_id, system_prompt, user_prompt): | |
| # Fixed: Removed the 'provider' argument to prevent TypeError | |
| client = InferenceClient(model=model_id, token=HF_TOKEN) | |
| try: | |
| messages = [ | |
| {"role": "system", "content": system_prompt}, | |
| {"role": "user", "content": user_prompt} | |
| ] | |
| output = client.chat_completion( | |
| messages, | |
| max_tokens=500, | |
| stream=False | |
| ) | |
| return output.choices[0].message.content | |
| except Exception as e: | |
| err = str(e) | |
| if "503" in err: | |
| return "β³ The model is currently loading on Hugging Face servers. Please wait 30 seconds and try again." | |
| return f"PANINI LLM Note: {err}" | |
| def generate_curriculum(model_name, language, topic): | |
| model_id = LLM_MODELS[model_name] | |
| system_prompt = f"You are PANINI LLM, a world-class {language} teacher. Create a focused lesson plan." | |
| user_prompt = f"Topic: {topic}. Provide 5 useful words/phrases in {language} with English translations, then give one expert learning tip." | |
| return get_llm_response(model_id, system_prompt, user_prompt) | |
| async def play_target_audio(text, lang_name): | |
| if not text: return None | |
| voice = LANGUAGES[lang_name]["voice"] | |
| output_path = "target.mp3" | |
| communicate = edge_tts.Communicate(text, voice) | |
| await communicate.save(output_path) | |
| return output_path | |
| def analyze_speech(model_name, lang_name, target_text, audio_path): | |
| if not audio_path or not target_text: | |
| return "Incomplete data.", "", "Please provide both text and recording." | |
| # 1. ASR Transcription | |
| asr_res = asr_pipe(audio_path)["text"].strip() | |
| # 2. Linguistic IPA Layer | |
| ipa_code = LANGUAGES[lang_name]["ipa"] | |
| try: | |
| # Requires espeak-ng installed via packages.txt | |
| target_ipa = phonemize(target_text, language=ipa_code, backend='espeak', strip=True) | |
| user_ipa = phonemize(asr_res, language=ipa_code, backend='espeak', strip=True) | |
| except: | |
| target_ipa = "IPA Unavailable" | |
| user_ipa = "IPA Unavailable" | |
| # 3. LLM Anatomical Feedback | |
| model_id = LLM_MODELS[model_name] | |
| system_prompt = "You are a professional Speech-Language Pathologist. Compare the student's pronunciation to the target using IPA." | |
| user_prompt = ( | |
| f"Target: '{target_text}' (IPA: /{target_ipa}/). " | |
| f"Student: '{asr_res}' (IPA: /{user_ipa}/). " | |
| f"Identify the primary phonetic error and give 1 specific anatomical tip (tongue/lip placement) in English." | |
| ) | |
| feedback = get_llm_response(model_id, system_prompt, user_prompt) | |
| return asr_res, f"/{user_ipa}/", feedback | |
| # --- UI DESIGN --- | |
| with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="slate"), css=".gradio-container {max-width: 950px !important}") as demo: | |
| gr.HTML("<h1 style='text-align: center; color: #1e40af;'>ποΈ PANINI LLM</h1>") | |
| gr.HTML("<p style='text-align: center; margin-top: -10px;'>Intelligent Multi-Model Language Tutoring</p>") | |
| with gr.Tab("Step 1: Curriculum Creation"): | |
| with gr.Row(): | |
| llm_choice = gr.Dropdown(list(LLM_MODELS.keys()), label="Select AI Teacher (LLM)", value="Qwen 2.5 7B (Most Accurate)") | |
| lang_choice = gr.Dropdown(list(LANGUAGES.keys()), label="Language", value="English (US)") | |
| topic_input = gr.Textbox(label="Lesson Topic", placeholder="e.g., Ordering Food, Job Interview, Airport Travel") | |
| btn_gen = gr.Button("π Build My Lesson", variant="primary") | |
| curr_output = gr.Markdown("---") | |
| with gr.Tab("Step 2: Pronunciation Practice"): | |
| with gr.Row(): | |
| target_word = gr.Textbox(label="Word/Phrase to Practice", placeholder="Copy a phrase from Step 1 here") | |
| btn_tts = gr.Button("π Play Native AI", scale=0) | |
| audio_ref = gr.Audio(label="Teacher Reference", type="filepath") | |
| with gr.Row(): | |
| audio_user = gr.Audio(label="Your Voice Recording", sources=["microphone"], type="filepath") | |
| btn_analyze = gr.Button("π Analyze My Accent", variant="primary") | |
| with gr.Row(): | |
| out_transcript = gr.Textbox(label="AI Heard") | |
| out_ipa = gr.Textbox(label="Your Phonetics (IPA)") | |
| out_feedback = gr.Markdown("### Feedback from the AI Coach") | |
| # Event Wireup | |
| btn_gen.click(generate_curriculum, inputs=[llm_choice, lang_choice, topic_input], outputs=curr_output) | |
| btn_tts.click(fn=lambda t, l: asyncio.run(play_target_audio(t, l)), inputs=[target_word, lang_choice], outputs=audio_ref) | |
| btn_analyze.click(analyze_speech, inputs=[llm_choice, lang_choice, target_word, audio_user], outputs=[out_transcript, out_ipa, out_feedback]) | |
| # Run app | |
| demo.launch() |