Spaces:

st192011
/

PANINI-LLM

Sleeping

App Files Files Community

st192011 commited on 18 days ago

Commit

8f4da15

verified ·

1 Parent(s): fd8cb7b

Update app.py

Browse files

Files changed (1) hide show

app.py +41 -42

app.py CHANGED Viewed

@@ -14,13 +14,12 @@ from huggingface_hub import InferenceClient
 # --- AUTHENTICATION ---
 HF_TOKEN = os.getenv("HF_TOKEN")
-# --- CONFIGURATION: 2025 STABLE MODELS ---
-# These models are currently the most reliable on the Hugging Face Free Inference API.
 LLM_MODELS = {
-    "Llama 3.2 3B (Fast & Smart)": "meta-llama/Llama-3.2-3B-Instruct",
-    "Qwen 2.5 7B (Excellent Accuracy)": "Qwen/Qwen2.5-7B-Instruct",
-    "Gemma 2 9B (Google's Best)": "google/gemma-2-9b-it",
-    "Llama 3.3 70B (Powerhouse - Busy)": "meta-llama/Llama-3.3-70B-Instruct"
 }
 LANGUAGES = {
@@ -31,41 +30,38 @@ LANGUAGES = {
     "Chinese (Mandarin)": {"code": "zh-CN", "ipa": "cmn", "voice": "zh-CN-XiaoxiaoNeural"}
 }
-# Load ASR model (Whisper Tiny) - remains the same for CPU efficiency
 print("Loading Whisper ASR...")
 asr_pipe = pipeline("automatic-speech-recognition", model="openai/whisper-tiny", device=-1)
 # --- FUNCTIONS ---
 def get_llm_response(model_id, system_prompt, user_prompt):
     client = InferenceClient(model=model_id, token=HF_TOKEN)
     try:
-        response = ""
         messages = [
             {"role": "system", "content": system_prompt},
             {"role": "user", "content": user_prompt}
         ]
-        # We allow the router to find the best provider automatically for better stability
         output = client.chat_completion(
             messages,
             max_tokens=500,
-            stream=False # Non-streaming is often more stable for curriculum tasks
         )
         return output.choices[0].message.content
     except Exception as e:
-        error_str = str(e)
-        if "410" in error_str:
-            return "⚠️ This model version was recently retired by the provider. Please try the 'Llama 3.2' or 'Qwen' option."
-        if "503" in error_str:
-            return "⏳ The model is currently 'waking up' or busy. Please wait 30 seconds and try again."
-        return f"System Note: {error_str}"
 def generate_curriculum(model_name, language, topic):
     model_id = LLM_MODELS[model_name]
-    system_prompt = f"You are PANINI LLM, a structured language teacher for {language}. Create a short lesson."
-    user_prompt = f"Topic: {topic}. Provide 5 words/phrases with English translations and one tip for a beginner."
     return get_llm_response(model_id, system_prompt, user_prompt)
 async def play_target_audio(text, lang_name):
@@ -78,66 +74,69 @@ async def play_target_audio(text, lang_name):
 def analyze_speech(model_name, lang_name, target_text, audio_path):
     if not audio_path or not target_text:
-        return "Incomplete data.", "", "Provide text and recording."
-    # 1. Transcription
     asr_res = asr_pipe(audio_path)["text"].strip()
-    # 2. Phonetic Data (Linguistic layer)
     ipa_code = LANGUAGES[lang_name]["ipa"]
     try:
         target_ipa = phonemize(target_text, language=ipa_code, backend='espeak', strip=True)
         user_ipa = phonemize(asr_res, language=ipa_code, backend='espeak', strip=True)
     except:
         target_ipa = "IPA Unavailable"
         user_ipa = "IPA Unavailable"
-    # 3. LLM Analysis
     model_id = LLM_MODELS[model_name]
-    system_prompt = "You are an expert Speech-Language Pathologist. Focus on anatomical advice."
     user_prompt = (
         f"Target: '{target_text}' (IPA: /{target_ipa}/). "
         f"Student: '{asr_res}' (IPA: /{user_ipa}/). "
-        f"Identify the primary error and give one tip on tongue or lip placement."
     )
     feedback = get_llm_response(model_id, system_prompt, user_prompt)
     return asr_res, f"/{user_ipa}/", feedback
-# --- UI ---
-with gr.Blocks(theme=gr.themes.Soft(primary_hue="indigo"), css=".gradio-container {max-width: 950px !important}") as demo:
-    gr.HTML("<h1 style='text-align: center; color: #312e81;'>🎙️ PANINI LLM</h1>")
-    gr.HTML("<p style='text-align: center; margin-top: -10px;'>Intelligent Language Pedagogy & Phonetic Analysis</p>")
-    with gr.Tab("Step 1: Curriculum"):
         with gr.Row():
-            llm_choice = gr.Dropdown(list(LLM_MODELS.keys()), label="Select AI Teacher", value="Qwen 2.5 7B (Excellent Accuracy)")
             lang_choice = gr.Dropdown(list(LANGUAGES.keys()), label="Language", value="English (US)")
-        topic_input = gr.Textbox(label="Enter Topic", placeholder="e.g. At the grocery store, Job Interview, Hobbies")
-        btn_gen = gr.Button("📚 Generate Lesson", variant="primary")
         curr_output = gr.Markdown("---")
-    with gr.Tab("Step 2: Pronunciation"):
         with gr.Row():
-            target_word = gr.Textbox(label="Practice this Phrase", placeholder="Copy a word from Step 1 or type your own")
-            btn_tts = gr.Button("🔊 Hear Native AI", scale=0)
-        audio_ref = gr.Audio(label="Model Audio", type="filepath")
         with gr.Row():
-            audio_user = gr.Audio(label="Record Your Version", sources=["microphone"], type="filepath")
-            btn_analyze = gr.Button("🚀 Analyze Accent", variant="primary")
         with gr.Row():
-            out_transcript = gr.Textbox(label="Transcription (What the AI heard)")
-            out_ipa = gr.Textbox(label="Your IPA (Phonetics)")
-        out_feedback = gr.Markdown("---")
-    # Event Wiring
     btn_gen.click(generate_curriculum, inputs=[llm_choice, lang_choice, topic_input], outputs=curr_output)
     btn_tts.click(fn=lambda t, l: asyncio.run(play_target_audio(t, l)), inputs=[target_word, lang_choice], outputs=audio_ref)
     btn_analyze.click(analyze_speech, inputs=[llm_choice, lang_choice, target_word, audio_user], outputs=[out_transcript, out_ipa, out_feedback])
 demo.launch()

 # --- AUTHENTICATION ---
 HF_TOKEN = os.getenv("HF_TOKEN")
+# --- CONFIGURATION ---
+# We use 3B to 9B models because they are the most stable on the free Inference API.
 LLM_MODELS = {
+    "Llama 3.2 3B (Fastest)": "meta-llama/Llama-3.2-3B-Instruct",
+    "Qwen 2.5 7B (Most Accurate)": "Qwen/Qwen2.5-7B-Instruct",
+    "Gemma 2 9B (Excellent English)": "google/gemma-2-9b-it"
 }
 LANGUAGES = {
     "Chinese (Mandarin)": {"code": "zh-CN", "ipa": "cmn", "voice": "zh-CN-XiaoxiaoNeural"}
 }
+# Load ASR model (Whisper Tiny for CPU efficiency)
 print("Loading Whisper ASR...")
 asr_pipe = pipeline("automatic-speech-recognition", model="openai/whisper-tiny", device=-1)
 # --- FUNCTIONS ---
 def get_llm_response(model_id, system_prompt, user_prompt):
+    # Fixed: Removed the 'provider' argument to prevent TypeError
     client = InferenceClient(model=model_id, token=HF_TOKEN)
     try:
         messages = [
             {"role": "system", "content": system_prompt},
             {"role": "user", "content": user_prompt}
         ]
         output = client.chat_completion(
             messages,
             max_tokens=500,
+            stream=False
         )
         return output.choices[0].message.content
     except Exception as e:
+        err = str(e)
+        if "503" in err:
+            return "⏳ The model is currently loading on Hugging Face servers. Please wait 30 seconds and try again."
+        return f"PANINI LLM Note: {err}"
 def generate_curriculum(model_name, language, topic):
     model_id = LLM_MODELS[model_name]
+    system_prompt = f"You are PANINI LLM, a world-class {language} teacher. Create a focused lesson plan."
+    user_prompt = f"Topic: {topic}. Provide 5 useful words/phrases in {language} with English translations, then give one expert learning tip."
     return get_llm_response(model_id, system_prompt, user_prompt)
 async def play_target_audio(text, lang_name):
 def analyze_speech(model_name, lang_name, target_text, audio_path):
     if not audio_path or not target_text:
+        return "Incomplete data.", "", "Please provide both text and recording."
+    # 1. ASR Transcription
     asr_res = asr_pipe(audio_path)["text"].strip()
+    # 2. Linguistic IPA Layer
     ipa_code = LANGUAGES[lang_name]["ipa"]
     try:
+        # Requires espeak-ng installed via packages.txt
         target_ipa = phonemize(target_text, language=ipa_code, backend='espeak', strip=True)
         user_ipa = phonemize(asr_res, language=ipa_code, backend='espeak', strip=True)
     except:
         target_ipa = "IPA Unavailable"
         user_ipa = "IPA Unavailable"
+    # 3. LLM Anatomical Feedback
     model_id = LLM_MODELS[model_name]
+    system_prompt = "You are a professional Speech-Language Pathologist. Compare the student's pronunciation to the target using IPA."
     user_prompt = (
         f"Target: '{target_text}' (IPA: /{target_ipa}/). "
         f"Student: '{asr_res}' (IPA: /{user_ipa}/). "
+        f"Identify the primary phonetic error and give 1 specific anatomical tip (tongue/lip placement) in English."
     )
     feedback = get_llm_response(model_id, system_prompt, user_prompt)
     return asr_res, f"/{user_ipa}/", feedback
+# --- UI DESIGN ---
+with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="slate"), css=".gradio-container {max-width: 950px !important}") as demo:
+    gr.HTML("<h1 style='text-align: center; color: #1e40af;'>🎙️ PANINI LLM</h1>")
+    gr.HTML("<p style='text-align: center; margin-top: -10px;'>Intelligent Multi-Model Language Tutoring</p>")
+    with gr.Tab("Step 1: Curriculum Creation"):
         with gr.Row():
+            llm_choice = gr.Dropdown(list(LLM_MODELS.keys()), label="Select AI Teacher (LLM)", value="Qwen 2.5 7B (Most Accurate)")
             lang_choice = gr.Dropdown(list(LANGUAGES.keys()), label="Language", value="English (US)")
+        topic_input = gr.Textbox(label="Lesson Topic", placeholder="e.g., Ordering Food, Job Interview, Airport Travel")
+        btn_gen = gr.Button("📚 Build My Lesson", variant="primary")
         curr_output = gr.Markdown("---")
+    with gr.Tab("Step 2: Pronunciation Practice"):
         with gr.Row():
+            target_word = gr.Textbox(label="Word/Phrase to Practice", placeholder="Copy a phrase from Step 1 here")
+            btn_tts = gr.Button("🔊 Play Native AI", scale=0)
+        audio_ref = gr.Audio(label="Teacher Reference", type="filepath")
         with gr.Row():
+            audio_user = gr.Audio(label="Your Voice Recording", sources=["microphone"], type="filepath")
+            btn_analyze = gr.Button("🚀 Analyze My Accent", variant="primary")
         with gr.Row():
+            out_transcript = gr.Textbox(label="AI Heard")
+            out_ipa = gr.Textbox(label="Your Phonetics (IPA)")
+        out_feedback = gr.Markdown("### Feedback from the AI Coach")
+    # Event Wireup
     btn_gen.click(generate_curriculum, inputs=[llm_choice, lang_choice, topic_input], outputs=curr_output)
     btn_tts.click(fn=lambda t, l: asyncio.run(play_target_audio(t, l)), inputs=[target_word, lang_choice], outputs=audio_ref)
     btn_analyze.click(analyze_speech, inputs=[llm_choice, lang_choice, target_word, audio_user], outputs=[out_transcript, out_ipa, out_feedback])
+# Run app
 demo.launch()