st192011 commited on
Commit
8f4da15
Β·
verified Β·
1 Parent(s): fd8cb7b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -42
app.py CHANGED
@@ -14,13 +14,12 @@ from huggingface_hub import InferenceClient
14
  # --- AUTHENTICATION ---
15
  HF_TOKEN = os.getenv("HF_TOKEN")
16
 
17
- # --- CONFIGURATION: 2025 STABLE MODELS ---
18
- # These models are currently the most reliable on the Hugging Face Free Inference API.
19
  LLM_MODELS = {
20
- "Llama 3.2 3B (Fast & Smart)": "meta-llama/Llama-3.2-3B-Instruct",
21
- "Qwen 2.5 7B (Excellent Accuracy)": "Qwen/Qwen2.5-7B-Instruct",
22
- "Gemma 2 9B (Google's Best)": "google/gemma-2-9b-it",
23
- "Llama 3.3 70B (Powerhouse - Busy)": "meta-llama/Llama-3.3-70B-Instruct"
24
  }
25
 
26
  LANGUAGES = {
@@ -31,41 +30,38 @@ LANGUAGES = {
31
  "Chinese (Mandarin)": {"code": "zh-CN", "ipa": "cmn", "voice": "zh-CN-XiaoxiaoNeural"}
32
  }
33
 
34
- # Load ASR model (Whisper Tiny) - remains the same for CPU efficiency
35
  print("Loading Whisper ASR...")
36
  asr_pipe = pipeline("automatic-speech-recognition", model="openai/whisper-tiny", device=-1)
37
 
38
  # --- FUNCTIONS ---
39
 
40
  def get_llm_response(model_id, system_prompt, user_prompt):
 
41
  client = InferenceClient(model=model_id, token=HF_TOKEN)
42
  try:
43
- response = ""
44
  messages = [
45
  {"role": "system", "content": system_prompt},
46
  {"role": "user", "content": user_prompt}
47
  ]
48
 
49
- # We allow the router to find the best provider automatically for better stability
50
  output = client.chat_completion(
51
  messages,
52
  max_tokens=500,
53
- stream=False # Non-streaming is often more stable for curriculum tasks
54
  )
55
  return output.choices[0].message.content
56
 
57
  except Exception as e:
58
- error_str = str(e)
59
- if "410" in error_str:
60
- return "⚠️ This model version was recently retired by the provider. Please try the 'Llama 3.2' or 'Qwen' option."
61
- if "503" in error_str:
62
- return "⏳ The model is currently 'waking up' or busy. Please wait 30 seconds and try again."
63
- return f"System Note: {error_str}"
64
 
65
  def generate_curriculum(model_name, language, topic):
66
  model_id = LLM_MODELS[model_name]
67
- system_prompt = f"You are PANINI LLM, a structured language teacher for {language}. Create a short lesson."
68
- user_prompt = f"Topic: {topic}. Provide 5 words/phrases with English translations and one tip for a beginner."
69
  return get_llm_response(model_id, system_prompt, user_prompt)
70
 
71
  async def play_target_audio(text, lang_name):
@@ -78,66 +74,69 @@ async def play_target_audio(text, lang_name):
78
 
79
  def analyze_speech(model_name, lang_name, target_text, audio_path):
80
  if not audio_path or not target_text:
81
- return "Incomplete data.", "", "Provide text and recording."
82
 
83
- # 1. Transcription
84
  asr_res = asr_pipe(audio_path)["text"].strip()
85
 
86
- # 2. Phonetic Data (Linguistic layer)
87
  ipa_code = LANGUAGES[lang_name]["ipa"]
88
  try:
 
89
  target_ipa = phonemize(target_text, language=ipa_code, backend='espeak', strip=True)
90
  user_ipa = phonemize(asr_res, language=ipa_code, backend='espeak', strip=True)
91
  except:
92
  target_ipa = "IPA Unavailable"
93
  user_ipa = "IPA Unavailable"
94
 
95
- # 3. LLM Analysis
96
  model_id = LLM_MODELS[model_name]
97
- system_prompt = "You are an expert Speech-Language Pathologist. Focus on anatomical advice."
98
  user_prompt = (
99
  f"Target: '{target_text}' (IPA: /{target_ipa}/). "
100
  f"Student: '{asr_res}' (IPA: /{user_ipa}/). "
101
- f"Identify the primary error and give one tip on tongue or lip placement."
102
  )
103
 
104
  feedback = get_llm_response(model_id, system_prompt, user_prompt)
105
  return asr_res, f"/{user_ipa}/", feedback
106
 
107
- # --- UI ---
108
- with gr.Blocks(theme=gr.themes.Soft(primary_hue="indigo"), css=".gradio-container {max-width: 950px !important}") as demo:
109
- gr.HTML("<h1 style='text-align: center; color: #312e81;'>πŸŽ™οΈ PANINI LLM</h1>")
110
- gr.HTML("<p style='text-align: center; margin-top: -10px;'>Intelligent Language Pedagogy & Phonetic Analysis</p>")
 
111
 
112
- with gr.Tab("Step 1: Curriculum"):
113
  with gr.Row():
114
- llm_choice = gr.Dropdown(list(LLM_MODELS.keys()), label="Select AI Teacher", value="Qwen 2.5 7B (Excellent Accuracy)")
115
  lang_choice = gr.Dropdown(list(LANGUAGES.keys()), label="Language", value="English (US)")
116
 
117
- topic_input = gr.Textbox(label="Enter Topic", placeholder="e.g. At the grocery store, Job Interview, Hobbies")
118
- btn_gen = gr.Button("πŸ“š Generate Lesson", variant="primary")
119
  curr_output = gr.Markdown("---")
120
 
121
- with gr.Tab("Step 2: Pronunciation"):
122
  with gr.Row():
123
- target_word = gr.Textbox(label="Practice this Phrase", placeholder="Copy a word from Step 1 or type your own")
124
- btn_tts = gr.Button("πŸ”Š Hear Native AI", scale=0)
125
 
126
- audio_ref = gr.Audio(label="Model Audio", type="filepath")
127
 
128
  with gr.Row():
129
- audio_user = gr.Audio(label="Record Your Version", sources=["microphone"], type="filepath")
130
- btn_analyze = gr.Button("πŸš€ Analyze Accent", variant="primary")
131
 
132
  with gr.Row():
133
- out_transcript = gr.Textbox(label="Transcription (What the AI heard)")
134
- out_ipa = gr.Textbox(label="Your IPA (Phonetics)")
135
 
136
- out_feedback = gr.Markdown("---")
137
 
138
- # Event Wiring
139
  btn_gen.click(generate_curriculum, inputs=[llm_choice, lang_choice, topic_input], outputs=curr_output)
140
  btn_tts.click(fn=lambda t, l: asyncio.run(play_target_audio(t, l)), inputs=[target_word, lang_choice], outputs=audio_ref)
141
  btn_analyze.click(analyze_speech, inputs=[llm_choice, lang_choice, target_word, audio_user], outputs=[out_transcript, out_ipa, out_feedback])
142
 
 
143
  demo.launch()
 
14
  # --- AUTHENTICATION ---
15
  HF_TOKEN = os.getenv("HF_TOKEN")
16
 
17
+ # --- CONFIGURATION ---
18
+ # We use 3B to 9B models because they are the most stable on the free Inference API.
19
  LLM_MODELS = {
20
+ "Llama 3.2 3B (Fastest)": "meta-llama/Llama-3.2-3B-Instruct",
21
+ "Qwen 2.5 7B (Most Accurate)": "Qwen/Qwen2.5-7B-Instruct",
22
+ "Gemma 2 9B (Excellent English)": "google/gemma-2-9b-it"
 
23
  }
24
 
25
  LANGUAGES = {
 
30
  "Chinese (Mandarin)": {"code": "zh-CN", "ipa": "cmn", "voice": "zh-CN-XiaoxiaoNeural"}
31
  }
32
 
33
+ # Load ASR model (Whisper Tiny for CPU efficiency)
34
  print("Loading Whisper ASR...")
35
  asr_pipe = pipeline("automatic-speech-recognition", model="openai/whisper-tiny", device=-1)
36
 
37
  # --- FUNCTIONS ---
38
 
39
  def get_llm_response(model_id, system_prompt, user_prompt):
40
+ # Fixed: Removed the 'provider' argument to prevent TypeError
41
  client = InferenceClient(model=model_id, token=HF_TOKEN)
42
  try:
 
43
  messages = [
44
  {"role": "system", "content": system_prompt},
45
  {"role": "user", "content": user_prompt}
46
  ]
47
 
 
48
  output = client.chat_completion(
49
  messages,
50
  max_tokens=500,
51
+ stream=False
52
  )
53
  return output.choices[0].message.content
54
 
55
  except Exception as e:
56
+ err = str(e)
57
+ if "503" in err:
58
+ return "⏳ The model is currently loading on Hugging Face servers. Please wait 30 seconds and try again."
59
+ return f"PANINI LLM Note: {err}"
 
 
60
 
61
  def generate_curriculum(model_name, language, topic):
62
  model_id = LLM_MODELS[model_name]
63
+ system_prompt = f"You are PANINI LLM, a world-class {language} teacher. Create a focused lesson plan."
64
+ user_prompt = f"Topic: {topic}. Provide 5 useful words/phrases in {language} with English translations, then give one expert learning tip."
65
  return get_llm_response(model_id, system_prompt, user_prompt)
66
 
67
  async def play_target_audio(text, lang_name):
 
74
 
75
  def analyze_speech(model_name, lang_name, target_text, audio_path):
76
  if not audio_path or not target_text:
77
+ return "Incomplete data.", "", "Please provide both text and recording."
78
 
79
+ # 1. ASR Transcription
80
  asr_res = asr_pipe(audio_path)["text"].strip()
81
 
82
+ # 2. Linguistic IPA Layer
83
  ipa_code = LANGUAGES[lang_name]["ipa"]
84
  try:
85
+ # Requires espeak-ng installed via packages.txt
86
  target_ipa = phonemize(target_text, language=ipa_code, backend='espeak', strip=True)
87
  user_ipa = phonemize(asr_res, language=ipa_code, backend='espeak', strip=True)
88
  except:
89
  target_ipa = "IPA Unavailable"
90
  user_ipa = "IPA Unavailable"
91
 
92
+ # 3. LLM Anatomical Feedback
93
  model_id = LLM_MODELS[model_name]
94
+ system_prompt = "You are a professional Speech-Language Pathologist. Compare the student's pronunciation to the target using IPA."
95
  user_prompt = (
96
  f"Target: '{target_text}' (IPA: /{target_ipa}/). "
97
  f"Student: '{asr_res}' (IPA: /{user_ipa}/). "
98
+ f"Identify the primary phonetic error and give 1 specific anatomical tip (tongue/lip placement) in English."
99
  )
100
 
101
  feedback = get_llm_response(model_id, system_prompt, user_prompt)
102
  return asr_res, f"/{user_ipa}/", feedback
103
 
104
+ # --- UI DESIGN ---
105
+
106
+ with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="slate"), css=".gradio-container {max-width: 950px !important}") as demo:
107
+ gr.HTML("<h1 style='text-align: center; color: #1e40af;'>πŸŽ™οΈ PANINI LLM</h1>")
108
+ gr.HTML("<p style='text-align: center; margin-top: -10px;'>Intelligent Multi-Model Language Tutoring</p>")
109
 
110
+ with gr.Tab("Step 1: Curriculum Creation"):
111
  with gr.Row():
112
+ llm_choice = gr.Dropdown(list(LLM_MODELS.keys()), label="Select AI Teacher (LLM)", value="Qwen 2.5 7B (Most Accurate)")
113
  lang_choice = gr.Dropdown(list(LANGUAGES.keys()), label="Language", value="English (US)")
114
 
115
+ topic_input = gr.Textbox(label="Lesson Topic", placeholder="e.g., Ordering Food, Job Interview, Airport Travel")
116
+ btn_gen = gr.Button("πŸ“š Build My Lesson", variant="primary")
117
  curr_output = gr.Markdown("---")
118
 
119
+ with gr.Tab("Step 2: Pronunciation Practice"):
120
  with gr.Row():
121
+ target_word = gr.Textbox(label="Word/Phrase to Practice", placeholder="Copy a phrase from Step 1 here")
122
+ btn_tts = gr.Button("πŸ”Š Play Native AI", scale=0)
123
 
124
+ audio_ref = gr.Audio(label="Teacher Reference", type="filepath")
125
 
126
  with gr.Row():
127
+ audio_user = gr.Audio(label="Your Voice Recording", sources=["microphone"], type="filepath")
128
+ btn_analyze = gr.Button("πŸš€ Analyze My Accent", variant="primary")
129
 
130
  with gr.Row():
131
+ out_transcript = gr.Textbox(label="AI Heard")
132
+ out_ipa = gr.Textbox(label="Your Phonetics (IPA)")
133
 
134
+ out_feedback = gr.Markdown("### Feedback from the AI Coach")
135
 
136
+ # Event Wireup
137
  btn_gen.click(generate_curriculum, inputs=[llm_choice, lang_choice, topic_input], outputs=curr_output)
138
  btn_tts.click(fn=lambda t, l: asyncio.run(play_target_audio(t, l)), inputs=[target_word, lang_choice], outputs=audio_ref)
139
  btn_analyze.click(analyze_speech, inputs=[llm_choice, lang_choice, target_word, audio_user], outputs=[out_transcript, out_ipa, out_feedback])
140
 
141
+ # Run app
142
  demo.launch()