Spaces:

st192011
/

PANINI-LLM

Sleeping

App Files Files Community

PANINI-LLM / app.py

st192011

Update app.py

8f4da15 verified 22 days ago

raw

history blame contribute delete

6.1 kB

	import os
	import asyncio
	import edge_tts
	import librosa
	import torch
	import numpy as np
	import pandas as pd
	import re
	import gradio as gr
	from phonemizer import phonemize
	from transformers import pipeline
	from huggingface_hub import InferenceClient

	# --- AUTHENTICATION ---
	HF_TOKEN = os.getenv("HF_TOKEN")

	# --- CONFIGURATION ---
	# We use 3B to 9B models because they are the most stable on the free Inference API.
	LLM_MODELS = {
	"Llama 3.2 3B (Fastest)": "meta-llama/Llama-3.2-3B-Instruct",
	"Qwen 2.5 7B (Most Accurate)": "Qwen/Qwen2.5-7B-Instruct",
	"Gemma 2 9B (Excellent English)": "google/gemma-2-9b-it"
	}

	LANGUAGES = {
	"English (US)": {"code": "en-US", "ipa": "en-us", "voice": "en-US-ChristopherNeural"},
	"German": {"code": "de-DE", "ipa": "de", "voice": "de-DE-ConradNeural"},
	"French": {"code": "fr-FR", "ipa": "fr-fr", "voice": "fr-FR-HenriNeural"},
	"Spanish": {"code": "es-ES", "ipa": "es", "voice": "es-ES-AlvaroNeural"},
	"Chinese (Mandarin)": {"code": "zh-CN", "ipa": "cmn", "voice": "zh-CN-XiaoxiaoNeural"}
	}

	# Load ASR model (Whisper Tiny for CPU efficiency)
	print("Loading Whisper ASR...")
	asr_pipe = pipeline("automatic-speech-recognition", model="openai/whisper-tiny", device=-1)

	# --- FUNCTIONS ---

	def get_llm_response(model_id, system_prompt, user_prompt):
	# Fixed: Removed the 'provider' argument to prevent TypeError
	client = InferenceClient(model=model_id, token=HF_TOKEN)
	try:
	messages = [
	{"role": "system", "content": system_prompt},
	{"role": "user", "content": user_prompt}
	]

	output = client.chat_completion(
	messages,
	max_tokens=500,
	stream=False
	)
	return output.choices[0].message.content

	except Exception as e:
	err = str(e)
	if "503" in err:
	return "⏳ The model is currently loading on Hugging Face servers. Please wait 30 seconds and try again."
	return f"PANINI LLM Note: {err}"

	def generate_curriculum(model_name, language, topic):
	model_id = LLM_MODELS[model_name]
	system_prompt = f"You are PANINI LLM, a world-class {language} teacher. Create a focused lesson plan."
	user_prompt = f"Topic: {topic}. Provide 5 useful words/phrases in {language} with English translations, then give one expert learning tip."
	return get_llm_response(model_id, system_prompt, user_prompt)

	async def play_target_audio(text, lang_name):
	if not text: return None
	voice = LANGUAGES[lang_name]["voice"]
	output_path = "target.mp3"
	communicate = edge_tts.Communicate(text, voice)
	await communicate.save(output_path)
	return output_path

	def analyze_speech(model_name, lang_name, target_text, audio_path):
	if not audio_path or not target_text:
	return "Incomplete data.", "", "Please provide both text and recording."

	# 1. ASR Transcription
	asr_res = asr_pipe(audio_path)["text"].strip()

	# 2. Linguistic IPA Layer
	ipa_code = LANGUAGES[lang_name]["ipa"]
	try:
	# Requires espeak-ng installed via packages.txt
	target_ipa = phonemize(target_text, language=ipa_code, backend='espeak', strip=True)
	user_ipa = phonemize(asr_res, language=ipa_code, backend='espeak', strip=True)
	except:
	target_ipa = "IPA Unavailable"
	user_ipa = "IPA Unavailable"

	# 3. LLM Anatomical Feedback
	model_id = LLM_MODELS[model_name]
	system_prompt = "You are a professional Speech-Language Pathologist. Compare the student's pronunciation to the target using IPA."
	user_prompt = (
	f"Target: '{target_text}' (IPA: /{target_ipa}/). "
	f"Student: '{asr_res}' (IPA: /{user_ipa}/). "
	f"Identify the primary phonetic error and give 1 specific anatomical tip (tongue/lip placement) in English."
	)

	feedback = get_llm_response(model_id, system_prompt, user_prompt)
	return asr_res, f"/{user_ipa}/", feedback

	# --- UI DESIGN ---

	with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="slate"), css=".gradio-container {max-width: 950px !important}") as demo:
	gr.HTML("<h1 style='text-align: center; color: #1e40af;'>🎙️ PANINI LLM</h1>")
	gr.HTML("<p style='text-align: center; margin-top: -10px;'>Intelligent Multi-Model Language Tutoring</p>")

	with gr.Tab("Step 1: Curriculum Creation"):
	with gr.Row():
	llm_choice = gr.Dropdown(list(LLM_MODELS.keys()), label="Select AI Teacher (LLM)", value="Qwen 2.5 7B (Most Accurate)")
	lang_choice = gr.Dropdown(list(LANGUAGES.keys()), label="Language", value="English (US)")

	topic_input = gr.Textbox(label="Lesson Topic", placeholder="e.g., Ordering Food, Job Interview, Airport Travel")
	btn_gen = gr.Button("📚 Build My Lesson", variant="primary")
	curr_output = gr.Markdown("---")

	with gr.Tab("Step 2: Pronunciation Practice"):
	with gr.Row():
	target_word = gr.Textbox(label="Word/Phrase to Practice", placeholder="Copy a phrase from Step 1 here")
	btn_tts = gr.Button("🔊 Play Native AI", scale=0)

	audio_ref = gr.Audio(label="Teacher Reference", type="filepath")

	with gr.Row():
	audio_user = gr.Audio(label="Your Voice Recording", sources=["microphone"], type="filepath")
	btn_analyze = gr.Button("🚀 Analyze My Accent", variant="primary")

	with gr.Row():
	out_transcript = gr.Textbox(label="AI Heard")
	out_ipa = gr.Textbox(label="Your Phonetics (IPA)")

	out_feedback = gr.Markdown("### Feedback from the AI Coach")

	# Event Wireup
	btn_gen.click(generate_curriculum, inputs=[llm_choice, lang_choice, topic_input], outputs=curr_output)
	btn_tts.click(fn=lambda t, l: asyncio.run(play_target_audio(t, l)), inputs=[target_word, lang_choice], outputs=audio_ref)
	btn_analyze.click(analyze_speech, inputs=[llm_choice, lang_choice, target_word, audio_user], outputs=[out_transcript, out_ipa, out_feedback])

	# Run app
	demo.launch()