Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import torch | |
| import tempfile | |
| from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, pipeline | |
| import whisper | |
| from TTS.api import TTS | |
| from langdetect import detect | |
| # Modelos | |
| chat_model_id = "google/flan-t5-small" | |
| chat_tokenizer = AutoTokenizer.from_pretrained(chat_model_id) | |
| chat_model = AutoModelForSeq2SeqLM.from_pretrained(chat_model_id) | |
| chat_pipe = pipeline("text2text-generation", model=chat_model, tokenizer=chat_tokenizer) | |
| tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False, gpu=False) | |
| whisper_model = whisper.load_model("base") | |
| corr_model_id = "prithivida/grammar_error_correcter_v1" | |
| corr_tokenizer = AutoTokenizer.from_pretrained(corr_model_id) | |
| corr_model = AutoModelForSeq2SeqLM.from_pretrained(corr_model_id) | |
| def detectar_idioma(texto): | |
| try: | |
| return detect(texto) | |
| except: | |
| return "en" | |
| def corregir_gramatica(texto, idioma): | |
| if idioma == "auto": | |
| idioma = detectar_idioma(texto) | |
| if idioma != "en": | |
| return texto, idioma | |
| input_ids = corr_tokenizer.encode(texto, return_tensors="pt") | |
| output_ids = corr_model.generate(input_ids, max_length=128, num_beams=5, early_stopping=True) | |
| corrected = corr_tokenizer.decode(output_ids[0], skip_special_tokens=True) | |
| return corrected, idioma | |
| # π FunciΓ³n unificada | |
| def chatbot_con_voz(audio_input, text_input,history, idioma_opcion, nivel): | |
| if audio_input: | |
| transcription = whisper_model.transcribe(audio_input)["text"] | |
| elif text_input: | |
| transcription = text_input | |
| else: | |
| return "β Debes ingresar texto o voz.", None, "<div id='history-output'></div>" | |
| texto_corregido, idioma_detectado = corregir_gramatica(transcription, idioma_opcion) | |
| prompt = f"Level: {nivel}\nInstruction: {texto_corregido}\nResponse:" | |
| resultado = chat_pipe(prompt, max_new_tokens=150)[0]["generated_text"] | |
| respuesta = resultado.replace(prompt, "").strip() | |
| output_path = tempfile.mktemp(suffix=".wav") | |
| tts.tts_to_file(text=respuesta, file_path=output_path) | |
| resumen = f"""π€ **You said:** {transcription} | |
| π **Language detected:** `{idioma_detectado}` | |
| β **Corrected:** {texto_corregido} | |
| π§ **Response:** {respuesta}""" | |
| return resumen, output_path, "<div id='history-output'></div>" | |
| # Interfaz Gradio | |
| iface = gr.Interface( | |
| fn=chatbot_con_voz, | |
| inputs=[ | |
| gr.Audio(sources="microphone", type="filepath", label="ποΈ Speak now (optional)"), | |
| gr.Textbox(label="βοΈ Or type your message"), | |
| gr.Dropdown(["auto", "en", "it"], label="Language", value="auto"), | |
| gr.Dropdown(["Greetings", "Colors", "Daily Activities", "Verbs"], label="Level", value="Greetings") | |
| ], | |
| outputs=[ | |
| gr.Textbox(label="π Transcription & Correction"), | |
| gr.Audio(label="π Bot Response"), | |
| gr.HTML(label="π Previous Corrections") | |
| ], | |
| title="π£οΈπ Language Teacher (Voice & Text)", | |
| description="Speak or write in English/Italian. The bot corrects, replies with voice, and tracks your learning.", | |
| theme="soft", | |
| allow_flagging="never", | |
| js="static/script.js" | |
| ) | |
| iface.launch() | |