chatbot_english / app.py
Serefor's picture
Update app.py
6fafff0 verified
import gradio as gr
import torch
import tempfile
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, pipeline
import whisper
from TTS.api import TTS
from langdetect import detect
# Modelos
chat_model_id = "google/flan-t5-small"
chat_tokenizer = AutoTokenizer.from_pretrained(chat_model_id)
chat_model = AutoModelForSeq2SeqLM.from_pretrained(chat_model_id)
chat_pipe = pipeline("text2text-generation", model=chat_model, tokenizer=chat_tokenizer)
tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False, gpu=False)
whisper_model = whisper.load_model("base")
corr_model_id = "prithivida/grammar_error_correcter_v1"
corr_tokenizer = AutoTokenizer.from_pretrained(corr_model_id)
corr_model = AutoModelForSeq2SeqLM.from_pretrained(corr_model_id)
def detectar_idioma(texto):
try:
return detect(texto)
except:
return "en"
def corregir_gramatica(texto, idioma):
if idioma == "auto":
idioma = detectar_idioma(texto)
if idioma != "en":
return texto, idioma
input_ids = corr_tokenizer.encode(texto, return_tensors="pt")
output_ids = corr_model.generate(input_ids, max_length=128, num_beams=5, early_stopping=True)
corrected = corr_tokenizer.decode(output_ids[0], skip_special_tokens=True)
return corrected, idioma
# πŸ‘‡ FunciΓ³n unificada
def chatbot_con_voz(audio_input, text_input,history, idioma_opcion, nivel):
if audio_input:
transcription = whisper_model.transcribe(audio_input)["text"]
elif text_input:
transcription = text_input
else:
return "❗ Debes ingresar texto o voz.", None, "<div id='history-output'></div>"
texto_corregido, idioma_detectado = corregir_gramatica(transcription, idioma_opcion)
prompt = f"Level: {nivel}\nInstruction: {texto_corregido}\nResponse:"
resultado = chat_pipe(prompt, max_new_tokens=150)[0]["generated_text"]
respuesta = resultado.replace(prompt, "").strip()
output_path = tempfile.mktemp(suffix=".wav")
tts.tts_to_file(text=respuesta, file_path=output_path)
resumen = f"""🎀 **You said:** {transcription}
🌍 **Language detected:** `{idioma_detectado}`
βœ… **Corrected:** {texto_corregido}
🧠 **Response:** {respuesta}"""
return resumen, output_path, "<div id='history-output'></div>"
# Interfaz Gradio
iface = gr.Interface(
fn=chatbot_con_voz,
inputs=[
gr.Audio(sources="microphone", type="filepath", label="πŸŽ™οΈ Speak now (optional)"),
gr.Textbox(label="✍️ Or type your message"),
gr.Dropdown(["auto", "en", "it"], label="Language", value="auto"),
gr.Dropdown(["Greetings", "Colors", "Daily Activities", "Verbs"], label="Level", value="Greetings")
],
outputs=[
gr.Textbox(label="πŸ“ Transcription & Correction"),
gr.Audio(label="πŸ”Š Bot Response"),
gr.HTML(label="πŸ“š Previous Corrections")
],
title="πŸ—£οΈπŸ“ Language Teacher (Voice & Text)",
description="Speak or write in English/Italian. The bot corrects, replies with voice, and tracks your learning.",
theme="soft",
allow_flagging="never",
js="static/script.js"
)
iface.launch()