Spaces:

Serefor
/

chatbot_english

Sleeping

File size: 3,166 Bytes

04c8919
f257c81
caf815a
38d63ba
caf815a
 
f257c81
 
38d63ba
caf815a
 
 
 
 
 
 
f257c81
caf815a
 
 
f257c81
5ef27b2
f257c81
5ef27b2
f257c81
caf815a
5ef27b2
 
 
 
caf815a
38d63ba
caf815a
 
 
 
5ef27b2
38d63ba
314434e
38d63ba
 
 
 
 
 
5ef27b2
caf815a
5ef27b2
caf815a
 
5ef27b2
 
caf815a
 
5ef27b2
caf815a
 
 
5ef27b2
caf815a
38d63ba
 
caf815a
 
 
5f04d7f
caf815a
73e3f28
fbb362c
caf815a
 
 
 
 
fbb362c
 
5ef27b2
fbb362c
 
caf815a
314434e
 
 
5ef27b2
04c8919
5ef27b2
fbb362c

import gradio as gr
import torch
import tempfile
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, pipeline
import whisper
from TTS.api import TTS
from langdetect import detect

# Modelos
chat_model_id = "google/flan-t5-small"
chat_tokenizer = AutoTokenizer.from_pretrained(chat_model_id)
chat_model = AutoModelForSeq2SeqLM.from_pretrained(chat_model_id)
chat_pipe = pipeline("text2text-generation", model=chat_model, tokenizer=chat_tokenizer)

tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False, gpu=False)
whisper_model = whisper.load_model("base")

corr_model_id = "prithivida/grammar_error_correcter_v1"
corr_tokenizer = AutoTokenizer.from_pretrained(corr_model_id)
corr_model = AutoModelForSeq2SeqLM.from_pretrained(corr_model_id)

def detectar_idioma(texto):
    try:
        return detect(texto)
    except:
        return "en"

def corregir_gramatica(texto, idioma):
    if idioma == "auto":
        idioma = detectar_idioma(texto)
    if idioma != "en":
        return texto, idioma
    input_ids = corr_tokenizer.encode(texto, return_tensors="pt")
    output_ids = corr_model.generate(input_ids, max_length=128, num_beams=5, early_stopping=True)
    corrected = corr_tokenizer.decode(output_ids[0], skip_special_tokens=True)
    return corrected, idioma

# 👇 Función unificada
def chatbot_con_voz(audio_input, text_input,history, idioma_opcion, nivel):
    if audio_input:
        transcription = whisper_model.transcribe(audio_input)["text"]
    elif text_input:
        transcription = text_input
    else:
        return "❗ Debes ingresar texto o voz.", None, "<div id='history-output'></div>"

    texto_corregido, idioma_detectado = corregir_gramatica(transcription, idioma_opcion)

    prompt = f"Level: {nivel}\nInstruction: {texto_corregido}\nResponse:"
    resultado = chat_pipe(prompt, max_new_tokens=150)[0]["generated_text"]
    respuesta = resultado.replace(prompt, "").strip()

    output_path = tempfile.mktemp(suffix=".wav")
    tts.tts_to_file(text=respuesta, file_path=output_path)

    resumen = f"""🎤 **You said:** {transcription}
🌍 **Language detected:** `{idioma_detectado}`
✅ **Corrected:** {texto_corregido}

🧠 **Response:** {respuesta}"""

    return resumen, output_path, "<div id='history-output'></div>"

# Interfaz Gradio
iface = gr.Interface(
    fn=chatbot_con_voz,
    inputs=[
        gr.Audio(sources="microphone", type="filepath", label="🎙️ Speak now (optional)"),
        gr.Textbox(label="✍️ Or type your message"),
        gr.Dropdown(["auto", "en", "it"], label="Language", value="auto"),
        gr.Dropdown(["Greetings", "Colors", "Daily Activities", "Verbs"], label="Level", value="Greetings")
    ],
    outputs=[
        gr.Textbox(label="📝 Transcription & Correction"),
        gr.Audio(label="🔊 Bot Response"),
        gr.HTML(label="📚 Previous Corrections")
    ],
    title="🗣️📝 Language Teacher (Voice & Text)",
    description="Speak or write in English/Italian. The bot corrects, replies with voice, and tracks your learning.",
    theme="soft",
    allow_flagging="never",
    js="static/script.js"

)

iface.launch()