Spaces:
Sleeping
Sleeping
File size: 3,166 Bytes
04c8919 f257c81 caf815a 38d63ba caf815a f257c81 38d63ba caf815a f257c81 caf815a f257c81 5ef27b2 f257c81 5ef27b2 f257c81 caf815a 5ef27b2 caf815a 38d63ba caf815a 5ef27b2 38d63ba 314434e 38d63ba 5ef27b2 caf815a 5ef27b2 caf815a 5ef27b2 caf815a 5ef27b2 caf815a 5ef27b2 caf815a 38d63ba caf815a 5f04d7f caf815a 73e3f28 fbb362c caf815a fbb362c 5ef27b2 fbb362c caf815a 314434e 5ef27b2 04c8919 5ef27b2 fbb362c | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 | import gradio as gr
import torch
import tempfile
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, pipeline
import whisper
from TTS.api import TTS
from langdetect import detect
# Modelos
chat_model_id = "google/flan-t5-small"
chat_tokenizer = AutoTokenizer.from_pretrained(chat_model_id)
chat_model = AutoModelForSeq2SeqLM.from_pretrained(chat_model_id)
chat_pipe = pipeline("text2text-generation", model=chat_model, tokenizer=chat_tokenizer)
tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False, gpu=False)
whisper_model = whisper.load_model("base")
corr_model_id = "prithivida/grammar_error_correcter_v1"
corr_tokenizer = AutoTokenizer.from_pretrained(corr_model_id)
corr_model = AutoModelForSeq2SeqLM.from_pretrained(corr_model_id)
def detectar_idioma(texto):
try:
return detect(texto)
except:
return "en"
def corregir_gramatica(texto, idioma):
if idioma == "auto":
idioma = detectar_idioma(texto)
if idioma != "en":
return texto, idioma
input_ids = corr_tokenizer.encode(texto, return_tensors="pt")
output_ids = corr_model.generate(input_ids, max_length=128, num_beams=5, early_stopping=True)
corrected = corr_tokenizer.decode(output_ids[0], skip_special_tokens=True)
return corrected, idioma
# π FunciΓ³n unificada
def chatbot_con_voz(audio_input, text_input,history, idioma_opcion, nivel):
if audio_input:
transcription = whisper_model.transcribe(audio_input)["text"]
elif text_input:
transcription = text_input
else:
return "β Debes ingresar texto o voz.", None, "<div id='history-output'></div>"
texto_corregido, idioma_detectado = corregir_gramatica(transcription, idioma_opcion)
prompt = f"Level: {nivel}\nInstruction: {texto_corregido}\nResponse:"
resultado = chat_pipe(prompt, max_new_tokens=150)[0]["generated_text"]
respuesta = resultado.replace(prompt, "").strip()
output_path = tempfile.mktemp(suffix=".wav")
tts.tts_to_file(text=respuesta, file_path=output_path)
resumen = f"""π€ **You said:** {transcription}
π **Language detected:** `{idioma_detectado}`
β
**Corrected:** {texto_corregido}
π§ **Response:** {respuesta}"""
return resumen, output_path, "<div id='history-output'></div>"
# Interfaz Gradio
iface = gr.Interface(
fn=chatbot_con_voz,
inputs=[
gr.Audio(sources="microphone", type="filepath", label="ποΈ Speak now (optional)"),
gr.Textbox(label="βοΈ Or type your message"),
gr.Dropdown(["auto", "en", "it"], label="Language", value="auto"),
gr.Dropdown(["Greetings", "Colors", "Daily Activities", "Verbs"], label="Level", value="Greetings")
],
outputs=[
gr.Textbox(label="π Transcription & Correction"),
gr.Audio(label="π Bot Response"),
gr.HTML(label="π Previous Corrections")
],
title="π£οΈπ Language Teacher (Voice & Text)",
description="Speak or write in English/Italian. The bot corrects, replies with voice, and tracks your learning.",
theme="soft",
allow_flagging="never",
js="static/script.js"
)
iface.launch()
|