import gradio as gr
import joblib
import whisper
import re

# ------------------------------
# 1. Carregar modelo de sentimento + TF-IDF
# ------------------------------

clf = joblib.load("sentiment_model_logreg.pkl")
vect = joblib.load("tfidf_vect.pkl")

def preprocess(text):
    text = text.lower()
    text = re.sub(r"[^a-zà-ú0-9 ]", "", text)
    return text

def predict_sentiment(text):
    text_clean = preprocess(text)
    X = vect.transform([text_clean])
    pred = clf.predict(X)[0]
    return "POSITIVO" if pred == 1 else "NEGATIVO"

# ------------------------------
# 2. Carregar modelo Whisper
# ------------------------------

whisper_model = whisper.load_model("small")

def audio_to_text(audio_path):
    result = whisper_model.transcribe(audio_path, fp16=False)
    return result["text"]

# ------------------------------
# 3. Pipeline para vários arquivos
# ------------------------------

def pipeline(lista_arquivos):
    if not lista_arquivos:
        return []

    resultados = []

    for arquivo in lista_arquivos:
        caminho = arquivo.name  # gr.File fornece um objeto, usamos .name
        texto = audio_to_text(caminho)
        sentimento = predict_sentiment(texto)
        resultados.append([arquivo.name, texto, sentimento])

    return resultados

# ------------------------------
# 4. Interface Gradio
# ------------------------------

app = gr.Interface(
    fn=pipeline,
    inputs=gr.File(
        file_count="multiple",
        label="Envie seus arquivos de áudio (WAV, MP3, OGG...)"
    ),
    outputs=gr.Dataframe(
        headers=["Arquivo", "Texto reconhecido", "Sentimento"],
        label="Resultados"
    ),
    title="Análise de Sentimento por Áudio",
    description="Envie múltiplos áudios. O sistema usa Whisper para transcrição e um modelo treinado para detectar sentimento."
)

app.launch()