Spaces:

TanRJ
/

MoodSyncAI

Running

File size: 1,748 Bytes

ae9c781

import tempfile

from transformers import pipeline


def load_models():

    image_model = pipeline(
        "image-classification",
        model="dima806/facial_emotions_image_detection"
    )

    text_model = pipeline(
        "text-classification",
        model="cardiffnlp/twitter-roberta-base-sentiment-latest",
        top_k=None
    )

    whisper_model = pipeline(
        "automatic-speech-recognition",
        model="openai/whisper-tiny"
    )

    return image_model, text_model, whisper_model


def get_top_prediction(predictions):

    return max(
        predictions,
        key=lambda x: x["score"]
    )


def normalize_text_label(label):

    label = label.lower()

    if "positive" in label:
        return "positive"

    elif "negative" in label:
        return "negative"

    return "neutral"


def analyse_image(model, image):

    predictions = model(image)

    top_prediction = get_top_prediction(predictions)

    emotion = top_prediction["label"]
    score = top_prediction["score"]

    return predictions, emotion, score


def analyse_text(model, text):

    predictions = model(text)[0]

    top_prediction = get_top_prediction(predictions)

    sentiment = normalize_text_label(
        top_prediction["label"]
    )

    score = top_prediction["score"]

    return predictions, sentiment, score


def transcribe_audio(model, uploaded_audio):

    file_extension = uploaded_audio.name.split(".")[-1]

    with tempfile.NamedTemporaryFile(
        delete=False,
        suffix=f".{file_extension}"
    ) as temp_audio:

        temp_audio.write(uploaded_audio.read())

        temp_audio_path = temp_audio.name

    result = model(temp_audio_path)

    transcript = result["text"]

    return transcript.strip()