Spaces:
Running
Running
File size: 1,748 Bytes
ae9c781 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 | import tempfile
from transformers import pipeline
def load_models():
image_model = pipeline(
"image-classification",
model="dima806/facial_emotions_image_detection"
)
text_model = pipeline(
"text-classification",
model="cardiffnlp/twitter-roberta-base-sentiment-latest",
top_k=None
)
whisper_model = pipeline(
"automatic-speech-recognition",
model="openai/whisper-tiny"
)
return image_model, text_model, whisper_model
def get_top_prediction(predictions):
return max(
predictions,
key=lambda x: x["score"]
)
def normalize_text_label(label):
label = label.lower()
if "positive" in label:
return "positive"
elif "negative" in label:
return "negative"
return "neutral"
def analyse_image(model, image):
predictions = model(image)
top_prediction = get_top_prediction(predictions)
emotion = top_prediction["label"]
score = top_prediction["score"]
return predictions, emotion, score
def analyse_text(model, text):
predictions = model(text)[0]
top_prediction = get_top_prediction(predictions)
sentiment = normalize_text_label(
top_prediction["label"]
)
score = top_prediction["score"]
return predictions, sentiment, score
def transcribe_audio(model, uploaded_audio):
file_extension = uploaded_audio.name.split(".")[-1]
with tempfile.NamedTemporaryFile(
delete=False,
suffix=f".{file_extension}"
) as temp_audio:
temp_audio.write(uploaded_audio.read())
temp_audio_path = temp_audio.name
result = model(temp_audio_path)
transcript = result["text"]
return transcript.strip() |