Spaces:

sami606713
/

audio_emotion_classification

Sleeping

App Files Files Community

sami606713 commited on Aug 19, 2024

Commit

adf299f

verified ·

1 Parent(s): 628b599

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -10

app.py CHANGED Viewed

@@ -1,13 +1,16 @@
 import streamlit as st
 import soundfile as sf
-from transformers import pipeline, Wav2Vec2ForSequenceClassification, Wav2Vec2Tokenizer
-# Load the model and tokenizer
-model_name = "sami606713/emotion_classification"
-# Initialize the pipeline
 try:
-    classifier = pipeline("audio-classification", model=model_name, tokenizer=model_name)
 except Exception as e:
     st.write(f"Error loading model: {e}")
@@ -21,6 +24,7 @@ uploaded_file = st.file_uploader("Choose an audio file...", type=["wav", "mp3",
 if uploaded_file is not None:
     # Load the audio file
     audio_input, sample_rate = sf.read(uploaded_file)
     # Display the audio player
     st.audio(uploaded_file)
@@ -28,10 +32,34 @@ if uploaded_file is not None:
     # Perform emotion classification
     st.write("Classifying...")
     try:
-        predictions = classifier(audio_input, sampling_rate=sample_rate)
-        # Display the results
-        for prediction in predictions:
-            st.write(f"Emotion: {prediction['label']}, Score: {prediction['score']:.2f}")
     except Exception as e:
         st.write(f"Error during classification: {e}")

 import streamlit as st
 import soundfile as sf
+import torch
+from transformers import AutoModel, AutoFeatureExtractor
+import os
+# Get the Hugging Face API token from environment variables
+token = os.getenv("HF_TOKEN")
+# Load the model and feature extractor using your token
 try:
+    model = AutoModel.from_pretrained("sami606713/emotion_classification", use_auth_token=token)
+    feature_extractor = AutoFeatureExtractor.from_pretrained("sami606713/emotion_classification", use_auth_token=token)
 except Exception as e:
     st.write(f"Error loading model: {e}")
 if uploaded_file is not None:
     # Load the audio file
     audio_input, sample_rate = sf.read(uploaded_file)
+    sample_rate = 16000  # Ensure the sample rate is 16000
     # Display the audio player
     st.audio(uploaded_file)
     # Perform emotion classification
     st.write("Classifying...")
     try:
+        inputs = feature_extractor(audio_input, sampling_rate=sample_rate, return_tensors="pt")
+        # Make prediction
+        with torch.no_grad():
+            outputs = model(**inputs)
+        embeddings = outputs.pooler_output
+        # Apply a classification head on top of the embeddings
+        id2label={
+            0:"angry",
+            1:'calm',
+            2:'disgust',
+            3:'fearful',
+            4:'happy',
+            5:'neutral',
+            6:'sad',
+            7:'surprised'
+        }
+        classifier = torch.nn.Linear(embeddings.shape[-1], len(id2label))
+        # Pass embeddings through the classifier
+        logits = classifier(embeddings)
+        # Get predicted class
+        predicted_class_idx = logits.argmax(-1).item()
+        predicted_class = id2label[predicted_class_idx]
+        st.write(f"Predicted Emotion: {predicted_class}")
     except Exception as e:
         st.write(f"Error during classification: {e}")