File size: 3,844 Bytes
ae4fb2a
b69af29
 
 
 
828e49c
b69af29
8941721
a347c6f
828e49c
b69af29
828e49c
 
 
 
 
 
 
 
 
 
 
b69af29
8941721
27b8a63
0ebbf50
398b465
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b69af29
67ba6b1
 
b69af29
7a65103
b69af29
8941721
 
 
e78469c
8941721
 
e78469c
b69af29
 
 
 
 
a347c6f
 
 
398b465
 
 
8941721
e78469c
 
7a65103
828e49c
 
 
a347c6f
398b465
 
 
828e49c
a347c6f
828e49c
8941721
398b465
a347c6f
398b465
 
 
a347c6f
398b465
 
 
 
 
a347c6f
398b465
8941721
 
23771b6
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import streamlit as st
import librosa
import numpy as np
import matplotlib.pyplot as plt
from pydub import AudioSegment
from transformers import T5Tokenizer, T5ForConditionalGeneration
import os
import whisper
from collections import Counter
import torch

# Load T5 model and tokenizer
tokenizer = T5Tokenizer.from_pretrained("t5-small")
model = T5ForConditionalGeneration.from_pretrained("t5-small")

def analyze_sentiment_t5(text):
    """Analyzes sentiment using the T5 model."""
    input_text = f"sst2 sentence: {text}"  # Formatting input for T5 model
    input_ids = tokenizer.encode(input_text, return_tensors="pt")
    output = model.generate(input_ids)
    sentiment = tokenizer.decode(output[0], skip_special_tokens=True)
    return "POSITIVE" if "positive" in sentiment.lower() else "NEGATIVE"

# Load Whisper model
whisper_model = whisper.load_model("base")

def highlight_words(text, sentiment="POSITIVE"):
    """Highlight positive and negative words in transcription."""
    # Create a list of positive and negative words (simple example, expand as needed)
    positive_words = {"good", "great", "awesome", "happy", "positive", "love"}
    negative_words = {"bad", "sad", "angry", "negative", "hate", "awful"}
    
    # Split the transcription into words
    words = text.split()
    
    highlighted_text = []
    
    for word in words:
        if word.lower() in positive_words:
            highlighted_text.append(f"<span style='color:green'>{word}</span>")  # Green for positive
        elif word.lower() in negative_words:
            highlighted_text.append(f"<span style='color:red'>{word}</span>")  # Red for negative
        else:
            highlighted_text.append(word)  # Leave neutral words unchanged
    
    # Join the words back into a string
    return ' '.join(highlighted_text)

# Streamlit UI
st.title("🎤 Audio Sentiment & Feature Analysis")
st.write("Upload an MP3 file to analyze its sentiment and audio features.")

uploaded_file = st.file_uploader("Choose an MP3 file", type=["mp3"])

if uploaded_file:
    file_path = f"temp/{uploaded_file.name}"
    os.makedirs("temp", exist_ok=True)

    with open(file_path, "wb") as f:
        f.write(uploaded_file.getbuffer())

    # Convert MP3 to WAV
    audio = AudioSegment.from_mp3(file_path)
    wav_path = file_path.replace(".mp3", ".wav")
    audio.export(wav_path, format="wav")

    # Load audio
    y, sr = librosa.load(wav_path, sr=None)
    
    # Get audio length in seconds
    audio_length = librosa.get_duration(y=y, sr=sr)

    # Transcribe with Whisper
    result = whisper_model.transcribe(wav_path)
    transcribed_text = result["text"]

    # Analyze sentiment
    sentiment = analyze_sentiment_t5(transcribed_text)
    sentiment_color = "green" if sentiment == "POSITIVE" else "red"

    # Highlight positive and negative words in transcription
    highlighted_transcription = highlight_words(transcribed_text, sentiment)

    # Display results
    st.subheader("📊 Sentiment Analysis Result")
    st.markdown(f"**Overall Sentiment:** <span style='color:{sentiment_color}; font-size:20px;'>{sentiment}</span>", unsafe_allow_html=True)
    
    # Display full transcription with highlighted words
    st.subheader("📝 Full Transcription")
    st.markdown(highlighted_transcription, unsafe_allow_html=True)

    # Plot sentiment score vs. audio length
    fig, ax = plt.subplots(figsize=(10, 5))
    sentiment_score = 1 if sentiment == "POSITIVE" else 0  # Simplified sentiment score: 1 for POSITIVE, 0 for NEGATIVE
    ax.barh(["Sentiment"], [sentiment_score], color=sentiment_color)
    ax.set_xlim(0, 1)
    ax.set_xlabel("Sentiment Score")
    ax.set_title(f"Sentiment Score vs. Audio Length (Duration: {audio_length:.2f} seconds)")
    st.pyplot(fig)

    # Clean up temp files
    os.remove(wav_path)
    os.remove(file_path)