Update app.py
Browse files
app.py
CHANGED
|
@@ -23,6 +23,47 @@ st.write("Upload an MP3 file to analyze its sentiment and audio features.")
|
|
| 23 |
# Upload audio file
|
| 24 |
uploaded_file = st.file_uploader("Choose an MP3 file", type=["mp3"])
|
| 25 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
# Function to process audio and get sentiment
|
| 27 |
def analyze_audio(file_path):
|
| 28 |
# Convert MP3 to WAV
|
|
@@ -35,15 +76,20 @@ def analyze_audio(file_path):
|
|
| 35 |
|
| 36 |
# Extract MFCCs (Mel-frequency cepstral coefficients)
|
| 37 |
mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
|
| 38 |
-
mfccs_mean = np.mean(mfccs, axis=1)
|
| 39 |
|
| 40 |
-
#
|
| 41 |
-
|
|
|
|
| 42 |
|
| 43 |
-
#
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
|
| 48 |
# Function to extract words from audio using Whisper
|
| 49 |
def extract_words_from_audio(file_path):
|
|
@@ -55,123 +101,57 @@ def extract_words_from_audio(file_path):
|
|
| 55 |
# Transcribe audio using Whisper
|
| 56 |
result = whisper_model.transcribe(wav_path, word_timestamps=True)
|
| 57 |
|
| 58 |
-
# Extract words and
|
| 59 |
words = []
|
| 60 |
for segment in result['segments']:
|
| 61 |
for word_info in segment['words']:
|
| 62 |
-
word
|
| 63 |
-
start_time = word_info['start']
|
| 64 |
-
end_time = word_info['end']
|
| 65 |
-
words.append({"word": word, "start_time": start_time, "end_time": end_time})
|
| 66 |
|
| 67 |
-
# Remove WAV file after processing
|
| 68 |
os.remove(wav_path)
|
| 69 |
-
|
| 70 |
return words, result['text']
|
| 71 |
|
| 72 |
# Process and plot if a file is uploaded
|
| 73 |
if uploaded_file:
|
| 74 |
file_path = f"temp/{uploaded_file.name}"
|
|
|
|
| 75 |
|
| 76 |
-
# Ensure temp directory exists
|
| 77 |
-
os.makedirs("temp", exist_ok=True)
|
| 78 |
-
|
| 79 |
-
# Save the uploaded file
|
| 80 |
with open(file_path, "wb") as f:
|
| 81 |
f.write(uploaded_file.getbuffer())
|
| 82 |
|
| 83 |
-
# Analyze sentiment
|
| 84 |
-
y, sr, sentiment, mfccs, mfccs_mean = analyze_audio(file_path)
|
| 85 |
|
| 86 |
-
# Extract words from audio
|
| 87 |
-
words_from_audio,
|
| 88 |
|
| 89 |
-
#
|
| 90 |
-
good_words = [
|
| 91 |
-
negative_words = [
|
| 92 |
|
| 93 |
-
# Determine color
|
| 94 |
sentiment_label = sentiment['label']
|
| 95 |
sentiment_color = "green" if sentiment_label == "POSITIVE" else "red"
|
| 96 |
|
| 97 |
-
# Display sentiment
|
| 98 |
st.subheader("📊 Sentiment Analysis Result")
|
| 99 |
st.markdown(f"**Sentiment:** <span style='color:{sentiment_color}; font-size:20px;'>{sentiment_label}</span>", unsafe_allow_html=True)
|
| 100 |
st.write(f"**Confidence:** {sentiment['score']:.2f}")
|
| 101 |
|
| 102 |
-
#
|
| 103 |
-
fig, axes = plt.subplots(2, 3, figsize=(30, 18), dpi=300) # Increased size and DPI for better quality and larger plots
|
| 104 |
-
axes = axes.flatten() # Flatten the 2x3 grid to make it easier to index
|
| 105 |
-
|
| 106 |
-
# **Plot 1: Line Plot of Average MFCCs**
|
| 107 |
-
axes[0].plot(range(1, len(mfccs_mean) + 1), mfccs_mean, marker='o', color='b', label="Average MFCCs")
|
| 108 |
-
axes[0].set_xlabel("MFCC Coefficients")
|
| 109 |
-
axes[0].set_ylabel("Mean Value")
|
| 110 |
-
axes[0].set_title("Average MFCCs Across Time")
|
| 111 |
-
axes[0].legend()
|
| 112 |
-
|
| 113 |
-
# **Plot 2: Audio Waveform with Sentiment Indicator**
|
| 114 |
-
librosa.display.waveshow(y, sr=sr, ax=axes[1], alpha=0.5, color=sentiment_color)
|
| 115 |
-
axes[1].set_xlabel("Time (s)")
|
| 116 |
-
axes[1].set_ylabel("Amplitude")
|
| 117 |
-
axes[1].set_title(f"Waveform of Audio ({'Good' if sentiment_label == 'POSITIVE' else 'Bad'})")
|
| 118 |
-
|
| 119 |
-
# **Plot 3: MFCC Heatmap**
|
| 120 |
-
sns.heatmap(mfccs, ax=axes[2], cmap="coolwarm", yticklabels=[f"MFCC {i}" for i in range(1, 14)])
|
| 121 |
-
axes[2].set_xlabel("Time Frames")
|
| 122 |
-
axes[2].set_ylabel("MFCC Coefficients")
|
| 123 |
-
axes[2].set_title(f"MFCC Feature Heatmap ({'Good' if sentiment_label == 'POSITIVE' else 'Bad'})")
|
| 124 |
-
|
| 125 |
-
# **Plot 4: Spectrogram**
|
| 126 |
-
S = librosa.feature.melspectrogram(y=y, sr=sr)
|
| 127 |
-
S_dB = librosa.power_to_db(S, ref=np.max)
|
| 128 |
-
img = librosa.display.specshow(S_dB, sr=sr, x_axis="time", y_axis="mel", ax=axes[3])
|
| 129 |
-
fig.colorbar(img, ax=axes[3], format="%+2.0f dB")
|
| 130 |
-
axes[3].set_title(f"Mel Spectrogram ({'Good' if sentiment_label == 'POSITIVE' else 'Bad'})")
|
| 131 |
-
|
| 132 |
-
# **Plot 5: Sentiment Trend Over Time (10-sec intervals)**
|
| 133 |
sentiment_scores = analyze_sentiment_over_time(y, sr)
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
# **Plot 6: Performance Scoring**
|
| 143 |
-
duration_min = librosa.get_duration(y=y, sr=sr) / 60
|
| 144 |
-
if duration_min > 8:
|
| 145 |
-
performance = "Critical (Long call, needs improvement)"
|
| 146 |
-
box_color = "#FFCCCB" # Light red for critical calls
|
| 147 |
-
elif sentiment_label == "NEGATIVE":
|
| 148 |
-
performance = "Needs Improvement (Negative sentiment)"
|
| 149 |
-
box_color = "#FFDD00" # Light yellow for improvement needed
|
| 150 |
-
else:
|
| 151 |
-
performance = "Good (Positive sentiment)"
|
| 152 |
-
box_color = "#98FB98" # Light green for good performance
|
| 153 |
-
|
| 154 |
-
# Hide the axis
|
| 155 |
-
axes[5].axis('off')
|
| 156 |
-
|
| 157 |
-
# Add a colored rectangle as the background box for performance text
|
| 158 |
-
box = patches.FancyBboxPatch((0.05, 0.4), 0.9, 0.2, boxstyle="round,pad=0.05", linewidth=2, edgecolor="black", facecolor=box_color)
|
| 159 |
-
axes[5].add_patch(box)
|
| 160 |
-
|
| 161 |
-
# Add the performance text inside the colored box
|
| 162 |
-
axes[5].text(0.5, 0.5, f"Agent Performance: {performance}", ha='center', va='center', fontsize=14, color="black")
|
| 163 |
-
|
| 164 |
-
# Adjust the spacing between the subplots to avoid overlap
|
| 165 |
-
plt.subplots_adjust(wspace=0.4, hspace=0.5)
|
| 166 |
-
|
| 167 |
-
# Display the plot in Streamlit
|
| 168 |
st.pyplot(fig)
|
| 169 |
|
| 170 |
-
# Display
|
| 171 |
st.subheader("🗣️ Positive and Negative Words in Audio")
|
| 172 |
-
|
| 173 |
-
# Create two horizontal boxes for good and negative words
|
| 174 |
-
col1, col2 = st.columns([1, 1])
|
| 175 |
with col1:
|
| 176 |
st.markdown("### Good Words")
|
| 177 |
st.write(", ".join(good_words) if good_words else "No good words detected.")
|
|
@@ -179,13 +159,9 @@ if uploaded_file:
|
|
| 179 |
st.markdown("### Negative Words")
|
| 180 |
st.write(", ".join(negative_words) if negative_words else "No negative words detected.")
|
| 181 |
|
| 182 |
-
# Display
|
| 183 |
-
st.subheader("
|
| 184 |
-
st.write(
|
| 185 |
|
| 186 |
-
# Display transcription result in Streamlit
|
| 187 |
-
st.subheader("📝 Full Transcription Result (Raw JSON)")
|
| 188 |
-
st.json(result)
|
| 189 |
-
|
| 190 |
# Clean up temp file
|
| 191 |
os.remove(file_path)
|
|
|
|
| 23 |
# Upload audio file
|
| 24 |
uploaded_file = st.file_uploader("Choose an MP3 file", type=["mp3"])
|
| 25 |
|
| 26 |
+
# Function to analyze sentiment over time
|
| 27 |
+
def analyze_sentiment_over_time(y, sr, chunk_duration=10):
|
| 28 |
+
"""
|
| 29 |
+
Splits audio into chunks, transcribes, and performs sentiment analysis.
|
| 30 |
+
|
| 31 |
+
Args:
|
| 32 |
+
y: The audio waveform.
|
| 33 |
+
sr: Sample rate.
|
| 34 |
+
chunk_duration: Duration of each chunk in seconds (default: 10 sec).
|
| 35 |
+
|
| 36 |
+
Returns:
|
| 37 |
+
A list of sentiment labels over time.
|
| 38 |
+
"""
|
| 39 |
+
chunk_length = chunk_duration * sr # Convert chunk duration to samples
|
| 40 |
+
total_chunks = len(y) // chunk_length # Number of chunks
|
| 41 |
+
sentiment_labels = []
|
| 42 |
+
|
| 43 |
+
for i in range(total_chunks):
|
| 44 |
+
start_sample = i * chunk_length
|
| 45 |
+
end_sample = start_sample + chunk_length
|
| 46 |
+
chunk_audio = y[start_sample:end_sample]
|
| 47 |
+
|
| 48 |
+
# Convert chunk to WAV
|
| 49 |
+
temp_wav_path = f"temp_chunk_{i}.wav"
|
| 50 |
+
librosa.output.write_wav(temp_wav_path, chunk_audio, sr)
|
| 51 |
+
|
| 52 |
+
# Transcribe with Whisper
|
| 53 |
+
result = whisper_model.transcribe(temp_wav_path)
|
| 54 |
+
os.remove(temp_wav_path) # Remove temporary WAV file
|
| 55 |
+
|
| 56 |
+
transcribed_text = result["text"]
|
| 57 |
+
|
| 58 |
+
# Run sentiment analysis on transcribed text (if available)
|
| 59 |
+
if transcribed_text.strip():
|
| 60 |
+
sentiment_result = sentiment_analyzer(transcribed_text)
|
| 61 |
+
sentiment_labels.append(sentiment_result[0]["label"])
|
| 62 |
+
else:
|
| 63 |
+
sentiment_labels.append("NEUTRAL")
|
| 64 |
+
|
| 65 |
+
return sentiment_labels
|
| 66 |
+
|
| 67 |
# Function to process audio and get sentiment
|
| 68 |
def analyze_audio(file_path):
|
| 69 |
# Convert MP3 to WAV
|
|
|
|
| 76 |
|
| 77 |
# Extract MFCCs (Mel-frequency cepstral coefficients)
|
| 78 |
mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
|
| 79 |
+
mfccs_mean = np.mean(mfccs, axis=1)
|
| 80 |
|
| 81 |
+
# Transcribe audio
|
| 82 |
+
result = whisper_model.transcribe(wav_path)
|
| 83 |
+
transcribed_text = result["text"]
|
| 84 |
|
| 85 |
+
# Run sentiment analysis
|
| 86 |
+
if transcribed_text.strip():
|
| 87 |
+
sentiment_result = sentiment_analyzer(transcribed_text)
|
| 88 |
+
else:
|
| 89 |
+
sentiment_result = [{"label": "NEUTRAL", "score": 0.0}]
|
| 90 |
+
|
| 91 |
+
os.remove(wav_path) # Remove WAV file after processing
|
| 92 |
+
return y, sr, sentiment_result[0], mfccs, mfccs_mean, transcribed_text
|
| 93 |
|
| 94 |
# Function to extract words from audio using Whisper
|
| 95 |
def extract_words_from_audio(file_path):
|
|
|
|
| 101 |
# Transcribe audio using Whisper
|
| 102 |
result = whisper_model.transcribe(wav_path, word_timestamps=True)
|
| 103 |
|
| 104 |
+
# Extract words and timestamps
|
| 105 |
words = []
|
| 106 |
for segment in result['segments']:
|
| 107 |
for word_info in segment['words']:
|
| 108 |
+
words.append({"word": word_info['word'], "start_time": word_info['start'], "end_time": word_info['end']})
|
|
|
|
|
|
|
|
|
|
| 109 |
|
|
|
|
| 110 |
os.remove(wav_path)
|
|
|
|
| 111 |
return words, result['text']
|
| 112 |
|
| 113 |
# Process and plot if a file is uploaded
|
| 114 |
if uploaded_file:
|
| 115 |
file_path = f"temp/{uploaded_file.name}"
|
| 116 |
+
os.makedirs("temp", exist_ok=True) # Ensure temp directory exists
|
| 117 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 118 |
with open(file_path, "wb") as f:
|
| 119 |
f.write(uploaded_file.getbuffer())
|
| 120 |
|
| 121 |
+
# Analyze sentiment & extract features
|
| 122 |
+
y, sr, sentiment, mfccs, mfccs_mean, transcribed_text = analyze_audio(file_path)
|
| 123 |
|
| 124 |
+
# Extract words from audio
|
| 125 |
+
words_from_audio, _ = extract_words_from_audio(file_path)
|
| 126 |
|
| 127 |
+
# Categorize words
|
| 128 |
+
good_words = [w['word'] for w in words_from_audio if w['word'].lower() in ['good', 'excellent', 'positive', 'great', 'happy', 'success']]
|
| 129 |
+
negative_words = [w['word'] for w in words_from_audio if w['word'].lower() in ['bad', 'negative', 'poor', 'angry', 'sad', 'failure']]
|
| 130 |
|
| 131 |
+
# Determine sentiment color
|
| 132 |
sentiment_label = sentiment['label']
|
| 133 |
sentiment_color = "green" if sentiment_label == "POSITIVE" else "red"
|
| 134 |
|
| 135 |
+
# Display sentiment
|
| 136 |
st.subheader("📊 Sentiment Analysis Result")
|
| 137 |
st.markdown(f"**Sentiment:** <span style='color:{sentiment_color}; font-size:20px;'>{sentiment_label}</span>", unsafe_allow_html=True)
|
| 138 |
st.write(f"**Confidence:** {sentiment['score']:.2f}")
|
| 139 |
|
| 140 |
+
# Analyze sentiment over time
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 141 |
sentiment_scores = analyze_sentiment_over_time(y, sr)
|
| 142 |
+
sentiment_numeric = [1 if s == "POSITIVE" else -1 for s in sentiment_scores]
|
| 143 |
+
|
| 144 |
+
# Plot sentiment trend
|
| 145 |
+
fig, ax = plt.subplots(figsize=(8, 4))
|
| 146 |
+
ax.scatter(range(len(sentiment_numeric)), sentiment_numeric, c=sentiment_numeric, cmap="coolwarm")
|
| 147 |
+
ax.set_title("Sentiment Trend (10-sec intervals)")
|
| 148 |
+
ax.set_xticks(range(0, len(sentiment_scores), max(1, len(sentiment_scores)//5)))
|
| 149 |
+
ax.set_yticks([-1, 1], labels=["Negative", "Positive"])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 150 |
st.pyplot(fig)
|
| 151 |
|
| 152 |
+
# Display positive & negative words
|
| 153 |
st.subheader("🗣️ Positive and Negative Words in Audio")
|
| 154 |
+
col1, col2 = st.columns(2)
|
|
|
|
|
|
|
| 155 |
with col1:
|
| 156 |
st.markdown("### Good Words")
|
| 157 |
st.write(", ".join(good_words) if good_words else "No good words detected.")
|
|
|
|
| 159 |
st.markdown("### Negative Words")
|
| 160 |
st.write(", ".join(negative_words) if negative_words else "No negative words detected.")
|
| 161 |
|
| 162 |
+
# Display full transcription
|
| 163 |
+
st.subheader("📝 Full Transcription")
|
| 164 |
+
st.write(transcribed_text)
|
| 165 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 166 |
# Clean up temp file
|
| 167 |
os.remove(file_path)
|