YashsharmaPhD commited on
Commit
e78469c
·
verified ·
1 Parent(s): c346b6f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +79 -103
app.py CHANGED
@@ -23,6 +23,47 @@ st.write("Upload an MP3 file to analyze its sentiment and audio features.")
23
  # Upload audio file
24
  uploaded_file = st.file_uploader("Choose an MP3 file", type=["mp3"])
25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  # Function to process audio and get sentiment
27
  def analyze_audio(file_path):
28
  # Convert MP3 to WAV
@@ -35,15 +76,20 @@ def analyze_audio(file_path):
35
 
36
  # Extract MFCCs (Mel-frequency cepstral coefficients)
37
  mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
38
- mfccs_mean = np.mean(mfccs, axis=1) # Take mean across time axis
39
 
40
- # Dummy text for sentiment analysis (replace with actual text from speech-to-text if needed)
41
- sentiment_result = sentiment_analyzer("This is a placeholder for sentiment analysis based on audio!")
 
42
 
43
- # Remove WAV file after processing
44
- os.remove(wav_path)
45
-
46
- return y, sr, sentiment_result[0], mfccs, mfccs_mean
 
 
 
 
47
 
48
  # Function to extract words from audio using Whisper
49
  def extract_words_from_audio(file_path):
@@ -55,123 +101,57 @@ def extract_words_from_audio(file_path):
55
  # Transcribe audio using Whisper
56
  result = whisper_model.transcribe(wav_path, word_timestamps=True)
57
 
58
- # Extract words and their timestamps
59
  words = []
60
  for segment in result['segments']:
61
  for word_info in segment['words']:
62
- word = word_info['word']
63
- start_time = word_info['start']
64
- end_time = word_info['end']
65
- words.append({"word": word, "start_time": start_time, "end_time": end_time})
66
 
67
- # Remove WAV file after processing
68
  os.remove(wav_path)
69
-
70
  return words, result['text']
71
 
72
  # Process and plot if a file is uploaded
73
  if uploaded_file:
74
  file_path = f"temp/{uploaded_file.name}"
 
75
 
76
- # Ensure temp directory exists
77
- os.makedirs("temp", exist_ok=True)
78
-
79
- # Save the uploaded file
80
  with open(file_path, "wb") as f:
81
  f.write(uploaded_file.getbuffer())
82
 
83
- # Analyze sentiment and extract features
84
- y, sr, sentiment, mfccs, mfccs_mean = analyze_audio(file_path)
85
 
86
- # Extract words from audio using Whisper
87
- words_from_audio, transcribed_text = extract_words_from_audio(file_path)
88
 
89
- # Display words from audio in two categories: good and negative words
90
- good_words = [word['word'] for word in words_from_audio if word['word'].lower() in ['good', 'excellent', 'positive', 'great', 'happy', 'success']]
91
- negative_words = [word['word'] for word in words_from_audio if word['word'].lower() in ['bad', 'negative', 'poor', 'angry', 'sad', 'failure']]
92
 
93
- # Determine color based on sentiment
94
  sentiment_label = sentiment['label']
95
  sentiment_color = "green" if sentiment_label == "POSITIVE" else "red"
96
 
97
- # Display sentiment result
98
  st.subheader("📊 Sentiment Analysis Result")
99
  st.markdown(f"**Sentiment:** <span style='color:{sentiment_color}; font-size:20px;'>{sentiment_label}</span>", unsafe_allow_html=True)
100
  st.write(f"**Confidence:** {sentiment['score']:.2f}")
101
 
102
- # **Create a 2x3 Grid Layout for Plots with Larger Size and Adjustments**
103
- fig, axes = plt.subplots(2, 3, figsize=(30, 18), dpi=300) # Increased size and DPI for better quality and larger plots
104
- axes = axes.flatten() # Flatten the 2x3 grid to make it easier to index
105
-
106
- # **Plot 1: Line Plot of Average MFCCs**
107
- axes[0].plot(range(1, len(mfccs_mean) + 1), mfccs_mean, marker='o', color='b', label="Average MFCCs")
108
- axes[0].set_xlabel("MFCC Coefficients")
109
- axes[0].set_ylabel("Mean Value")
110
- axes[0].set_title("Average MFCCs Across Time")
111
- axes[0].legend()
112
-
113
- # **Plot 2: Audio Waveform with Sentiment Indicator**
114
- librosa.display.waveshow(y, sr=sr, ax=axes[1], alpha=0.5, color=sentiment_color)
115
- axes[1].set_xlabel("Time (s)")
116
- axes[1].set_ylabel("Amplitude")
117
- axes[1].set_title(f"Waveform of Audio ({'Good' if sentiment_label == 'POSITIVE' else 'Bad'})")
118
-
119
- # **Plot 3: MFCC Heatmap**
120
- sns.heatmap(mfccs, ax=axes[2], cmap="coolwarm", yticklabels=[f"MFCC {i}" for i in range(1, 14)])
121
- axes[2].set_xlabel("Time Frames")
122
- axes[2].set_ylabel("MFCC Coefficients")
123
- axes[2].set_title(f"MFCC Feature Heatmap ({'Good' if sentiment_label == 'POSITIVE' else 'Bad'})")
124
-
125
- # **Plot 4: Spectrogram**
126
- S = librosa.feature.melspectrogram(y=y, sr=sr)
127
- S_dB = librosa.power_to_db(S, ref=np.max)
128
- img = librosa.display.specshow(S_dB, sr=sr, x_axis="time", y_axis="mel", ax=axes[3])
129
- fig.colorbar(img, ax=axes[3], format="%+2.0f dB")
130
- axes[3].set_title(f"Mel Spectrogram ({'Good' if sentiment_label == 'POSITIVE' else 'Bad'})")
131
-
132
- # **Plot 5: Sentiment Trend Over Time (10-sec intervals)**
133
  sentiment_scores = analyze_sentiment_over_time(y, sr)
134
- sentiment_labels = list(set(sentiment_scores)) # Unique sentiment labels
135
-
136
- sentiment_numeric = [1 if label == "POSITIVE" else -1 for label in sentiment_scores]
137
- axes[4].scatter(range(len(sentiment_numeric)), sentiment_numeric, c=sns.color_palette("coolwarm", as_cmap=True)(sentiment_numeric))
138
- axes[4].set_title("Sentiment Trend (10 sec intervals)")
139
- axes[4].set_xticks(range(0, len(sentiment_scores), len(sentiment_scores)//5))
140
- axes[4].set_yticks([-1, 1], labels=["Negative", "Positive"])
141
-
142
- # **Plot 6: Performance Scoring**
143
- duration_min = librosa.get_duration(y=y, sr=sr) / 60
144
- if duration_min > 8:
145
- performance = "Critical (Long call, needs improvement)"
146
- box_color = "#FFCCCB" # Light red for critical calls
147
- elif sentiment_label == "NEGATIVE":
148
- performance = "Needs Improvement (Negative sentiment)"
149
- box_color = "#FFDD00" # Light yellow for improvement needed
150
- else:
151
- performance = "Good (Positive sentiment)"
152
- box_color = "#98FB98" # Light green for good performance
153
-
154
- # Hide the axis
155
- axes[5].axis('off')
156
-
157
- # Add a colored rectangle as the background box for performance text
158
- box = patches.FancyBboxPatch((0.05, 0.4), 0.9, 0.2, boxstyle="round,pad=0.05", linewidth=2, edgecolor="black", facecolor=box_color)
159
- axes[5].add_patch(box)
160
-
161
- # Add the performance text inside the colored box
162
- axes[5].text(0.5, 0.5, f"Agent Performance: {performance}", ha='center', va='center', fontsize=14, color="black")
163
-
164
- # Adjust the spacing between the subplots to avoid overlap
165
- plt.subplots_adjust(wspace=0.4, hspace=0.5)
166
-
167
- # Display the plot in Streamlit
168
  st.pyplot(fig)
169
 
170
- # Display words from audio (positive and negative words in boxes)
171
  st.subheader("🗣️ Positive and Negative Words in Audio")
172
-
173
- # Create two horizontal boxes for good and negative words
174
- col1, col2 = st.columns([1, 1])
175
  with col1:
176
  st.markdown("### Good Words")
177
  st.write(", ".join(good_words) if good_words else "No good words detected.")
@@ -179,13 +159,9 @@ if uploaded_file:
179
  st.markdown("### Negative Words")
180
  st.write(", ".join(negative_words) if negative_words else "No negative words detected.")
181
 
182
- # Display only negative words separately
183
- st.subheader("🗣️ Negative Words in Audio")
184
- st.write(", ".join(negative_words) if negative_words else "No negative words detected.")
185
 
186
- # Display transcription result in Streamlit
187
- st.subheader("📝 Full Transcription Result (Raw JSON)")
188
- st.json(result)
189
-
190
  # Clean up temp file
191
  os.remove(file_path)
 
23
  # Upload audio file
24
  uploaded_file = st.file_uploader("Choose an MP3 file", type=["mp3"])
25
 
26
+ # Function to analyze sentiment over time
27
+ def analyze_sentiment_over_time(y, sr, chunk_duration=10):
28
+ """
29
+ Splits audio into chunks, transcribes, and performs sentiment analysis.
30
+
31
+ Args:
32
+ y: The audio waveform.
33
+ sr: Sample rate.
34
+ chunk_duration: Duration of each chunk in seconds (default: 10 sec).
35
+
36
+ Returns:
37
+ A list of sentiment labels over time.
38
+ """
39
+ chunk_length = chunk_duration * sr # Convert chunk duration to samples
40
+ total_chunks = len(y) // chunk_length # Number of chunks
41
+ sentiment_labels = []
42
+
43
+ for i in range(total_chunks):
44
+ start_sample = i * chunk_length
45
+ end_sample = start_sample + chunk_length
46
+ chunk_audio = y[start_sample:end_sample]
47
+
48
+ # Convert chunk to WAV
49
+ temp_wav_path = f"temp_chunk_{i}.wav"
50
+ librosa.output.write_wav(temp_wav_path, chunk_audio, sr)
51
+
52
+ # Transcribe with Whisper
53
+ result = whisper_model.transcribe(temp_wav_path)
54
+ os.remove(temp_wav_path) # Remove temporary WAV file
55
+
56
+ transcribed_text = result["text"]
57
+
58
+ # Run sentiment analysis on transcribed text (if available)
59
+ if transcribed_text.strip():
60
+ sentiment_result = sentiment_analyzer(transcribed_text)
61
+ sentiment_labels.append(sentiment_result[0]["label"])
62
+ else:
63
+ sentiment_labels.append("NEUTRAL")
64
+
65
+ return sentiment_labels
66
+
67
  # Function to process audio and get sentiment
68
  def analyze_audio(file_path):
69
  # Convert MP3 to WAV
 
76
 
77
  # Extract MFCCs (Mel-frequency cepstral coefficients)
78
  mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
79
+ mfccs_mean = np.mean(mfccs, axis=1)
80
 
81
+ # Transcribe audio
82
+ result = whisper_model.transcribe(wav_path)
83
+ transcribed_text = result["text"]
84
 
85
+ # Run sentiment analysis
86
+ if transcribed_text.strip():
87
+ sentiment_result = sentiment_analyzer(transcribed_text)
88
+ else:
89
+ sentiment_result = [{"label": "NEUTRAL", "score": 0.0}]
90
+
91
+ os.remove(wav_path) # Remove WAV file after processing
92
+ return y, sr, sentiment_result[0], mfccs, mfccs_mean, transcribed_text
93
 
94
  # Function to extract words from audio using Whisper
95
  def extract_words_from_audio(file_path):
 
101
  # Transcribe audio using Whisper
102
  result = whisper_model.transcribe(wav_path, word_timestamps=True)
103
 
104
+ # Extract words and timestamps
105
  words = []
106
  for segment in result['segments']:
107
  for word_info in segment['words']:
108
+ words.append({"word": word_info['word'], "start_time": word_info['start'], "end_time": word_info['end']})
 
 
 
109
 
 
110
  os.remove(wav_path)
 
111
  return words, result['text']
112
 
113
  # Process and plot if a file is uploaded
114
  if uploaded_file:
115
  file_path = f"temp/{uploaded_file.name}"
116
+ os.makedirs("temp", exist_ok=True) # Ensure temp directory exists
117
 
 
 
 
 
118
  with open(file_path, "wb") as f:
119
  f.write(uploaded_file.getbuffer())
120
 
121
+ # Analyze sentiment & extract features
122
+ y, sr, sentiment, mfccs, mfccs_mean, transcribed_text = analyze_audio(file_path)
123
 
124
+ # Extract words from audio
125
+ words_from_audio, _ = extract_words_from_audio(file_path)
126
 
127
+ # Categorize words
128
+ good_words = [w['word'] for w in words_from_audio if w['word'].lower() in ['good', 'excellent', 'positive', 'great', 'happy', 'success']]
129
+ negative_words = [w['word'] for w in words_from_audio if w['word'].lower() in ['bad', 'negative', 'poor', 'angry', 'sad', 'failure']]
130
 
131
+ # Determine sentiment color
132
  sentiment_label = sentiment['label']
133
  sentiment_color = "green" if sentiment_label == "POSITIVE" else "red"
134
 
135
+ # Display sentiment
136
  st.subheader("📊 Sentiment Analysis Result")
137
  st.markdown(f"**Sentiment:** <span style='color:{sentiment_color}; font-size:20px;'>{sentiment_label}</span>", unsafe_allow_html=True)
138
  st.write(f"**Confidence:** {sentiment['score']:.2f}")
139
 
140
+ # Analyze sentiment over time
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
  sentiment_scores = analyze_sentiment_over_time(y, sr)
142
+ sentiment_numeric = [1 if s == "POSITIVE" else -1 for s in sentiment_scores]
143
+
144
+ # Plot sentiment trend
145
+ fig, ax = plt.subplots(figsize=(8, 4))
146
+ ax.scatter(range(len(sentiment_numeric)), sentiment_numeric, c=sentiment_numeric, cmap="coolwarm")
147
+ ax.set_title("Sentiment Trend (10-sec intervals)")
148
+ ax.set_xticks(range(0, len(sentiment_scores), max(1, len(sentiment_scores)//5)))
149
+ ax.set_yticks([-1, 1], labels=["Negative", "Positive"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
150
  st.pyplot(fig)
151
 
152
+ # Display positive & negative words
153
  st.subheader("🗣️ Positive and Negative Words in Audio")
154
+ col1, col2 = st.columns(2)
 
 
155
  with col1:
156
  st.markdown("### Good Words")
157
  st.write(", ".join(good_words) if good_words else "No good words detected.")
 
159
  st.markdown("### Negative Words")
160
  st.write(", ".join(negative_words) if negative_words else "No negative words detected.")
161
 
162
+ # Display full transcription
163
+ st.subheader("📝 Full Transcription")
164
+ st.write(transcribed_text)
165
 
 
 
 
 
166
  # Clean up temp file
167
  os.remove(file_path)