Vlad Bastina commited on
Commit
d2e2a95
·
0 Parent(s):

first commit

Browse files
Files changed (7) hide show
  1. .gitattributes +2 -0
  2. .gitignore +9 -0
  3. app.py +317 -0
  4. default_audio.wav +3 -0
  5. packages.txt +1 -0
  6. requirements.txt +5 -0
  7. zega_logo.PNG +3 -0
.gitattributes ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ *.wav filter=lfs diff=lfs merge=lfs -text
2
+ *.PNG filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ *.wav
2
+ !default_audio.wav
3
+ *.json
4
+ *.txt
5
+ !requirements.txt
6
+ !packages.txt
7
+ sentence_segments
8
+ *.py
9
+ !app.py
app.py ADDED
@@ -0,0 +1,317 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from streamlit_mic_recorder import mic_recorder
3
+ import google.generativeai as genai
4
+ import plotly.express as px
5
+ import pandas as pd
6
+ import os
7
+ import io
8
+ import tempfile
9
+ import json
10
+ from pydub import AudioSegment # Used to ensure WAV format if needed
11
+
12
+ # --- Configuration ---
13
+ st.set_page_config(layout="wide", page_title="Audio Sentiment Analysis")
14
+ st.title("🗣️ Audio Sentiment Analysis with Gemini")
15
+ st.markdown("""
16
+ Upload a WAV file, record new audio, or use the default example. The app will use Google's Gemini model
17
+ to analyze the sentiment, focusing on the customer if it detects a support call.
18
+ """)
19
+
20
+ # --- Default File Configuration ---
21
+ DEFAULT_AUDIO_FILENAME = "default_audio.wav" # MAKE SURE THIS FILE EXISTS!
22
+
23
+ # --- API Key Handling ---
24
+ api_key = os.getenv("GOOGLE_API_KEY") or st.secrets["GOOGLE_API_KEY"]
25
+ if not api_key:
26
+ api_key = st.text_input("Enter your Google Gemini API Key:", type="password")
27
+
28
+ if not api_key:
29
+ st.warning("Please enter your Gemini API Key to proceed.")
30
+ st.stop()
31
+
32
+ try:
33
+ genai.configure(api_key=api_key)
34
+ # Use a model that supports audio input, like 1.5 Flash or 1.5 Pro
35
+ model = genai.GenerativeModel(model_name="gemini-2.5-pro-exp-03-25") # Or gemini-1.5-pro
36
+ except Exception as e:
37
+ st.error(f"Error configuring Gemini SDK: {e}")
38
+ st.stop()
39
+
40
+
41
+ st.sidebar.image("zega_logo.PNG",use_container_width=True)
42
+ # --- Function Definitions (Keep analyze_audio, detailed_sentiment_prompt, plot_sentiment_timeline as before) ---
43
+
44
+ def detailed_sentiment_prompt(is_customer_support=None, customer_focus=False):
45
+ """Generates the prompt for Gemini based on context."""
46
+ base_prompt = """
47
+ Analyze the sentiment of the provided audio conversation in detail. Consider the following aspects:
48
+ 1. **Voice Tone:** (e.g., calm, agitated, happy, sad, sarcastic, urgent, monotone)
49
+ 2. **Voice Intensity:** (e.g., loud, quiet, normal, shouting, whispering)
50
+ 3. **Speaking Pace:** (e.g., fast, slow, normal, rushed, hesitant)
51
+ 4. **Specific Emotions:** Identify primary emotions expressed (e.g., frustration, relief, anger, confusion, satisfaction, politeness, impatience).
52
+
53
+ First, determine if this sounds like a customer support interaction (e.g., someone calling a company for help). Respond 'Customer Support: Yes' or 'Customer Support: No'.
54
+
55
+ """
56
+
57
+ if is_customer_support is None: # Initial analysis phase
58
+ prompt = base_prompt + """
59
+ Based on your determination above, proceed with the sentiment analysis.
60
+
61
+ **Sentiment Timeline:** Provide a timeline of the overall sentiment throughout the conversation. Divide the audio into logical segments (e.g., every 15-20 seconds or by speaker turn if discernible) and assign a sentiment score from -10 (very negative) to +10 (very positive) for each segment.
62
+
63
+ **Output Format:** Structure your entire response strictly as a JSON object with the following keys:
64
+ - "is_customer_support": (boolean, true if it's customer support, false otherwise)
65
+ - "analysis_target": (string, "customer only" or "full conversation")
66
+ - "detailed_report": (string, a comprehensive text report covering tone, intensity, pace, emotions, and overall sentiment trends based on the analysis target)
67
+ - "sentiment_timeline": (array of numbers, e.g., [2, 1, -5, -3, 0, 4, 6])
68
+ """
69
+
70
+ elif is_customer_support and customer_focus:
71
+ prompt = base_prompt + """
72
+ **Focus:** Since this is identified as a customer support call, focus your analysis *exclusively* on the speech segments likely belonging to the **customer**. Ignore the agent's speech for sentiment scoring and detailed analysis unless it directly influences the customer's reaction.
73
+
74
+ **Sentiment Timeline:** Provide a timeline of the **customer's** sentiment throughout the conversation. Divide the customer's speaking parts into logical segments and assign a sentiment score from -10 (very negative) to +10 (very positive) for each segment.
75
+
76
+ **Output Format:** Structure your entire response strictly as a JSON object with the following keys:
77
+ - "is_customer_support": true
78
+ - "analysis_target": "customer only"
79
+ - "detailed_report": (string, a comprehensive text report covering the *customer's* tone, intensity, pace, emotions, and overall sentiment trends)
80
+ - "sentiment_timeline": (array of numbers, representing the *customer's* sentiment scores, e.g., [-5, -6, -2, 1, 5])
81
+ """
82
+ else: # Not customer support, or explicitly analyze full conversation
83
+ prompt = base_prompt + """
84
+ **Focus:** Analyze the sentiment of the **entire conversation**, considering all speakers.
85
+
86
+ **Sentiment Timeline:** Provide a timeline of the overall sentiment throughout the conversation. Divide the audio into logical segments (e.g., every 15-20 seconds or by speaker turn) and assign a sentiment score from -10 (very negative) to +10 (very positive) for each segment.
87
+
88
+ **Output Format:** Structure your entire response strictly as a JSON object with the following keys:
89
+ - "is_customer_support": false
90
+ - "analysis_target": "full conversation"
91
+ - "detailed_report": (string, a comprehensive text report covering tone, intensity, pace, emotions, and overall sentiment trends for the *whole conversation*)
92
+ - "sentiment_timeline": (array of numbers, representing the *overall* sentiment scores, e.g., [2, 1, -5, -3, 0, 4, 6])
93
+ """
94
+ return prompt
95
+
96
+
97
+ def analyze_audio(audio_bytes, filename="uploaded_audio.wav"):
98
+ """Sends audio to Gemini and processes the response."""
99
+ temp_file_path = None
100
+ uploaded_file_info = None
101
+ try:
102
+ # Gemini SDK works best with files. Save bytes to a temporary file.
103
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmpfile:
104
+ tmpfile.write(audio_bytes)
105
+ temp_file_path = tmpfile.name
106
+
107
+ # Optional: Ensure it's WAV format for robustness
108
+ # try:
109
+ # audio_segment = AudioSegment.from_file(temp_file_path)
110
+ # audio_segment.export(temp_file_path, format="wav") # Re-export as WAV
111
+ # except Exception as e:
112
+ # st.warning(f"Could not verify/re-export as WAV using pydub: {e}. Sending as is.")
113
+
114
+
115
+ # Upload the file to Gemini
116
+ uploaded_file_info = genai.upload_file(path=temp_file_path, mime_type="audio/wav")
117
+
118
+ # --- Initial Analysis Phase (Determine if Customer Support) ---
119
+ initial_prompt = detailed_sentiment_prompt()
120
+ initial_response = model.generate_content([initial_prompt, uploaded_file_info],
121
+ request_options={"timeout": 600}) # Increased timeout
122
+
123
+ # --- Process Initial Response ---
124
+ try:
125
+ # Clean potential markdown/code block formatting
126
+ cleaned_text = initial_response.text.strip().lstrip('```json').rstrip('```')
127
+ initial_data = json.loads(cleaned_text)
128
+ is_customer_support = initial_data.get("is_customer_support", False)
129
+
130
+ # --- Second Analysis Phase (Refined based on support type) ---
131
+ # Decide if we need a second pass to focus on the customer
132
+ needs_second_pass = is_customer_support
133
+ if needs_second_pass:
134
+ refined_prompt = detailed_sentiment_prompt(is_customer_support=True, customer_focus=True)
135
+ final_response = model.generate_content([refined_prompt, uploaded_file_info],
136
+ request_options={"timeout": 600})
137
+ final_text = final_response.text.strip().lstrip('```json').rstrip('```')
138
+ analysis_data = json.loads(final_text)
139
+ else:
140
+ # Use the results from the first pass if not customer support
141
+ analysis_data = initial_data # Reuse initial analysis
142
+
143
+ # Validate keys exist
144
+ report = analysis_data.get("detailed_report", "Report not found in response.")
145
+ timeline = analysis_data.get("sentiment_timeline", [])
146
+ target = analysis_data.get("analysis_target", "unknown")
147
+
148
+ return report, timeline, target, is_customer_support
149
+
150
+ except json.JSONDecodeError:
151
+ st.error("Error: Could not parse Gemini's response as JSON. Raw response:")
152
+ st.code(initial_response.text if 'initial_response' in locals() else "No initial response captured")
153
+ if 'final_response' in locals():
154
+ st.code(final_response.text)
155
+ return "Error parsing response.", [], "Error", False
156
+ except Exception as e:
157
+ st.error(f"An error occurred during analysis: {e}")
158
+ return f"Error: {e}", [], "Error", False
159
+
160
+ except Exception as e:
161
+ st.error(f"An error occurred during file processing or API call: {e}")
162
+ return f"Error: {e}", [], "Error", False
163
+ finally:
164
+ # Clean up the uploaded file on Gemini and the local temp file
165
+ if uploaded_file_info:
166
+ try:
167
+ # Gemini API might change; adapt if delete() method isn't available
168
+ # print(f"Attempting to delete file: {uploaded_file_info.name}") # Debugging
169
+ genai.delete_file(uploaded_file_info.name)
170
+ except AttributeError:
171
+ st.warning(f"Could not directly delete file object. Attempting delete by name: {uploaded_file_info.name}")
172
+ try:
173
+ genai.delete_file(uploaded_file_info.name)
174
+ except Exception as del_err_name:
175
+ st.warning(f"Could not delete uploaded file from Gemini by name either: {del_err_name}")
176
+ except Exception as del_err:
177
+ st.warning(f"Could not delete uploaded file from Gemini: {del_err}")
178
+ if temp_file_path and os.path.exists(temp_file_path):
179
+ os.remove(temp_file_path)
180
+
181
+
182
+ def plot_sentiment_timeline(timeline_data):
183
+ """Generates a Plotly line chart for the sentiment timeline."""
184
+ if not timeline_data or not isinstance(timeline_data, list):
185
+ st.warning("No valid sentiment timeline data to plot.")
186
+ return None
187
+
188
+ # Ensure data are numbers (handle potential strings if parsing failed slightly)
189
+ numeric_timeline = []
190
+ for item in timeline_data:
191
+ try:
192
+ numeric_timeline.append(float(item))
193
+ except (ValueError, TypeError):
194
+ st.warning(f"Skipping non-numeric value in timeline: {item}")
195
+ # Optionally append a neutral value like 0 or None, or just skip
196
+ # numeric_timeline.append(0)
197
+
198
+ if not numeric_timeline:
199
+ st.warning("No numeric sentiment data available after filtering.")
200
+ return None
201
+
202
+ df = pd.DataFrame({
203
+ 'Segment': range(1, len(numeric_timeline) + 1),
204
+ 'Sentiment Score': numeric_timeline
205
+ })
206
+
207
+ fig = px.line(df, x='Segment', y='Sentiment Score',
208
+ title="Sentiment Progression Over Conversation Segments",
209
+ markers=True, range_y=[-10.5, 10.5]) # Set Y-axis range
210
+ fig.update_layout(xaxis_title="Conversation Segment / Time Progression",
211
+ yaxis_title="Sentiment Score (-10 to +10)")
212
+ return fig
213
+
214
+
215
+ # --- Streamlit UI Elements ---
216
+ audio_bytes = None
217
+ file_name = None
218
+
219
+ # --- ADDED "Use Default Example" option ---
220
+ input_method = st.radio(
221
+ "Choose audio input method:",
222
+ ("Upload WAV file", "Record Audio", "Use Default Example (Customer support call)"),
223
+ index=0,
224
+ key="input_method"
225
+ )
226
+
227
+ if input_method == "Upload WAV file":
228
+ uploaded_file = st.file_uploader("Choose a WAV file", type=['wav'], key="uploader")
229
+ if uploaded_file is not None:
230
+ file_name = uploaded_file.name
231
+ audio_bytes = uploaded_file.getvalue()
232
+ st.audio(audio_bytes, format='audio/wav')
233
+
234
+ elif input_method == "Record Audio":
235
+ st.write("Click the microphone to start/stop recording (allow microphone access).")
236
+ # Use streamlit_mic_recorder
237
+ # The key='recorder' helps maintain state across reruns
238
+ audio_info = mic_recorder(
239
+ start_prompt="🔴 Start Recording",
240
+ stop_prompt="⏹️ Stop Recording",
241
+ just_once=False, # Allow multiple recordings without refresh
242
+ use_container_width=True,
243
+ format="wav", # Specify wav format
244
+ key='recorder'
245
+ )
246
+
247
+ if audio_info and audio_info['bytes']:
248
+ st.success("Recording finished!")
249
+ audio_bytes = audio_info['bytes']
250
+ file_name = "recorded_audio.wav"
251
+ st.audio(audio_bytes, format='audio/wav')
252
+ # Optional: ensure WAV format integrity if needed
253
+ # try:
254
+ # audio_segment = AudioSegment.from_file(io.BytesIO(audio_bytes))
255
+ # wav_buffer = io.BytesIO()
256
+ # audio_segment.export(wav_buffer, format="wav")
257
+ # audio_bytes = wav_buffer.getvalue()
258
+ # st.info("Ensured audio is in WAV format.")
259
+ # except Exception as e:
260
+ # st.warning(f"Could not process recorded audio with pydub: {e}. Sending as is.")
261
+
262
+
263
+ # --- ADDED Logic for Default Example ---
264
+ elif input_method == "Use Default Example":
265
+ default_file_path = DEFAULT_AUDIO_FILENAME
266
+ # Check if the default file exists in the script's directory
267
+ if os.path.exists(default_file_path):
268
+ st.info(f"Using default example file: '{default_file_path}'")
269
+ try:
270
+ with open(default_file_path, "rb") as f:
271
+ audio_bytes = f.read()
272
+ file_name = os.path.basename(default_file_path)
273
+ # Display the audio player for the default file
274
+ st.audio(audio_bytes, format='audio/wav')
275
+ except Exception as e:
276
+ st.error(f"Error reading default file '{default_file_path}': {e}")
277
+ audio_bytes = None # Reset to prevent analysis button
278
+ file_name = None
279
+ else:
280
+ # Handle case where the file is missing
281
+ st.error(f"Default file not found: '{default_file_path}'.")
282
+ st.markdown(f"Please make sure a file named `{DEFAULT_AUDIO_FILENAME}` exists in the same directory as the Streamlit script (`app.py`).")
283
+ # Ensure analysis button doesn't appear if file is missing
284
+ audio_bytes = None
285
+ file_name = None
286
+
287
+
288
+ # --- Analysis Trigger ---
289
+ # This part remains the same, it checks if audio_bytes and file_name are set,
290
+ # regardless of how they were set (upload, record, or default)
291
+ if audio_bytes and file_name:
292
+ if st.button(f"Analyze Sentiment for '{file_name}'", key="analyze_button"):
293
+ col1, col2 = st.columns(2)
294
+ with col1:
295
+ st.subheader("📊 Sentiment Analysis Report")
296
+ with st.spinner("Analyzing audio... This may take a minute or two depending on length."):
297
+ report, timeline, target, is_cs = analyze_audio(audio_bytes, file_name)
298
+
299
+ st.text_area("Detailed Report", report, height=400)
300
+
301
+ with col2:
302
+ st.subheader("📈 Sentiment Timeline Plot")
303
+ if timeline:
304
+ fig = plot_sentiment_timeline(timeline)
305
+ if fig:
306
+ st.plotly_chart(fig, use_container_width=True)
307
+ else:
308
+ st.info("Could not generate plot.")
309
+ else:
310
+ st.info("No sentiment timeline data available to plot.")
311
+ # Don't show the button instruction if using default and file is missing
312
+ elif input_method != "Use Default Example" or os.path.exists(DEFAULT_AUDIO_FILENAME) :
313
+ st.info("Please provide audio via one of the methods above to begin analysis.")
314
+
315
+ # --- Footer/Info ---
316
+ st.markdown("---")
317
+ st.markdown("Powered by ZEGA AI")
default_audio.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:599821dbef6d16e1b42bd91d8fb410acfc2ec4846e295a66b09934935b29db7e
3
+ size 4054096
packages.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ ffmpeg
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ streamlit
2
+ streamlit_mic_recorder
3
+ google-generativeai
4
+ plotly
5
+ pydub
zega_logo.PNG ADDED

Git LFS Details

  • SHA256: ab929904c4eadf8cc1aadc9a797a469f20d31a5636770f1db2789f2096033558
  • Pointer size: 131 Bytes
  • Size of remote file: 116 kB