ombhojane commited on
Commit
82d6a45
·
verified ·
1 Parent(s): e8b2410

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +53 -129
app.py CHANGED
@@ -6,9 +6,6 @@ from google import genai
6
  from google.genai import types
7
  import asyncio
8
  import concurrent.futures
9
- from streamlit_audiorecorder import st_audiorecorder
10
- import pydub
11
-
12
 
13
  GEMINI_API_KEY = st.secrets["GEMINI_API_KEY"]
14
  client = genai.Client(api_key=GEMINI_API_KEY)
@@ -22,7 +19,6 @@ st.write("Upload your speech and get AI-powered feedback")
22
 
23
  def analyze_knowledge_relevancy(audio_data, title):
24
  prompt = f"""As an expert in content analysis, evaluate this speech titled '{title}' focusing ONLY on:
25
-
26
  1. Knowledge Depth:
27
  - Topic expertise level
28
  - Accuracy of information
@@ -32,7 +28,6 @@ def analyze_knowledge_relevancy(audio_data, title):
32
  - Alignment with topic
33
  - Appropriate examples
34
  - Target audience fit
35
-
36
  Provide a structured analysis with specific examples from the speech."""
37
 
38
  contents = [
@@ -46,10 +41,10 @@ def analyze_knowledge_relevancy(audio_data, title):
46
  ]
47
 
48
  response = client.models.generate_content(
49
- model="gemini-2.0-flash",
50
  contents=contents,
51
  config=types.GenerateContentConfig(
52
- temperature=0.7,
53
  top_p=0.95,
54
  top_k=40,
55
  max_output_tokens=8192,
@@ -60,7 +55,6 @@ def analyze_knowledge_relevancy(audio_data, title):
60
 
61
  def analyze_emotional_delivery(audio_data, title):
62
  prompt = f"""As an expert in public speaking delivery, analyze this speech titled '{title}' focusing ONLY on:
63
-
64
  1. Emotional Expression:
65
  - Voice modulation
66
  - Emotional engagement
@@ -71,7 +65,6 @@ def analyze_emotional_delivery(audio_data, title):
71
  - Use of pauses
72
  - Filler words
73
  - Voice clarity
74
-
75
  Provide specific examples and timestamps where possible."""
76
 
77
  contents = [
@@ -85,10 +78,10 @@ def analyze_emotional_delivery(audio_data, title):
85
  ]
86
 
87
  response = client.models.generate_content(
88
- model="gemini-2.0-flash",
89
  contents=contents,
90
  config=types.GenerateContentConfig(
91
- temperature=0.7,
92
  top_p=0.95,
93
  top_k=40,
94
  max_output_tokens=8192,
@@ -99,13 +92,10 @@ def analyze_emotional_delivery(audio_data, title):
99
 
100
  def generate_final_analysis(knowledge_analysis, emotional_analysis):
101
  prompt_final = f"""As a comprehensive public speaking coach, analyze these two detailed evaluations:
102
-
103
  Knowledge Analysis:
104
  {knowledge_analysis}
105
-
106
  Emotional Delivery Analysis:
107
  {emotional_analysis}
108
-
109
  Provide:
110
  1. Overall Score (0-100)
111
  2. Key Strengths (Top 3)
@@ -127,7 +117,7 @@ def generate_final_analysis(knowledge_analysis, emotional_analysis):
127
  ]
128
 
129
  response = client.models.generate_content(
130
- model="gemini-2.0-flash",
131
  contents=contents,
132
  config=types.GenerateContentConfig(
133
  temperature=0.7,
@@ -138,7 +128,7 @@ def generate_final_analysis(knowledge_analysis, emotional_analysis):
138
  )
139
 
140
  return response.text
141
-
142
  def parallel_analysis(audio_data, title):
143
  with concurrent.futures.ThreadPoolExecutor() as executor:
144
  # Submit both analysis tasks
@@ -170,118 +160,52 @@ def parallel_analysis(audio_data, title):
170
 
171
  # Main interface
172
  title = st.text_input("Speech Title/Topic:", placeholder="e.g., Introduction to Machine Learning")
173
- # Input method selection
174
- input_method = st.radio("Choose input method:", ["Upload Audio File", "Record Speech"], key="input_method_radio")
175
-
176
- audio_data = None
177
- audio_path = None
178
 
179
- if input_method == "Upload Audio File":
180
- uploaded_file = st.file_uploader("Upload your speech (WAV, MP3, M4A)", type=["wav", "mp3", "m4a"], key="speech_file_uploader")
181
- if uploaded_file:
182
- st.audio(uploaded_file, key="uploaded_audio_player")
183
- if title and uploaded_file:
184
- if st.button("Analyze Speech", key="analyze_uploaded_speech_button"):
185
- with st.spinner("Processing your speech..."):
186
- # Save and process audio
187
- with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
188
- audio_data = uploaded_file.read()
189
- tmp_file.write(audio_data)
190
- audio_path = tmp_file.name
191
-
192
- try:
193
- # Run parallel analysis
194
- knowledge, emotional, final = parallel_analysis(audio_data, title)
195
-
196
- # Download options
197
- col1, col2, col3 = st.columns(3)
198
- with col1:
199
- st.download_button(
200
- "Download Knowledge Analysis",
201
- knowledge,
202
- file_name=f"knowledge_analysis_{title}.txt",
203
- key="download_knowledge_button"
204
- )
205
- with col2:
206
- st.download_button(
207
- "Download Emotional Analysis",
208
- emotional,
209
- file_name=f"emotional_analysis_{title}.txt",
210
- key="download_emotional_button"
211
- )
212
- with col3:
213
- st.download_button(
214
- "Download Final Analysis",
215
- final,
216
- file_name=f"final_analysis_{title}.txt",
217
- key="download_final_button"
218
- )
219
-
220
- except Exception as e:
221
- st.error(f"Error during analysis: {str(e)}")
222
- if "API key" in str(e):
223
- st.warning("Please check your Google API key configuration.")
224
-
225
- finally:
226
- if os.path.exists(audio_path):
227
- os.unlink(audio_path)
228
- else:
229
- st.info("Please provide both a title and upload your speech recording to begin.")
230
-
231
- else: # Record Speech
232
- st.write("Record your speech directly:")
233
- audio_bytes = st_audiorecorder(pause_threshold=2.0, sample_rate=44100, key="speech_recorder")
234
-
235
- if audio_bytes and title:
236
- # Convert audio bytes to WAV format using pydub
237
- audio_segment = pydub.AudioSegment.from_wav(audio_bytes)
238
-
239
- st.audio(audio_bytes, key="recorded_audio_player")
240
-
241
- if st.button("Analyze Recorded Speech", key="analyze_recorded_speech_button"):
242
- with st.spinner("Processing your speech..."):
243
- # Save recorded audio to temporary file
244
- with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
245
- audio_segment.export(tmp_file.name, format="wav")
246
- audio_path = tmp_file.name
247
-
248
- try:
249
- # Run parallel analysis
250
- knowledge, emotional, final = parallel_analysis(audio_bytes, title)
251
-
252
- # Download options
253
- col1, col2, col3 = st.columns(3)
254
- with col1:
255
- st.download_button(
256
- "Download Knowledge Analysis",
257
- knowledge,
258
- file_name=f"knowledge_analysis_{title}.txt",
259
- key="download_recorded_knowledge_button"
260
- )
261
- with col2:
262
- st.download_button(
263
- "Download Emotional Analysis",
264
- emotional,
265
- file_name=f"emotional_analysis_{title}.txt",
266
- key="download_recorded_emotional_button"
267
- )
268
- with col3:
269
- st.download_button(
270
- "Download Final Analysis",
271
- final,
272
- file_name=f"final_analysis_{title}.txt",
273
- key="download_recorded_final_button"
274
- )
275
-
276
- except Exception as e:
277
- st.error(f"Error during analysis: {str(e)}")
278
- if "API key" in str(e):
279
- st.warning("Please check your Google API key configuration.")
280
 
281
- finally:
282
- if os.path.exists(audio_path):
283
- os.unlink(audio_path)
284
- elif not title and audio_bytes:
285
- st.info("Please provide a title for your speech to begin analysis.")
286
- elif not audio_bytes:
287
- st.info("Record your speech using the microphone button above.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  from google.genai import types
7
  import asyncio
8
  import concurrent.futures
 
 
 
9
 
10
  GEMINI_API_KEY = st.secrets["GEMINI_API_KEY"]
11
  client = genai.Client(api_key=GEMINI_API_KEY)
 
19
 
20
  def analyze_knowledge_relevancy(audio_data, title):
21
  prompt = f"""As an expert in content analysis, evaluate this speech titled '{title}' focusing ONLY on:
 
22
  1. Knowledge Depth:
23
  - Topic expertise level
24
  - Accuracy of information
 
28
  - Alignment with topic
29
  - Appropriate examples
30
  - Target audience fit
 
31
  Provide a structured analysis with specific examples from the speech."""
32
 
33
  contents = [
 
41
  ]
42
 
43
  response = client.models.generate_content(
44
+ model="gemini-2.0-pro-exp-02-05",
45
  contents=contents,
46
  config=types.GenerateContentConfig(
47
+ temperature=0.4,
48
  top_p=0.95,
49
  top_k=40,
50
  max_output_tokens=8192,
 
55
 
56
  def analyze_emotional_delivery(audio_data, title):
57
  prompt = f"""As an expert in public speaking delivery, analyze this speech titled '{title}' focusing ONLY on:
 
58
  1. Emotional Expression:
59
  - Voice modulation
60
  - Emotional engagement
 
65
  - Use of pauses
66
  - Filler words
67
  - Voice clarity
 
68
  Provide specific examples and timestamps where possible."""
69
 
70
  contents = [
 
78
  ]
79
 
80
  response = client.models.generate_content(
81
+ model="gemini-2.0-pro-exp-02-05",
82
  contents=contents,
83
  config=types.GenerateContentConfig(
84
+ temperature=0.4,
85
  top_p=0.95,
86
  top_k=40,
87
  max_output_tokens=8192,
 
92
 
93
  def generate_final_analysis(knowledge_analysis, emotional_analysis):
94
  prompt_final = f"""As a comprehensive public speaking coach, analyze these two detailed evaluations:
 
95
  Knowledge Analysis:
96
  {knowledge_analysis}
 
97
  Emotional Delivery Analysis:
98
  {emotional_analysis}
 
99
  Provide:
100
  1. Overall Score (0-100)
101
  2. Key Strengths (Top 3)
 
117
  ]
118
 
119
  response = client.models.generate_content(
120
+ model="gemini-2.0-pro-exp-02-05",
121
  contents=contents,
122
  config=types.GenerateContentConfig(
123
  temperature=0.7,
 
128
  )
129
 
130
  return response.text
131
+
132
  def parallel_analysis(audio_data, title):
133
  with concurrent.futures.ThreadPoolExecutor() as executor:
134
  # Submit both analysis tasks
 
160
 
161
  # Main interface
162
  title = st.text_input("Speech Title/Topic:", placeholder="e.g., Introduction to Machine Learning")
 
 
 
 
 
163
 
164
+ uploaded_file = st.file_uploader("Upload your speech (WAV, MP3, M4A)", type=["wav", "mp3", "m4a"])
165
+ if uploaded_file:
166
+ st.audio(uploaded_file)
167
+
168
+ if title and uploaded_file:
169
+ if st.button("Analyze Speech"):
170
+ with st.spinner("Processing your speech..."):
171
+ # Save and process audio
172
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
173
+ audio_data = uploaded_file.read()
174
+ tmp_file.write(audio_data)
175
+ audio_path = tmp_file.name
176
+
177
+ try:
178
+ # Run parallel analysis
179
+ knowledge, emotional, final = parallel_analysis(audio_data, title)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
180
 
181
+ # Download options
182
+ col1, col2, col3 = st.columns(3)
183
+ with col1:
184
+ st.download_button(
185
+ "Download Knowledge Analysis",
186
+ knowledge,
187
+ file_name=f"knowledge_analysis_{title}.txt"
188
+ )
189
+ with col2:
190
+ st.download_button(
191
+ "Download Emotional Analysis",
192
+ emotional,
193
+ file_name=f"emotional_analysis_{title}.txt"
194
+ )
195
+ with col3:
196
+ st.download_button(
197
+ "Download Final Analysis",
198
+ final,
199
+ file_name=f"final_analysis_{title}.txt"
200
+ )
201
+
202
+ except Exception as e:
203
+ st.error(f"Error during analysis: {str(e)}")
204
+ if "API key" in str(e):
205
+ st.warning("Please check your Google API key configuration.")
206
+
207
+ finally:
208
+ if os.path.exists(audio_path):
209
+ os.unlink(audio_path)
210
+ else:
211
+ st.info("Please provide both a title and upload your speech recording to begin.")