Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -17,12 +17,16 @@ import pandas as pd
|
|
| 17 |
PARQUET_DATASET_DIR = Path("parquet_dataset")
|
| 18 |
PARQUET_DATASET_DIR.mkdir(parents=True,exist_ok=True)
|
| 19 |
|
|
|
|
|
|
|
| 20 |
|
| 21 |
scheduler = ps.ParquetScheduler(repo_id="Sonogram/SampleDataset")
|
| 22 |
|
| 23 |
-
# Store results for viewing
|
| 24 |
if 'results' not in st.session_state:
|
| 25 |
st.session_state.results = []
|
|
|
|
|
|
|
| 26 |
|
| 27 |
def save_data(
|
| 28 |
config_dict: Dict[str,str], audio_paths: List[str], userid: str,
|
|
@@ -76,6 +80,7 @@ if uploaded_file_paths is not None:
|
|
| 76 |
audio_tabs = st.tabs([f.name for f in valid_files])
|
| 77 |
while (len(st.session_state.results) < len(valid_files)):
|
| 78 |
st.session_state.results.append([])
|
|
|
|
| 79 |
st.info(f'{len(valid_files)} valid files: {[fi.name for fi in valid_files]}')
|
| 80 |
for j, tab in enumerate(audio_tabs):
|
| 81 |
if tab.button("Analyze Audio",key=f"button_{j}"):
|
|
@@ -85,11 +90,11 @@ for j, tab in enumerate(audio_tabs):
|
|
| 85 |
# Process
|
| 86 |
# Pretend to take time as an example
|
| 87 |
with st.spinner(text='NOT ACTUALLY ANALYZING, JUST A FILLER ANIMATION'):
|
| 88 |
-
time.sleep(
|
| 89 |
st.success('Done')
|
| 90 |
|
| 91 |
# RTTM load as filler
|
| 92 |
-
speakerList, annotations = su.loadAudioRTTM(
|
| 93 |
st.session_state.results[j] = (speakerList,annotations)
|
| 94 |
|
| 95 |
if len(st.session_state.results) > j and len(st.session_state.results[j]) > 0:
|
|
@@ -247,14 +252,40 @@ for j, tab in enumerate(audio_tabs):
|
|
| 247 |
tab.write("Total length of audio: {}h:{:02d}m:{:02d}s".format(int(totalSeconds/3600),int((totalSeconds%3600)/60),int(totalSeconds%60)))
|
| 248 |
tab.table(df)
|
| 249 |
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
|
| 254 |
-
|
| 255 |
-
|
|
|
|
|
|
|
| 256 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 257 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 258 |
userid = st.text_input("user id:", "Guest")
|
| 259 |
colorPref = st.text_input("Favorite color?", "None")
|
| 260 |
radio = st.radio('Pick one:', ['Left','Right'])
|
|
@@ -263,5 +294,4 @@ if st.button("Upload Files to Dataset"):
|
|
| 263 |
save_data({"color":colorPref,"direction":radio,"number":selection},
|
| 264 |
file_paths,
|
| 265 |
userid)
|
| 266 |
-
st.success('I think it worked!')
|
| 267 |
-
|
|
|
|
| 17 |
PARQUET_DATASET_DIR = Path("parquet_dataset")
|
| 18 |
PARQUET_DATASET_DIR.mkdir(parents=True,exist_ok=True)
|
| 19 |
|
| 20 |
+
sample_data = [f"CHEM1402_gt/24F_CHEM1402_Night_Class_Week_{i}_gt.rttm" for i in range(1,11)]
|
| 21 |
+
|
| 22 |
|
| 23 |
scheduler = ps.ParquetScheduler(repo_id="Sonogram/SampleDataset")
|
| 24 |
|
| 25 |
+
# Store results for viewing and further processing
|
| 26 |
if 'results' not in st.session_state:
|
| 27 |
st.session_state.results = []
|
| 28 |
+
if 'summaries' not in st.session_state:
|
| 29 |
+
st.session_state.summaries = []
|
| 30 |
|
| 31 |
def save_data(
|
| 32 |
config_dict: Dict[str,str], audio_paths: List[str], userid: str,
|
|
|
|
| 80 |
audio_tabs = st.tabs([f.name for f in valid_files])
|
| 81 |
while (len(st.session_state.results) < len(valid_files)):
|
| 82 |
st.session_state.results.append([])
|
| 83 |
+
st.session_state.summaries.append([])
|
| 84 |
st.info(f'{len(valid_files)} valid files: {[fi.name for fi in valid_files]}')
|
| 85 |
for j, tab in enumerate(audio_tabs):
|
| 86 |
if tab.button("Analyze Audio",key=f"button_{j}"):
|
|
|
|
| 90 |
# Process
|
| 91 |
# Pretend to take time as an example
|
| 92 |
with st.spinner(text='NOT ACTUALLY ANALYZING, JUST A FILLER ANIMATION'):
|
| 93 |
+
time.sleep(1)
|
| 94 |
st.success('Done')
|
| 95 |
|
| 96 |
# RTTM load as filler
|
| 97 |
+
speakerList, annotations = su.loadAudioRTTM(sample_data[j])
|
| 98 |
st.session_state.results[j] = (speakerList,annotations)
|
| 99 |
|
| 100 |
if len(st.session_state.results) > j and len(st.session_state.results[j]) > 0:
|
|
|
|
| 252 |
tab.write("Total length of audio: {}h:{:02d}m:{:02d}s".format(int(totalSeconds/3600),int((totalSeconds%3600)/60),int(totalSeconds%60)))
|
| 253 |
tab.table(df)
|
| 254 |
|
| 255 |
+
st.session_state.summaries[j] = [totalSeconds,lecturer_speaker_times,all_speaker_times]
|
| 256 |
+
|
| 257 |
+
with st.spinner(text='Processing summary results...'):
|
| 258 |
+
summary_count = 0
|
| 259 |
+
for su in st.session_state.summaries:
|
| 260 |
+
if len(su) > 0:
|
| 261 |
+
summary_count += 1
|
| 262 |
+
|
| 263 |
|
| 264 |
+
lecturer_data = pd.DataFrame({
|
| 265 |
+
'ds' : range(summary_count) ,
|
| 266 |
+
'y' : [100*ssm[1][0]/ssm[0] for ssm in enumerate(st.session_state.summaries)]
|
| 267 |
+
|
| 268 |
+
})
|
| 269 |
+
audience_data = pd.DataFrame({
|
| 270 |
+
'ds' : range(summary_count) ,
|
| 271 |
+
'y' : [100*ssm[1][1]/ssm[0] for ssm in enumerate(st.session_state.summaries)]
|
| 272 |
+
|
| 273 |
+
})
|
| 274 |
+
speaker_data = pd.DataFrame({
|
| 275 |
+
'ds' : range(summary_count) ,
|
| 276 |
+
'y' : [len(ssm[2]) for ssm in enumerate(st.session_state.summaries)]
|
| 277 |
+
|
| 278 |
+
})
|
| 279 |
+
fig_la = px.line(lecturer_data, x="ds", y="y", color_discrete_sequence=["#0514C0"], labels={'y': 'Lecturer Time'})
|
| 280 |
+
fig_la.add_scatter(x=audience_data['ds'], y=audience_data['y'], mode='lines', name='Audience Time', line=dict(color='#4CC005'))
|
| 281 |
+
fig_la.update_layout(title='Percentage of Time Speaking for Lecturer and Audience', xaxis_title='File', yaxis_title='Percent Time Spoken')
|
| 282 |
+
st.plotly_chart(fig_la, use_container_width=True)
|
| 283 |
|
| 284 |
+
fig_spc = px.line(speaker_data, x="ds", y="y", color_discrete_sequence=["#0514C0"], labels={'y': 'Speaker Count'})
|
| 285 |
+
fig_spc.update_layout(title='Number of Distinct Speakers over Time', xaxis_title='File', yaxis_title='# of Distinct Speakers')
|
| 286 |
+
st.plotly_chart(fig_spc, use_container_width=True)
|
| 287 |
+
|
| 288 |
+
|
| 289 |
userid = st.text_input("user id:", "Guest")
|
| 290 |
colorPref = st.text_input("Favorite color?", "None")
|
| 291 |
radio = st.radio('Pick one:', ['Left','Right'])
|
|
|
|
| 294 |
save_data({"color":colorPref,"direction":radio,"number":selection},
|
| 295 |
file_paths,
|
| 296 |
userid)
|
| 297 |
+
st.success('I think it worked!')
|
|
|