Spaces:
Sleeping
Sleeping
Commit ·
5000d19
1
Parent(s): 44c255c
Update t-SNE plots to use column
Browse files- src/app.py +40 -10
src/app.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
import os
|
|
|
|
| 2 |
from dotenv import load_dotenv
|
| 3 |
from transformers import pipeline
|
| 4 |
from sentence_transformers import SentenceTransformer
|
|
@@ -134,6 +135,9 @@ def nmf_plots(df,
|
|
| 134 |
for i, col in enumerate(topic_cols):
|
| 135 |
df[col] = nmf_embeddings[i]
|
| 136 |
|
|
|
|
|
|
|
|
|
|
| 137 |
# Get word values for every topic
|
| 138 |
word_df = pd.DataFrame(
|
| 139 |
nmf.components_.T,
|
|
@@ -171,7 +175,7 @@ def nmf_plots(df,
|
|
| 171 |
return df, [topic_words_fig, contributions_fig]
|
| 172 |
|
| 173 |
|
| 174 |
-
def tsne_plots(df, encoder, emotion_cols,
|
| 175 |
"""
|
| 176 |
Encodes all `text_original` values of `df` DataFrame with `encoder`,
|
| 177 |
uses t-SNE algorithm for visualization on these embeddings and on
|
|
@@ -193,12 +197,21 @@ def tsne_plots(df, encoder, emotion_cols, color_emotion, tsne_perplexity):
|
|
| 193 |
# Also use predicted emotions
|
| 194 |
if emotion_cols:
|
| 195 |
tsne_cols = embedding_cols + emotion_cols
|
| 196 |
-
color =
|
| 197 |
hover_data = ['first_emotion', 'second_emotion', 'text_original']
|
| 198 |
else:
|
| 199 |
tsne_cols = embedding_cols
|
| 200 |
color = None
|
| 201 |
-
hover_data = 'text_original'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 202 |
|
| 203 |
tsne_results = tsne.fit_transform(df[tsne_cols])
|
| 204 |
tsne_results = pd.DataFrame(
|
|
@@ -230,7 +243,8 @@ def tsne_plots(df, encoder, emotion_cols, color_emotion, tsne_perplexity):
|
|
| 230 |
hover_data=hover_data
|
| 231 |
)
|
| 232 |
fig3d.update_layout(
|
| 233 |
-
title_text="t-SNE Visualization Over Time"
|
|
|
|
| 234 |
)
|
| 235 |
|
| 236 |
return df, [fig2d, fig3d]
|
|
@@ -285,7 +299,15 @@ yt_api = YouTubeAPI(
|
|
| 285 |
|
| 286 |
# Input form
|
| 287 |
with st.form(key='input'):
|
| 288 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 289 |
|
| 290 |
# Emotions
|
| 291 |
emotions_checkbox = st.checkbox(
|
|
@@ -302,7 +324,7 @@ with st.form(key='input'):
|
|
| 302 |
nmf_components = st.slider(
|
| 303 |
"Topics (NMF Components)",
|
| 304 |
min_value=2,
|
| 305 |
-
max_value=
|
| 306 |
value=8,
|
| 307 |
step=1,
|
| 308 |
)
|
|
@@ -335,9 +357,9 @@ with st.form(key='input'):
|
|
| 335 |
step=1,
|
| 336 |
)
|
| 337 |
|
| 338 |
-
|
| 339 |
-
"
|
| 340 |
-
options=['first_emotion', 'second_emotion']
|
| 341 |
)
|
| 342 |
|
| 343 |
# Language Map
|
|
@@ -356,6 +378,9 @@ if submit:
|
|
| 356 |
comments = yt_api.get_comments(video_id)
|
| 357 |
except KeyError:
|
| 358 |
st.write("Video not found.")
|
|
|
|
|
|
|
|
|
|
| 359 |
bad_id = True
|
| 360 |
|
| 361 |
if not bad_id:
|
|
@@ -387,10 +412,15 @@ if submit:
|
|
| 387 |
|
| 388 |
if tsne_checkbox:
|
| 389 |
# t-SNE visualization
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 390 |
df, tsne_figs = tsne_plots(df,
|
| 391 |
sentence_encoder,
|
| 392 |
emotion_cols,
|
| 393 |
-
|
| 394 |
tsne_perplexity)
|
| 395 |
plots.extend(tsne_figs)
|
| 396 |
|
|
|
|
| 1 |
import os
|
| 2 |
+
import urllib.parse as urlparse
|
| 3 |
from dotenv import load_dotenv
|
| 4 |
from transformers import pipeline
|
| 5 |
from sentence_transformers import SentenceTransformer
|
|
|
|
| 135 |
for i, col in enumerate(topic_cols):
|
| 136 |
df[col] = nmf_embeddings[i]
|
| 137 |
|
| 138 |
+
# Create `main_topic` column with the highest value topic name
|
| 139 |
+
df['main_topic'] = df[topic_cols].apply(lambda row: row.idxmax(), axis=1)
|
| 140 |
+
|
| 141 |
# Get word values for every topic
|
| 142 |
word_df = pd.DataFrame(
|
| 143 |
nmf.components_.T,
|
|
|
|
| 175 |
return df, [topic_words_fig, contributions_fig]
|
| 176 |
|
| 177 |
|
| 178 |
+
def tsne_plots(df, encoder, emotion_cols, tsne_color, tsne_perplexity):
|
| 179 |
"""
|
| 180 |
Encodes all `text_original` values of `df` DataFrame with `encoder`,
|
| 181 |
uses t-SNE algorithm for visualization on these embeddings and on
|
|
|
|
| 197 |
# Also use predicted emotions
|
| 198 |
if emotion_cols:
|
| 199 |
tsne_cols = embedding_cols + emotion_cols
|
| 200 |
+
color = tsne_color
|
| 201 |
hover_data = ['first_emotion', 'second_emotion', 'text_original']
|
| 202 |
else:
|
| 203 |
tsne_cols = embedding_cols
|
| 204 |
color = None
|
| 205 |
+
hover_data = ['text_original']
|
| 206 |
+
|
| 207 |
+
if 'main_topic' in df.columns:
|
| 208 |
+
hover_data.append('main_topic')
|
| 209 |
+
|
| 210 |
+
# Color column
|
| 211 |
+
if 'main_topic' in df.columns or emotion_cols:
|
| 212 |
+
color = tsne_color
|
| 213 |
+
else:
|
| 214 |
+
color = None
|
| 215 |
|
| 216 |
tsne_results = tsne.fit_transform(df[tsne_cols])
|
| 217 |
tsne_results = pd.DataFrame(
|
|
|
|
| 243 |
hover_data=hover_data
|
| 244 |
)
|
| 245 |
fig3d.update_layout(
|
| 246 |
+
title_text="t-SNE Visualization Over Time",
|
| 247 |
+
height=800
|
| 248 |
)
|
| 249 |
|
| 250 |
return df, [fig2d, fig3d]
|
|
|
|
| 299 |
|
| 300 |
# Input form
|
| 301 |
with st.form(key='input'):
|
| 302 |
+
# Input
|
| 303 |
+
url_input = st.text_input("URL or ID")
|
| 304 |
+
# Get ID from URL
|
| 305 |
+
url_data = urlparse.urlparse(url_input)
|
| 306 |
+
query = urlparse.parse_qs(url_data.query)
|
| 307 |
+
if 'v' in query:
|
| 308 |
+
video_id = query['v'][0]
|
| 309 |
+
else:
|
| 310 |
+
video_id = url_input
|
| 311 |
|
| 312 |
# Emotions
|
| 313 |
emotions_checkbox = st.checkbox(
|
|
|
|
| 324 |
nmf_components = st.slider(
|
| 325 |
"Topics (NMF Components)",
|
| 326 |
min_value=2,
|
| 327 |
+
max_value=12,
|
| 328 |
value=8,
|
| 329 |
step=1,
|
| 330 |
)
|
|
|
|
| 357 |
step=1,
|
| 358 |
)
|
| 359 |
|
| 360 |
+
tsne_color = st.selectbox(
|
| 361 |
+
"Plot Color",
|
| 362 |
+
options=['main_topic', 'first_emotion', 'second_emotion']
|
| 363 |
)
|
| 364 |
|
| 365 |
# Language Map
|
|
|
|
| 378 |
comments = yt_api.get_comments(video_id)
|
| 379 |
except KeyError:
|
| 380 |
st.write("Video not found.")
|
| 381 |
+
st.write(query)
|
| 382 |
+
st.write('v' in query)
|
| 383 |
+
st.write(video_id)
|
| 384 |
bad_id = True
|
| 385 |
|
| 386 |
if not bad_id:
|
|
|
|
| 412 |
|
| 413 |
if tsne_checkbox:
|
| 414 |
# t-SNE visualization
|
| 415 |
+
if not nmf_checkbox:
|
| 416 |
+
tsne_color = 'first_emotion'
|
| 417 |
+
if not emotions_checkbox:
|
| 418 |
+
tsne_color = 'main_topic'
|
| 419 |
+
|
| 420 |
df, tsne_figs = tsne_plots(df,
|
| 421 |
sentence_encoder,
|
| 422 |
emotion_cols,
|
| 423 |
+
tsne_color,
|
| 424 |
tsne_perplexity)
|
| 425 |
plots.extend(tsne_figs)
|
| 426 |
|