Update app.py
Browse files
app.py
CHANGED
|
@@ -93,7 +93,7 @@ def process_model(target_word):
|
|
| 93 |
# Word2Vec ๋ชจ๋ธ ๋ก๋
|
| 94 |
model = Word2Vec.load("word2vec.model")
|
| 95 |
unique_words = get_unique(model)
|
| 96 |
-
|
| 97 |
# ๊ฐ ๋จ์ด์ ์๋ฒ ๋ฉ ๋ฒกํฐ ์ถ์ถ
|
| 98 |
word_vectors = np.array([model.wv[word] for word in unique_words])
|
| 99 |
|
|
@@ -101,26 +101,26 @@ def process_model(target_word):
|
|
| 101 |
word_vectors_3d = apply_pca(word_vectors)
|
| 102 |
|
| 103 |
# ์์ ์ค์ (ํฌ๋ช
๋ ์ถ๊ฐ)
|
| 104 |
-
colors = ['rgba(
|
| 105 |
|
| 106 |
# ๊ฐ์ฅ ๊ฐ๊น์ด ๋จ์ด 10๊ฐ ์ฐพ๊ธฐ
|
| 107 |
if target_word in model.wv:
|
| 108 |
similar_words = model.wv.most_similar(target_word, topn=10)
|
| 109 |
similar_word_indices = [unique_words.index(word) for word, _ in similar_words]
|
| 110 |
for idx in similar_word_indices:
|
| 111 |
-
colors[idx] = 'rgba(
|
| 112 |
|
| 113 |
# ๊ฐ์ฅ ๋จผ ๋จ์ด 10๊ฐ ์ฐพ๊ธฐ
|
| 114 |
if target_word in model.wv:
|
| 115 |
all_words = model.wv.index_to_key # ๋ชจ๋ธ์ ํฌํจ๋ ๋ชจ๋ ๋จ์ด ๋ฆฌ์คํธ
|
| 116 |
-
dissimilar_words = sorted(
|
| 117 |
-
|
| 118 |
-
key=lambda x: x[1]
|
|
|
|
| 119 |
|
| 120 |
dissimilar_word_indices = [unique_words.index(word) for word, _ in dissimilar_words]
|
| 121 |
for idx in dissimilar_word_indices:
|
| 122 |
-
colors[idx] = 'rgba(
|
| 123 |
-
|
| 124 |
|
| 125 |
# Plotly๋ฅผ ์ฌ์ฉํ 3D ์ฐ์ ๋ ์์ฑ
|
| 126 |
fig = go.Figure(data=[go.Scatter3d(
|
|
@@ -131,7 +131,7 @@ def process_model(target_word):
|
|
| 131 |
text=unique_words,
|
| 132 |
textposition="top center",
|
| 133 |
marker=dict(
|
| 134 |
-
size=
|
| 135 |
color=colors,
|
| 136 |
)
|
| 137 |
)])
|
|
@@ -139,12 +139,12 @@ def process_model(target_word):
|
|
| 139 |
fig.update_layout(
|
| 140 |
title="Word Embeddings 3D Visualization",
|
| 141 |
scene=dict(
|
| 142 |
-
xaxis_title="
|
| 143 |
-
yaxis_title="
|
| 144 |
-
zaxis_title="
|
| 145 |
),
|
| 146 |
-
width=
|
| 147 |
-
height=
|
| 148 |
)
|
| 149 |
|
| 150 |
# ๊ฐ์ฅ ๊ฐ๊น์ด ๋จ์ด 10๊ฐ ๋ชฉ๋ก ์์ฑ
|
|
@@ -152,7 +152,11 @@ def process_model(target_word):
|
|
| 152 |
if target_word in model.wv:
|
| 153 |
similar_words_text = "๊ฐ์ฅ ๊ฐ๊น์ด ๋จ์ด 10๊ฐ:\n" + "\n".join([f"{word}: {score:.4f}" for word, score in similar_words])
|
| 154 |
|
| 155 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 156 |
|
| 157 |
|
| 158 |
# Gradio ์ธํฐํ์ด์ค ์์
|
|
@@ -176,7 +180,7 @@ with gr.Blocks(css=".plot-box {width: 70%; height: 500px;}") as iface:
|
|
| 176 |
dissimilar_words_output = gr.Textbox(label="์ ์ฌํ์ง ์์ ๋จ์ด", interactive=False, lines=5)
|
| 177 |
|
| 178 |
submit_btn.click(
|
| 179 |
-
fn=
|
| 180 |
inputs=[word_input],
|
| 181 |
outputs=[plot_output, similar_words_output, dissimilar_words_output]
|
| 182 |
)
|
|
|
|
| 93 |
# Word2Vec ๋ชจ๋ธ ๋ก๋
|
| 94 |
model = Word2Vec.load("word2vec.model")
|
| 95 |
unique_words = get_unique(model)
|
| 96 |
+
|
| 97 |
# ๊ฐ ๋จ์ด์ ์๋ฒ ๋ฉ ๋ฒกํฐ ์ถ์ถ
|
| 98 |
word_vectors = np.array([model.wv[word] for word in unique_words])
|
| 99 |
|
|
|
|
| 101 |
word_vectors_3d = apply_pca(word_vectors)
|
| 102 |
|
| 103 |
# ์์ ์ค์ (ํฌ๋ช
๋ ์ถ๊ฐ)
|
| 104 |
+
colors = ['rgba(255, 255, 255, 0.15)' if word != target_word else 'rgba(255, 20, 147, 0.9)' for word in unique_words]
|
| 105 |
|
| 106 |
# ๊ฐ์ฅ ๊ฐ๊น์ด ๋จ์ด 10๊ฐ ์ฐพ๊ธฐ
|
| 107 |
if target_word in model.wv:
|
| 108 |
similar_words = model.wv.most_similar(target_word, topn=10)
|
| 109 |
similar_word_indices = [unique_words.index(word) for word, _ in similar_words]
|
| 110 |
for idx in similar_word_indices:
|
| 111 |
+
colors[idx] = 'rgba(255, 165, 0, 1)' # ๊ฐ๊น์ด ๋จ์ด๋ค์ ์ฃผํฉ์์ผ๋ก ํ์
|
| 112 |
|
| 113 |
# ๊ฐ์ฅ ๋จผ ๋จ์ด 10๊ฐ ์ฐพ๊ธฐ
|
| 114 |
if target_word in model.wv:
|
| 115 |
all_words = model.wv.index_to_key # ๋ชจ๋ธ์ ํฌํจ๋ ๋ชจ๋ ๋จ์ด ๋ฆฌ์คํธ
|
| 116 |
+
dissimilar_words = sorted(
|
| 117 |
+
[(word, model.wv.similarity(target_word, word)) for word in all_words if word != target_word],
|
| 118 |
+
key=lambda x: x[1]
|
| 119 |
+
)[:10] # ์ ์ฌ๋๊ฐ ๊ฐ์ฅ ๋ฎ์ 10๊ฐ ๋จ์ด ์ ํ
|
| 120 |
|
| 121 |
dissimilar_word_indices = [unique_words.index(word) for word, _ in dissimilar_words]
|
| 122 |
for idx in dissimilar_word_indices:
|
| 123 |
+
colors[idx] = 'rgba(138, 43, 226, 0.8)' # ๊ฐ์ฅ ๋จผ ๋จ์ด๋ค์ ๋ณด๋ผ์์ผ๋ก ํ์
|
|
|
|
| 124 |
|
| 125 |
# Plotly๋ฅผ ์ฌ์ฉํ 3D ์ฐ์ ๋ ์์ฑ
|
| 126 |
fig = go.Figure(data=[go.Scatter3d(
|
|
|
|
| 131 |
text=unique_words,
|
| 132 |
textposition="top center",
|
| 133 |
marker=dict(
|
| 134 |
+
size=4,
|
| 135 |
color=colors,
|
| 136 |
)
|
| 137 |
)])
|
|
|
|
| 139 |
fig.update_layout(
|
| 140 |
title="Word Embeddings 3D Visualization",
|
| 141 |
scene=dict(
|
| 142 |
+
xaxis_title="X",
|
| 143 |
+
yaxis_title="Y",
|
| 144 |
+
zaxis_title="Z"
|
| 145 |
),
|
| 146 |
+
width=800,
|
| 147 |
+
height=800
|
| 148 |
)
|
| 149 |
|
| 150 |
# ๊ฐ์ฅ ๊ฐ๊น์ด ๋จ์ด 10๊ฐ ๋ชฉ๋ก ์์ฑ
|
|
|
|
| 152 |
if target_word in model.wv:
|
| 153 |
similar_words_text = "๊ฐ์ฅ ๊ฐ๊น์ด ๋จ์ด 10๊ฐ:\n" + "\n".join([f"{word}: {score:.4f}" for word, score in similar_words])
|
| 154 |
|
| 155 |
+
dissimilar_words_text = ""
|
| 156 |
+
if target_word in model.wv:
|
| 157 |
+
dissimilar_words_text = "๊ฐ์ฅ ๋จผ ๋จ์ด 10๊ฐ:\n" + "\n".join([f"{word}: {score:.4f}" for word, score in dissimilar_words])
|
| 158 |
+
|
| 159 |
+
return fig, similar_words_text, dissimilar_words_text
|
| 160 |
|
| 161 |
|
| 162 |
# Gradio ์ธํฐํ์ด์ค ์์
|
|
|
|
| 180 |
dissimilar_words_output = gr.Textbox(label="์ ์ฌํ์ง ์์ ๋จ์ด", interactive=False, lines=5)
|
| 181 |
|
| 182 |
submit_btn.click(
|
| 183 |
+
fn=process_model,
|
| 184 |
inputs=[word_input],
|
| 185 |
outputs=[plot_output, similar_words_output, dissimilar_words_output]
|
| 186 |
)
|