word2vec

Sleeping

App Files Files Community

Ridealist commited on Nov 1, 2024

Commit

7112f5e

verified ·

1 Parent(s): 8c42cdb

Update app.py

Browse files

Files changed (1) hide show

app.py +20 -16

app.py CHANGED Viewed

@@ -93,7 +93,7 @@ def process_model(target_word):
     # Word2Vec 모델 로드
     model = Word2Vec.load("word2vec.model")
     unique_words = get_unique(model)
     # 각 단어의 임베딩 벡터 추출
     word_vectors = np.array([model.wv[word] for word in unique_words])
@@ -101,26 +101,26 @@ def process_model(target_word):
     word_vectors_3d = apply_pca(word_vectors)
     # 색상 설정 (투명도 추가)
-    colors = ['rgba(128, 128, 128, 0.15)' if word != target_word else 'rgba(255, 0, 0, 1)' for word in unique_words]
     # 가장 가까운 단어 10개 찾기
     if target_word in model.wv:
         similar_words = model.wv.most_similar(target_word, topn=10)
         similar_word_indices = [unique_words.index(word) for word, _ in similar_words]
         for idx in similar_word_indices:
-            colors[idx] = 'rgba(0, 255, 0, 1)'  # 가까운 단어들을 초록색으로 표시
     # 가장 먼 단어 10개 찾기
     if target_word in model.wv:
         all_words = model.wv.index_to_key  # 모델에 포함된 모든 단어 리스트
-        dissimilar_words = sorted([(word, model.wv.similarity(target_word, word))
-        for word in all_words if word != target_word],
-            key=lambda x: x[1])[:10]  # 유사도가 가장 낮은 10개 단어 선택
         dissimilar_word_indices = [unique_words.index(word) for word, _ in dissimilar_words]
         for idx in dissimilar_word_indices:
-            colors[idx] = 'rgba(128, 0, 128, 1)'  # 가장 먼 단어들을 보라색으로 표시
     # Plotly를 사용한 3D 산점도 생성
     fig = go.Figure(data=[go.Scatter3d(
@@ -131,7 +131,7 @@ def process_model(target_word):
         text=unique_words,
         textposition="top center",
         marker=dict(
-            size=6,
             color=colors,
         )
     )])
@@ -139,12 +139,12 @@ def process_model(target_word):
     fig.update_layout(
         title="Word Embeddings 3D Visualization",
         scene=dict(
-            xaxis_title="PCA 1",
-            yaxis_title="PCA 2",
-            zaxis_title="PCA 3"
         ),
-        width=1000,
-        height=1000
     )
     # 가장 가까운 단어 10개 목록 생성
@@ -152,7 +152,11 @@ def process_model(target_word):
     if target_word in model.wv:
         similar_words_text = "가장 가까운 단어 10개:\n" + "\n".join([f"{word}: {score:.4f}" for word, score in similar_words])
-    return fig, similar_words_text
 # Gradio 인터페이스 수정
@@ -176,7 +180,7 @@ with gr.Blocks(css=".plot-box {width: 70%; height: 500px;}") as iface:
             dissimilar_words_output = gr.Textbox(label="유사하지 않은 단어", interactive=False, lines=5)
     submit_btn.click(
-        fn=process_text,
         inputs=[word_input],
         outputs=[plot_output, similar_words_output, dissimilar_words_output]
     )

     # Word2Vec 모델 로드
     model = Word2Vec.load("word2vec.model")
     unique_words = get_unique(model)
     # 각 단어의 임베딩 벡터 추출
     word_vectors = np.array([model.wv[word] for word in unique_words])
     word_vectors_3d = apply_pca(word_vectors)
     # 색상 설정 (투명도 추가)
+    colors = ['rgba(255, 255, 255, 0.15)' if word != target_word else 'rgba(255, 20, 147, 0.9)' for word in unique_words]
     # 가장 가까운 단어 10개 찾기
     if target_word in model.wv:
         similar_words = model.wv.most_similar(target_word, topn=10)
         similar_word_indices = [unique_words.index(word) for word, _ in similar_words]
         for idx in similar_word_indices:
+            colors[idx] = 'rgba(255, 165, 0, 1)'  # 가까운 단어들을 주황색으로 표시
     # 가장 먼 단어 10개 찾기
     if target_word in model.wv:
         all_words = model.wv.index_to_key  # 모델에 포함된 모든 단어 리스트
+        dissimilar_words = sorted(
+            [(word, model.wv.similarity(target_word, word)) for word in all_words if word != target_word],
+            key=lambda x: x[1]
+        )[:10]  # 유사도가 가장 낮은 10개 단어 선택
         dissimilar_word_indices = [unique_words.index(word) for word, _ in dissimilar_words]
         for idx in dissimilar_word_indices:
+            colors[idx] = 'rgba(138, 43, 226, 0.8)'  # 가장 먼 단어들을 보라색으로 표시
     # Plotly를 사용한 3D 산점도 생성
     fig = go.Figure(data=[go.Scatter3d(
         text=unique_words,
         textposition="top center",
         marker=dict(
+            size=4,
             color=colors,
         )
     )])
     fig.update_layout(
         title="Word Embeddings 3D Visualization",
         scene=dict(
+            xaxis_title="X",
+            yaxis_title="Y",
+            zaxis_title="Z"
         ),
+        width=800,
+        height=800
     )
     # 가장 가까운 단어 10개 목록 생성
     if target_word in model.wv:
         similar_words_text = "가장 가까운 단어 10개:\n" + "\n".join([f"{word}: {score:.4f}" for word, score in similar_words])
+    dissimilar_words_text = ""
+    if target_word in model.wv:
+        dissimilar_words_text = "가장 먼 단어 10개:\n" + "\n".join([f"{word}: {score:.4f}" for word, score in dissimilar_words])
+    return fig, similar_words_text, dissimilar_words_text
 # Gradio 인터페이스 수정
             dissimilar_words_output = gr.Textbox(label="유사하지 않은 단어", interactive=False, lines=5)
     submit_btn.click(
+        fn=process_model,
         inputs=[word_input],
         outputs=[plot_output, similar_words_output, dissimilar_words_output]
     )