Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import json | |
| import os | |
| import uuid | |
| import glob | |
| from datetime import datetime | |
| import numpy as np | |
| import platform | |
| import networkx as nx | |
| import plotly.graph_objects as go | |
| from sklearn.metrics.pairwise import cosine_similarity | |
| import plotly | |
| import matplotlib.pyplot as plt | |
| import matplotlib.font_manager as fm | |
| from sklearn.manifold import TSNE | |
| import warnings | |
| warnings.filterwarnings('ignore') | |
| # --- (μ΄μ μ½λλ λμΌ) --- | |
| # νμ΄μ§ μ€μ | |
| st.set_page_config( | |
| page_title="νκ΅μ΄ λ¨μ΄ μλ―Έ λ€νΈμν¬ μκ°ν", | |
| page_icon="π€", | |
| layout="wide" | |
| ) | |
| # ν΄λ κ²½λ‘ μ€μ | |
| DATA_FOLDER = 'data' | |
| UPLOAD_FOLDER = 'uploads' | |
| # ν΄λ μμ± | |
| if not os.path.exists(UPLOAD_FOLDER): | |
| os.makedirs(UPLOAD_FOLDER) | |
| # μΈμ μν μ΄κΈ°ν | |
| if 'model' not in st.session_state: | |
| st.session_state.model = None | |
| if 'embeddings_cache' not in st.session_state: | |
| st.session_state.embeddings_cache = {} | |
| if 'graph_cache' not in st.session_state: | |
| st.session_state.graph_cache = {} | |
| if 'data_files' not in st.session_state: | |
| st.session_state.data_files = {} | |
| if 'selected_files' not in st.session_state: | |
| st.session_state.selected_files = [] # 리μ€νΈλ‘ μ΄κΈ°ν | |
| if 'threshold' not in st.session_state: | |
| st.session_state.threshold = 0.7 | |
| if 'generate_clicked' not in st.session_state: | |
| st.session_state.generate_clicked = False | |
| if 'fig' not in st.session_state: | |
| st.session_state.fig = None | |
| # --- (ν¨μ μ μ λΆλΆμ λμΌ: set_korean_font, load_words_from_json, ...) --- | |
| # --- νκΈ ν°νΈ μ€μ ν¨μ --- | |
| def set_korean_font(): | |
| """ | |
| νμ¬ μ΄μ체μ μ λ§λ νκΈ ν°νΈλ₯Ό matplotlib λ° Plotlyμ©μΌλ‘ μ€μ μλνκ³ , | |
| Plotlyμμ μ¬μ©ν ν°νΈ μ΄λ¦μ λ°νν©λλ€. | |
| """ | |
| system_name = platform.system() | |
| plotly_font_name = None # Plotlyμμ μ¬μ©ν ν°νΈ μ΄λ¦ | |
| # Matplotlib ν°νΈ μ€μ | |
| if system_name == "Windows": | |
| font_name = "Malgun Gothic" | |
| plotly_font_name = "Malgun Gothic" | |
| elif system_name == "Darwin": # MacOS | |
| font_name = "AppleGothic" | |
| plotly_font_name = "AppleGothic" | |
| elif system_name == "Linux": | |
| # Linuxμμ μ νΈνλ νκΈ ν°νΈ κ²½λ‘ λλ μ΄λ¦ μ€μ | |
| font_path = "/usr/share/fonts/truetype/nanum/NanumGothic.ttf" | |
| plotly_font_name_linux = "NanumGothic" # Plotlyλ ν°νΈ 'μ΄λ¦'μ μ£Όλ‘ μ¬μ© | |
| if os.path.exists(font_path): | |
| prop = fm.FontProperties(fname=font_path) | |
| fm.fontManager.addfont(font_path) # μμ€ν μ ν°νΈ μΆκ° (νμν μ μμ) | |
| font_name = prop.get_name() | |
| plotly_font_name = plotly_font_name_linux | |
| else: | |
| # μμ€ν μμ 'Nanum' ν¬ν¨ ν°νΈ μ°ΎκΈ° μλ | |
| try: | |
| available_fonts = [f.name for f in fm.fontManager.ttflist] | |
| nanum_fonts = [name for name in available_fonts if 'Nanum' in name] | |
| if nanum_fonts: | |
| font_name = nanum_fonts[0] | |
| # Plotlyμμ μ¬μ©ν μ΄λ¦λ λΉμ·νκ² μ€μ (μ νν μ΄λ¦μ μμ€ν λ§λ€ λ€λ₯Ό μ μμ) | |
| plotly_font_name = font_name if 'Nanum' in font_name else plotly_font_name_linux | |
| else: | |
| # λ€λ₯Έ OS ν°νΈ μλ (Linuxμμ λλ¬Όμ§λ§) | |
| if "Malgun Gothic" in available_fonts: | |
| font_name = "Malgun Gothic" | |
| plotly_font_name = "Malgun Gothic" | |
| elif "AppleGothic" in available_fonts: | |
| font_name = "AppleGothic" | |
| plotly_font_name = "AppleGothic" | |
| else: | |
| font_name = None | |
| except Exception as e: | |
| print(f"Linux font search error: {e}") | |
| font_name = None | |
| if not font_name: | |
| font_name = None | |
| plotly_font_name = None # Plotlyλ κΈ°λ³Έκ° μ¬μ© | |
| else: # κΈ°ν OS | |
| font_name = None | |
| plotly_font_name = None | |
| # Matplotlib ν°νΈ μ€μ μ μ© | |
| if font_name: | |
| try: | |
| plt.rc('font', family=font_name) | |
| plt.rc('axes', unicode_minus=False) | |
| print(f"Matplotlib font set to: {font_name}") | |
| except Exception as e: | |
| print(f"Failed to set Matplotlib font '{font_name}': {e}") | |
| plt.rcdefaults() | |
| plt.rc('axes', unicode_minus=False) | |
| else: | |
| print("No suitable Korean font found for Matplotlib. Using default.") | |
| plt.rcdefaults() | |
| plt.rc('axes', unicode_minus=False) | |
| if not plotly_font_name: | |
| plotly_font_name = 'sans-serif' # Plotly κΈ°λ³Έκ° μ§μ | |
| print(f"Plotly font name to use: {plotly_font_name}") | |
| return plotly_font_name # Plotlyμμ μ¬μ©ν ν°νΈ μ΄λ¦ λ°ν | |
| # --- λ°μ΄ν° λ‘λ ν¨μ --- | |
| def load_words_from_json(filepath): | |
| """ JSON νμΌμμ 'word' νλλ§ λ¦¬μ€νΈλ‘ λ‘λν©λλ€. """ | |
| try: | |
| with open(filepath, 'r', encoding='utf-8') as f: | |
| data = json.load(f) | |
| # dataκ° λ¦¬μ€νΈ ννλΌκ³ κ°μ | |
| if isinstance(data, list): | |
| words = [item.get('word', '') for item in data if isinstance(item, dict) and item.get('word')] # dict ννμ΄κ³ 'word' ν€κ° μλμ§ νμΈ | |
| # λΉ λ¬Έμμ΄ μ κ±° | |
| words = [word for word in words if word] | |
| if not words: | |
| st.warning(f"κ²½κ³ : νμΌ '{os.path.basename(filepath)}'μμ 'word' ν€λ₯Ό κ°μ§ μ ν¨ν λ°μ΄ν°λ₯Ό μ°Ύμ μ μμ΅λλ€.") | |
| return None | |
| return words | |
| else: | |
| st.error(f"μ€λ₯: νμΌ '{os.path.basename(filepath)}'μ μ΅μμ νμμ΄ λ¦¬μ€νΈκ° μλλλ€.") | |
| return None | |
| except FileNotFoundError: | |
| st.error(f"μ€λ₯: νμΌ '{filepath}'λ₯Ό μ°Ύμ μ μμ΅λλ€.") | |
| return None | |
| except json.JSONDecodeError as e: | |
| st.error(f"μ€λ₯: νμΌ '{os.path.basename(filepath)}'μ JSON νμμ΄ μλͺ»λμμ΅λλ€. μ€λ₯: {e}") | |
| return None | |
| except Exception as e: | |
| st.error(f"'{os.path.basename(filepath)}' λ°μ΄ν° λ‘λ© μ€ μ€λ₯ λ°μ: {e}") | |
| return None | |
| def scan_data_files(): | |
| """λ°μ΄ν° ν΄λμμ μ¬μ© κ°λ₯ν λͺ¨λ JSON νμΌμ μ€μΊνκ³ μ 보λ₯Ό λ°νν©λλ€.""" | |
| data_files = {} | |
| # κΈ°λ³Έ λ°μ΄ν° ν΄λ μ€μΊ | |
| try: | |
| for file_path in glob.glob(os.path.join(DATA_FOLDER, '*.json')): | |
| file_id = f"default_{os.path.basename(file_path)}" # κ³ μ ID μμ± λ°©μ λ³κ²½ | |
| file_name = os.path.basename(file_path) | |
| words = load_words_from_json(file_path) | |
| if words: # wordsκ° Noneμ΄ μλκ³ λΉμ΄μμ§ μμ κ²½μ° | |
| data_files[file_id] = { | |
| 'path': file_path, | |
| 'name': file_name, | |
| 'word_count': len(words), | |
| 'type': 'default', | |
| 'sample_words': words[:5] # μν λ¨μ΄ μ μ‘°μ κ°λ₯ | |
| } | |
| except Exception as e: | |
| st.error(f"κΈ°λ³Έ λ°μ΄ν° ν΄λ μ€μΊ μ€ μ€λ₯: {e}") | |
| # μ λ‘λ ν΄λ μ€μΊ | |
| try: | |
| for file_path in glob.glob(os.path.join(UPLOAD_FOLDER, '*.json')): | |
| file_id = f"uploaded_{os.path.basename(file_path)}" # κ³ μ ID μμ± λ°©μ λ³κ²½ | |
| file_name = os.path.basename(file_path) | |
| words = load_words_from_json(file_path) | |
| if words: # wordsκ° Noneμ΄ μλκ³ λΉμ΄μμ§ μμ κ²½μ° | |
| data_files[file_id] = { | |
| 'path': file_path, | |
| 'name': file_name, | |
| 'word_count': len(words), | |
| 'type': 'uploaded', | |
| 'sample_words': words[:5] # μν λ¨μ΄ μ μ‘°μ κ°λ₯ | |
| } | |
| except Exception as e: | |
| st.error(f"μ λ‘λ ν΄λ μ€μΊ μ€ μ€λ₯: {e}") | |
| return data_files | |
| def merge_word_lists(file_ids): | |
| """μ νλ νμΌλ€μμ λ¨μ΄λ₯Ό λ‘λνκ³ μ€λ³΅ μ κ±°νμ¬ λ³ν©ν©λλ€.""" | |
| all_words = [] | |
| if not file_ids: | |
| return [] | |
| # data_files μνκ° μ΅μ μΈμ§ νμΈ (μ λ‘λ/μμ ν νμν μ μμ) | |
| current_data_files = st.session_state.get('data_files', {}) | |
| for file_id in file_ids: | |
| if file_id in current_data_files: | |
| file_path = current_data_files[file_id]['path'] | |
| words = load_words_from_json(file_path) | |
| if words: | |
| all_words.extend(words) | |
| else: | |
| st.warning(f"μ νλ νμΌ ID '{file_id}'λ₯Ό νμ¬ νμΌ λͺ©λ‘μμ μ°Ύμ μ μμ΅λλ€. λͺ©λ‘μ μλ‘κ³ μΉ¨ν©λλ€.") | |
| # νμΌ λͺ©λ‘μ λ€μ μ€μΊνκ³ μ¬μλ (μ νμ ) | |
| st.session_state.data_files = scan_data_files() | |
| if file_id in st.session_state.data_files: | |
| words = load_words_from_json(st.session_state.data_files[file_id]['path']) | |
| if words: all_words.extend(words) | |
| else: | |
| st.error(f"νμΌ '{file_id}'λ₯Ό μ¬μ ν μ°Ύμ μ μμ΅λλ€.") | |
| # μ€λ³΅ μ κ±° λ° μ λ ¬ | |
| unique_words = sorted(list(set(all_words))) | |
| return unique_words | |
| def encode_words(words, normalize=True): | |
| """λ¨μ΄ λͺ©λ‘μ μλ² λ©μΌλ‘ λ³νν©λλ€. (κ°μ λ TF-IDF μ€νμΌ μλ² λ©)""" | |
| if not words: | |
| return np.array([]) | |
| embeddings = [] | |
| # μ 체 λ¨μ΄μ λνλλ λͺ¨λ κ³ μ λ¬Έμλ‘ μ΄ν κ΅¬μ± | |
| unique_chars = set(char for word in words for char in word) | |
| char_to_idx = {char: i for i, char in enumerate(sorted(list(unique_chars)))} | |
| dim = len(char_to_idx) | |
| if dim == 0: # λ¨μ΄κ° μμ μλ κ²½μ° | |
| return np.array([]) | |
| for word in words: | |
| embed = np.zeros(dim) | |
| word_len = len(word) | |
| if word_len == 0: # λΉ λ¬Έμμ΄ μ²λ¦¬ | |
| embeddings.append(embed) | |
| continue | |
| # TF (Term Frequency): λ¨μ΄ λ΄ λ¬Έμ λΉλ | |
| tf = {} | |
| for char in word: | |
| if char in char_to_idx: | |
| tf[char] = tf.get(char, 0) + 1 | |
| for char, count in tf.items(): | |
| if char in char_to_idx: | |
| # TF κ³μ° (μ¬κΈ°μλ λ¨μ λΉλ μ¬μ©, νμμ log μ€μΌμΌλ§ λ± μ μ© κ°λ₯) | |
| embed[char_to_idx[char]] = count / word_len # λ¨μ΄ κΈΈμ΄λ‘ μ κ·ν | |
| # L2 μ κ·ν (Cosine Similarityλ₯Ό μν΄ μ μ©) | |
| if normalize: | |
| norm = np.linalg.norm(embed) | |
| if norm > 0: | |
| embed = embed / norm | |
| embeddings.append(embed) | |
| return np.array(embeddings) | |
| def generate_graph(file_ids, similarity_threshold=0.7): | |
| """μ¬λ¬ νμΌμμ λ¨μ΄λ₯Ό λ‘λνκ³ κ·Έλνλ₯Ό μμ±ν©λλ€.""" | |
| if not file_ids: | |
| st.error("κ·Έλνλ₯Ό μμ±ν νμΌμ΄ μ νλμ§ μμμ΅λλ€.") | |
| return None | |
| # μΊμ ν€ μμ± (νμΌ ID 리μ€νΈμ μκ³κ° μ‘°ν©, μμ 보μ₯) | |
| cache_key = f"{'-'.join(sorted(file_ids))}_{similarity_threshold}" | |
| if cache_key in st.session_state.graph_cache: | |
| # μΊμλ κ²°κ³Ό λ°ν | |
| return st.session_state.graph_cache[cache_key] | |
| # νκΈ ν°νΈ μ€μ | |
| plotly_font = set_korean_font() | |
| # μ νλ νμΌλ€μμ λ¨μ΄ λ‘λ λ° λ³ν© | |
| word_list = merge_word_lists(file_ids) | |
| if not word_list: | |
| st.error("μ νλ νμΌμμ μ ν¨ν λ¨μ΄λ₯Ό λ‘λν μ μμ΅λλ€.") | |
| return None | |
| if len(word_list) < 2: | |
| st.warning("κ·Έλνλ₯Ό μμ±νλ €λ©΄ μ΅μ 2κ° μ΄μμ κ³ μ λ¨μ΄κ° νμν©λλ€.") | |
| return None | |
| # μλ² λ© μμ± | |
| embeddings = None | |
| with st.spinner('λ¨μ΄ μλ² λ© μμ± μ€...'): | |
| # μΊμ νμΈ (νμΌ ID κΈ°λ°) | |
| embedding_cache_key = '-'.join(sorted(file_ids)) | |
| if embedding_cache_key in st.session_state.embeddings_cache: | |
| word_list_cached, embeddings = st.session_state.embeddings_cache[embedding_cache_key] | |
| # μΊμλ λ¨μ΄ λͺ©λ‘κ³Ό νμ¬ λ¨μ΄ λͺ©λ‘μ΄ λ€λ₯΄λ©΄ μ¬μμ± | |
| if sorted(word_list_cached) != sorted(word_list): | |
| embeddings = encode_words(word_list, normalize=True) | |
| st.session_state.embeddings_cache[embedding_cache_key] = (word_list, embeddings) | |
| else: | |
| embeddings = encode_words(word_list, normalize=True) | |
| st.session_state.embeddings_cache[embedding_cache_key] = (word_list, embeddings) | |
| if embeddings is None or embeddings.shape[0] == 0 or embeddings.shape[1] == 0: | |
| st.error("λ¨μ΄ μλ² λ© μμ±μ μ€ν¨νμ΅λλ€.") | |
| return None | |
| # 3D μ’ν μμ± - t-SNE μ¬μ© | |
| embeddings_3d = None | |
| with st.spinner('λ¨μ΄ μ’ν κ³μ° μ€ (t-SNE)...'): | |
| # t-SNE νλΌλ―Έν° μ€μ (λ°μ΄ν° ν¬κΈ°μ λ°λΌ λμ μ‘°μ ) | |
| n_samples = embeddings.shape[0] | |
| # perplexityλ n_samples - 1 λ³΄λ€ μμμΌ ν¨ | |
| effective_perplexity = min(30, max(5, n_samples - 1)) # μ΅μ 5, μ΅λ 30 λλ μνμ-1 | |
| # λ°λ³΅ νμ | |
| max_iter = max(250, min(1000, n_samples * 5)) # μν μμ λ°λΌ μ‘°μ νλ μ΅μ/μ΅λκ° μ€μ | |
| # νμ΅λ₯ | |
| learning_rate = max(10, min(200, n_samples / 12)) if n_samples > 12 else 'auto' # μν μ κΈ°λ°, λ무 μμΌλ©΄ auto | |
| if n_samples <= 3: # t-SNEλ μ΅μ 4κ° μν κΆμ₯ | |
| st.warning(f"t-SNEλ μ΅μ 4κ°μ λ¨μ΄κ° νμν©λλ€ (νμ¬ {n_samples}κ°). PCAλ₯Ό μ¬μ©ν©λλ€.") | |
| from sklearn.decomposition import PCA | |
| pca = PCA(n_components=min(3, n_samples), random_state=42) # μ΅λ 3μ°¨μ λλ μν μ | |
| embeddings_3d_pca = pca.fit_transform(embeddings) | |
| # 3μ°¨μμΌλ‘ λ§μΆκΈ° (λΆμ‘±νλ©΄ 0μΌλ‘ μ±μ) | |
| embeddings_3d = np.zeros((n_samples, 3)) | |
| embeddings_3d[:, :embeddings_3d_pca.shape[1]] = embeddings_3d_pca | |
| else: | |
| try: | |
| # max_iter λ³μ λμ κ³μ° λ° ν λΉ | |
| max_iter = max(250, min(1000, n_samples * 5)) # <--- μ΄ μ€μ μ€μ μ½λλ‘ μΆκ°/νμ±ν | |
| tsne = TSNE(n_components=3, random_state=42, | |
| perplexity=effective_perplexity, | |
| n_iter=max_iter, # μ΄μ μ μλ max_iter μ¬μ© | |
| init='pca', | |
| learning_rate=learning_rate, | |
| n_jobs=-1) | |
| embeddings_3d = tsne.fit_transform(embeddings) | |
| except Exception as e: | |
| st.error(f"t-SNE μ€ν μ€ μ€λ₯ λ°μ: {e}. PCAλ‘ λ체ν©λλ€.") | |
| from sklearn.decomposition import PCA | |
| pca = PCA(n_components=3, random_state=42) | |
| embeddings_3d = pca.fit_transform(embeddings) | |
| if embeddings_3d is None: | |
| st.error("λ¨μ΄ μ’ν μμ±μ μ€ν¨νμ΅λλ€.") | |
| return None | |
| # μ μ¬λ κ³μ° λ° μ£μ§ μ μ | |
| edges = [] | |
| edge_weights = [] | |
| with st.spinner('λ¨μ΄ κ° μ μ¬λ κ³μ° λ° μ°κ²°(μ£μ§) μμ± μ€...'): | |
| # μ μ¬λ νλ ¬ κ³μ° | |
| similarity_matrix = cosine_similarity(embeddings) | |
| # μκ³κ° μ΄μμΈ μ£μ§λ§ μΆκ° | |
| for i in range(n_samples): | |
| for j in range(i + 1, n_samples): # μ€λ³΅ λ° μκΈ° μμ μ°κ²° λ°©μ§ | |
| similarity = similarity_matrix[i, j] | |
| if similarity >= similarity_threshold: # λ±νΈ ν¬ν¨ (μκ³κ°κ³Ό κ°μλ μ°κ²°) | |
| edges.append((word_list[i], word_list[j])) | |
| edge_weights.append(similarity) | |
| # NetworkX κ·Έλν μμ± | |
| G = nx.Graph() | |
| # λ Έλ μΆκ° (λ¨μ΄μ 3D μ’ν) | |
| for i, word in enumerate(word_list): | |
| G.add_node(word, pos=(embeddings_3d[i, 0], embeddings_3d[i, 1], embeddings_3d[i, 2])) | |
| # μ£μ§μ κ°μ€μΉ μΆκ° | |
| for edge, weight in zip(edges, edge_weights): | |
| # self-loop λ°©μ§ (μ΄λ‘ μ μ λ‘μ§μμ λ°μ μ ν¨) | |
| if edge[0] != edge[1]: | |
| G.add_edge(edge[0], edge[1], weight=weight) | |
| # Plotly κ·Έλν μμ± | |
| edge_x, edge_y, edge_z = [], [], [] | |
| if G.number_of_edges() > 0: | |
| for edge in G.edges(): | |
| try: | |
| pos0 = G.nodes[edge[0]]['pos'] | |
| pos1 = G.nodes[edge[1]]['pos'] | |
| edge_x.extend([pos0[0], pos1[0], None]) # Noneμ μ λκΈ° | |
| edge_y.extend([pos0[1], pos1[1], None]) | |
| edge_z.extend([pos0[2], pos1[2], None]) | |
| except KeyError as e: | |
| st.warning(f"μ£μ§ μμ± μ€ λ Έλ ν€ μ€λ₯: {e}. ν΄λΉ μ£μ§λ₯Ό 건λ<0xEB><0x84>λλ€.") | |
| continue # λ¬Έμ κ° μλ μ£μ§λ 건λλ | |
| # μ£μ§ νΈλ μ΄μ€ | |
| edge_trace = go.Scatter3d( | |
| x=edge_x, y=edge_y, z=edge_z, | |
| mode='lines', | |
| line=dict(width=1, color='#888'), | |
| hoverinfo='none' # μ£μ§μλ νΈλ² μ 보 μμ | |
| ) | |
| # λ Έλ μ’ν λ° ν μ€νΈ μ 보 | |
| node_x, node_y, node_z, node_text = [], [], [], [] | |
| node_adjacencies = [] # μ°κ²° μ (degree) | |
| node_hover_text = [] # νΈλ² ν μ€νΈ | |
| nodes_data = [] | |
| for node in G.nodes(): | |
| try: | |
| pos = G.nodes[node]['pos'] | |
| degree = G.degree(node) # λ Έλμ μ°κ²° μ κ³μ° | |
| nodes_data.append({ | |
| 'x': pos[0], 'y': pos[1], 'z': pos[2], | |
| 'text': node, | |
| 'degree': degree, | |
| 'hover_text': f'{node}<br>μ°κ²° μ: {degree}' | |
| }) | |
| except KeyError: | |
| st.warning(f"λ Έλ '{node}' μ²λ¦¬ μ€ 'pos' ν€ μ€λ₯. ν΄λΉ λ Έλλ₯Ό 건λ<0xEB><0x84>λλ€.") | |
| continue # μμΉ μ 보 μλ λ Έλ 건λλ | |
| # λ Έλ λ°μ΄ν°κ° μμ κ²½μ°μλ§ μ²λ¦¬ | |
| if nodes_data: | |
| # λ Έλ ν¬κΈ°λ₯Ό μ°κ²° μμ λ°λΌ μ‘°μ (μμ: λ‘κ·Έ μ€μΌμΌλ§) | |
| degrees = np.array([data['degree'] for data in nodes_data]) | |
| # λ‘κ·Έ μ€μΌμΌλ§ μ μ© (0μΈ κ²½μ° λλΉ +1), μ΅λ/μ΅μ ν¬κΈ° μ ν | |
| node_sizes = np.log1p(degrees) * 3 + 6 # κΈ°λ³Έ ν¬κΈ° 6, μ°κ²° λ§μμλ‘ μ»€μ§ | |
| node_sizes = np.clip(node_sizes, 5, 20) # μ΅μ 5, μ΅λ 20 | |
| # λ Έλ λ°μ΄ν° λΆλ¦¬ | |
| node_x = [data['x'] for data in nodes_data] | |
| node_y = [data['y'] for data in nodes_data] | |
| node_z = [data['z'] for data in nodes_data] | |
| node_text = [data['text'] for data in nodes_data] | |
| node_hover_text = [data['hover_text'] for data in nodes_data] | |
| # λ Έλ νΈλ μ΄μ€ | |
| node_trace = go.Scatter3d( | |
| x=node_x, y=node_y, z=node_z, | |
| mode='markers+text', # λ§μ»€μ ν μ€νΈ ν¨κ» νμ | |
| text=node_text, # λ Έλ μμ νμλ ν μ€νΈ | |
| hovertext=node_hover_text, # λ§μ°μ€ μ¬λ Έμ λ νμλ ν μ€νΈ | |
| hoverinfo='text', # νΈλ² μ hovertextλ§ νμ | |
| textposition='top center', # ν μ€νΈ μμΉ | |
| textfont=dict( | |
| size=10, | |
| color='black', | |
| family=plotly_font # μ€μ λ νκΈ ν°νΈ μ¬μ© | |
| ), | |
| marker=dict( | |
| size=node_sizes, # μ°κ²° μμ λ°λΌ ν¬κΈ° μ‘°μ λ 리μ€νΈ | |
| color=node_z, # ZμΆ κ°μΌλ‘ μμ λ§€ν | |
| colorscale='Viridis', # μμ μ€μΌμΌ | |
| opacity=0.9, | |
| colorbar=dict(thickness=15, title='Node Depth (Z)', xanchor='left', titleside='right') | |
| ) | |
| ) | |
| else: | |
| # λ Έλ λ°μ΄ν°κ° μμΌλ©΄ λΉ νΈλ μ΄μ€ μμ± | |
| node_trace = go.Scatter3d(x=[], y=[], z=[], mode='markers') | |
| # μ¬μ©λ νμΌ μ΄λ¦ λͺ©λ‘ μμ± | |
| file_names_used = [] | |
| if 'data_files' in st.session_state: | |
| file_names_used = [st.session_state.data_files[fid]['name'] for fid in file_ids if fid in st.session_state.data_files] | |
| file_info_str = ", ".join(file_names_used) if file_names_used else "μ μ μμ" | |
| # λ μ΄μμ μ€μ | |
| layout = go.Layout( | |
| title=dict( | |
| text=f'<b>μ΄ν μλ―Έ μ μ¬μ± κΈ°λ° 3D κ·Έλν</b><br>Threshold: {similarity_threshold:.2f} | λ°μ΄ν°: {file_info_str}', | |
| font=dict(size=16, family=plotly_font), | |
| x=0.5, # μ λͺ© μ€μ μ λ ¬ | |
| xanchor='center' | |
| ), | |
| showlegend=False, # λ²λ‘ μ¨κΉ | |
| margin=dict(l=10, r=10, b=10, t=80), # μ¬λ°± μ‘°μ (μ λͺ© κ³΅κ° ν보) | |
| scene=dict( | |
| xaxis=dict( | |
| title='TSNE-1', showticklabels=False, # μΆ λκΈ μ¨κΉ | |
| backgroundcolor="rgb(240, 240, 240)", gridcolor="white", zerolinecolor="white" | |
| ), | |
| yaxis=dict( | |
| title='TSNE-2', showticklabels=False, | |
| backgroundcolor="rgb(240, 240, 240)", gridcolor="white", zerolinecolor="white" | |
| ), | |
| zaxis=dict( | |
| title='TSNE-3', showticklabels=False, | |
| backgroundcolor="rgb(240, 240, 240)", gridcolor="white", zerolinecolor="white" | |
| ), | |
| aspectratio=dict(x=1, y=1, z=0.8), # κ°λ‘μΈλ‘λΉ μ‘°μ | |
| camera=dict( | |
| eye=dict(x=1.2, y=1.2, z=0.8) # μ΄κΈ° μΉ΄λ©λΌ μμ | |
| ) | |
| ), | |
| # νΈλ² λͺ¨λ μ€μ (κ°μ₯ κ°κΉμ΄ λ°μ΄ν° ν¬μΈνΈ λλ ν΅ν©) | |
| hovermode='closest' | |
| ) | |
| # Figure κ°μ²΄ μμ± | |
| fig = go.Figure(data=[edge_trace, node_trace], layout=layout) | |
| # κ²°κ³Ό μΊμ μ μ₯ | |
| st.session_state.graph_cache[cache_key] = fig | |
| return fig | |
| def handle_uploaded_file(uploaded_file): | |
| """μ λ‘λλ νμΌμ μ²λ¦¬νκ³ λ°μ΄ν° νμΌ λͺ©λ‘μ μΆκ°ν©λλ€.""" | |
| if uploaded_file is not None: | |
| # νμΌλͺ μμ μ²λ¦¬ (uuid μ¬μ© κΆμ₯) λ° μ μ₯ κ²½λ‘ | |
| # original_name = uploaded_file.name | |
| unique_id = str(uuid.uuid4()) # κ³ μ ID μμ± | |
| # file_extension = os.path.splitext(original_name)[1] | |
| # file_name = f"{unique_id}{file_extension}" # κ³ μ IDλ‘ νμΌλͺ μμ± | |
| file_name = f"{unique_id}_{uploaded_file.name}" # μλ³Έ μ΄λ¦ μΌλΆ ν¬ν¨ (μ νμ ) | |
| file_path = os.path.join(UPLOAD_FOLDER, file_name) | |
| try: | |
| # νμΌ μ μ₯ | |
| with open(file_path, 'wb') as f: | |
| f.write(uploaded_file.getbuffer()) | |
| st.info(f"νμΌ '{uploaded_file.name}' ({file_name}) μ μ₯ μλ£. λ΄μ© κ²μ¦ μ€...") | |
| # μ λ‘λλ νμΌ κ²μ¦ (λ¨μ΄ λ‘λ μλ) | |
| words = load_words_from_json(file_path) | |
| if words is None or not words : # λ‘λ μ€ν¨ λλ λΉ λ¦¬μ€νΈ | |
| try: | |
| os.remove(file_path) # μ ν¨νμ§ μμΌλ©΄ νμΌ μμ | |
| st.error(f"μ λ‘λλ νμΌ '{uploaded_file.name}'μμ μ ν¨ν 'word' λ°μ΄ν°λ₯Ό μ°Ύμ μ μμ΅λλ€. νμΌ νμ(UTF-8 μΈμ½λ© JSON λ°°μ΄, κ° κ°μ²΄μ 'word' ν€)μ νμΈν΄μ£ΌμΈμ. νμΌμ΄ μμ λμμ΅λλ€.") | |
| except OSError as e: | |
| st.error(f"μ ν¨νμ§ μμ νμΌμ μμ νλ μ€ μ€λ₯ λ°μ: {e}") | |
| return None # μ€ν¨ μ None λ°ν | |
| st.success(f"νμΌ '{uploaded_file.name}' κ²μ¦ μλ£. {len(words)}κ°μ λ¨μ΄λ₯Ό μ°Ύμμ΅λλ€.") | |
| # λ°μ΄ν° νμΌ λ€μ μ€μΊνμ¬ μ νμΌ μ 보 ν¬ν¨ (μΈμ μν μ λ°μ΄νΈ) | |
| st.session_state.data_files = scan_data_files() | |
| # μ νμΌμ ν΄λΉνλ file_id μ°ΎκΈ° (scan_data_filesμμ μμ±λ ID μ¬μ©) | |
| new_file_id = f"uploaded_{file_name}" # scan_data_filesμ λμΌν λ‘μ§μΌλ‘ ID μμ± | |
| if new_file_id in st.session_state.data_files: | |
| return new_file_id # μ±κ³΅ μ νμΌ ID λ°ν | |
| else: | |
| st.error("νμΌ λͺ©λ‘ μ λ°μ΄νΈ νμλ μ νμΌ IDλ₯Ό μ°Ύμ§ λͺ»νμ΅λλ€.") | |
| return None | |
| except Exception as e: | |
| st.error(f"νμΌ μ λ‘λ λ° μ²λ¦¬ μ€ μ€λ₯ λ°μ: {e}") | |
| # μ€λ₯ λ°μ μ μ λ‘λλ νμΌ μμ μλ | |
| try: | |
| if os.path.exists(file_path): | |
| os.remove(file_path) | |
| except OSError as del_e: | |
| st.warning(f"μ€λ₯ λ°μ ν νμΌ μμ μ€ν¨: {del_e}") | |
| return None # μ€ν¨ μ None λ°ν | |
| def delete_file(file_id): | |
| """νμΌμ μμ ν©λλ€.""" | |
| if file_id not in st.session_state.get('data_files', {}): | |
| st.error('μμ ν νμΌμ μ°Ύμ μ μμ΅λλ€.') | |
| return False | |
| file_info = st.session_state.data_files[file_id] | |
| # μ λ‘λλ νμΌλ§ μμ νμ© | |
| if file_info.get('type') != 'uploaded': | |
| st.error('κΈ°λ³Έ λ°μ΄ν° νμΌμ μμ ν μ μμ΅λλ€.') | |
| return False | |
| file_path = file_info.get('path') | |
| file_name = file_info.get('name', 'μ μ μμ') | |
| if not file_path: | |
| st.error(f"νμΌ '{file_name}'μ κ²½λ‘ μ λ³΄κ° μμ΅λλ€.") | |
| return False | |
| try: | |
| # νμΌ μμ€ν μμ νμΌ μμ | |
| if os.path.exists(file_path): | |
| os.remove(file_path) | |
| st.info(f"νμΌ μμ€ν μμ '{file_name}' μμ μλ£.") | |
| else: | |
| st.warning(f"νμΌ μμ€ν μ '{file_name}'({file_path})μ΄(κ°) μ΄λ―Έ μ‘΄μ¬νμ§ μμ΅λλ€.") | |
| # μΈμ μνμμ νμΌ μ 보 μ κ±° | |
| del st.session_state.data_files[file_id] | |
| # κ΄λ ¨ μΊμ νλͺ© μμ (κ·Έλν, μλ² λ©) | |
| keys_to_remove_graph = [k for k in st.session_state.graph_cache if file_id in k] | |
| for key in keys_to_remove_graph: | |
| del st.session_state.graph_cache[key] | |
| keys_to_remove_embed = [k for k in st.session_state.embeddings_cache if file_id in k] | |
| for key in keys_to_remove_embed: | |
| del st.session_state.embeddings_cache[key] | |
| # νμ¬ μ νλ νμΌ λͺ©λ‘μμλ μ κ±° | |
| if file_id in st.session_state.selected_files: | |
| st.session_state.selected_files.remove(file_id) | |
| st.success(f"νμΌ '{file_name}' κ΄λ ¨ μ 보 λ° μΊμκ° μμ λμμ΅λλ€.") | |
| return True | |
| except Exception as e: | |
| st.error(f"νμΌ μμ μ€ μ€λ₯ λ°μ: {e}") | |
| return False | |
| def clear_cache(): | |
| """κ·Έλν λ° μλ² λ© μΊμλ₯Ό μ΄κΈ°νν©λλ€.""" | |
| st.session_state.graph_cache = {} | |
| st.session_state.embeddings_cache = {} | |
| st.session_state.fig = None # νμ¬ νμμ€μΈ κ·Έλνλ μ΄κΈ°ν | |
| st.success('κ·Έλν λ° μλ² λ© μΊμκ° μ΄κΈ°νλμμ΅λλ€.') | |
| # st.experimental_rerun() # μΊμ ν΄λ¦¬μ΄ ν UI κ°±μ | |
| # --- μ± μ€ν μμ --- | |
| # λ°μ΄ν° νμΌ μ€μΊ (μ± μμ μ λλ νμ μ) | |
| if 'data_files' not in st.session_state or not st.session_state.data_files: | |
| st.session_state.data_files = scan_data_files() | |
| # νμ΄ν λ° μκ° | |
| st.title('νκ΅μ΄ λ¨μ΄ μλ―Έ λ€νΈμν¬ μκ°ν') | |
| st.markdown(""" | |
| μ΄ λꡬλ μ 곡λ JSON νμΌμμ νκ΅μ΄ λ¨μ΄ λͺ©λ‘μ μ½μ΄λ€μ¬, λ¨μ΄ κ°μ μλ―Έμ μ μ¬μ±(μ¬κΈ°μλ λ¬Έμ κ΅¬μ± κΈ°λ° μ μ¬μ±)μ κ³μ°νκ³ , | |
| κ·Έ κ΄κ³λ₯Ό μΈν°λν°λΈν 3D λ€νΈμν¬ κ·Έλνλ‘ μκ°νν©λλ€. | |
| """) | |
| # --- μ¬μ΄λλ° μ€μ --- | |
| st.sidebar.title('βοΈ μ€μ λ° μ μ΄') | |
| # μκ³κ° μ€μ | |
| threshold = st.sidebar.slider( | |
| 'μ μ¬λ μκ³κ° (Similarity Threshold)', | |
| min_value=0.1, | |
| max_value=0.95, # μ΅λκ° μ½κ° λλ¦Ό | |
| value=st.session_state.threshold, | |
| step=0.05, | |
| help='μ΄ κ°λ³΄λ€ μ μ¬λκ° λμ λ¨μ΄λ€λ§ μ°κ²°μ (μ£μ§)μΌλ‘ μ΄μ΄μ§λλ€. κ°μ΄ λμμλ‘ μ°κ²°μ΄ λ μ격ν΄μ§λλ€.' | |
| ) | |
| # μ¬λΌμ΄λ κ°μ΄ λ³κ²½λλ©΄ μΈμ μν μ λ°μ΄νΈ (μ½λ°± μ¬μ©μ΄ λ ν¨μ¨μ μΌ μ μμ) | |
| if threshold != st.session_state.threshold: | |
| st.session_state.threshold = threshold | |
| st.session_state.fig = None # μκ³κ° λ³κ²½ μ νμ¬ κ·Έλν μ΄κΈ°ν (μ¬μμ± νμ μλ¦Ό) | |
| st.session_state.generate_clicked = False # ν΄λ¦ μνλ 리μ | |
| st.sidebar.divider() | |
| # νμΌ μ λ‘λ | |
| st.sidebar.header('π νμΌ μ λ‘λ') | |
| uploaded_file = st.sidebar.file_uploader( | |
| "JSON νμΌ μ λ‘λ", | |
| type=['json'], | |
| help="λ¨μ΄ λͺ©λ‘μ΄ ν¬ν¨λ JSON νμΌμ μ λ‘λνμΈμ. νμ: [{'word': 'λ¨μ΄1'}, {'word': 'λ¨μ΄2'}, ...]" | |
| ) | |
| if uploaded_file is not None: | |
| # μ λ‘λ λ²νΌ λμ νμΌμ΄ μμΌλ©΄ λ°λ‘ μ²λ¦¬ μλ (μ¬μ©μ κ²½ν κ°μ ) | |
| # if st.sidebar.button('μ λ‘λ μ²λ¦¬', key='upload_button'): # λ²νΌ μ κ±° | |
| with st.spinner("μ λ‘λλ νμΌ μ²λ¦¬ μ€..."): | |
| new_file_id = handle_uploaded_file(uploaded_file) | |
| if new_file_id: | |
| st.sidebar.success(f"νμΌ '{uploaded_file.name}' μ λ‘λ λ° μ²λ¦¬ μλ£!") | |
| # μλ‘ μ λ‘λλ νμΌμ μλμΌλ‘ μ ν λͺ©λ‘μ μΆκ°νκ³ μ ν μνλ‘ λ§λ¦ | |
| if new_file_id not in st.session_state.selected_files: | |
| st.session_state.selected_files.append(new_file_id) | |
| # μ€ν¬λ¦½νΈ μ¬μ€ννμ¬ UI μ λ°μ΄νΈ | |
| # st.experimental_rerun() | |
| else: | |
| # handle_uploaded_file λ΄λΆμμ μ€λ₯ λ©μμ§ νμλ¨ | |
| pass | |
| # μ λ‘λ μμ ― μ΄κΈ°νλ₯Ό μν΄ None ν λΉ (μ νμ ) | |
| # uploaded_file = None # μ΄λ κ² νλ©΄ νμΌ μ ν μ°½μ΄ λ€μ λνλ¨, νμμ λ°λΌ μ‘°μ | |
| st.sidebar.divider() | |
| # νμΌ μ ν μμ | |
| st.sidebar.header('ποΈ λ°μ΄ν° νμΌ μ ν') | |
| if st.session_state.data_files: | |
| # μ¬μ©ν νμΌ μ ν 체ν¬λ°μ€ | |
| st.sidebar.markdown("**μ¬μ©ν νμΌμ μ ννμΈμ (λ€μ€ μ ν κ°λ₯):**") | |
| # μ ν μν κ΄λ¦¬λ₯Ό μν μμ 리μ€νΈ | |
| selected_files_temp = [] | |
| # νμΌ λͺ©λ‘ μ λ ¬ (μ΄λ¦μ) | |
| sorted_file_ids = sorted(st.session_state.data_files.keys(), key=lambda fid: st.session_state.data_files[fid]['name']) | |
| # κ° νμΌμ λν 체ν¬λ°μ€ λ° μ 보 νμ | |
| for file_id in sorted_file_ids: | |
| if file_id not in st.session_state.data_files: continue # μμ λ κ²½μ° κ±΄λλ°κΈ° | |
| file_info = st.session_state.data_files[file_id] | |
| file_label = f"{file_info['name']} ({file_info['word_count']} λ¨μ΄)" | |
| file_type_tag = "[κΈ°λ³Έ]" if file_info['type'] == 'default' else "[μ λ‘λ]" | |
| label_full = f"{file_label} {file_type_tag}" | |
| # νμ¬ νμΌμ΄ μ νλμλμ§ νμΈ (μΈμ μν κΈ°μ€) | |
| is_selected = file_id in st.session_state.selected_files | |
| # 체ν¬λ°μ€ μμ± | |
| checkbox_key = f"cb_{file_id}" # κ³ μ ν€ | |
| # 체ν¬λ°μ€ κ° λ³κ²½ μ μ½λ°± μ¬μ© λμ , 루ν ν λΉκ΅ λ°©μμΌλ‘ μ²λ¦¬ | |
| if st.sidebar.checkbox(label_full, value=is_selected, key=checkbox_key): | |
| # 체ν¬λ κ²½μ° μμ 리μ€νΈμ μΆκ° | |
| selected_files_temp.append(file_id) | |
| # μν λ¨μ΄ λ° μμ λ²νΌ (μ λ‘λλ νμΌμλ§) | |
| with st.sidebar.expander("νμΌ μ 보 보기", expanded=False): | |
| st.markdown(f"**μν λ¨μ΄:** `{'`, `'.join(file_info['sample_words'])}`") | |
| if file_info['type'] == 'uploaded': | |
| delete_button_key = f"del_{file_id}" | |
| if st.button('ποΈ μ΄ νμΌ μμ ', key=delete_button_key, help=f"'{file_info['name']}' νμΌμ μꡬμ μΌλ‘ μμ ν©λλ€."): | |
| with st.spinner(f"'{file_info['name']}' μμ μ€..."): | |
| if delete_file(file_id): | |
| # μμ μ±κ³΅ μ, selected_files_tempμμλ μ κ±° (νμ) | |
| if file_id in selected_files_temp: | |
| selected_files_temp.remove(file_id) | |
| # data_files μνκ° λ³κ²½λμμΌλ―λ‘ μ¬μ€ν νμ | |
| # st.experimental_rerun() | |
| else: | |
| st.error("νμΌ μμ μ μ€ν¨νμ΅λλ€.") | |
| # st.sidebar.markdown("---") # ꡬλΆμ μ κ±° λλ μ€νμΌ μ‘°μ | |
| # --- μ€μ: μ ν μν μ λ°μ΄νΈ --- | |
| # νμ¬ μ²΄ν¬λ°μ€ μν(selected_files_temp)μ μΈμ μν(st.session_state.selected_files)κ° λ€λ₯Ό λλ§ μ λ°μ΄νΈ | |
| # μμμ μκ΄μμ΄ λΉκ΅νκΈ° μν΄ μ λ ¬ ν λΉκ΅ | |
| if sorted(selected_files_temp) != sorted(st.session_state.selected_files): | |
| st.session_state.selected_files = selected_files_temp | |
| st.session_state.fig = None # νμΌ μ ν λ³κ²½ μ κ·Έλν μ΄κΈ°ν | |
| st.session_state.generate_clicked = False # ν΄λ¦ μνλ 리μ | |
| # μ ν λ³κ²½ μ λ°λ‘ μ¬μ€ννμ¬ UI λ°μ (μ νμ μ΄μ§λ§ μ¬μ©μ κ²½ν κ°μ ) | |
| # st.experimental_rerun() | |
| st.sidebar.divider() | |
| # κ·Έλν μμ± λ²νΌ | |
| # μ νλ νμΌμ΄ μμ λλ§ νμ±ν | |
| if st.session_state.selected_files: | |
| if st.sidebar.button('π κ·Έλν μμ±/μ λ°μ΄νΈ', key='generate_button', type="primary"): | |
| # λ²νΌ ν΄λ¦ μ, generate_clicked νλκ·Έ μ€μ | |
| # μ νλ νμΌμ΄ μλμ§ λ€μ νλ² νμΈ (νΉμ λͺ¨λ₯Ό λμμ± λ¬Έμ λ°©μ§) | |
| if st.session_state.selected_files: | |
| st.session_state.generate_clicked = True | |
| # μ¬κΈ°μ st.experimental_rerun() νΈμΆ μ κ±°! λ²νΌ ν΄λ¦ μ μλμΌλ‘ μ¬μ€νλ¨ | |
| else: | |
| st.sidebar.warning('κ·Έλνλ₯Ό μμ±ν νμΌμ λ¨Όμ μ νν΄μ£ΌμΈμ.') | |
| st.session_state.generate_clicked = False # λ§μ½μ μν΄ λ¦¬μ | |
| else: | |
| st.sidebar.warning('κ·Έλνλ₯Ό μμ±νλ €λ©΄ μ΅μ 1κ° μ΄μμ νμΌμ μ νν΄μ£ΌμΈμ.') | |
| else: | |
| st.sidebar.warning('μ¬μ© κ°λ₯ν λ°μ΄ν° νμΌμ΄ μμ΅λλ€. νμΌμ μ λ‘λνκ±°λ `data` ν΄λμ JSON νμΌμ μΆκ°νμΈμ.') | |
| # μΊμ μ΄κΈ°ν λ²νΌ (νμ νμ) | |
| if st.sidebar.button('π μΊμ μ΄κΈ°ν', key='clear_cache_button'): | |
| clear_cache() | |
| # --- λ©μΈ μ½ν μΈ μμ --- | |
| st.header("π 3D λ¨μ΄ λ€νΈμν¬ μκ°ν") | |
| # κ·Έλν νμ λ‘μ§ | |
| # 1. μ νλ νμΌμ΄ μμ΄μΌ ν¨ | |
| # 2. 'κ·Έλν μμ±' λ²νΌμ΄ ν΄λ¦λμκ±°λ (generate_clicked == True) | |
| # 3. μ΄λ―Έ μμ±λ κ·Έλνκ° μΈμ μνμ μμ΄μΌ ν¨ (st.session_state.fig is not None) | |
| if st.session_state.selected_files: | |
| # κ·Έλνλ₯Ό μμ±ν΄μΌ νλ 쑰건 : λ²νΌ ν΄λ¦ νλκ·Έκ° True μ΄κ±°λ, μκ³κ°/νμΌμ ν λ³κ²½μΌλ‘ figκ° Noneμ΄ λ κ²½μ° | |
| should_generate_graph = st.session_state.generate_clicked or \ | |
| (st.session_state.fig is None and st.session_state.selected_files) # νμΌ μ ν ν figκ° μμ λ | |
| if should_generate_graph: | |
| with st.spinner('κ·Έλν μμ± μ€... μ μλ§ κΈ°λ€λ €μ£ΌμΈμ.'): | |
| try: | |
| # generate_graph ν¨μ νΈμΆ | |
| fig = generate_graph(st.session_state.selected_files, st.session_state.threshold) | |
| # μ±κ³΅μ μΌλ‘ μμ±λλ©΄ μΈμ μνμ μ μ₯ | |
| st.session_state.fig = fig | |
| # μμ± μλ£ ν ν΄λ¦ νλκ·Έ 리μ | |
| st.session_state.generate_clicked = False | |
| except Exception as e: | |
| st.error(f"κ·Έλν μμ± μ€ μ€λ₯ λ°μ: {e}") | |
| st.session_state.fig = None # μ€λ₯ λ°μ μ fig μ΄κΈ°ν | |
| st.session_state.generate_clicked = False # νλκ·Έ 리μ | |
| # μμ±λ κ·Έλνκ° μΈμ μνμ μμΌλ©΄ νμ | |
| if st.session_state.get('fig') is not None: | |
| st.plotly_chart(st.session_state.fig, use_container_width=True) | |
| # νμ¬ κ·Έλν μ 보 νμ | |
| try: | |
| selected_file_names = [st.session_state.data_files[fid]['name'] for fid in st.session_state.selected_files if fid in st.session_state.data_files] | |
| total_word_count = sum(st.session_state.data_files[fid]['word_count'] for fid in st.session_state.selected_files if fid in st.session_state.data_files) | |
| # μ€μ κ·Έλνμ λ Έλ/μ£μ§ μ κ°μ Έμ€κΈ° (fig κ°μ²΄ λΆμ νμ) | |
| num_nodes = len(st.session_state.fig.data[1].x) if len(st.session_state.fig.data) > 1 and hasattr(st.session_state.fig.data[1], 'x') else 0 | |
| num_edges = len(st.session_state.fig.data[0].x) // 3 if len(st.session_state.fig.data) > 0 and hasattr(st.session_state.fig.data[0], 'x') and st.session_state.fig.data[0].x else 0 | |
| st.info(f""" | |
| **νμ¬ κ·Έλν μ 보** | |
| - **λ°μ΄ν° νμΌ:** {', '.join(selected_file_names)} | |
| - **κ³ μ λ¨μ΄ μ (λ Έλ):** {num_nodes} κ° | |
| - **μ°κ²°μ μ (μ£μ§):** {num_edges} κ° (μ μ¬λ β₯ {st.session_state.threshold:.2f}) | |
| """) | |
| except Exception as info_e: | |
| st.warning(f"κ·Έλν μ 보 νμ μ€ μ€λ₯: {info_e}") | |
| # μ¬μ© μ€λͺ | |
| with st.expander("π‘ κ·Έλν μ‘°μ λ°©λ²"): | |
| st.markdown(""" | |
| - **νλ/μΆμ:** λ§μ°μ€ ν μ€ν¬λ‘€ λλ ν°μΉμ€ν¬λ¦°μμ λ μκ°λ½ μ¬μ© | |
| - **νμ :** λ§μ°μ€ μΌμͺ½ λ²νΌ λλ₯Έ μνλ‘ λλκ·Έ | |
| - **μ΄λ (Pan):** λ§μ°μ€ μ€λ₯Έμͺ½ λ²νΌ λλ₯Έ μνλ‘ λλκ·Έ λλ Shift + μΌμͺ½ λ²νΌ λλκ·Έ | |
| - **λ¨μ΄ μ 보 νμΈ:** λ§μ°μ€ 컀μλ₯Ό λ¨μ΄(λ§μ»€) μμ μ¬λ¦¬λ©΄ λ¨μ΄ μ΄λ¦κ³Ό μ°κ²°λ λ€λ₯Έ λ¨μ΄μ μλ₯Ό λ³Ό μ μμ΅λλ€. | |
| - **ν΄λ° μ¬μ©:** κ·Έλν μ°μΈ‘ μλ¨μ ν΄λ° μμ΄μ½μ μ¬μ©νμ¬ λ€μν 보기 μ΅μ (λ€μ΄λ‘λ, νλ/μΆμ μμ μ§μ λ±)μ νμ©ν μ μμ΅λλ€. | |
| """) | |
| elif not should_generate_graph and not st.session_state.selected_files: | |
| st.info("π μ¬μ΄λλ°μμ λΆμν λ°μ΄ν° νμΌμ μ νν΄μ£ΌμΈμ.") | |
| elif not should_generate_graph and st.session_state.selected_files and st.session_state.fig is None: | |
| # νμΌμ μ ννμ§λ§ μμ§ μμ± λ²νΌ μ λλ¦ or μμ± μ€ν¨ | |
| st.info("π μ¬μ΄λλ°μμ 'π κ·Έλν μμ±/μ λ°μ΄νΈ' λ²νΌμ ν΄λ¦νμ¬ μκ°νλ₯Ό μμνμΈμ.") | |
| elif not st.session_state.data_files: | |
| st.warning("νμν λ°μ΄ν° νμΌμ΄ μμ΅λλ€. νμΌμ μ λ‘λνκ±°λ `data` ν΄λμ μ ν¨ν JSON νμΌμ μΆκ°νμΈμ.") | |
| else: | |
| # data_filesλ μμ§λ§ selected_filesκ° μλ κ²½μ° | |
| st.info("π μ¬μ΄λλ°μμ λΆμν λ°μ΄ν° νμΌμ μ νν΄μ£ΌμΈμ.") | |
| # --- νλ¨ μ 보 μΉμ --- | |
| st.divider() | |
| with st.expander("βΉοΈ μ΄ μκ°ν λꡬμ λνμ¬"): | |
| st.markdown(""" | |
| μ΄ λꡬλ λ€μκ³Ό κ°μ κ³Όμ μ ν΅ν΄ νκ΅μ΄ λ¨μ΄ λ€νΈμν¬λ₯Ό μκ°νν©λλ€: | |
| 1. **λ°μ΄ν° λ‘λ©:** μ¬μ©μκ° μ 곡ν JSON νμΌμμ 'word' νλλ₯Ό κ°μ§ λ¨μ΄ λͺ©λ‘μ μΆμΆν©λλ€. | |
| 2. **λ¨μ΄ μλ² λ©:** κ° λ¨μ΄λ₯Ό κ³ μ°¨μ λ²‘ν° κ³΅κ°μ ννν©λλ€. νμ¬λ **λ¬Έμ κ΅¬μ± κΈ°λ° TF-IDF μ€νμΌ μλ² λ©**μ μ¬μ©νμ¬, λ¨μ΄λ₯Ό μ΄λ£¨λ λ¬Έμλ€μ λΉλλ₯Ό κΈ°λ°μΌλ‘ 벑ν°λ₯Ό μμ±ν©λλ€. (μΆν Word2Vec, FastText λ± μ¬μ νλ ¨λ λͺ¨λΈ μ¬μ© κ°λ₯) | |
| 3. **μ°¨μ μΆμ:** κ³ μ°¨μ μλ² λ© λ²‘ν°λ₯Ό μκ°ν κ°λ₯ν 3μ°¨μ 곡κ°μΌλ‘ μΆμν©λλ€. **t-SNE(t-Distributed Stochastic Neighbor Embedding)** μκ³ λ¦¬μ¦μ μ¬μ©νμ¬ λ³΅μ‘ν λ°μ΄ν° ꡬ쑰λ₯Ό μ μ§νλ©΄μ μ°¨μμ μ€μ λλ€. (λ¨μ΄ μκ° μ μ κ²½μ° PCA μ¬μ©) | |
| 4. **μ μ¬λ κ³μ°:** 3D 곡κ°μΌλ‘ μΆμνκΈ° μ μ μλ³Έ μλ² λ© λ²‘ν° κ°μ **μ½μ¬μΈ μ μ¬λ(Cosine Similarity)**λ₯Ό κ³μ°νμ¬ λ¨μ΄ μμ μλ―Έμ (μ¬κΈ°μλ ꡬμ±μ ) μ μ¬μ±μ μΈ‘μ ν©λλ€. | |
| 5. **κ·Έλν μμ±:** μ€μ λ **μ μ¬λ μκ³κ°(Threshold)** μ΄μμΈ λ¨μ΄ μλ€μ μ°κ²°μ (μ£μ§)μΌλ‘ μ΄μ΄ λ€νΈμν¬ κ·Έλνλ₯Ό ꡬμ±ν©λλ€. κ° λ¨μ΄λ λ Έλ(μ )λ‘ νμλ©λλ€. | |
| 6. **3D μκ°ν:** **Plotly λΌμ΄λΈλ¬λ¦¬**λ₯Ό μ¬μ©νμ¬ μμ±λ λ€νΈμν¬ κ·Έλνλ₯Ό μΈν°λν°λΈν 3D 곡κ°μ μκ°νν©λλ€. λ Έλμ μμΉλ t-SNE κ²°κ³Ό μ’νλ₯Ό λ°λ₯΄λ©°, μμμ΄λ ν¬κΈ°λ ZμΆ κ°μ΄λ μ°κ²° μ(degree) λ±μ λ°μν μ μμ΅λλ€. | |
| μ΄λ₯Ό ν΅ν΄ λ¨μ΄λ€μ΄ μλ‘ μΌλ§λ μ μ¬νμ§μ λ°λΌ κ΅°μ§μ μ΄λ£¨κ±°λ μ°κ²°λλ ν¨ν΄μ μκ°μ μΌλ‘ νμν μ μμ΅λλ€. | |
| """) | |
| with st.expander("π JSON νμΌ νμ μλ΄"): | |
| st.markdown(""" | |
| μ λ‘λνκ±°λ `data` ν΄λμ λ£λ JSON νμΌμ **UTF-8 μΈμ½λ©**μ΄μ΄μΌ νλ©°, λ€μκ³Ό κ°μ νμμ λ°λΌμΌ ν©λλ€: | |
| ```json | |
| [ | |
| { | |
| "word": "νκ΅" | |
| }, | |
| { | |
| "word": "μ μλ" | |
| }, | |
| { | |
| "word": "νμ" | |
| }, | |
| { | |
| "word": "κ΅μ€" | |
| }, | |
| { | |
| "word": "μ»΄ν¨ν°", | |
| "description": "μ΄ νλλ 무μλ©λλ€" | |
| } | |
| ] | |
| ``` | |
| - νμΌμ μ΅μμ ꡬ쑰λ **λ°°μ΄(List)**μ΄μ΄μΌ ν©λλ€ (`[...]`). | |
| - λ°°μ΄μ κ° μμλ **κ°μ²΄(Dictionary)**μ¬μΌ ν©λλ€ (`{...}`). | |
| - κ° κ°μ²΄λ λ°λμ `"word"`λΌλ ν€λ₯Ό ν¬ν¨ν΄μΌ νλ©°, κ·Έ κ°μ λΆμν **νκ΅μ΄ λ¨μ΄ λ¬Έμμ΄**μ΄μ΄μΌ ν©λλ€. | |
| - `"word"` μΈμ λ€λ₯Έ ν€κ° μμ΄λ 무방νλ, νμ¬ λ²μ μμλ μ¬μ©λμ§ μκ³ λ¬΄μλ©λλ€. | |
| - νμΌ μΈμ½λ©μ΄ UTF-8μ΄ μλ κ²½μ° νκΈμ΄ κΉ¨μ§κ±°λ μ€λ₯κ° λ°μν μ μμ΅λλ€. | |
| """) |