Spaces:
Running
Running
| import numpy as np | |
| import json | |
| def load_beta_matrix(beta_path: str, vocab_path: str): | |
| """ | |
| Loads the beta matrix (T x K x V) and vocab list. | |
| Returns: | |
| beta: np.ndarray of shape (T, K, V) | |
| vocab: list of words | |
| """ | |
| beta = np.load(beta_path) # shape: T x K x V | |
| with open(vocab_path, 'r') as f: | |
| vocab = [line.strip() for line in f.readlines()] | |
| return beta, vocab | |
| def get_top_words_at_time(beta, vocab, topic_id, time, top_n): | |
| topic_beta = beta[time, topic_id, :] | |
| top_indices = topic_beta.argsort()[-top_n:][::-1] | |
| return [vocab[i] for i in top_indices] | |
| def get_top_words_over_time(beta, vocab, topic_id, top_n): | |
| topic_beta = beta[:, topic_id, :] | |
| mean_beta = topic_beta.mean(axis=0) | |
| top_indices = mean_beta.argsort()[-top_n:][::-1] | |
| return [vocab[i] for i in top_indices] | |
| def load_time_labels(time2id_path): | |
| with open(time2id_path, 'r') as f: | |
| time2id = json.load(f) | |
| # Invert and sort by id | |
| id2time = {v: k for k, v in time2id.items()} | |
| return [id2time[i] for i in sorted(id2time)] |