File size: 1,079 Bytes
11c72a2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
import numpy as np
import json

def load_beta_matrix(beta_path: str, vocab_path: str):
    """
    Loads the beta matrix (T x K x V) and vocab list.

    Returns:
        beta: np.ndarray of shape (T, K, V)
        vocab: list of words
    """
    beta = np.load(beta_path)  # shape: T x K x V
    with open(vocab_path, 'r') as f:
        vocab = [line.strip() for line in f.readlines()]
    return beta, vocab

def get_top_words_at_time(beta, vocab, topic_id, time, top_n):
    topic_beta = beta[time, topic_id, :]
    top_indices = topic_beta.argsort()[-top_n:][::-1]
    return [vocab[i] for i in top_indices]

def get_top_words_over_time(beta, vocab, topic_id, top_n):
    topic_beta = beta[:, topic_id, :]
    mean_beta = topic_beta.mean(axis=0)
    top_indices = mean_beta.argsort()[-top_n:][::-1]
    return [vocab[i] for i in top_indices]

def load_time_labels(time2id_path):
    with open(time2id_path, 'r') as f:
        time2id = json.load(f)
    # Invert and sort by id
    id2time = {v: k for k, v in time2id.items()}
    return [id2time[i] for i in sorted(id2time)]