DTECT / backend /inference /process_beta.py
AdhyaSuman's picture
Initial commit with Git LFS for large files
11c72a2
import numpy as np
import json
def load_beta_matrix(beta_path: str, vocab_path: str):
"""
Loads the beta matrix (T x K x V) and vocab list.
Returns:
beta: np.ndarray of shape (T, K, V)
vocab: list of words
"""
beta = np.load(beta_path) # shape: T x K x V
with open(vocab_path, 'r') as f:
vocab = [line.strip() for line in f.readlines()]
return beta, vocab
def get_top_words_at_time(beta, vocab, topic_id, time, top_n):
topic_beta = beta[time, topic_id, :]
top_indices = topic_beta.argsort()[-top_n:][::-1]
return [vocab[i] for i in top_indices]
def get_top_words_over_time(beta, vocab, topic_id, top_n):
topic_beta = beta[:, topic_id, :]
mean_beta = topic_beta.mean(axis=0)
top_indices = mean_beta.argsort()[-top_n:][::-1]
return [vocab[i] for i in top_indices]
def load_time_labels(time2id_path):
with open(time2id_path, 'r') as f:
time2id = json.load(f)
# Invert and sort by id
id2time = {v: k for k, v in time2id.items()}
return [id2time[i] for i in sorted(id2time)]