Spaces:

AdhyaSuman
/

DTECT

Running

Initial commit with Git LFS for large files

11c72a2 9 months ago

1.08 kB

	import numpy as np
	import json

	def load_beta_matrix(beta_path: str, vocab_path: str):
	"""
	Loads the beta matrix (T x K x V) and vocab list.

	Returns:
	beta: np.ndarray of shape (T, K, V)
	vocab: list of words
	"""
	beta = np.load(beta_path) # shape: T x K x V
	with open(vocab_path, 'r') as f:
	vocab = [line.strip() for line in f.readlines()]
	return beta, vocab

	def get_top_words_at_time(beta, vocab, topic_id, time, top_n):
	topic_beta = beta[time, topic_id, :]
	top_indices = topic_beta.argsort()[-top_n:][::-1]
	return [vocab[i] for i in top_indices]

	def get_top_words_over_time(beta, vocab, topic_id, top_n):
	topic_beta = beta[:, topic_id, :]
	mean_beta = topic_beta.mean(axis=0)
	top_indices = mean_beta.argsort()[-top_n:][::-1]
	return [vocab[i] for i in top_indices]

	def load_time_labels(time2id_path):
	with open(time2id_path, 'r') as f:
	time2id = json.load(f)
	# Invert and sort by id
	id2time = {v: k for k, v in time2id.items()}
	return [id2time[i] for i in sorted(id2time)]