MahaTTSv2 / T2S /plot_embed.py
rasenganai
init
41bc8a8
# import matplotlib.pyplot as plt
# import numpy as np
# import subprocess
# import json
# from umap import UMAP
# from tqdm import tqdm
# def count_lines_shell(file_path):
# result = subprocess.run(["wc", "-l", file_path], capture_output=True, text=True)
# return int(result.stdout.split()[0])
# def load_chunk(file_path,chunk_size):
# lines = count_lines_shell(file_path)
# with open(file_path,'r') as file:
# dataset = []
# # embed = []
# for i in tqdm(file,total=lines):
# data = json.loads(i)
# key = list(data.keys())[0]
# dataset.append([key,data[key][0]])
# # embed.append(data[key][1])
# if len(dataset)==chunk_size:
# return dataset
# dataset=[]
# # embed=[]
# if len(dataset)!=0:
# return dataset
# if __name__ == '__main__':
# file_name = "pocketfm_pure_textlossless_data_stats.json"
# bs = -1
# # data = load_chunk(file_name,-1)
# embed = np.load("/nlsasfs/home/dubverse/varshulg/work/NeuralSpeak/T2S/pocketfm_embeddings.npy")
# print(embed.shape)
# plt.scatter(embed[:,0],embed[:,1])
# # plt.imsave("gst_embed.png")
# plt.savefig('gst_embed_pocketfm.png')#, dpi=300, bbox_inches='tight')