Buckets:

craffel
/

moto_checkpoints

Files

xet

craffel/moto_checkpoints / script_1 /code /apps /plots /analysis.py

craffel

18 days ago

download

raw

1.86 kB

	# Copyright (c) Meta Platforms, Inc. and affiliates.

	import glob
	import json
	from functools import partial
	from multiprocessing import Pool
	from pathlib import Path

	import pandas as pd
	import plotly.express as px
	from omegaconf import OmegaConf


	def parallel(func, files, num_workers=16):
	with Pool(num_workers) as p:
	results = p.map(func, files)
	results = list(results)
	# Flatten the list of results
	if len(results) > 0 and isinstance(results[0], list):
	results = [item for sublist in results for item in sublist]

	return results


	def parallel_from_glob(func, glob_pattern, num_workers=16):
	files = glob.glob(glob_pattern, recursive=True)
	return parallel(partial(func), files, num_workers=num_workers)


	def load_raw_json(path):
	with open(path, "r") as f:
	return json.load(f)


	def load_raw_jsonl(jsonl_file):
	metrics = []

	with open(jsonl_file, "r") as f:
	for i, line in enumerate(f):
	try:
	json_obj = json.loads(line)
	except json.decoder.JSONDecodeError as e:
	print(f"Error decoding line {i+1} in file {jsonl_file}")

	metrics.append(json_obj)

	return metrics


	def get_metrics(path):
	results_dir = Path(path)

	results = load_raw_jsonl(results_dir)
	params = OmegaConf.load(results_dir.parent / "config.yaml")
	params = OmegaConf.to_container(params, resolve=True)
	df = pd.json_normalize(
	[{"params": params, "metrics": res} for res in results], sep="/"
	)
	return df


	def get_merged_df(path):
	dfs = parallel_from_glob(get_metrics, path, num_workers=80)
	return pd.concat(dfs)


	# %% Example usage
	df = get_merged_df("/path/to/metrics.jsonl")
	fig = px.line(
	df,
	x="metrics/global_step",
	y="metrics/loss/out",
	)
	fig.update_yaxes(type="log")
	fig.show()

	# %%

Xet Storage Details

Size:: 1.86 kB
Xet hash:: 8b9cd09d6264ccb78e7524b5857a7a2e9b3fd24a6347a7fdaba5daed43c3f8e4

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.