| | import os |
| | import sys |
| | import shutil |
| | import os.path as osp |
| | import json |
| | import time |
| | import datetime |
| | import tempfile |
| | from collections import defaultdict |
| | from contextlib import contextmanager |
| |
|
| | DEBUG = 10 |
| | INFO = 20 |
| | WARN = 30 |
| | ERROR = 40 |
| |
|
| | DISABLED = 50 |
| |
|
| | class KVWriter(object): |
| | def writekvs(self, kvs): |
| | raise NotImplementedError |
| |
|
| | class SeqWriter(object): |
| | def writeseq(self, seq): |
| | raise NotImplementedError |
| |
|
| | class HumanOutputFormat(KVWriter, SeqWriter): |
| | def __init__(self, filename_or_file): |
| | if isinstance(filename_or_file, str): |
| | self.file = open(filename_or_file, 'wt') |
| | self.own_file = True |
| | else: |
| | assert hasattr(filename_or_file, 'read'), 'expected file or str, got %s'%filename_or_file |
| | self.file = filename_or_file |
| | self.own_file = False |
| |
|
| | def writekvs(self, kvs): |
| | |
| | key2str = {} |
| | for (key, val) in sorted(kvs.items()): |
| | if hasattr(val, '__float__'): |
| | valstr = '%-8.3g' % val |
| | else: |
| | valstr = str(val) |
| | key2str[self._truncate(key)] = self._truncate(valstr) |
| |
|
| | |
| | if len(key2str) == 0: |
| | print('WARNING: tried to write empty key-value dict') |
| | return |
| | else: |
| | keywidth = max(map(len, key2str.keys())) |
| | valwidth = max(map(len, key2str.values())) |
| |
|
| | |
| | dashes = '-' * (keywidth + valwidth + 7) |
| | lines = [dashes] |
| | for (key, val) in sorted(key2str.items(), key=lambda kv: kv[0].lower()): |
| | lines.append('| %s%s | %s%s |' % ( |
| | key, |
| | ' ' * (keywidth - len(key)), |
| | val, |
| | ' ' * (valwidth - len(val)), |
| | )) |
| | lines.append(dashes) |
| | self.file.write('\n'.join(lines) + '\n') |
| |
|
| | |
| | self.file.flush() |
| |
|
| | def _truncate(self, s): |
| | maxlen = 30 |
| | return s[:maxlen-3] + '...' if len(s) > maxlen else s |
| |
|
| | def writeseq(self, seq): |
| | seq = list(seq) |
| | for (i, elem) in enumerate(seq): |
| | self.file.write(elem) |
| | if i < len(seq) - 1: |
| | self.file.write(' ') |
| | self.file.write('\n') |
| | self.file.flush() |
| |
|
| | def close(self): |
| | if self.own_file: |
| | self.file.close() |
| |
|
| | class JSONOutputFormat(KVWriter): |
| | def __init__(self, filename): |
| | self.file = open(filename, 'wt') |
| |
|
| | def writekvs(self, kvs): |
| | for k, v in sorted(kvs.items()): |
| | if hasattr(v, 'dtype'): |
| | kvs[k] = float(v) |
| | self.file.write(json.dumps(kvs) + '\n') |
| | self.file.flush() |
| |
|
| | def close(self): |
| | self.file.close() |
| |
|
| | class CSVOutputFormat(KVWriter): |
| | def __init__(self, filename): |
| | self.file = open(filename, 'w+t') |
| | self.keys = [] |
| | self.sep = ',' |
| |
|
| | def writekvs(self, kvs): |
| | |
| | extra_keys = list(kvs.keys() - self.keys) |
| | extra_keys.sort() |
| | if extra_keys: |
| | self.keys.extend(extra_keys) |
| | self.file.seek(0) |
| | lines = self.file.readlines() |
| | self.file.seek(0) |
| | for (i, k) in enumerate(self.keys): |
| | if i > 0: |
| | self.file.write(',') |
| | self.file.write(k) |
| | self.file.write('\n') |
| | for line in lines[1:]: |
| | self.file.write(line[:-1]) |
| | self.file.write(self.sep * len(extra_keys)) |
| | self.file.write('\n') |
| | for (i, k) in enumerate(self.keys): |
| | if i > 0: |
| | self.file.write(',') |
| | v = kvs.get(k) |
| | if v is not None: |
| | self.file.write(str(v)) |
| | self.file.write('\n') |
| | self.file.flush() |
| |
|
| | def close(self): |
| | self.file.close() |
| |
|
| |
|
| | class TensorBoardOutputFormat(KVWriter): |
| | """ |
| | Dumps key/value pairs into TensorBoard's numeric format. |
| | """ |
| | def __init__(self, dir): |
| | os.makedirs(dir, exist_ok=True) |
| | self.dir = dir |
| | self.step = 1 |
| | prefix = 'events' |
| | path = osp.join(osp.abspath(dir), prefix) |
| | import tensorflow as tf |
| | from tensorflow.python import pywrap_tensorflow |
| | from tensorflow.core.util import event_pb2 |
| | from tensorflow.python.util import compat |
| | self.tf = tf |
| | self.event_pb2 = event_pb2 |
| | self.pywrap_tensorflow = pywrap_tensorflow |
| | self.writer = pywrap_tensorflow.EventsWriter(compat.as_bytes(path)) |
| |
|
| | def writekvs(self, kvs): |
| | def summary_val(k, v): |
| | kwargs = {'tag': k, 'simple_value': float(v)} |
| | return self.tf.Summary.Value(**kwargs) |
| | summary = self.tf.Summary(value=[summary_val(k, v) for k, v in kvs.items()]) |
| | event = self.event_pb2.Event(wall_time=time.time(), summary=summary) |
| | event.step = self.step |
| | self.writer.WriteEvent(event) |
| | self.writer.Flush() |
| | self.step += 1 |
| |
|
| | def close(self): |
| | if self.writer: |
| | self.writer.Close() |
| | self.writer = None |
| |
|
| | def make_output_format(format, ev_dir, log_suffix=''): |
| | os.makedirs(ev_dir, exist_ok=True) |
| | if format == 'stdout': |
| | return HumanOutputFormat(sys.stdout) |
| | elif format == 'log': |
| | return HumanOutputFormat(osp.join(ev_dir, 'log%s.txt' % log_suffix)) |
| | elif format == 'json': |
| | return JSONOutputFormat(osp.join(ev_dir, 'progress%s.json' % log_suffix)) |
| | elif format == 'csv': |
| | return CSVOutputFormat(osp.join(ev_dir, 'progress%s.csv' % log_suffix)) |
| | elif format == 'tensorboard': |
| | return TensorBoardOutputFormat(osp.join(ev_dir, 'tb%s' % log_suffix)) |
| | else: |
| | raise ValueError('Unknown format specified: %s' % (format,)) |
| |
|
| | |
| | |
| | |
| |
|
| | def logkv(key, val): |
| | """ |
| | Log a value of some diagnostic |
| | Call this once for each diagnostic quantity, each iteration |
| | If called many times, last value will be used. |
| | """ |
| | get_current().logkv(key, val) |
| |
|
| | def logkv_mean(key, val): |
| | """ |
| | The same as logkv(), but if called many times, values averaged. |
| | """ |
| | get_current().logkv_mean(key, val) |
| |
|
| | def logkvs(d): |
| | """ |
| | Log a dictionary of key-value pairs |
| | """ |
| | for (k, v) in d.items(): |
| | logkv(k, v) |
| |
|
| | def dumpkvs(): |
| | """ |
| | Write all of the diagnostics from the current iteration |
| | """ |
| | return get_current().dumpkvs() |
| |
|
| | def getkvs(): |
| | return get_current().name2val |
| |
|
| |
|
| | def log(*args, level=INFO): |
| | """ |
| | Write the sequence of args, with no separators, to the console and output files (if you've configured an output file). |
| | """ |
| | get_current().log(*args, level=level) |
| |
|
| | def debug(*args): |
| | log(*args, level=DEBUG) |
| |
|
| | def info(*args): |
| | log(*args, level=INFO) |
| |
|
| | def warn(*args): |
| | log(*args, level=WARN) |
| |
|
| | def error(*args): |
| | log(*args, level=ERROR) |
| |
|
| |
|
| | def set_level(level): |
| | """ |
| | Set logging threshold on current logger. |
| | """ |
| | get_current().set_level(level) |
| |
|
| | def set_comm(comm): |
| | get_current().set_comm(comm) |
| |
|
| | def get_dir(): |
| | """ |
| | Get directory that log files are being written to. |
| | will be None if there is no output directory (i.e., if you didn't call start) |
| | """ |
| | return get_current().get_dir() |
| |
|
| | record_tabular = logkv |
| | dump_tabular = dumpkvs |
| |
|
| | @contextmanager |
| | def profile_kv(scopename): |
| | logkey = 'wait_' + scopename |
| | tstart = time.time() |
| | try: |
| | yield |
| | finally: |
| | get_current().name2val[logkey] += time.time() - tstart |
| |
|
| | def profile(n): |
| | """ |
| | Usage: |
| | @profile("my_func") |
| | def my_func(): code |
| | """ |
| | def decorator_with_name(func): |
| | def func_wrapper(*args, **kwargs): |
| | with profile_kv(n): |
| | return func(*args, **kwargs) |
| | return func_wrapper |
| | return decorator_with_name |
| |
|
| |
|
| | |
| | |
| | |
| |
|
| | def get_current(): |
| | if Logger.CURRENT is None: |
| | _configure_default_logger() |
| |
|
| | return Logger.CURRENT |
| |
|
| |
|
| | class Logger(object): |
| | DEFAULT = None |
| | |
| | CURRENT = None |
| |
|
| | def __init__(self, dir, output_formats, comm=None): |
| | self.name2val = defaultdict(float) |
| | self.name2cnt = defaultdict(int) |
| | self.level = INFO |
| | self.dir = dir |
| | self.output_formats = output_formats |
| | self.comm = comm |
| |
|
| | |
| | |
| | def logkv(self, key, val): |
| | self.name2val[key] = val |
| |
|
| | def logkv_mean(self, key, val): |
| | oldval, cnt = self.name2val[key], self.name2cnt[key] |
| | self.name2val[key] = oldval*cnt/(cnt+1) + val/(cnt+1) |
| | self.name2cnt[key] = cnt + 1 |
| |
|
| | def dumpkvs(self): |
| | if self.comm is None: |
| | d = self.name2val |
| | else: |
| | from baselines.common import mpi_util |
| | d = mpi_util.mpi_weighted_mean(self.comm, |
| | {name : (val, self.name2cnt.get(name, 1)) |
| | for (name, val) in self.name2val.items()}) |
| | if self.comm.rank != 0: |
| | d['dummy'] = 1 |
| | out = d.copy() |
| | for fmt in self.output_formats: |
| | if isinstance(fmt, KVWriter): |
| | fmt.writekvs(d) |
| | self.name2val.clear() |
| | self.name2cnt.clear() |
| | return out |
| |
|
| | def log(self, *args, level=INFO): |
| | if self.level <= level: |
| | self._do_log(args) |
| |
|
| | |
| | |
| | def set_level(self, level): |
| | self.level = level |
| |
|
| | def set_comm(self, comm): |
| | self.comm = comm |
| |
|
| | def get_dir(self): |
| | return self.dir |
| |
|
| | def close(self): |
| | for fmt in self.output_formats: |
| | fmt.close() |
| |
|
| | |
| | |
| | def _do_log(self, args): |
| | for fmt in self.output_formats: |
| | if isinstance(fmt, SeqWriter): |
| | fmt.writeseq(map(str, args)) |
| |
|
| | def get_rank_without_mpi_import(): |
| | |
| | |
| | for varname in ['PMI_RANK', 'OMPI_COMM_WORLD_RANK']: |
| | if varname in os.environ: |
| | return int(os.environ[varname]) |
| | return 0 |
| |
|
| |
|
| | def configure(dir=None, format_strs=None, comm=None, log_suffix=''): |
| | """ |
| | If comm is provided, average all numerical stats across that comm |
| | """ |
| | if dir is None: |
| | dir = os.getenv('OPENAI_LOGDIR') |
| | if dir is None: |
| | dir = osp.join(tempfile.gettempdir(), |
| | datetime.datetime.now().strftime("openai-%Y-%m-%d-%H-%M-%S-%f")) |
| | assert isinstance(dir, str) |
| | dir = os.path.expanduser(dir) |
| | os.makedirs(os.path.expanduser(dir), exist_ok=True) |
| |
|
| | rank = get_rank_without_mpi_import() |
| | if rank > 0: |
| | log_suffix = log_suffix + "-rank%03i" % rank |
| |
|
| | if format_strs is None: |
| | if rank == 0: |
| | format_strs = os.getenv('OPENAI_LOG_FORMAT', 'stdout,log,csv').split(',') |
| | else: |
| | format_strs = os.getenv('OPENAI_LOG_FORMAT_MPI', 'log').split(',') |
| | format_strs = filter(None, format_strs) |
| | output_formats = [make_output_format(f, dir, log_suffix) for f in format_strs] |
| |
|
| | Logger.CURRENT = Logger(dir=dir, output_formats=output_formats, comm=comm) |
| | if output_formats: |
| | log('Logging to %s'%dir) |
| |
|
| | def _configure_default_logger(): |
| | configure() |
| | Logger.DEFAULT = Logger.CURRENT |
| |
|
| | def reset(): |
| | if Logger.CURRENT is not Logger.DEFAULT: |
| | Logger.CURRENT.close() |
| | Logger.CURRENT = Logger.DEFAULT |
| | log('Reset logger') |
| |
|
| | @contextmanager |
| | def scoped_configure(dir=None, format_strs=None, comm=None): |
| | prevlogger = Logger.CURRENT |
| | configure(dir=dir, format_strs=format_strs, comm=comm) |
| | try: |
| | yield |
| | finally: |
| | Logger.CURRENT.close() |
| | Logger.CURRENT = prevlogger |
| |
|
| | |
| |
|
| | def _demo(): |
| | info("hi") |
| | debug("shouldn't appear") |
| | set_level(DEBUG) |
| | debug("should appear") |
| | dir = "/tmp/testlogging" |
| | if os.path.exists(dir): |
| | shutil.rmtree(dir) |
| | configure(dir=dir) |
| | logkv("a", 3) |
| | logkv("b", 2.5) |
| | dumpkvs() |
| | logkv("b", -2.5) |
| | logkv("a", 5.5) |
| | dumpkvs() |
| | info("^^^ should see a = 5.5") |
| | logkv_mean("b", -22.5) |
| | logkv_mean("b", -44.4) |
| | logkv("a", 5.5) |
| | dumpkvs() |
| | info("^^^ should see b = -33.3") |
| |
|
| | logkv("b", -2.5) |
| | dumpkvs() |
| |
|
| | logkv("a", "longasslongasslongasslongasslongasslongassvalue") |
| | dumpkvs() |
| |
|
| |
|
| | |
| | |
| | |
| |
|
| | def read_json(fname): |
| | import pandas |
| | ds = [] |
| | with open(fname, 'rt') as fh: |
| | for line in fh: |
| | ds.append(json.loads(line)) |
| | return pandas.DataFrame(ds) |
| |
|
| | def read_csv(fname): |
| | import pandas |
| | return pandas.read_csv(fname, index_col=None, comment='#') |
| |
|
| | def read_tb(path): |
| | """ |
| | path : a tensorboard file OR a directory, where we will find all TB files |
| | of the form events.* |
| | """ |
| | import pandas |
| | import numpy as np |
| | from glob import glob |
| | import tensorflow as tf |
| | if osp.isdir(path): |
| | fnames = glob(osp.join(path, "events.*")) |
| | elif osp.basename(path).startswith("events."): |
| | fnames = [path] |
| | else: |
| | raise NotImplementedError("Expected tensorboard file or directory containing them. Got %s"%path) |
| | tag2pairs = defaultdict(list) |
| | maxstep = 0 |
| | for fname in fnames: |
| | for summary in tf.compat.v1.train.summary_iterator(fname): |
| | if summary.step > 0: |
| | for v in summary.summary.value: |
| | pair = (summary.step, v.simple_value) |
| | tag2pairs[v.tag].append(pair) |
| | maxstep = max(summary.step, maxstep) |
| | data = np.empty((maxstep, len(tag2pairs))) |
| | data[:] = np.nan |
| | tags = sorted(tag2pairs.keys()) |
| | for (colidx,tag) in enumerate(tags): |
| | pairs = tag2pairs[tag] |
| | for (step, value) in pairs: |
| | data[step-1, colidx] = value |
| | return pandas.DataFrame(data, columns=tags) |
| |
|
| | if __name__ == "__main__": |
| | _demo() |
| |
|