| | |
| | |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | |
| |
|
| | |
| |
|
| | |
| |
|
| | from sys import exit, stdout |
| |
|
| | from os.path import splitext |
| |
|
| | from itertools import imap |
| |
|
| | from math import sqrt, log10, floor |
| |
|
| | from collections import deque |
| |
|
| | from argparse import ArgumentParser as argument_parser |
| | from argparse import Action as argument_action |
| |
|
| | from csv import DictReader as csv_dict_reader |
| | from csv import DictWriter as csv_dict_writer |
| |
|
| | from re import compile as regex_compile |
| |
|
| | |
| |
|
| | def unpack_tuple(f): |
| | """Return a unary function that calls `f` with its argument unpacked.""" |
| | return lambda args: f(*iter(args)) |
| |
|
| | def strip_dict(d): |
| | """Strip leading and trailing whitespace from all keys and values in `d`. |
| | |
| | Returns: |
| | The modified dict `d`. |
| | """ |
| | d.update({key: value.strip() for (key, value) in d.items()}) |
| | return d |
| |
|
| | def merge_dicts(d0, d1): |
| | """Create a new `dict` that is the union of `dict`s `d0` and `d1`.""" |
| | d = d0.copy() |
| | d.update(d1) |
| | return d |
| |
|
| | def change_key_in_dict(d, old_key, new_key): |
| | """Change the key of the entry in `d` with key `old_key` to `new_key`. If |
| | there is an existing entry |
| | |
| | Returns: |
| | The modified dict `d`. |
| | |
| | Raises: |
| | KeyError : If `old_key` is not in `d`. |
| | """ |
| | d[new_key] = d.pop(old_key) |
| | return d |
| |
|
| | def key_from_dict(d): |
| | """Create a hashable key from a `dict` by converting the `dict` to a tuple.""" |
| | return tuple(sorted(d.items())) |
| |
|
| | def strip_list(l): |
| | """Strip leading and trailing whitespace from all values in `l`.""" |
| | for i, value in enumerate(l): l[i] = value.strip() |
| | return l |
| |
|
| | def remove_from_list(l, item): |
| | """Remove the first occurence of `item` from list `l` and return a tuple of |
| | the index that was removed and the element that was removed. |
| | |
| | Raises: |
| | ValueError : If `item` is not in `l`. |
| | """ |
| | idx = l.index(item) |
| | item = l.pop(idx) |
| | return (idx, item) |
| |
|
| | |
| |
|
| | def int_or_float(x): |
| | """Convert `x` to either `int` or `float`, preferring `int`. |
| | |
| | Raises: |
| | ValueError : If `x` is not convertible to either `int` or `float` |
| | """ |
| | try: |
| | return int(x) |
| | except ValueError: |
| | return float(x) |
| |
|
| | def try_int_or_float(x): |
| | """Try to convert `x` to either `int` or `float`, preferring `int`. `x` is |
| | returned unmodified if conversion fails. |
| | """ |
| | try: |
| | return int_or_float(x) |
| | except ValueError: |
| | return x |
| |
|
| | |
| |
|
| | def ranges_overlap(x1, x2, y1, y2): |
| | """Returns true if the ranges `[x1, x2]` and `[y1, y2]` overlap, |
| | where `x1 <= x2` and `y1 <= y2`. |
| | |
| | Raises: |
| | AssertionError : If `x1 > x2` or `y1 > y2`. |
| | """ |
| | assert x1 <= x2 |
| | assert y1 <= y2 |
| | return x1 <= y2 and y1 <= x2 |
| |
|
| | def ranges_overlap_uncertainty(x, x_unc, y, y_unc): |
| | """Returns true if the ranges `[x - x_unc, x + x_unc]` and |
| | `[y - y_unc, y + y_unc]` overlap, where `x_unc >= 0` and `y_unc >= 0`. |
| | |
| | Raises: |
| | AssertionError : If `x_unc < 0` or `y_unc < 0`. |
| | """ |
| | assert x_unc >= 0 |
| | assert y_unc >= 0 |
| | return ranges_overlap(x - x_unc, x + x_unc, y - y_unc, y + y_unc) |
| |
|
| | |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | def uncertainty_multiplicative(f, A, A_abs_unc, B, B_abs_unc): |
| | """Compute the propagated uncertainty from the multiplication of two |
| | uncertain values, `A +/- A_abs_unc` and `B +/- B_abs_unc`. Given `f = AB` or |
| | `f = A/B`, where `A != 0` and `B != 0`, the uncertainty in `f` is |
| | approximately: |
| | |
| | .. math:: |
| | |
| | \sigma_f = |f| \sqrt{\frac{\sigma_A}{A} ^ 2 + \frac{\sigma_B}{B} ^ 2} |
| | |
| | Raises: |
| | ZeroDivisionError : If `A == 0` or `B == 0`. |
| | """ |
| | return abs(f) * sqrt((A_abs_unc / A) ** 2 + (B_abs_unc / B) ** 2); |
| |
|
| | def uncertainty_additive(c, A_abs_unc, d, B_abs_unc): |
| | """Compute the propagated uncertainty from addition of two uncertain values, |
| | `A +/- A_abs_unc` and `B +/- B_abs_unc`. Given `f = cA + dB`, where `c` and |
| | `d` are certain constants, the uncertainty in `f` is approximately: |
| | |
| | .. math:: |
| | |
| | f_{\sigma} = \sqrt{c ^ 2 * A_{\sigma} ^ 2 + d ^ 2 * B_{\sigma} ^ 2} |
| | """ |
| | return sqrt(((c ** 2) * (A_abs_unc ** 2)) + ((d ** 2) * (B_abs_unc ** 2))) |
| |
|
| | |
| |
|
| | |
| |
|
| | def absolute_change(old, new): |
| | """Computes the absolute change from old to new: |
| | |
| | .. math:: |
| | |
| | absolute_change = new - old |
| | """ |
| | return new - old |
| |
|
| | def absolute_change_uncertainty(old, old_unc, new, new_unc): |
| | """Computes the uncertainty in the absolute change from old to new and returns |
| | a tuple of the absolute change and the absolute change uncertainty. |
| | """ |
| | absolute_change = new - old |
| | absolute_change_unc = uncertainty_additive(1.0, new_unc, -1.0, old_unc) |
| |
|
| | return (absolute_change, absolute_change_unc) |
| |
|
| | def percent_change(old, new): |
| | """Computes the percent change from old to new: |
| | |
| | .. math:: |
| | |
| | percent_change = 100 \frac{new - old}{abs(old)} |
| | """ |
| | return float(new - old) / abs(old) |
| |
|
| | def percent_change_uncertainty(old, old_unc, new, new_unc): |
| | """Computes the uncertainty in the percent change from old to new and returns |
| | a tuple of the absolute change, the absolute change uncertainty, the percent |
| | change and the percent change uncertainty. |
| | """ |
| | |
| | |
| | |
| | |
| | |
| |
|
| | if old == 0: |
| | |
| | return (float("nan"), float("nan"), float("nan"), float("nan")) |
| |
|
| | (absolute_change, absolute_change_unc) = absolute_change_uncertainty( |
| | old, old_unc, new, new_unc |
| | ) |
| |
|
| | if absolute_change == 0: |
| | |
| | |
| | return (absolute_change, absolute_change_unc, float("nan"), float("nan")) |
| |
|
| | relative_change = float(absolute_change) / abs(old) |
| | relative_change_unc = uncertainty_multiplicative( |
| | relative_change, absolute_change, absolute_change_unc, old, old_unc |
| | ) |
| |
|
| | percent_change = 100.0 * relative_change |
| | percent_change_unc = uncertainty_multiplicative( |
| | percent_change, 100.0, 0.0, relative_change, relative_change_unc |
| | ) |
| |
|
| | return ( |
| | absolute_change, absolute_change_unc, percent_change, percent_change_unc |
| | ) |
| |
|
| | |
| |
|
| | def find_significant_digit(x): |
| | """Return the significant digit of the number x. The result is the number of |
| | digits after the decimal place to round to (negative numbers indicate rounding |
| | before the decimal place).""" |
| | if x == 0: return 0 |
| | return -int(floor(log10(abs(x)))) |
| |
|
| | def round_with_int_conversion(x, ndigits = None): |
| | """Rounds `x` to `ndigits` after the the decimal place. If `ndigits` is less |
| | than 1, convert the result to `int`. If `ndigits` is `None`, the significant |
| | digit of `x` is used.""" |
| | if ndigits is None: ndigits = find_significant_digit(x) |
| | x_rounded = round(x, ndigits) |
| | return int(x_rounded) if ndigits < 1 else x_rounded |
| |
|
| | |
| |
|
| | class measured_variable(object): |
| | """A meta-variable representing measured data. It is composed of three raw |
| | variables plus units meta-data. |
| | |
| | Attributes: |
| | quantity (`str`) : |
| | Name of the quantity variable of this object. |
| | uncertainty (`str`) : |
| | Name of the uncertainty variable of this object. |
| | sample_size (`str`) : |
| | Name of the sample size variable of this object. |
| | units (units class or `None`) : |
| | The units the value is measured in. |
| | """ |
| |
|
| | def __init__(self, quantity, uncertainty, sample_size, units = None): |
| | self.quantity = quantity |
| | self.uncertainty = uncertainty |
| | self.sample_size = sample_size |
| | self.units = units |
| |
|
| | def as_tuple(self): |
| | return (self.quantity, self.uncertainty, self.sample_size, self.units) |
| |
|
| | def __iter__(self): |
| | return iter(self.as_tuple()) |
| |
|
| | def __str__(self): |
| | return str(self.as_tuple()) |
| |
|
| | def __repr__(self): |
| | return str(self) |
| |
|
| | class measured_value(object): |
| | """An object that represents a value determined by multiple measurements. |
| | |
| | Attributes: |
| | quantity (scalar) : |
| | The quantity of the value, e.g. the arithmetic mean. |
| | uncertainty (scalar) : |
| | The measurement uncertainty, e.g. the sample standard deviation. |
| | sample_size (`int`) : |
| | The number of observations contributing to the value. |
| | units (units class or `None`) : |
| | The units the value is measured in. |
| | """ |
| |
|
| | def __init__(self, quantity, uncertainty, sample_size = 1, units = None): |
| | self.quantity = quantity |
| | self.uncertainty = uncertainty |
| | self.sample_size = sample_size |
| | self.units = units |
| |
|
| | def as_tuple(self): |
| | return (self.quantity, self.uncertainty, self.sample_size, self.units) |
| |
|
| | def __iter__(self): |
| | return iter(self.as_tuple()) |
| |
|
| | def __str__(self): |
| | return str(self.as_tuple()) |
| |
|
| | def __repr__(self): |
| | return str(self) |
| |
|
| | |
| |
|
| | def arithmetic_mean(X): |
| | """Computes the arithmetic mean of the sequence `X`. |
| | |
| | Let: |
| | |
| | * `n = len(X)`. |
| | * `u` denote the arithmetic mean of `X`. |
| | |
| | .. math:: |
| | |
| | u = \frac{\sum_{i = 0}^{n - 1} X_i}{n} |
| | """ |
| | return sum(X) / len(X) |
| |
|
| | def sample_variance(X, u = None): |
| | """Computes the sample variance of the sequence `X`. |
| | |
| | Let: |
| | |
| | * `n = len(X)`. |
| | * `u` denote the arithmetic mean of `X`. |
| | * `s` denote the sample standard deviation of `X`. |
| | |
| | .. math:: |
| | |
| | v = \frac{\sum_{i = 0}^{n - 1} (X_i - u)^2}{n - 1} |
| | |
| | Args: |
| | X (`Iterable`) : The sequence of values. |
| | u (number) : The arithmetic mean of `X`. |
| | """ |
| | if u is None: u = arithmetic_mean(X) |
| | return sum(imap(lambda X_i: (X_i - u) ** 2, X)) / (len(X) - 1) |
| | |
| | def sample_standard_deviation(X, u = None, v = None): |
| | """Computes the sample standard deviation of the sequence `X`. |
| | |
| | Let: |
| | |
| | * `n = len(X)`. |
| | * `u` denote the arithmetic mean of `X`. |
| | * `v` denote the sample variance of `X`. |
| | * `s` denote the sample standard deviation of `X`. |
| | |
| | .. math:: |
| | |
| | s &= \sqrt{v} |
| | &= \sqrt{\frac{\sum_{i = 0}^{n - 1} (X_i - u)^2}{n - 1}} |
| | |
| | Args: |
| | X (`Iterable`) : The sequence of values. |
| | u (number) : The arithmetic mean of `X`. |
| | v (number) : The sample variance of `X`. |
| | """ |
| | if u is None: u = arithmetic_mean(X) |
| | if v is None: v = sample_variance(X, u) |
| | return sqrt(v) |
| |
|
| | def combine_sample_size(As): |
| | """Computes the combined sample variance of a group of `measured_value`s. |
| | |
| | Let: |
| | |
| | * `g = len(As)`. |
| | * `n_i = As[i].samples`. |
| | * `n` denote the combined sample size of `As`. |
| | |
| | .. math:: |
| | |
| | n = \sum{i = 0}^{g - 1} n_i |
| | """ |
| | return sum(imap(unpack_tuple(lambda u_i, s_i, n_i, t_i: n_i), As)) |
| |
|
| | def combine_arithmetic_mean(As, n = None): |
| | """Computes the combined arithmetic mean of a group of `measured_value`s. |
| | |
| | Let: |
| | |
| | * `g = len(As)`. |
| | * `u_i = As[i].quantity`. |
| | * `n_i = As[i].samples`. |
| | * `n` denote the combined sample size of `As`. |
| | * `u` denote the arithmetic mean of the quantities of `As`. |
| | |
| | .. math:: |
| | |
| | u = \frac{\sum{i = 0}^{g - 1} n_i u_i}{n} |
| | """ |
| | if n is None: n = combine_sample_size(As) |
| | return sum(imap(unpack_tuple(lambda u_i, s_i, n_i, t_i: n_i * u_i), As)) / n |
| | |
| | def combine_sample_variance(As, n = None, u = None): |
| | """Computes the combined sample variance of a group of `measured_value`s. |
| | |
| | Let: |
| | |
| | * `g = len(As)`. |
| | * `u_i = As[i].quantity`. |
| | * `s_i = As[i].uncertainty`. |
| | * `n_i = As[i].samples`. |
| | * `n` denote the combined sample size of `As`. |
| | * `u` denote the arithmetic mean of the quantities of `As`. |
| | * `v` denote the sample variance of `X`. |
| | |
| | .. math:: |
| | |
| | v = \frac{(\sum_{i = 0}^{g - 1} n_i (u_i - u)^2 + s_i^2 (n_i - 1))}{n - 1} |
| | |
| | Args: |
| | As (`Iterable` of `measured_value`s) : The sequence of values. |
| | n (number) : The combined sample sizes of `As`. |
| | u (number) : The combined arithmetic mean of `As`. |
| | """ |
| | if n <= 1: return 0 |
| | if n is None: n = combine_sample_size(As) |
| | if u is None: u = combine_arithmetic_mean(As, n) |
| | return sum(imap(unpack_tuple( |
| | lambda u_i, s_i, n_i, t_i: n_i * (u_i - u) ** 2 + (s_i ** 2) * (n_i - 1) |
| | ), As)) / (n - 1) |
| |
|
| | def combine_sample_standard_deviation(As, n = None, u = None, v = None): |
| | """Computes the combined sample standard deviation of a group of |
| | `measured_value`s. |
| | |
| | Let: |
| | |
| | * `g = len(As)`. |
| | * `u_i = As[i].quantity`. |
| | * `s_i = As[i].uncertainty`. |
| | * `n_i = As[i].samples`. |
| | * `n` denote the combined sample size of `As`. |
| | * `u` denote the arithmetic mean of the quantities of `As`. |
| | * `v` denote the sample variance of `X`. |
| | * `s` denote the sample standard deviation of `X`. |
| | |
| | .. math:: |
| | v &= \frac{(\sum_{i = 0}^{g - 1} n_i (u_i - u)^2 + s_i^2 (n_i - 1))}{n - 1} |
| | |
| | s &= \sqrt{v} |
| | |
| | Args: |
| | As (`Iterable` of `measured_value`s) : The sequence of values. |
| | n (number) : The combined sample sizes of `As`. |
| | u (number) : The combined arithmetic mean of `As`. |
| | v (number) : The combined sample variance of `As`. |
| | """ |
| | if n <= 1: return 0 |
| | if n is None: n = combine_sample_size(As) |
| | if u is None: u = combine_arithmetic_mean(As, n) |
| | if v is None: v = combine_sample_variance(As, n, u) |
| | return sqrt(v) |
| |
|
| | |
| |
|
| | def store_const_multiple(const, *destinations): |
| | """Returns an `argument_action` class that sets multiple argument |
| | destinations (`destinations`) to `const`.""" |
| | class store_const_multiple_action(argument_action): |
| | def __init__(self, *args, **kwargs): |
| | super(store_const_multiple_action, self).__init__( |
| | metavar = None, nargs = 0, const = const, *args, **kwargs |
| | ) |
| |
|
| | def __call__(self, parser, namespace, values, option_string = None): |
| | for destination in destinations: |
| | setattr(namespace, destination, const) |
| |
|
| | return store_const_multiple_action |
| |
|
| | def store_true_multiple(*destinations): |
| | """Returns an `argument_action` class that sets multiple argument |
| | destinations (`destinations`) to `True`.""" |
| | return store_const_multiple(True, *destinations) |
| |
|
| | def store_false_multiple(*destinations): |
| | """Returns an `argument_action` class that sets multiple argument |
| | destinations (`destinations`) to `False`.""" |
| | return store_const_multiple(False, *destinations) |
| |
|
| | |
| |
|
| | def process_program_arguments(): |
| | ap = argument_parser( |
| | description = ( |
| | "Compares two sets of combined performance results and identifies " |
| | "statistically significant changes." |
| | ) |
| | ) |
| |
|
| | ap.add_argument( |
| | "baseline_input_file", |
| | help = ("CSV file containing the baseline performance results. The first " |
| | "two rows should be a header. The 1st header row specifies the " |
| | "name of each variable, and the 2nd header row specifies the units " |
| | "for that variable. The baseline results may be a superset of the " |
| | "observed performance results, but the reverse is not true. The " |
| | "baseline results must contain data for every datapoint in the " |
| | "observed performance results."), |
| | type = str |
| | ) |
| |
|
| | ap.add_argument( |
| | "observed_input_file", |
| | help = ("CSV file containing the observed performance results. The first " |
| | "two rows should be a header. The 1st header row specifies the name " |
| | "of header row specifies the units for that variable."), |
| | type = str |
| | ) |
| |
|
| | ap.add_argument( |
| | "-o", "--output-file", |
| | help = ("The file that results are written to. If `-`, results are " |
| | "written to stdout."), |
| | action = "store", type = str, default = "-", |
| | metavar = "OUTPUT" |
| | ) |
| |
|
| | ap.add_argument( |
| | "-c", "--control-variable", |
| | help = ("Treat the specified variable as a control variable. This means " |
| | "it will be filtered out when forming dataset keys. For example, " |
| | "this could be used to ignore a timestamp variable that is " |
| | "different in the baseline and observed results. May be specified " |
| | "multiple times."), |
| | action = "append", type = str, dest = "control_variables", default = [], |
| | metavar = "QUANTITY" |
| | ) |
| |
|
| | ap.add_argument( |
| | "-d", "--dependent-variable", |
| | help = ("Treat the specified three variables as a dependent variable. The " |
| | "1st variable is the measured quantity, the 2nd is the uncertainty " |
| | "of the measurement and the 3rd is the sample size. The defaults " |
| | "are the dependent variables of Thrust's benchmark suite. May be " |
| | "specified multiple times."), |
| | action = "append", type = str, dest = "dependent_variables", default = [], |
| | metavar = "QUANTITY,UNCERTAINTY,SAMPLES" |
| | ) |
| |
|
| | ap.add_argument( |
| | "-t", "--change-threshold", |
| | help = ("Treat relative changes less than this amount (a percentage) as " |
| | "statistically insignificant. The default is 5%%."), |
| | action = "store", type = float, default = 5, |
| | metavar = "PERCENTAGE" |
| | ) |
| |
|
| | ap.add_argument( |
| | "-p", "--preserve-whitespace", |
| | help = ("Don't trim leading and trailing whitespace from each CSV cell."), |
| | action = "store_true", default = False |
| | ) |
| |
|
| | ap.add_argument( |
| | "--output-all-variables", |
| | help = ("Don't omit original absolute values in output."), |
| | action = "store_true", default = False |
| | ) |
| |
|
| | ap.add_argument( |
| | "--output-all-datapoints", |
| | help = ("Don't omit datapoints that are statistically indistinguishable " |
| | "in output."), |
| | action = "store_true", default = False |
| | ) |
| |
|
| | ap.add_argument( |
| | "-a", "--output-all", |
| | help = ("Equivalent to `--output-all-variables --output-all-datapoints`."), |
| | action = store_true_multiple("output_all_variables", "output_all_datapoints") |
| | ) |
| |
|
| | return ap.parse_args() |
| |
|
| | |
| |
|
| | def filter_comments(f, s = "#"): |
| | """Return an iterator to the file `f` which filters out all lines beginning |
| | with `s`.""" |
| | return filter(lambda line: not line.startswith(s), f) |
| |
|
| | |
| |
|
| | class io_manager(object): |
| | """Manages I/O operations and represents the input data as an `Iterable` |
| | sequence of `dict`s. |
| | |
| | It is `Iterable` and an `Iterator`. It can be used with `with`. |
| | |
| | Attributes: |
| | preserve_whitespace (`bool`) : |
| | If `False`, leading and trailing whitespace is stripped from each CSV cell. |
| | writer (`csv_dict_writer`) : |
| | CSV writer object that the output is written to. |
| | output_file (`file` or `stdout`) : |
| | The output `file` object. |
| | baseline_reader (`csv_dict_reader`) : |
| | CSV reader object for the baseline results. |
| | observed_reader (`csv_dict_reader`) : |
| | CSV reader object for the observed results. |
| | baseline_input_file (`file`) : |
| | `file` object for the baseline results. |
| | observed_input_file (`file`) : |
| | `file` object for the observed results.. |
| | variable_names (`list` of `str`s) : |
| | Names of the variables, in order. |
| | variable_units (`list` of `str`s) : |
| | Units of the variables, in order. |
| | """ |
| |
|
| | def __init__(self, |
| | baseline_input_file, observed_input_file, |
| | output_file, |
| | preserve_whitespace = False): |
| | """Read input files and open the output file and construct a new `io_manager` |
| | object. |
| | |
| | If `preserve_whitespace` is `False`, leading and trailing whitespace is |
| | stripped from each CSV cell. |
| | |
| | Raises |
| | AssertionError : |
| | If `type(preserve_whitespace) != bool`. |
| | """ |
| | assert type(preserve_whitespace) == bool |
| |
|
| | self.preserve_whitespace = preserve_whitespace |
| |
|
| | |
| | self.baseline_input_file = open(baseline_input_file) |
| | self.baseline_reader = csv_dict_reader( |
| | filter_comments(self.baseline_input_file) |
| | ) |
| |
|
| | if not self.preserve_whitespace: |
| | strip_list(self.baseline_reader.fieldnames) |
| |
|
| | self.variable_names = list(self.baseline_reader.fieldnames) |
| | self.variable_units = self.baseline_reader.next() |
| |
|
| | if not self.preserve_whitespace: |
| | strip_dict(self.variable_units) |
| |
|
| | |
| | self.observed_input_file = open(observed_input_file) |
| | self.observed_reader = csv_dict_reader( |
| | filter_comments(self.observed_input_file) |
| | ) |
| |
|
| | if not self.preserve_whitespace: |
| | strip_list(self.observed_reader.fieldnames) |
| |
|
| | |
| | assert self.variable_names == self.observed_reader.fieldnames, \ |
| | "Observed results input file (`" + observed_input_file + "`) " + \ |
| | "variable schema `" + str(self.observed_reader.fieldnames) + "` does " + \ |
| | "not match the baseline results input file (`" + baseline_input_file + \ |
| | "`) variable schema `" + str(self.variable_names) + "`." |
| |
|
| | |
| | observed_variable_units = self.observed_reader.next() |
| |
|
| | if not self.preserve_whitespace: |
| | strip_dict(observed_variable_units) |
| |
|
| | |
| | assert self.variable_units == observed_variable_units, \ |
| | "Observed results input file (`" + observed_input_file + "`) " + \ |
| | "units schema `" + str(observed_variable_units) + "` does not " + \ |
| | "match the baseline results input file (`" + baseline_input_file + \ |
| | "`) units schema `" + str(self.variable_units) + "`." |
| |
|
| | if output_file == "-": |
| | self.output_file = stdout |
| | else: |
| | self.output_file = open(output_file, "w") |
| |
|
| | self.writer = csv_dict_writer( |
| | self.output_file, fieldnames = self.variable_names |
| | ) |
| |
|
| | def __enter__(self): |
| | """Called upon entering a `with` statement.""" |
| | return self |
| |
|
| | def __exit__(self, *args): |
| | """Called upon exiting a `with` statement.""" |
| | if self.output_file is stdout: |
| | self.output_file = None |
| | elif self.output_file is not None: |
| | self.output_file.__exit__(*args) |
| |
|
| | self.baseline_input_file.__exit__(*args) |
| | self.observed_input_file.__exit__(*args) |
| |
|
| | def append_variable(self, name, units): |
| | """Add a new variable to the output schema.""" |
| | self.variable_names.append(name) |
| | self.variable_units.update({name : units}) |
| |
|
| | |
| | self.writer.fieldnames = self.variable_names |
| |
|
| | def insert_variable(self, idx, name, units): |
| | """Insert a new variable into the output schema at index `idx`.""" |
| | self.variable_names.insert(idx, name) |
| | self.variable_units.update({name : units}) |
| |
|
| | |
| | self.writer.fieldnames = self.variable_names |
| |
|
| | def remove_variable(self, name): |
| | """Remove variable from the output schema and return a tuple of the variable |
| | index and the variable units. |
| | |
| | Raises: |
| | ValueError : If `name` is not in the output schema. |
| | """ |
| | |
| | |
| | (idx, item) = remove_from_list(self.variable_names, name) |
| |
|
| | |
| | units = self.variable_units.pop(item) |
| |
|
| | |
| | self.writer.fieldnames = self.variable_names |
| |
|
| | return (idx, units) |
| |
|
| | |
| | |
| |
|
| | def baseline(self): |
| | """Return an iterator to the baseline results input sequence.""" |
| | return imap(lambda row: strip_dict(row), self.baseline_reader) |
| |
|
| | def observed(self): |
| | """Return an iterator to the observed results input sequence.""" |
| | return imap(lambda row: strip_dict(row), self.observed_reader) |
| |
|
| | |
| | |
| |
|
| | def write_header(self): |
| | """Write the header for the output CSV file.""" |
| | |
| | self.writer.writeheader() |
| |
|
| | |
| | self.writer.writerow(self.variable_units) |
| |
|
| | def write(self, d): |
| | """Write a record (a `dict`) to the output CSV file.""" |
| | self.writer.writerow(d) |
| |
|
| | |
| |
|
| | class dependent_variable_parser(object): |
| | """Parses a `--dependent-variable=AVG,STDEV,TRIALS` command line argument.""" |
| |
|
| | |
| | |
| |
|
| | |
| | variable_name_rule = r'[^,]+' |
| |
|
| | |
| | dependent_variable_rule = r'(' + variable_name_rule + r')' \ |
| | + r',' \ |
| | + r'(' + variable_name_rule + r')' \ |
| | + r',' \ |
| | + r'(' + variable_name_rule + r')' |
| |
|
| | engine = regex_compile(dependent_variable_rule) |
| |
|
| | |
| |
|
| | def __call__(self, s): |
| | """Parses the string `s` with the form "AVG,STDEV,TRIALS". |
| | |
| | Returns: |
| | A `measured_variable`. |
| | |
| | Raises: |
| | AssertionError : If parsing fails. |
| | """ |
| |
|
| | match = self.engine.match(s) |
| |
|
| | assert match is not None, \ |
| | "Dependent variable (-d) `" +s+ "` is invalid, the format is " + \ |
| | "`AVG,STDEV,TRIALS`." |
| |
|
| | return measured_variable(match.group(1), match.group(2), match.group(3)) |
| |
|
| | |
| |
|
| | class record_aggregator(object): |
| | """Consumes and combines records and represents the result as an `Iterable` |
| | sequence of `dict`s. |
| | |
| | It is `Iterable` and an `Iterator`. |
| | |
| | Attributes: |
| | dependent_variables (`list` of `measured_variable`s) : |
| | A list of dependent variables provided on the command line. |
| | control_variables (`list` of `str`s) : |
| | A list of control variables provided on the command line. |
| | dataset (`dict`) : |
| | A mapping of distinguishing (e.g. control + independent) values (`tuple`s |
| | of variable-quantity pairs) to `list`s of dependent values (`dict`s from |
| | variables to lists of cells). |
| | in_order_dataset_keys : |
| | A list of unique dataset keys (e.g. distinguishing variables) in order of |
| | appearance. |
| | """ |
| |
|
| | def __init__(self, dependent_variables, control_variables): |
| | """Construct a new `record_aggregator` object. |
| | |
| | Raises: |
| | AssertionError : If parsing of dependent variables fails. |
| | """ |
| | self.dependent_variables = dependent_variables |
| | self.control_variables = control_variables |
| |
|
| | self.dataset = {} |
| |
|
| | self.in_order_dataset_keys = deque() |
| |
|
| | |
| | |
| |
|
| | def key_from_dict(self, d): |
| | """Create a hashable key from a `dict` by filtering out control variables |
| | and then converting the `dict` to a tuple. |
| | |
| | Raises: |
| | AssertionError : If any control variable was not found in `d`. |
| | """ |
| | distinguishing_values = d.copy() |
| |
|
| | |
| | for var in self.control_variables: |
| | distinguishing_values.pop(var, None) |
| |
|
| | return key_from_dict(distinguishing_values) |
| |
|
| | def append(self, record): |
| | """Add `record` to the dataset. |
| | |
| | Raises: |
| | ValueError : If any `str`-to-numeric conversions fail. |
| | """ |
| | |
| | |
| | |
| | |
| | dependent_values = {} |
| |
|
| | |
| | |
| | |
| | sample_size_variables = [] |
| |
|
| | |
| | |
| | for var in self.dependent_variables: |
| | quantity, uncertainty, sample_size, units = var.as_tuple() |
| |
|
| | dependent_values[quantity] = [int_or_float(record.pop(quantity))] |
| | dependent_values[uncertainty] = [int_or_float(record.pop(uncertainty))] |
| | dependent_values[sample_size] = [int(record[sample_size])] |
| |
|
| | sample_size_variables.append(sample_size) |
| |
|
| | |
| | for var in sample_size_variables: |
| | |
| | record.pop(var, None) |
| |
|
| | distinguishing_values = self.key_from_dict(record) |
| |
|
| | if distinguishing_values in self.dataset: |
| | |
| | |
| | |
| | |
| | for var, columns in dependent_values.iteritems(): |
| | self.dataset[distinguishing_values][var] += columns |
| | else: |
| | |
| | |
| | self.dataset[distinguishing_values] = dependent_values |
| | self.in_order_dataset_keys.append(distinguishing_values) |
| |
|
| | |
| | |
| |
|
| | def combine_dependent_values(self, dependent_values): |
| | """Takes a mapping of dependent variables to lists of cells and returns |
| | a new mapping with the cells combined. |
| | |
| | Raises: |
| | AssertionError : If class invariants were violated. |
| | """ |
| | combined_dependent_values = dependent_values.copy() |
| |
|
| | for var in self.dependent_variables: |
| | quantity, uncertainty, sample_size, units = var.as_tuple() |
| |
|
| | quantities = dependent_values[quantity] |
| | uncertainties = dependent_values[uncertainty] |
| | sample_sizes = dependent_values[sample_size] |
| |
|
| | if type(sample_size) is list: |
| | |
| | assert len(quantities) == len(uncertainties) \ |
| | and len(uncertainties) == len(sample_sizes), \ |
| | "Length of quantities list `(" + str(len(quantities)) + ")`, " + \ |
| | "length of uncertainties list `(" + str(len(uncertainties)) + \ |
| | "),` and length of sample sizes list `(" + str(len(sample_sizes)) + \ |
| | ")` are not the same." |
| | else: |
| | |
| | |
| | assert len(quantities) == len(uncertainties), \ |
| | "Length of quantities list `(" + str(len(quantities)) + ")` and " + \ |
| | "length of uncertainties list `(" + str(len(uncertainties)) + \ |
| | ")` are not the same." |
| |
|
| | |
| | measured_values = [] |
| |
|
| | for i in range(len(quantities)): |
| | mv = measured_value( |
| | quantities[i], uncertainties[i], sample_sizes[i], units |
| | ) |
| |
|
| | measured_values.append(mv) |
| |
|
| | |
| | combined_sample_size = combine_sample_size( |
| | measured_values |
| | ) |
| |
|
| | combined_arithmetic_mean = combine_arithmetic_mean( |
| | measured_values, combined_sample_size |
| | ) |
| |
|
| | combined_sample_standard_deviation = combine_sample_standard_deviation( |
| | measured_values, combined_sample_size, combined_arithmetic_mean |
| | ) |
| |
|
| | |
| | |
| | sigdig = find_significant_digit(combined_sample_standard_deviation) |
| |
|
| | |
| | |
| | |
| |
|
| | |
| | |
| | |
| |
|
| | combined_dependent_values[quantity] = combined_arithmetic_mean |
| | combined_dependent_values[uncertainty] = combined_sample_standard_deviation |
| | combined_dependent_values[sample_size] = combined_sample_size |
| |
|
| | return combined_dependent_values |
| |
|
| | |
| | |
| |
|
| | def __iter__(self): |
| | """Return an iterator to the output sequence of separated distinguishing |
| | variables and dependent variables (a tuple of two `dict`s). |
| | |
| | This is a requirement for the `Iterable` protocol. |
| | """ |
| | return self |
| |
|
| | def records(self): |
| | """Return an iterator to the output sequence of CSV rows (`dict`s of |
| | variables to values). |
| | """ |
| | return imap(unpack_tuple(lambda dist, dep: merge_dicts(dist, dep)), self) |
| |
|
| | def next(self): |
| | """Produce the components of the next output record - a tuple of two |
| | `dict`s. The first `dict` is a mapping of distinguishing variables to |
| | distinguishing values, the second `dict` is a mapping of dependent |
| | variables to combined dependent values. Combining the two dicts forms a |
| | CSV row suitable for output. |
| | |
| | This is a requirement for the `Iterator` protocol. |
| | |
| | Raises: |
| | StopIteration : If there is no more output. |
| | AssertionError : If class invariants were violated. |
| | """ |
| | assert len(self.dataset.keys()) == len(self.in_order_dataset_keys), \ |
| | "Number of dataset keys (`" + str(len(self.dataset.keys())) + \ |
| | "`) is not equal to the number of keys in the ordering list (`" + \ |
| | str(len(self.in_order_dataset_keys)) + "`)." |
| |
|
| | if len(self.in_order_dataset_keys) == 0: |
| | raise StopIteration() |
| |
|
| | |
| | raw_distinguishing_values = self.in_order_dataset_keys.popleft() |
| | distinguishing_values = dict(raw_distinguishing_values) |
| |
|
| | dependent_values = self.dataset.pop(raw_distinguishing_values) |
| |
|
| | combined_dependent_values = self.combine_dependent_values(dependent_values) |
| |
|
| | return (distinguishing_values, combined_dependent_values) |
| |
|
| | def __getitem__(self, distinguishing_values): |
| | """Produce the dependent component, a `dict` mapping dependent variables to |
| | combined dependent values, associated with `distinguishing_values`. |
| | |
| | Args: |
| | distinguishing_values (`dict`) : |
| | A `dict` mapping distinguishing variables to distinguishing values. |
| | |
| | Raises: |
| | KeyError : If `distinguishing_values` is not in the dataset. |
| | """ |
| | raw_distinguishing_values = self.key_from_dict(distinguishing_values) |
| |
|
| | dependent_values = self.dataset[raw_distinguishing_values] |
| |
|
| | combined_dependent_values = self.combine_dependent_values(dependent_values) |
| |
|
| | return combined_dependent_values |
| |
|
| | |
| |
|
| | args = process_program_arguments() |
| |
|
| | if len(args.dependent_variables) == 0: |
| | args.dependent_variables = [ |
| | "STL Average Walltime,STL Walltime Uncertainty,STL Trials", |
| | "STL Average Throughput,STL Throughput Uncertainty,STL Trials", |
| | "Thrust Average Walltime,Thrust Walltime Uncertainty,Thrust Trials", |
| | "Thrust Average Throughput,Thrust Throughput Uncertainty,Thrust Trials" |
| | ] |
| |
|
| | |
| | dependent_variables = [] |
| |
|
| | parse_dependent_variable = dependent_variable_parser() |
| |
|
| | |
| | for var in args.dependent_variables: |
| | dependent_variables.append(parse_dependent_variable(var)) |
| |
|
| | |
| | with io_manager(args.baseline_input_file, |
| | args.observed_input_file, |
| | args.output_file, |
| | args.preserve_whitespace) as iom: |
| |
|
| | |
| | baseline_ra = record_aggregator(dependent_variables, args.control_variables) |
| | observed_ra = record_aggregator(dependent_variables, args.control_variables) |
| |
|
| | |
| | |
| | baseline_suffix = " - `{0}`".format( |
| | args.baseline_input_file |
| | ) |
| | observed_suffix = " - `{0}`".format( |
| | args.observed_input_file |
| | ) |
| |
|
| | for var in dependent_variables: |
| | |
| | |
| | |
| | |
| | |
| | (quantity_idx, quantity_units) = iom.remove_variable(var.quantity) |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | if args.output_all_variables: |
| | iom.insert_variable( |
| | quantity_idx, var.quantity + observed_suffix, quantity_units |
| | ) |
| | iom.insert_variable( |
| | quantity_idx, var.quantity + baseline_suffix, quantity_units |
| | ) |
| |
|
| | |
| | (uncertainty_idx, uncertainty_units) = iom.remove_variable(var.uncertainty) |
| |
|
| | |
| | |
| | if args.output_all_variables: |
| | iom.insert_variable( |
| | uncertainty_idx, var.uncertainty + observed_suffix, uncertainty_units |
| | ) |
| | iom.insert_variable( |
| | uncertainty_idx, var.uncertainty + baseline_suffix, uncertainty_units |
| | ) |
| |
|
| | try: |
| | |
| | (sample_size_idx, sample_size_units) = iom.remove_variable(var.sample_size) |
| |
|
| | |
| | |
| | if args.output_all_variables: |
| | iom.insert_variable( |
| | sample_size_idx, var.sample_size + observed_suffix, sample_size_units |
| | ) |
| | iom.insert_variable( |
| | sample_size_idx, var.sample_size + baseline_suffix, sample_size_units |
| | ) |
| | except ValueError: |
| | |
| | |
| | pass |
| |
|
| | for var in args.control_variables: |
| | iom.remove_variable(var) |
| |
|
| | |
| | absolute_change_suffix = " - Change (`{0}` - `{1}`)".format( |
| | args.observed_input_file, args.baseline_input_file |
| | ) |
| |
|
| | percent_change_suffix = " - % Change (`{0}` to `{1}`)".format( |
| | args.observed_input_file, args.baseline_input_file |
| | ) |
| |
|
| | for var in dependent_variables: |
| | iom.append_variable(var.quantity + absolute_change_suffix, var.units) |
| | iom.append_variable(var.uncertainty + absolute_change_suffix, var.units) |
| | iom.append_variable(var.quantity + percent_change_suffix, "") |
| | iom.append_variable(var.uncertainty + percent_change_suffix, "") |
| |
|
| | |
| | for record in iom.baseline(): |
| | baseline_ra.append(record) |
| | |
| | for record in iom.observed(): |
| | observed_ra.append(record) |
| |
|
| | iom.write_header() |
| |
|
| | |
| | for distinguishing_values, observed_dependent_values in observed_ra: |
| | try: |
| | baseline_dependent_values = baseline_ra[distinguishing_values] |
| | except KeyError: |
| | assert False, \ |
| | "Distinguishing value `" + \ |
| | str(baseline_ra.key_from_dict(distinguishing_values)) + \ |
| | "` was not found in the baseline results." |
| |
|
| | statistically_significant_change = False |
| |
|
| | record = distinguishing_values.copy() |
| |
|
| | |
| | |
| | for var in dependent_variables: |
| | |
| | baseline_quantity = baseline_dependent_values[var.quantity] |
| | baseline_uncertainty = baseline_dependent_values[var.uncertainty] |
| | baseline_sample_size = baseline_dependent_values[var.sample_size] |
| |
|
| | observed_quantity = observed_dependent_values[var.quantity] |
| | observed_uncertainty = observed_dependent_values[var.uncertainty] |
| | observed_sample_size = observed_dependent_values[var.sample_size] |
| |
|
| | (abs_change, abs_change_unc, per_change, per_change_unc) = \ |
| | percent_change_uncertainty( |
| | baseline_quantity, baseline_uncertainty, |
| | observed_quantity, observed_uncertainty |
| | ) |
| |
|
| | |
| | |
| | try: |
| | abs_change_sigdig = max( |
| | find_significant_digit(abs_change), |
| | find_significant_digit(abs_change_unc), |
| | ) |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | except: |
| | |
| | |
| | |
| | pass |
| |
|
| | try: |
| | per_change_sigdig = max( |
| | find_significant_digit(per_change), |
| | find_significant_digit(per_change_unc) |
| | ) |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | except: |
| | |
| | |
| | |
| | pass |
| |
|
| | |
| | |
| | |
| | |
| | |
| | if args.output_all_variables: |
| | record[var.quantity + baseline_suffix] = baseline_quantity |
| | record[var.uncertainty + baseline_suffix] = baseline_uncertainty |
| | record[var.sample_size + baseline_suffix] = baseline_sample_size |
| | record[var.quantity + observed_suffix] = observed_quantity |
| | record[var.uncertainty + observed_suffix] = observed_uncertainty |
| | record[var.sample_size + observed_suffix] = observed_sample_size |
| |
|
| | record[var.quantity + absolute_change_suffix] = abs_change |
| | record[var.uncertainty + absolute_change_suffix] = abs_change_unc |
| | record[var.quantity + percent_change_suffix] = per_change |
| | record[var.uncertainty + percent_change_suffix] = per_change_unc |
| |
|
| | |
| | |
| | |
| | overlap = ranges_overlap_uncertainty( |
| | baseline_quantity, baseline_uncertainty, |
| | observed_quantity, observed_uncertainty |
| | ) |
| | if not overlap and per_change >= args.change_threshold: |
| | statistically_significant_change = True |
| |
|
| | |
| | |
| | if args.output_all_datapoints or statistically_significant_change: |
| | iom.write(record) |
| |
|
| |
|