# coding: utf-8 import pandas as pd import numpy as np import argparse from io import StringIO parser = argparse.ArgumentParser(description='Prof parser') parser.add_argument('file', type=str) # Strip the whitespace on reading def strip(text): try: return text.strip() except AttributeError: return text def convert(x): if x[-2:] == 'ms': x = float(x[:-2]) elif x[-2:] == 'us': x = 0.001 * float(x[:-2]) elif x[-2:] == 'ns': x = 1e-6 * float(x[:-2]) elif x[-1] == 's': x = 1000 * float(x[:-1]) return x def convert_timep(x): return float(x[:-1]) def pandify(data): names = ['Type', 'Time (%)', 'Time (ms)', 'Calls', 'Avg', 'Min', 'Max', 'Name'] convs = {i: strip for i in range(8)} df = pd.read_csv(StringIO(data), names=names, skiprows=0, sep=';', converters=convs) df.loc[df.loc[:,'Type'] == '', 'Type'] = np.NaN df = df.fillna(method='ffill') df['Time (ms)'] = df['Time (ms)'].apply(convert) df['Time (%)'] = df['Time (%)'].apply(convert_timep) df['Calls'] = df['Calls'].apply(int) df1 = df[df['Type'] == 'GPU activities:'] df2 = df[df['Type'] == 'API calls:'] idx = df1.Name.str.contains('CUDA memcpy') s1 = pd.Series({'Type': 'Total:', 'Time (%)': df1['Time (%)'].sum(), 'Time (ms)': df1['Time (ms)'].sum(), 'Calls': df1['Calls'].sum(), 'Avg': '', 'Min': '', 'Max': '', 'Name': ''}) s2 = pd.Series({'Type': 'Total (no mem):', 'Time (%)': df1.loc[~idx, 'Time (%)'].sum(), 'Time (ms)': df1.loc[~idx, 'Time (ms)'].sum(), 'Calls': df1.loc[~idx, 'Calls'].sum(), 'Avg': '', 'Min': '', 'Max': '', 'Name': ''}) s3 = pd.Series({'Type': 'Total:', 'Time (%)': df2['Time (%)'].sum(), 'Time (ms)': df2['Time (ms)'].sum(), 'Calls': df2['Calls'].sum(), 'Avg': '', 'Min': '', 'Max': '', 'Name': ''}) df3 = pd.concat([df1, pd.concat([s1, s2], axis=1).T, df2, pd.concat([s3,], axis=1).T], ignore_index=True, axis=0) return df3 def prep_file(file): with open(file, 'r') as f: data = f.readlines() header = data[0].split('command: ')[1] for i, l in enumerate(data): if i >= 3: data[i] = ';'.join([l[:16], l[17:25], l[26:35], l[36:45], l[46:55], l[56:65], l[66:75], l[76:]]) data = ''.join(data[4:]) return data, header if __name__ == '__main__': args = parser.parse_args() data, header = prep_file(args.file) df = pandify(data) with open(args.file, 'w') as f: f.write(header) f.write(df.to_string(index=False))