Spaces:
Running
Running
| # AUTOGENERATED! DO NOT EDIT! File to edit: 03a_image_archs.ipynb. | |
| # %% auto 0 | |
| __all__ = ['result_options', 'activity_options', 'col_options', 'subs', 'is_fullmatch', 'drop_tf', 'result_option', | |
| 'activity_option', 'col_option', 'size_col_option', 'title_dict', 'df', 'fig', 'col1', 'col2', 'get_results', | |
| 'get_integrated_data', 'get_filtered_data', 'get_data', 'plot_selection'] | |
| # %% 03a_image_archs.ipynb 11 | |
| import pandas as pd | |
| import plotly.express as px | |
| from fastcore.all import * | |
| import streamlit as st | |
| # %% 03a_image_archs.ipynb 12 | |
| st.set_page_config(page_title="Which Image Model is best?",layout="wide") | |
| # %% 03a_image_archs.ipynb 14 | |
| def get_results(result_option = 'original'): | |
| suffix = "" if result_option == 'original' else "-real" | |
| url_results = f"https://github.com/huggingface/pytorch-image-models/raw/main/results/results-imagenet{suffix}.csv" | |
| df_results = pd.read_csv(url_results); df_results.head() | |
| df_results['model_org'] = df_results['model'] | |
| df_results['model'] = df_results['model'].str.split('.').str[0] | |
| return df_results | |
| # %% 03a_image_archs.ipynb 16 | |
| def get_integrated_data(activity_option, result_option): | |
| df_results = get_results(result_option) | |
| url_benchmark = f"https://github.com/huggingface/pytorch-image-models/raw/main/results/benchmark-{activity_option}-amp-nhwc-pt112-cu113-rtx3090.csv" | |
| df_benchmark = pd.read_csv(url_benchmark) | |
| df_integrated = df_results.merge(df_benchmark, on='model') | |
| df_integrated['is_tensorflow_model'] = df_integrated.model.str.split('_').str[0] =='tf' | |
| df_integrated['family'] = df_integrated.model.str.removeprefix("tf_").str.removeprefix("legacy_").str.removeprefix("nf_").str.removeprefix("nf_").str.extract('^([a-z]+?(?:v2|v3)?)(?:\d|_|$)')[0].values | |
| df_integrated.loc[df_integrated.model.str.contains('in22'), 'family'] = df_integrated.loc[df_integrated.model.str.contains('in22'), 'family'] + "_in22" | |
| df_integrated.loc[df_integrated.model.str.contains('resnet.*d'), 'family'] = df_integrated.loc[df_integrated.model.str.contains('resnet.*d'), 'family'] + "d" | |
| return df_integrated[~df_integrated.model.str.endswith('gn')] # Group norm models. Why Jeremy eliminated them from analysis? | |
| # %% 03a_image_archs.ipynb 18 | |
| def get_filtered_data(df_integrated, subs, is_fullmatch=False, drop_tf=True): | |
| if drop_tf: df_integrated = df_integrated[~df_integrated.is_tensorflow_model] | |
| if not subs: return df_integrated | |
| elif is_fullmatch: return df_integrated[df_integrated.family.str.fullmatch(subs)] | |
| else: return df_integrated[df_integrated.model.str.contains(subs)] | |
| # %% 03a_image_archs.ipynb 19 | |
| def get_data(col_option, activity_option, result_option, subs, is_fullmatch=False, drop_tf=True): | |
| col = "_".join([activity_option, col_option]) | |
| df_integrated = get_integrated_data(activity_option, result_option) | |
| df_integrated = get_filtered_data(df_integrated, subs, is_fullmatch=is_fullmatch, drop_tf=drop_tf) | |
| df_integrated['secs'] =1./df_integrated[col] | |
| return df_integrated | |
| # %% 03a_image_archs.ipynb 20 | |
| def plot_selection(df, title, col_option, activity_option, w=1000, h=800): | |
| size_col = "_".join([activity_option, col_option]) | |
| return px.scatter(df, width=w, height=h, size=df[size_col]**2,trendline="ols", trendline_options={'log_x':True}, | |
| title=title, x="secs",log_x=True, y='top1', log_y=True, | |
| color="family", hover_name='model_org', | |
| hover_data=[size_col]) | |
| # %% 03a_image_archs.ipynb 21 | |
| result_options = ['original', 'real'] #result = 'real' | |
| activity_options = ['train', 'infer'] | |
| col_options = ['samples_per_sec', 'step_time', 'batch_size', 'img_size', 'gmacs', 'macts'] | |
| subs = '^re[sg]netd?|beit|convnext|levit|efficient|vit|vgg|swin' | |
| is_fullmatch = False | |
| drop_tf = False | |
| subs = 'levit|resnetd?|regnetx|vgg|convnext.*|efficientnetv2|beit|swin' | |
| is_fullmatch = True | |
| result_option = result_options[0] | |
| activity_option = activity_options[1] | |
| col_option = col_options[0] | |
| size_col_option = col_options[3] | |
| title_dict = dict(zip(activity_options, ['Training', "Inference"])) | |
| df = get_data(col_option, activity_option, result_option, subs, is_fullmatch=is_fullmatch, drop_tf=drop_tf) | |
| fig = plot_selection(df, title_dict[activity_option], size_col_option, activity_option) | |
| # %% 03a_image_archs.ipynb 25 | |
| st.title("Which Image Model is best?") | |
| col1, col2 = st.columns([1,3]) | |
| with col1: | |
| st.header("Settings") | |
| result_option = st.selectbox("Please choose dataset", result_options) | |
| activity_option = st.selectbox("Please choose activity", activity_options) | |
| subs = st.text_input("Subs", value='levit|resnetd?|regnetx|vgg|convnext.*|efficientnetv2|beit|swin') | |
| is_fullmatch = st.checkbox("Is fullmatch", value=True) | |
| drop_tf = st.checkbox("Drop Tensorflow Models", value=False) | |
| col_option = st.selectbox("Please choose col_option", col_options) | |
| size_col_option = st.selectbox("Please choose sizing col_option", col_options, index=3) | |
| with col2: | |
| title_dict = dict(zip(activity_options, ['Training', "Inference"])) | |
| df = get_data(col_option, activity_option, result_option, subs, is_fullmatch=is_fullmatch, drop_tf=drop_tf) | |
| fig = plot_selection(df, None, size_col_option, activity_option, h=500, w=1000) | |
| # Plot! | |
| st.header(title_dict[activity_option]) | |
| st.plotly_chart(fig, use_container_width=True, height=500) | |