dgarrett-synaptics's picture
Update app.py
89fa6b1 verified
raw
history blame
4.47 kB
import glob
import gradio as gr
import tempfile
import os
import sr100_model_compiler
#from huggingface_hub import HfApi
#from huggingface_hub import whoami
# Get top-level authorizations
#oauth_info = {'username' : None, 'token' : None}
#api = HfApi()
#
#def get_oauth_info(profile: gr.OAuthProfile | None, oauth_token: gr.OAuthToken | None) -> str:
# global oauth_info
# global api
# print(f'profile = {profile}')
# if profile is None:
# oauth_info['username'] = None
# oauth_info['token'] = None
# return "Please login to the Huggingface with login button"
# else:
# #print(f'Testing {profile} for username')
# oauth_info['username'] = profile.username
# oauth_info['token'] = oauth_token.token
# org_names = [org["name"] for org in whoami(oauth_token.token)["orgs"]]
#
# return print(f'{profile.username}: {org_names}')
def compile_model(model_name, vmem_value, lpmem_value):
#if oauth_info['token'] is None:
# return "ERROR - please log into HuggingFace to continue"
# Create a temporary directory
with tempfile.TemporaryDirectory() as out_dir:
print(f"Created temporary directory: {out_dir}")
vmem_size_limit = int(vmem_value * 1000)
lpmem_size_limit = int(lpmem_value * 1000)
# Run the model fitter
success, results = sr100_model_compiler.sr100_model_optimizer(
model_file=model_name,
vmem_size_limit=vmem_size_limit,
lpmem_size_limit=lpmem_size_limit
)
print(results)
output_text = ''
if results['cycles_npu'] == 0:
output_text = 'FAILURE cannot optimize TFLITE model for SR100'
output_text += '<br><br>Compiler log<br>---------------------------------------'
output_text += f'<br>{results["vela_log"]}'
else:
if success:
output_text = 'SUCCESS, model fits on SR100'
else:
output_text = 'FAILURE, model does not fit on SR100'
weights_size = results['weights_size'] / 1000.0
arena_size = results['arena_cache_size'] / 1000.0
clock = results['core_clock'] / 1.0e6
infer_time = results['inference_time'] * 1000.0
infer_fps = results['inference_per_sec']
output_text += f'<br>clock = {clock:0.1f} MHz'
output_text += f'<br>inference time = {infer_time:0.1f}ms - {infer_fps:0.1f} fps'
output_text += f'<br>Arena cache size = {arena_size:0.3f} kB'
output_text += f'<br>Model size = {weights_size:0.3f} kB'
output_text += f'<br>model loc = {results["model_loc"]}'
output_text += f'<br>System config = {results["system_config"]}'
vmem_size = results['vmem_size'] / 1000.0
lpmem_size = results['lpmem_size'] / 1000.0
vmem_size_limit = results['vmem_size_limit'] / 1000.0
lpmem_size_limit = results['lpmem_size_limit'] / 1000.0
vmem_perc = results['vmem_size'] * 100.0 / results['vmem_size_limit']
lpmem_perc = results['lpmem_size'] * 100.0 / results['lpmem_size_limit']
output_text += f'<br>vmem_size = {vmem_size:0.3f} kB ({vmem_perc:0.1f}% of {vmem_size_limit:0.3f} kB)'
output_text += f'<br>lpmem_size = {lpmem_size:0.3f} kB ({lpmem_perc:0.1f}% of {lpmem_size_limit:0.3f} kB)'
return output_text
# Get all available models
model_choices = glob.glob('models/*.tflite')
with gr.Blocks() as demo:
#gr.LoginButton()
text1 = gr.Markdown("SR100 Model Compiler - Compile a tflite model to SR100")
#user_text = gr.Markdown("")
# Setup model inputs
with gr.Row():
vmem_slider = gr.Slider(minimum=0, maximum=1536, step=1.024, label="Set total VMEM SRAM size available in kB", value=1536.0)
lpmem_slider = gr.Slider(minimum=0, maximum=1536, step=1.024, label="Set total LPMEM SRAM size in kB", value=1536.0)
# Setup model compile
model_dropdown = gr.Dropdown(
label="Select an model",
value='models/hello_world.tflite',
choices=model_choices
)
# Run the compile
compile_btn = gr.Button("Compile Model")
compile_text = gr.Markdown("Waiting for model results")
# Compute options
compile_btn.click(compile_model, inputs=[model_dropdown, vmem_slider, lpmem_slider], outputs=[compile_text])
#demo.load(get_oauth_info, inputs=None, outputs=user_text)
if __name__ == "__main__":
demo.launch()