Spaces:
Running on CPU Upgrade
Running on CPU Upgrade
| import glob | |
| import gradio as gr | |
| import tempfile | |
| import os | |
| import sr100_model_compiler | |
| #from huggingface_hub import HfApi | |
| #from huggingface_hub import whoami | |
| # Get top-level authorizations | |
| #oauth_info = {'username' : None, 'token' : None} | |
| #api = HfApi() | |
| # | |
| #def get_oauth_info(profile: gr.OAuthProfile | None, oauth_token: gr.OAuthToken | None) -> str: | |
| # global oauth_info | |
| # global api | |
| # print(f'profile = {profile}') | |
| # if profile is None: | |
| # oauth_info['username'] = None | |
| # oauth_info['token'] = None | |
| # return "Please login to the Huggingface with login button" | |
| # else: | |
| # #print(f'Testing {profile} for username') | |
| # oauth_info['username'] = profile.username | |
| # oauth_info['token'] = oauth_token.token | |
| # org_names = [org["name"] for org in whoami(oauth_token.token)["orgs"]] | |
| # | |
| # return print(f'{profile.username}: {org_names}') | |
| def compile_model(model_name, vmem_value, lpmem_value): | |
| #if oauth_info['token'] is None: | |
| # return "ERROR - please log into HuggingFace to continue" | |
| # Create a temporary directory | |
| with tempfile.TemporaryDirectory() as out_dir: | |
| print(f"Created temporary directory: {out_dir}") | |
| vmem_size_limit = int(vmem_value * 1000) | |
| lpmem_size_limit = int(lpmem_value * 1000) | |
| # Run the model fitter | |
| success, results = sr100_model_compiler.sr100_model_optimizer( | |
| model_file=model_name, | |
| vmem_size_limit=vmem_size_limit, | |
| lpmem_size_limit=lpmem_size_limit | |
| ) | |
| print(results) | |
| output_text = '' | |
| if results['cycles_npu'] == 0: | |
| output_text = 'FAILURE cannot optimize TFLITE model for SR100' | |
| output_text += '<br><br>Compiler log<br>---------------------------------------' | |
| output_text += f'<br>{results["vela_log"]}' | |
| else: | |
| if success: | |
| output_text = 'SUCCESS, model fits on SR100' | |
| else: | |
| output_text = 'FAILURE, model does not fit on SR100' | |
| weights_size = results['weights_size'] / 1000.0 | |
| arena_size = results['arena_cache_size'] / 1000.0 | |
| clock = results['core_clock'] / 1.0e6 | |
| infer_time = results['inference_time'] * 1000.0 | |
| infer_fps = results['inference_per_sec'] | |
| output_text += f'<br>clock = {clock:0.1f} MHz' | |
| output_text += f'<br>inference time = {infer_time:0.1f}ms - {infer_fps:0.1f} fps' | |
| output_text += f'<br>Arena cache size = {arena_size:0.3f} kB' | |
| output_text += f'<br>Model size = {weights_size:0.3f} kB' | |
| output_text += f'<br>model loc = {results["model_loc"]}' | |
| output_text += f'<br>System config = {results["system_config"]}' | |
| vmem_size = results['vmem_size'] / 1000.0 | |
| lpmem_size = results['lpmem_size'] / 1000.0 | |
| vmem_size_limit = results['vmem_size_limit'] / 1000.0 | |
| lpmem_size_limit = results['lpmem_size_limit'] / 1000.0 | |
| vmem_perc = results['vmem_size'] * 100.0 / results['vmem_size_limit'] | |
| lpmem_perc = results['lpmem_size'] * 100.0 / results['lpmem_size_limit'] | |
| output_text += f'<br>vmem_size = {vmem_size:0.3f} kB ({vmem_perc:0.1f}% of {vmem_size_limit:0.3f} kB)' | |
| output_text += f'<br>lpmem_size = {lpmem_size:0.3f} kB ({lpmem_perc:0.1f}% of {lpmem_size_limit:0.3f} kB)' | |
| return output_text | |
| # Get all available models | |
| model_choices = glob.glob('models/*.tflite') | |
| with gr.Blocks() as demo: | |
| #gr.LoginButton() | |
| text1 = gr.Markdown("SR100 Model Compiler - Compile a tflite model to SR100") | |
| #user_text = gr.Markdown("") | |
| # Setup model inputs | |
| with gr.Row(): | |
| vmem_slider = gr.Slider(minimum=0, maximum=1536, step=1.024, label="Set total VMEM SRAM size available in kB", value=1536.0) | |
| lpmem_slider = gr.Slider(minimum=0, maximum=1536, step=1.024, label="Set total LPMEM SRAM size in kB", value=1536.0) | |
| # Setup model compile | |
| model_dropdown = gr.Dropdown( | |
| label="Select an model", | |
| value='models/hello_world.tflite', | |
| choices=model_choices | |
| ) | |
| # Run the compile | |
| compile_btn = gr.Button("Compile Model") | |
| compile_text = gr.Markdown("Waiting for model results") | |
| # Compute options | |
| compile_btn.click(compile_model, inputs=[model_dropdown, vmem_slider, lpmem_slider], outputs=[compile_text]) | |
| #demo.load(get_oauth_info, inputs=None, outputs=user_text) | |
| if __name__ == "__main__": | |
| demo.launch() | |