Spaces:
Build error
Build error
| import torch | |
| from modules.log import logly | |
| from modules.pipeline import chatbot | |
| import gradio as gr | |
| if torch.cuda.is_available(): | |
| device = "cuda" | |
| logly.info("CUDA is available and using GPU instead.") | |
| else: | |
| device = "cpu" | |
| logly.info("CUDA is not available and using CPU instead.") | |
| with gr.Blocks(theme=gr.themes.Soft(), title="GemGPT") as app:#head="<link rel='icon' href='' sizes='32x32' />" | |
| gr.HTML("<link rel='icon' href='https://gemmamodels.com/wp-content/uploads/2021/10/cropped-gem-32x32.png' sizes='32x32' />") | |
| gr.HTML("<h1 style='text-align: center;'>GemGPT</h1>") | |
| gr.HTML("<h3 style='text-align: center;'>Talk to GemGPT, Powered by Gemma Models</h3>") | |
| with gr.Row(): | |
| gr.Markdown("Select a model to run. Gemma-2b-it is a smaller model that is faster and uses less memory. Gemma-7b-it is a larger model that is slower and uses more memory.") | |
| model_options = gr.Dropdown(label="Select a Model", choices=["muhammadfiaz/gemma-2b-it", "google/gemma-7b-it"], | |
| value="muhammadfiaz/gemma-2b-it") | |
| with gr.Row(): | |
| gr.Markdown("Select the device to run the model on. If you are running this on a CPU, select CPU. If you are running this on a GPU, select CUDA.") | |
| device = gr.Dropdown(label="Device", choices=["cuda", "cpu"], value=device) | |
| with gr.Row(): | |
| gr.Markdown("Output Generated by Selected Model:") | |
| with gr.Row(): | |
| outputs = gr.Textbox(lines=15, label="Output", value="") | |
| with gr.Row(): | |
| gr.Markdown("Input your Prompt and click Generate to get a response.") | |
| with gr.Row(): | |
| inputs = gr.Textbox(lines=2, label="Prompt", placeholder="Type here") | |
| with gr.Row(): | |
| generate = gr.Button("Generate") | |
| with gr.Row(): | |
| advanced_checkbox = gr.Checkbox(label="Show Advanced Options", container=False, elem_classes='min_check', | |
| value=False) | |
| with gr.Column(scale=1,visible=False) as advanced_column: | |
| with gr.Row(): | |
| gr.Markdown("<h4>Adjust the parameters to control the model's output.</h4>") | |
| with gr.Row(): | |
| gr.Markdown("Max New Tokens is the maximum number of tokens that the model will generate.") | |
| with gr.Row(): | |
| tokens = gr.Slider(minimum=50, maximum=2000, label="Max New Tokens", value=1250) | |
| with gr.Row(): | |
| gr.Markdown("Temperature is a parameter that controls the randomness of the model's output. A higher temperature will produce more random output.") | |
| with gr.Row(): | |
| temp = gr.Slider(minimum=0.0, maximum=1.0, label="Temperature", value=0.7) | |
| with gr.Row(): | |
| gr.Markdown("Top K is a parameter that controls the diversity of the model's output. A higher value will produce more diverse output.") | |
| with gr.Row(): | |
| top_k = gr.Slider(minimum=1, maximum=100, label="Top K", value=50) | |
| with gr.Row(): | |
| gr.Markdown("Top P is an alternative to Top K that selects the smallest set of tokens whose cumulative probability exceeds the threshold P.") | |
| with gr.Row(): | |
| top_p = gr.Slider(minimum=0.0, maximum=1.0, label="Top P", value=0.95) | |
| with gr.Row(): | |
| gr.Markdown("Quantization is a technique to reduce the size of the model and speed up inference. 4-bit quantization is faster but less accurate than 8-bit.") | |
| with gr.Row(): | |
| quantization = gr.Dropdown(label="Quantization", choices=["8-bit", "4-bit"], value="4-bit") | |
| generate.click(fn=chatbot, inputs=[inputs, tokens, temp, top_k, top_p, model_options,quantization,device], outputs=outputs) | |
| advanced_checkbox.change(lambda x: gr.update(visible=x), advanced_checkbox, advanced_column, | |
| queue=False, show_progress=False) | |