| import os |
| import time |
| from transformers import AutoTokenizer, AutoModelForCausalLM |
| import torch |
| import gradio as gr |
|
|
| |
| tokenizer = AutoTokenizer.from_pretrained("dinesh-bk/NepGPT2") |
| model = AutoModelForCausalLM.from_pretrained("dinesh-bk/NepGPT2", trust_remote_code=True) |
|
|
| |
| if torch.cuda.is_available(): |
| device = torch.device("cuda") |
| if torch.backends.mps.is_available(): |
| device = torch.device("mps") |
| else: |
| device = torch.device("cpu") |
|
|
| model.to(device) |
|
|
| def model_inference(input_text, max_output_tokens, temperature, top_k, top_p): |
| |
| if not input_text or input_text.strip() == "": |
| yield "Please provide input text" |
| return |
| |
| |
| input_ids = tokenizer.encode(input_text, return_tensors="pt", add_special_tokens=False).to(device) |
| |
| |
| with torch.no_grad(): |
| output = model.generate( |
| input_ids, |
| max_new_tokens=max_output_tokens, |
| temperature=temperature, |
| top_k=top_k, |
| top_p=top_p, |
| do_sample=True, |
| ) |
| |
| |
| generated_tokens = output[0] |
| partial_tokens = [] |
| |
| for token in generated_tokens: |
| partial_tokens.append(token.item()) |
| partial_sentence = tokenizer.decode(partial_tokens, skip_special_tokens=True) |
| yield partial_sentence |
| time.sleep(0.1) |
|
|
| with gr.Blocks(theme="ocean") as demo: |
| gr.Markdown("## Model Inference") |
| |
| with gr.Row(): |
| with gr.Column(): |
| input_textbox = gr.Textbox( |
| label="Input", |
| placeholder="यहाँ टाइप गर्नुहोस्...", |
| lines=5) |
| submit_btn = gr.Button("Submit") |
| clear_btn = gr.Button("Clear") |
| |
| with gr.Column(): |
| gr.Markdown(""" |
| ### Slider Settings |
| Adjust the sliders to control the model's output: |
| - **Context Length (1-1024)**: Sets the maximum number of tokens generated. |
| - **Temperature (0.0-1.0)**: Controls randomness. Lower values make output more predictable. |
| - **Top-K (1-100)**: Limits sampling to the top K most likely tokens. |
| - **Top-P (0.0-1.0)**: Filters tokens to a cumulative probability. |
| """) |
| |
| max_output_tokens = gr.Slider( |
| label="Context Length", |
| minimum=1, |
| maximum=1024, |
| step=1, |
| value=50) |
| |
| temperature = gr.Slider( |
| label="Temperature", |
| minimum=0.0, |
| maximum=1.0, |
| step=0.1, |
| value=0.9) |
| |
| top_k = gr.Slider( |
| label="Top-K", |
| minimum=1, |
| maximum=100, |
| step=1, |
| value=90) |
| |
| top_p = gr.Slider( |
| label="Top-P", |
| minimum=0.0, |
| maximum=1.0, |
| step=0.1, |
| value=0.9) |
|
|
| with gr.Row(): |
| output_textbox = gr.Textbox( |
| label="Output", |
| placeholder="मोडेलको आउटपुट...", |
| lines=5) |
|
|
| |
| submit_btn.click( |
| fn=model_inference, |
| inputs=[input_textbox, |
| max_output_tokens, |
| temperature, |
| top_k, |
| top_p], |
| outputs=[output_textbox] |
| ) |
| |
| |
| clear_btn.click( |
| fn=lambda: ("", ""), |
| inputs=[], |
| outputs=[input_textbox, output_textbox] |
| ) |
|
|
| demo.launch(debug=True) |