NepGPT2 / app.py
dinesh-bk's picture
Rename app2.py to app.py
93622d4 verified
import os
import time
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import gradio as gr
# Load the tokenizer and model from Hugging Face
tokenizer = AutoTokenizer.from_pretrained("dinesh-bk/NepGPT2")
model = AutoModelForCausalLM.from_pretrained("dinesh-bk/NepGPT2", trust_remote_code=True)
# Get the device and move the model to it
if torch.cuda.is_available():
device = torch.device("cuda")
if torch.backends.mps.is_available():
device = torch.device("mps")
else:
device = torch.device("cpu")
model.to(device)
def model_inference(input_text, max_output_tokens, temperature, top_k, top_p):
# Check if the input_text is empty
if not input_text or input_text.strip() == "":
yield "Please provide input text"
return
# Tokenize input
input_ids = tokenizer.encode(input_text, return_tensors="pt", add_special_tokens=False).to(device)
# Generate output using the model's generate method
with torch.no_grad():
output = model.generate(
input_ids,
max_new_tokens=max_output_tokens,
temperature=temperature,
top_k=top_k,
top_p=top_p,
do_sample=True,
)
# Decode token by token for streaming effect
generated_tokens = output[0]
partial_tokens = []
for token in generated_tokens:
partial_tokens.append(token.item())
partial_sentence = tokenizer.decode(partial_tokens, skip_special_tokens=True)
yield partial_sentence
time.sleep(0.1)
with gr.Blocks(theme="ocean") as demo:
gr.Markdown("## Model Inference")
with gr.Row():
with gr.Column():
input_textbox = gr.Textbox(
label="Input",
placeholder="यहाँ टाइप गर्नुहोस्...",
lines=5)
submit_btn = gr.Button("Submit")
clear_btn = gr.Button("Clear")
with gr.Column():
gr.Markdown("""
### Slider Settings
Adjust the sliders to control the model's output:
- **Context Length (1-1024)**: Sets the maximum number of tokens generated.
- **Temperature (0.0-1.0)**: Controls randomness. Lower values make output more predictable.
- **Top-K (1-100)**: Limits sampling to the top K most likely tokens.
- **Top-P (0.0-1.0)**: Filters tokens to a cumulative probability.
""")
max_output_tokens = gr.Slider(
label="Context Length",
minimum=1,
maximum=1024,
step=1,
value=50)
temperature = gr.Slider(
label="Temperature",
minimum=0.0,
maximum=1.0,
step=0.1,
value=0.9)
top_k = gr.Slider(
label="Top-K",
minimum=1,
maximum=100,
step=1,
value=90)
top_p = gr.Slider(
label="Top-P",
minimum=0.0,
maximum=1.0,
step=0.1,
value=0.9)
with gr.Row():
output_textbox = gr.Textbox(
label="Output",
placeholder="मोडेलको आउटपुट...",
lines=5)
# Wire up the button
submit_btn.click(
fn=model_inference,
inputs=[input_textbox,
max_output_tokens,
temperature,
top_k,
top_p],
outputs=[output_textbox]
)
# Clear both input and output
clear_btn.click(
fn=lambda: ("", ""),
inputs=[],
outputs=[input_textbox, output_textbox]
)
demo.launch(debug=True)