Spaces:

dinesh-bk
/

NepGPT2

Sleeping

App Files Files Community

NepGPT2 / app.py

dinesh-bk

Rename app2.py to app.py

93622d4 verified about 1 year ago

raw

history blame contribute delete

3.9 kB

	import os
	import time
	from transformers import AutoTokenizer, AutoModelForCausalLM
	import torch
	import gradio as gr

	# Load the tokenizer and model from Hugging Face
	tokenizer = AutoTokenizer.from_pretrained("dinesh-bk/NepGPT2")
	model = AutoModelForCausalLM.from_pretrained("dinesh-bk/NepGPT2", trust_remote_code=True)

	# Get the device and move the model to it
	if torch.cuda.is_available():
	device = torch.device("cuda")
	if torch.backends.mps.is_available():
	device = torch.device("mps")
	else:
	device = torch.device("cpu")

	model.to(device)

	def model_inference(input_text, max_output_tokens, temperature, top_k, top_p):
	# Check if the input_text is empty
	if not input_text or input_text.strip() == "":
	yield "Please provide input text"
	return

	# Tokenize input
	input_ids = tokenizer.encode(input_text, return_tensors="pt", add_special_tokens=False).to(device)

	# Generate output using the model's generate method
	with torch.no_grad():
	output = model.generate(
	input_ids,
	max_new_tokens=max_output_tokens,
	temperature=temperature,
	top_k=top_k,
	top_p=top_p,
	do_sample=True,
	)

	# Decode token by token for streaming effect
	generated_tokens = output[0]
	partial_tokens = []

	for token in generated_tokens:
	partial_tokens.append(token.item())
	partial_sentence = tokenizer.decode(partial_tokens, skip_special_tokens=True)
	yield partial_sentence
	time.sleep(0.1)

	with gr.Blocks(theme="ocean") as demo:
	gr.Markdown("## Model Inference")

	with gr.Row():
	with gr.Column():
	input_textbox = gr.Textbox(
	label="Input",
	placeholder="यहाँ टाइप गर्नुहोस्...",
	lines=5)
	submit_btn = gr.Button("Submit")
	clear_btn = gr.Button("Clear")

	with gr.Column():
	gr.Markdown("""
	### Slider Settings
	Adjust the sliders to control the model's output:
	- Context Length (1-1024): Sets the maximum number of tokens generated.
	- Temperature (0.0-1.0): Controls randomness. Lower values make output more predictable.
	- Top-K (1-100): Limits sampling to the top K most likely tokens.
	- Top-P (0.0-1.0): Filters tokens to a cumulative probability.
	""")

	max_output_tokens = gr.Slider(
	label="Context Length",
	minimum=1,
	maximum=1024,
	step=1,
	value=50)

	temperature = gr.Slider(
	label="Temperature",
	minimum=0.0,
	maximum=1.0,
	step=0.1,
	value=0.9)

	top_k = gr.Slider(
	label="Top-K",
	minimum=1,
	maximum=100,
	step=1,
	value=90)

	top_p = gr.Slider(
	label="Top-P",
	minimum=0.0,
	maximum=1.0,
	step=0.1,
	value=0.9)

	with gr.Row():
	output_textbox = gr.Textbox(
	label="Output",
	placeholder="मोडेलको आउटपुट...",
	lines=5)

	# Wire up the button
	submit_btn.click(
	fn=model_inference,
	inputs=[input_textbox,
	max_output_tokens,
	temperature,
	top_k,
	top_p],
	outputs=[output_textbox]
	)

	# Clear both input and output
	clear_btn.click(
	fn=lambda: ("", ""),
	inputs=[],
	outputs=[input_textbox, output_textbox]
	)

	demo.launch(debug=True)