Spaces:

amirkhanbloch
/

Qwen_chat

Build error

Qwen_chat / app.py

Create app.py

c81d4f0 verified over 1 year ago

1.69 kB

	import gradio as gr
	from transformers import AutoModelForCausalLM, AutoTokenizer
	import torch

	# Load the model and tokenizer
	device = "cuda" if torch.cuda.is_available() else "cpu"
	model = AutoModelForCausalLM.from_pretrained(
	"Qwen/Qwen1.5-0.5B-Chat",
	torch_dtype="auto",
	device_map="auto"
	).to(device)
	tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen1.5-0.5B-Chat")

	def generate_response(prompt):
	# Define the messages as input
	messages = [
	{"role": "system", "content": "You are a helpful assistant."},
	{"role": "user", "content": prompt}
	]

	# Apply chat template
	text = tokenizer.apply_chat_template(
	messages,
	tokenize=False,
	add_generation_prompt=True
	)

	# Prepare input for the model
	model_inputs = tokenizer([text], return_tensors="pt").to(device)

	# Generate output from the model
	generated_ids = model.generate(
	model_inputs.input_ids,
	max_new_tokens=512
	)

	# Extract the generated response
	generated_ids = [
	output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
	]

	# Decode and return the response
	response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
	return response

	# Set up Gradio Interface
	interface = gr.Interface(
	fn=generate_response, # Function to generate the response
	inputs="text", # User will input a text prompt
	outputs="text", # Model response will be displayed as text
	title="Qwen Chatbot",
	description="Enter a prompt and the AI assistant will provide a response."
	)

	# Launch the Gradio interface
	interface.launch()