Spaces:

InnovisionLLC
/

example_test

Paused

Wenye He

Update app.py

dd8d3db verified about 1 year ago

3.07 kB

	import gradio as gr
	from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, BitsAndBytesConfig
	import torch

	MODEL_CONFIG = {
	"phi-3": {
	"model_name": "microsoft/phi-3-mini-4k-instruct",
	"template": "<\|user\|>\n{message}<\|end\|>\n<\|assistant\|>"
	},
	"llama3-8b": {
	"model_name": "NousResearch/Meta-Llama-3-8B-Instruct",
	"template": """<\|begin_of_text\|><\|start_header_id\|>user<\|end_header_id\|>

	{message}<\|eot_id\|><\|start_header_id\|>assistant<\|end_header_id\|>

	"""
	}
	}

	bnb_config = BitsAndBytesConfig(
	load_in_4bit=True,
	bnb_4bit_quant_type="nf4",
	bnb_4bit_compute_dtype=torch.float16,
	bnb_4bit_use_double_quant=True
	)

	class ChatModel:
	def __init__(self):
	self.models = {}
	self.tokenizers = {}

	def load_model(self, model_name):
	if model_name not in self.models:
	config = MODEL_CONFIG[model_name]

	tokenizer = AutoTokenizer.from_pretrained(config["model_name"])
	tokenizer.pad_token = tokenizer.eos_token

	model = AutoModelForCausalLM.from_pretrained(
	config["model_name"],
	quantization_config=bnb_config,
	device_map="auto",
	torch_dtype=torch.float16,
	trust_remote_code=True
	)

	self.models[model_name] = model
	self.tokenizers[model_name] = tokenizer

	def generate(self, message, model_name, history):
	self.load_model(model_name)
	config = MODEL_CONFIG[model_name]

	# Format prompt
	prompt = config["template"].format(message=message)

	# Create pipeline
	pipe = pipeline(
	"text-generation",
	model=self.models[model_name],
	tokenizer=self.tokenizers[model_name],
	max_new_tokens=384,
	temperature=0.7,
	top_p=0.9,
	repetition_penalty=1.1,
	do_sample=True,
	return_full_text=False
	)

	response = pipe(prompt)[0]['generated_text']
	return response.strip()

	model_handler = ChatModel()

	def chat(message, history, model_choice):
	try:
	response = model_handler.generate(message, model_choice, history)
	return [(message, response)]
	except Exception as e:
	return [(message, f"Error: {str(e)}")]

	with gr.Blocks(theme=gr.themes.Soft()) as demo:
	gr.Markdown("# 🚀 Phi-3 vs Llama-3 Chatbot")
	with gr.Row():
	model_choice = gr.Dropdown(
	choices=["phi-3", "llama3-8b"],
	label="Select Model",
	value="phi-3"
	)
	chatbot = gr.Chatbot(height=400)
	msg = gr.Textbox(label="Message", placeholder="Type here...")
	with gr.Row():
	submit_btn = gr.Button("Send", variant="primary")
	clear_btn = gr.ClearButton([msg, chatbot])

	msg.submit(chat, [msg, chatbot, model_choice], chatbot)
	submit_btn.click(chat, [msg, chatbot, model_choice], chatbot)

	demo.launch()