Spaces:

dhairyashil
/

phi_2_QLoRA

Runtime error

Dhairyashil Ghatage

add app and model data

f04dcd7 over 1 year ago

2.19 kB

	import gradio as gr
	import mlx.core as mx
	import utils

	# Load the model and tokenizer
	def load_model(model_path, adapter_path):
	model, tokenizer, _ = utils.load(model_path)
	if adapter_path:
	try:
	adapter_weights = mx.load(adapter_path)
	# Filter out any weights that don't match the model's structure
	filtered_weights = {k: v for k, v in adapter_weights.items() if k in model.parameters()}
	model.load_weights(filtered_weights, strict=False)
	print(f"Loaded adapter weights from {adapter_path}")
	except Exception as e:
	print(f"Error loading adapter weights: {str(e)}")
	return model, tokenizer

	# Generate response
	def generate_response(model, tokenizer, prompt, max_tokens, temperature):
	prompt_tokens = mx.array(tokenizer.encode(prompt))

	generated_tokens = []
	for token in utils.generate(prompt_tokens, model, temperature):
	generated_tokens.append(token.item())
	if len(generated_tokens) >= max_tokens or token.item() == tokenizer.eos_token_id:
	break

	return tokenizer.decode(generated_tokens)

	# Inference function
	def infer(question, max_tokens, temperature):
	prompt = f"Q: {question}\nA:"
	response = generate_response(model, tokenizer, prompt, max_tokens, temperature)
	return response

	# Load the model and tokenizer (do this outside the infer function to load only once)
	model_path = "./phi-2" # Update this with the actual path to your model
	adapter_path = "./adapters.npz" # Update this with the actual path to your adapters
	model, tokenizer = load_model(model_path, adapter_path)

	# Create the Gradio interface
	iface = gr.Interface(
	fn=infer,
	inputs=[
	gr.Textbox(lines=2, placeholder="Enter your question here..."),
	gr.Slider(minimum=1, maximum=500, value=100, step=1, label="Max Tokens"),
	gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature"),
	],
	outputs="text",
	title="Fine-tuned Phi-2 Q&A Demo",
	description="Ask a question and get an answer from the fine-tuned Phi-2 model. Finetuned on OASST1 dataset."
	)

	# Launch the interface
	iface.launch()