Spaces:

sudhakar272
/

phi2-lora-sft

Sleeping

App Files Files Community

phi2-lora-sft / app.py

sudhakar272

Upload 3 files

d99986a verified 10 months ago

raw

history blame contribute delete

2.88 kB

	import gradio as gr
	from transformers import AutoModelForCausalLM, AutoTokenizer
	from peft import PeftModel
	import torch

	# Load the base model and tokenizer
	def load_model():
	base_model = AutoModelForCausalLM.from_pretrained(
	"microsoft/phi-2",
	torch_dtype=torch.float16,
	device_map="auto",
	trust_remote_code=True
	)

	# Load the fine-tuned adapter
	model = PeftModel.from_pretrained(
	base_model,
	"satyanayak/PHI2-SFT-OASST1",
	torch_dtype=torch.float16,
	device_map="auto"
	)

	tokenizer = AutoTokenizer.from_pretrained(
	"microsoft/phi-2",
	trust_remote_code=True
	)
	return model, tokenizer

	# Generate response
	def generate_response(prompt, max_length=512, temperature=0.7, top_p=0.9):
	inputs = tokenizer(f"Human: {prompt}\nAssistant:", return_tensors="pt").to(model.device)

	outputs = model.generate(
	**inputs,
	max_length=max_length,
	temperature=temperature,
	top_p=top_p,
	do_sample=True,
	pad_token_id=tokenizer.eos_token_id
	)

	response = tokenizer.decode(outputs[0], skip_special_tokens=True)
	# Extract only the Assistant's response
	response = response.split("Assistant:")[-1].strip()
	return response

	# Example prompts - Update to include values for all input parameters
	EXAMPLE_PROMPTS = [
	["What is the capital of France?", 512, 0.7, 0.9],
	["Write a short poem about autumn.", 512, 0.7, 0.9],
	["Explain quantum computing in simple terms.", 512, 0.7, 0.9],
	["Give me a recipe for chocolate chip cookies.", 512, 0.7, 0.9],
	["What are the benefits of regular exercise?", 512, 0.7, 0.9]
	]

	# Load model and tokenizer
	print("Loading model...")
	model, tokenizer = load_model()
	print("Model loaded!")

	# Create Gradio interface
	demo = gr.Interface(
	fn=generate_response,
	inputs=[
	gr.Textbox(
	label="Enter your prompt",
	placeholder="Type your message here...",
	lines=4
	),
	gr.Slider(
	minimum=64,
	maximum=1024,
	value=512,
	step=64,
	label="Maximum Length"
	),
	gr.Slider(
	minimum=0.1,
	maximum=1.0,
	value=0.7,
	step=0.1,
	label="Temperature"
	),
	gr.Slider(
	minimum=0.1,
	maximum=1.0,
	value=0.9,
	step=0.1,
	label="Top P"
	)
	],
	outputs=gr.Textbox(label="Response", lines=10),
	examples=EXAMPLE_PROMPTS,
	title="Phi-2 Assistant",
	description="This is a fine-tuned version of Phi-2 on the OpenAssistant dataset. Enter your prompt and adjust generation parameters as needed.",
	)

	# Add this line at the end of the file
	if __name__ == "__main__":
	demo.launch(share=True)