Spaces:

AB498
/

v1-chat-3

Sleeping

4de085d 5 months ago

8.99 kB

	import gradio as gr
	from transformers import AutoTokenizer, AutoModelForCausalLM
	import torch

	# Load Phi-2 model and tokenizer
	model_name = "microsoft/phi-2"
	tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
	model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True, torch_dtype=torch.float32)

	def generate_code(prompt, max_new_tokens=100, temperature=0.7, num_outputs=1):
	"""
	Generate code completion using Phi-2.

	Args:
	prompt: Code prompt/prefix
	max_new_tokens: Maximum number of new tokens to generate
	temperature: Sampling temperature (higher = more creative)
	num_outputs: Number of different completions to generate

	Returns:
	JSON object with generated code
	"""
	try:
	# Tokenize input
	inputs = tokenizer(prompt, return_tensors="pt", return_attention_mask=True)

	# Generate code
	with torch.no_grad():
	outputs = model.generate(
	inputs["input_ids"],
	attention_mask=inputs["attention_mask"],
	max_new_tokens=max_new_tokens,
	temperature=temperature,
	num_return_sequences=num_outputs,
	do_sample=True,
	top_p=0.95,
	pad_token_id=tokenizer.eos_token_id
	)

	# Decode generated sequences
	completions = []
	for idx, output in enumerate(outputs):
	generated_text = tokenizer.decode(output, skip_special_tokens=True)
	completions.append({
	"rank": idx + 1,
	"generated_code": generated_text,
	"continuation": generated_text[len(prompt):]
	})

	return {
	"prompt": prompt,
	"completions": completions
	}

	except Exception as e:
	return {
	"error": str(e),
	"completions": []
	}

	def chat(message, history, temperature=0.7, max_new_tokens=200):
	"""
	Simple chat function using Phi-2.

	Args:
	message: User's message
	history: Chat history
	temperature: Sampling temperature
	max_new_tokens: Maximum number of new tokens to generate

	Returns:
	Generated response
	"""
	try:
	# Build conversation context
	conversation = ""
	for user_msg, bot_msg in history:
	conversation += f"User: {user_msg}\nAssistant: {bot_msg}\n"
	conversation += f"User: {message}\nAssistant:"

	# Tokenize input
	inputs = tokenizer(conversation, return_tensors="pt", return_attention_mask=True)

	# Generate response
	with torch.no_grad():
	outputs = model.generate(
	inputs["input_ids"],
	attention_mask=inputs["attention_mask"],
	max_new_tokens=max_new_tokens,
	temperature=temperature,
	do_sample=True,
	top_p=0.95,
	pad_token_id=tokenizer.eos_token_id,
	eos_token_id=tokenizer.eos_token_id
	)

	# Decode response
	full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
	# Extract only the assistant's response
	response = full_response[len(conversation):].strip()

	# If response is empty or too short, provide a fallback
	if not response:
	response = "I understand. How can I help you further?"

	return response

	except Exception as e:
	return f"Error: {str(e)}"

	# Create Gradio interface
	with gr.Blocks(title="Phi-2 Code Generator") as demo:
	gr.Markdown(
	"""
	# Phi-2 (2.7B) - Code Generator & Chat

	Microsoft's Phi-2 language model with two modes: Code Generation and Simple Chat.
	"""
	)

	with gr.Tabs():
	# Code Generation Tab
	with gr.Tab("Code Generator"):
	gr.Markdown(
	"""
	### Code Generator
	Enter a code prompt and the model will continue writing the code.

	Examples:
	- `def add(x, y):`
	- `import numpy as np\n# Calculate`
	- `class Calculator:\n def __init__(self):`
	"""
	)

	with gr.Row():
	with gr.Column():
	code_input = gr.Textbox(
	label="Code Prompt",
	placeholder="Enter your code prompt...",
	lines=5,
	value="def fibonacci(n):"
	)
	max_new_tokens_slider = gr.Slider(
	minimum=1,
	maximum=500,
	value=100,
	step=10,
	label="Max New Tokens"
	)
	temperature_slider = gr.Slider(
	minimum=0.1,
	maximum=1.5,
	value=0.7,
	step=0.1,
	label="Temperature (creativity)"
	)
	num_outputs_slider = gr.Slider(
	minimum=1,
	maximum=3,
	value=1,
	step=1,
	label="Number of outputs"
	)
	generate_btn = gr.Button("Generate", variant="primary")

	with gr.Column():
	output = gr.JSON(
	label="Generated Code"
	)

	# Examples
	gr.Examples(
	examples=[
	["def fibonacci(n):", 100, 0.7, 1],
	["import pandas as pd\n# Load and analyze data\n", 150, 0.7, 1],
	["class BinaryTree:\n def __init__(self):", 120, 0.7, 1],
	["# Function to reverse a string\ndef reverse_string(s):", 100, 0.7, 1],
	["for i in range(10):", 80, 0.7, 1],
	],
	inputs=[code_input, max_new_tokens_slider, temperature_slider, num_outputs_slider],
	)

	generate_btn.click(
	fn=generate_code,
	inputs=[code_input, max_new_tokens_slider, temperature_slider, num_outputs_slider],
	outputs=output
	)

	# Chat Tab
	with gr.Tab("Simple Chat"):
	gr.Markdown(
	"""
	### Chat with Phi-2
	Have a conversation with the Phi-2 model. Ask questions, discuss topics, or just chat!
	"""
	)

	with gr.Row():
	with gr.Column(scale=4):
	chatbot = gr.Chatbot(
	label="Chat",
	height=400
	)
	with gr.Row():
	msg = gr.Textbox(
	label="Message",
	placeholder="Type your message here...",
	lines=2,
	scale=4
	)
	send_btn = gr.Button("Send", variant="primary", scale=1)
	clear_btn = gr.Button("Clear Chat")

	with gr.Column(scale=1):
	chat_temperature = gr.Slider(
	minimum=0.1,
	maximum=1.5,
	value=0.7,
	step=0.1,
	label="Temperature"
	)
	chat_max_new_tokens = gr.Slider(
	minimum=50,
	maximum=300,
	value=200,
	step=10,
	label="Max New Tokens"
	)

	def respond(message, history, temperature, max_new_tokens):
	bot_message = chat(message, history, temperature, max_new_tokens)
	history.append((message, bot_message))
	return "", history

	def clear_chat():
	return None

	msg.submit(
	fn=respond,
	inputs=[msg, chatbot, chat_temperature, chat_max_new_tokens],
	outputs=[msg, chatbot]
	)

	send_btn.click(
	fn=respond,
	inputs=[msg, chatbot, chat_temperature, chat_max_new_tokens],
	outputs=[msg, chatbot]
	)

	clear_btn.click(
	fn=clear_chat,
	outputs=[chatbot]
	)

	if __name__ == "__main__":
	demo.launch()