v1-chat-3 / app.py
AB498's picture
.
4de085d
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
# Load Phi-2 model and tokenizer
model_name = "microsoft/phi-2"
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True, torch_dtype=torch.float32)
def generate_code(prompt, max_new_tokens=100, temperature=0.7, num_outputs=1):
"""
Generate code completion using Phi-2.
Args:
prompt: Code prompt/prefix
max_new_tokens: Maximum number of new tokens to generate
temperature: Sampling temperature (higher = more creative)
num_outputs: Number of different completions to generate
Returns:
JSON object with generated code
"""
try:
# Tokenize input
inputs = tokenizer(prompt, return_tensors="pt", return_attention_mask=True)
# Generate code
with torch.no_grad():
outputs = model.generate(
inputs["input_ids"],
attention_mask=inputs["attention_mask"],
max_new_tokens=max_new_tokens,
temperature=temperature,
num_return_sequences=num_outputs,
do_sample=True,
top_p=0.95,
pad_token_id=tokenizer.eos_token_id
)
# Decode generated sequences
completions = []
for idx, output in enumerate(outputs):
generated_text = tokenizer.decode(output, skip_special_tokens=True)
completions.append({
"rank": idx + 1,
"generated_code": generated_text,
"continuation": generated_text[len(prompt):]
})
return {
"prompt": prompt,
"completions": completions
}
except Exception as e:
return {
"error": str(e),
"completions": []
}
def chat(message, history, temperature=0.7, max_new_tokens=200):
"""
Simple chat function using Phi-2.
Args:
message: User's message
history: Chat history
temperature: Sampling temperature
max_new_tokens: Maximum number of new tokens to generate
Returns:
Generated response
"""
try:
# Build conversation context
conversation = ""
for user_msg, bot_msg in history:
conversation += f"User: {user_msg}\nAssistant: {bot_msg}\n"
conversation += f"User: {message}\nAssistant:"
# Tokenize input
inputs = tokenizer(conversation, return_tensors="pt", return_attention_mask=True)
# Generate response
with torch.no_grad():
outputs = model.generate(
inputs["input_ids"],
attention_mask=inputs["attention_mask"],
max_new_tokens=max_new_tokens,
temperature=temperature,
do_sample=True,
top_p=0.95,
pad_token_id=tokenizer.eos_token_id,
eos_token_id=tokenizer.eos_token_id
)
# Decode response
full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
# Extract only the assistant's response
response = full_response[len(conversation):].strip()
# If response is empty or too short, provide a fallback
if not response:
response = "I understand. How can I help you further?"
return response
except Exception as e:
return f"Error: {str(e)}"
# Create Gradio interface
with gr.Blocks(title="Phi-2 Code Generator") as demo:
gr.Markdown(
"""
# Phi-2 (2.7B) - Code Generator & Chat
Microsoft's Phi-2 language model with two modes: Code Generation and Simple Chat.
"""
)
with gr.Tabs():
# Code Generation Tab
with gr.Tab("Code Generator"):
gr.Markdown(
"""
### Code Generator
Enter a code prompt and the model will continue writing the code.
**Examples:**
- `def add(x, y):`
- `import numpy as np\n# Calculate`
- `class Calculator:\n def __init__(self):`
"""
)
with gr.Row():
with gr.Column():
code_input = gr.Textbox(
label="Code Prompt",
placeholder="Enter your code prompt...",
lines=5,
value="def fibonacci(n):"
)
max_new_tokens_slider = gr.Slider(
minimum=1,
maximum=500,
value=100,
step=10,
label="Max New Tokens"
)
temperature_slider = gr.Slider(
minimum=0.1,
maximum=1.5,
value=0.7,
step=0.1,
label="Temperature (creativity)"
)
num_outputs_slider = gr.Slider(
minimum=1,
maximum=3,
value=1,
step=1,
label="Number of outputs"
)
generate_btn = gr.Button("Generate", variant="primary")
with gr.Column():
output = gr.JSON(
label="Generated Code"
)
# Examples
gr.Examples(
examples=[
["def fibonacci(n):", 100, 0.7, 1],
["import pandas as pd\n# Load and analyze data\n", 150, 0.7, 1],
["class BinaryTree:\n def __init__(self):", 120, 0.7, 1],
["# Function to reverse a string\ndef reverse_string(s):", 100, 0.7, 1],
["for i in range(10):", 80, 0.7, 1],
],
inputs=[code_input, max_new_tokens_slider, temperature_slider, num_outputs_slider],
)
generate_btn.click(
fn=generate_code,
inputs=[code_input, max_new_tokens_slider, temperature_slider, num_outputs_slider],
outputs=output
)
# Chat Tab
with gr.Tab("Simple Chat"):
gr.Markdown(
"""
### Chat with Phi-2
Have a conversation with the Phi-2 model. Ask questions, discuss topics, or just chat!
"""
)
with gr.Row():
with gr.Column(scale=4):
chatbot = gr.Chatbot(
label="Chat",
height=400
)
with gr.Row():
msg = gr.Textbox(
label="Message",
placeholder="Type your message here...",
lines=2,
scale=4
)
send_btn = gr.Button("Send", variant="primary", scale=1)
clear_btn = gr.Button("Clear Chat")
with gr.Column(scale=1):
chat_temperature = gr.Slider(
minimum=0.1,
maximum=1.5,
value=0.7,
step=0.1,
label="Temperature"
)
chat_max_new_tokens = gr.Slider(
minimum=50,
maximum=300,
value=200,
step=10,
label="Max New Tokens"
)
def respond(message, history, temperature, max_new_tokens):
bot_message = chat(message, history, temperature, max_new_tokens)
history.append((message, bot_message))
return "", history
def clear_chat():
return None
msg.submit(
fn=respond,
inputs=[msg, chatbot, chat_temperature, chat_max_new_tokens],
outputs=[msg, chatbot]
)
send_btn.click(
fn=respond,
inputs=[msg, chatbot, chat_temperature, chat_max_new_tokens],
outputs=[msg, chatbot]
)
clear_btn.click(
fn=clear_chat,
outputs=[chatbot]
)
if __name__ == "__main__":
demo.launch()