Spaces:
Sleeping
Sleeping
Commit ·
16ebb52
1
Parent(s): 7b0bd94
update commit with phi-3 mini 113
Browse files
app.py
CHANGED
|
@@ -2,27 +2,25 @@ import gradio as gr
|
|
| 2 |
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
|
| 3 |
import torch
|
| 4 |
|
| 5 |
-
#
|
| 6 |
model_id = "microsoft/phi-2"
|
| 7 |
|
| 8 |
-
#
|
| 9 |
-
|
|
|
|
| 10 |
|
| 11 |
-
#
|
|
|
|
| 12 |
model = AutoModelForCausalLM.from_pretrained(
|
| 13 |
model_id,
|
| 14 |
-
|
| 15 |
-
|
| 16 |
)
|
| 17 |
|
| 18 |
-
#
|
| 19 |
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
|
| 20 |
|
| 21 |
-
#
|
| 22 |
-
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 23 |
-
print(f"🚀 Using device: {device}")
|
| 24 |
-
|
| 25 |
-
# 💬 Chat logic
|
| 26 |
def chat_fn(message, history):
|
| 27 |
history_text = ""
|
| 28 |
for item in history:
|
|
@@ -32,28 +30,29 @@ def chat_fn(message, history):
|
|
| 32 |
history_text += f"<|assistant|>\n{item['content']}\n"
|
| 33 |
prompt = f"{history_text}<|user|>\n{message}\n<|assistant|>\n"
|
| 34 |
|
| 35 |
-
|
| 36 |
-
reply =
|
| 37 |
|
| 38 |
-
|
|
|
|
| 39 |
reply = f"```\n{reply}\n```"
|
| 40 |
|
| 41 |
return reply
|
| 42 |
|
| 43 |
-
#
|
| 44 |
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
| 45 |
gr.Markdown("## 🤖 Chat with Phi-2")
|
| 46 |
-
gr.Markdown("ZeroGPU
|
| 47 |
|
| 48 |
gr.ChatInterface(
|
| 49 |
fn=chat_fn,
|
| 50 |
chatbot=gr.Chatbot(type="messages"),
|
| 51 |
examples=[
|
| 52 |
-
"What is
|
| 53 |
-
"Write a
|
| 54 |
-
"Explain
|
| 55 |
]
|
| 56 |
)
|
| 57 |
|
| 58 |
-
#
|
| 59 |
-
demo.launch(
|
|
|
|
| 2 |
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
|
| 3 |
import torch
|
| 4 |
|
| 5 |
+
# Model ID
|
| 6 |
model_id = "microsoft/phi-2"
|
| 7 |
|
| 8 |
+
# Log device availability
|
| 9 |
+
cuda_available = torch.cuda.is_available()
|
| 10 |
+
print("🧠 CUDA Available:", cuda_available)
|
| 11 |
|
| 12 |
+
# Load tokenizer and model with auto device map (ZeroGPU-compatible)
|
| 13 |
+
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
| 14 |
model = AutoModelForCausalLM.from_pretrained(
|
| 15 |
model_id,
|
| 16 |
+
device_map="auto", # Automatically use GPU if available
|
| 17 |
+
torch_dtype=torch.float16 if cuda_available else torch.float32
|
| 18 |
)
|
| 19 |
|
| 20 |
+
# Initialize pipeline WITHOUT `device=` (to avoid conflict with Accelerate)
|
| 21 |
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
|
| 22 |
|
| 23 |
+
# Chat function
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
def chat_fn(message, history):
|
| 25 |
history_text = ""
|
| 26 |
for item in history:
|
|
|
|
| 30 |
history_text += f"<|assistant|>\n{item['content']}\n"
|
| 31 |
prompt = f"{history_text}<|user|>\n{message}\n<|assistant|>\n"
|
| 32 |
|
| 33 |
+
result = pipe(prompt, max_new_tokens=512, do_sample=True, temperature=0.7)[0]["generated_text"]
|
| 34 |
+
reply = result.split("<|assistant|>")[-1].strip()
|
| 35 |
|
| 36 |
+
# Wrap code in markdown if needed
|
| 37 |
+
if "```" not in reply and any(word in reply for word in ["def ", "class ", "import "]):
|
| 38 |
reply = f"```\n{reply}\n```"
|
| 39 |
|
| 40 |
return reply
|
| 41 |
|
| 42 |
+
# Gradio UI
|
| 43 |
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
| 44 |
gr.Markdown("## 🤖 Chat with Phi-2")
|
| 45 |
+
gr.Markdown("ZeroGPU-compatible AI Assistant (GPU if available, fallback to CPU)")
|
| 46 |
|
| 47 |
gr.ChatInterface(
|
| 48 |
fn=chat_fn,
|
| 49 |
chatbot=gr.Chatbot(type="messages"),
|
| 50 |
examples=[
|
| 51 |
+
"What is Python?",
|
| 52 |
+
"Write a Java function to sort a list.",
|
| 53 |
+
"Explain how neural networks work."
|
| 54 |
]
|
| 55 |
)
|
| 56 |
|
| 57 |
+
# Launch (ssr_mode=False avoids rendering issues in HF Spaces)
|
| 58 |
+
demo.launch(ssr_mode=False)
|