Spaces:
Sleeping
Sleeping
dev
Browse files- app.py +16 -15
- requirements.txt +0 -1
app.py
CHANGED
|
@@ -2,40 +2,41 @@ import gradio as gr
|
|
| 2 |
import torch
|
| 3 |
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
|
| 4 |
|
| 5 |
-
#
|
| 6 |
model_id = "microsoft/phi-2"
|
| 7 |
|
| 8 |
-
# Load
|
| 9 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
| 10 |
model = AutoModelForCausalLM.from_pretrained(
|
| 11 |
-
model_id,
|
| 12 |
-
device_map="auto",
|
| 13 |
-
torch_dtype=torch.float32, # ✅ use float32 on CPU
|
| 14 |
)
|
| 15 |
|
| 16 |
-
#
|
| 17 |
generator = pipeline(
|
| 18 |
"text-generation",
|
| 19 |
model=model,
|
| 20 |
tokenizer=tokenizer,
|
| 21 |
-
do_sample=True,
|
| 22 |
temperature=0.7,
|
| 23 |
)
|
| 24 |
|
| 25 |
|
| 26 |
-
# Chat handler
|
| 27 |
-
def chat(message, history
|
| 28 |
-
prompt =
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
|
|
|
|
|
|
|
|
|
| 32 |
|
| 33 |
|
| 34 |
-
#
|
| 35 |
iface = gr.ChatInterface(
|
| 36 |
fn=chat,
|
|
|
|
| 37 |
title="Phi-2 Chatbot",
|
| 38 |
-
chatbot=gr.Chatbot(type="messages"), # ✅ future-proof Gradio
|
| 39 |
)
|
| 40 |
|
| 41 |
if __name__ == "__main__":
|
|
|
|
| 2 |
import torch
|
| 3 |
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
|
| 4 |
|
| 5 |
+
# Recommended for 2vCPU/16GB RAM CPU hosting
|
| 6 |
model_id = "microsoft/phi-2"
|
| 7 |
|
| 8 |
+
# Load model and tokenizer
|
| 9 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
| 10 |
model = AutoModelForCausalLM.from_pretrained(
|
| 11 |
+
model_id, torch_dtype=torch.float32, device_map="auto"
|
|
|
|
|
|
|
| 12 |
)
|
| 13 |
|
| 14 |
+
# Pipeline for text generation
|
| 15 |
generator = pipeline(
|
| 16 |
"text-generation",
|
| 17 |
model=model,
|
| 18 |
tokenizer=tokenizer,
|
| 19 |
+
do_sample=True,
|
| 20 |
temperature=0.7,
|
| 21 |
)
|
| 22 |
|
| 23 |
|
| 24 |
+
# Chat handler that returns OpenAI-style messages
|
| 25 |
+
def chat(message, history):
|
| 26 |
+
prompt = ""
|
| 27 |
+
for turn in history:
|
| 28 |
+
prompt += f"<|user|>\n{turn['content']}\n<|assistant|>\n{turn['response']}\n"
|
| 29 |
+
prompt += f"<|user|>\n{message}\n<|assistant|>\n"
|
| 30 |
+
output = generator(prompt, max_new_tokens=256)[0]["generated_text"]
|
| 31 |
+
reply = output.replace(prompt, "").strip()
|
| 32 |
+
return {"response": reply}
|
| 33 |
|
| 34 |
|
| 35 |
+
# Launch with OpenAI-style message format
|
| 36 |
iface = gr.ChatInterface(
|
| 37 |
fn=chat,
|
| 38 |
+
chatbot=gr.Chatbot(type="messages"), # Matches interface default
|
| 39 |
title="Phi-2 Chatbot",
|
|
|
|
| 40 |
)
|
| 41 |
|
| 42 |
if __name__ == "__main__":
|
requirements.txt
CHANGED
|
@@ -1,4 +1,3 @@
|
|
| 1 |
-
huggingface_hub==0.25.2
|
| 2 |
transformers
|
| 3 |
torch
|
| 4 |
gradio
|
|
|
|
|
|
|
| 1 |
transformers
|
| 2 |
torch
|
| 3 |
gradio
|