File size: 2,107 Bytes
4df5e6a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
# app.py
from transformers import pipeline
import gradio as gr

# ======================
# Configuration
# ======================
MODEL_ID = "microsoft/Phi-3-mini-128k-instruct"

# ======================
# Load Model with pipeline
# ======================
print(f"🚀 Loading model: {MODEL_ID}")
pipe = pipeline(
    "text-generation",
    model=MODEL_ID,
    trust_remote_code=False,
    torch_dtype="auto",          # Auto-select float16 on GPU
    device_map="auto",           # Use GPU if available
    return_full_text=False,      # Only return assistant's reply
    pad_token_id=198,            # Phi-3: common pad_token_id (for <|endoftext|>)
)

print("✅ Pipeline loaded!")


# ======================
# Response Function
# ======================
def respond(message, history):
    if not message.strip():
        return ""

    # Build conversation using chat template
    messages = [
        {"role": "user", "content": msg["content"]}
        for msg in history
    ]
    messages.append({"role": "user", "content": message})

    # Generate response
    outputs = pipe(
        messages,
        max_new_tokens=1024,
        temperature=0.7,
        top_p=0.9,
        do_sample=True,
        stop_strings=["<|end|>", "<|endoftext|>"],  # Auto-stopping
        truncation=True,
        max_length=128000,
    )

    # Extract response text
    response = outputs[0]["generated_text"] if outputs else ""
    return response


# ======================
# Gradio Interface
# ======================
demo = gr.ChatInterface(
    fn=respond,
    chatbot=gr.Chatbot(height=600, type="messages"),
    textbox=gr.Textbox(placeholder="Ask me anything...", container=False, scale=7),
    title="🧠 Phi-3 Mini (128K) Chat - Simple Pipeline Version",
    description="A lightweight demo using `transformers.pipeline` for clean, readable code.",
    examples=[
        "Who are you?",
        "Explain quantum computing in simple terms",
        "Write a Python function to reverse a string"
    ],
)

# ======================
# Launch
# ======================
if __name__ == "__main__":
    demo.launch()