thava commited on
Commit
4df5e6a
·
1 Parent(s): 17cbe2a

Use pipeline API

Browse files
Files changed (1) hide show
  1. app.v2.py +78 -0
app.v2.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+ from transformers import pipeline
3
+ import gradio as gr
4
+
5
+ # ======================
6
+ # Configuration
7
+ # ======================
8
+ MODEL_ID = "microsoft/Phi-3-mini-128k-instruct"
9
+
10
+ # ======================
11
+ # Load Model with pipeline
12
+ # ======================
13
+ print(f"🚀 Loading model: {MODEL_ID}")
14
+ pipe = pipeline(
15
+ "text-generation",
16
+ model=MODEL_ID,
17
+ trust_remote_code=False,
18
+ torch_dtype="auto", # Auto-select float16 on GPU
19
+ device_map="auto", # Use GPU if available
20
+ return_full_text=False, # Only return assistant's reply
21
+ pad_token_id=198, # Phi-3: common pad_token_id (for <|endoftext|>)
22
+ )
23
+
24
+ print("✅ Pipeline loaded!")
25
+
26
+
27
+ # ======================
28
+ # Response Function
29
+ # ======================
30
+ def respond(message, history):
31
+ if not message.strip():
32
+ return ""
33
+
34
+ # Build conversation using chat template
35
+ messages = [
36
+ {"role": "user", "content": msg["content"]}
37
+ for msg in history
38
+ ]
39
+ messages.append({"role": "user", "content": message})
40
+
41
+ # Generate response
42
+ outputs = pipe(
43
+ messages,
44
+ max_new_tokens=1024,
45
+ temperature=0.7,
46
+ top_p=0.9,
47
+ do_sample=True,
48
+ stop_strings=["<|end|>", "<|endoftext|>"], # Auto-stopping
49
+ truncation=True,
50
+ max_length=128000,
51
+ )
52
+
53
+ # Extract response text
54
+ response = outputs[0]["generated_text"] if outputs else ""
55
+ return response
56
+
57
+
58
+ # ======================
59
+ # Gradio Interface
60
+ # ======================
61
+ demo = gr.ChatInterface(
62
+ fn=respond,
63
+ chatbot=gr.Chatbot(height=600, type="messages"),
64
+ textbox=gr.Textbox(placeholder="Ask me anything...", container=False, scale=7),
65
+ title="🧠 Phi-3 Mini (128K) Chat - Simple Pipeline Version",
66
+ description="A lightweight demo using `transformers.pipeline` for clean, readable code.",
67
+ examples=[
68
+ "Who are you?",
69
+ "Explain quantum computing in simple terms",
70
+ "Write a Python function to reverse a string"
71
+ ],
72
+ )
73
+
74
+ # ======================
75
+ # Launch
76
+ # ======================
77
+ if __name__ == "__main__":
78
+ demo.launch()