AlexKitipov commited on
Commit
15664cf
·
verified ·
1 Parent(s): ec0c58a

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +71 -0
app.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from transformers import AutoTokenizer, AutoModelForCausalLM
4
+
5
+ MODEL_ID = "AlexKitipov/Phi-3-mini-128k-instruct"
6
+
7
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
8
+ model = AutoModelForCausalLM.from_pretrained(
9
+ MODEL_ID,
10
+ torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
11
+ device_map="auto"
12
+ )
13
+
14
+ SYSTEM_PROMPT = "You are a helpful AI assistant."
15
+
16
+ def build_prompt(history, user_message):
17
+ messages = [{"role": "system", "content": SYSTEM_PROMPT}]
18
+ for user, assistant in history:
19
+ if user:
20
+ messages.append({"role": "user", "content": user})
21
+ if assistant:
22
+ messages.append({"role": "assistant", "content": assistant})
23
+ messages.append({"role": "user", "content": user_message})
24
+
25
+ if hasattr(tokenizer, "apply_chat_template") and tokenizer.chat_template:
26
+ return tokenizer.apply_chat_template(
27
+ messages,
28
+ tokenize=False,
29
+ add_generation_prompt=True
30
+ )
31
+
32
+ # fallback formatting
33
+ prompt = SYSTEM_PROMPT + "\n"
34
+ for m in messages:
35
+ role = m["role"].upper()
36
+ prompt += f"{role}: {m['content']}\n"
37
+ prompt += "ASSISTANT:"
38
+ return prompt
39
+
40
+
41
+ def chat_fn(message, history):
42
+ prompt = build_prompt(history, message)
43
+
44
+ inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
45
+
46
+ with torch.no_grad():
47
+ output = model.generate(
48
+ **inputs,
49
+ max_new_tokens=512,
50
+ temperature=0.7,
51
+ top_p=0.9,
52
+ do_sample=True,
53
+ pad_token_id=tokenizer.eos_token_id
54
+ )
55
+
56
+ generated = tokenizer.decode(
57
+ output[0][inputs["input_ids"].shape[-1]:],
58
+ skip_special_tokens=True
59
+ )
60
+
61
+ return generated
62
+
63
+
64
+ demo = gr.ChatInterface(
65
+ fn=chat_fn,
66
+ title="Phi-3-mini-128k Chat",
67
+ description="Chat with the Phi-3-mini-128k-instruct model."
68
+ )
69
+
70
+ if __name__ == "__main__":
71
+ demo.launch()