arudradey commited on
Commit
b9ed3f3
·
verified ·
1 Parent(s): d13590e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -43
app.py CHANGED
@@ -12,8 +12,9 @@ print(f"Loading {MODEL_ID} to CPU...")
12
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
13
  model = AutoModelForCausalLM.from_pretrained(
14
  MODEL_ID,
15
- dtype=torch.float32,
16
- device_map="cpu"
 
17
  )
18
 
19
  def get_system_stats():
@@ -22,11 +23,22 @@ def get_system_stats():
22
  return f"Available RAM: {available_gb:.2f} GB"
23
 
24
  def chat(history, system_prompt, temp, top_p, max_tokens, rep_penalty):
25
- # Construct messages starting with the system prompt
26
- messages = [{"role": "system", "content": system_prompt}]
 
 
 
 
 
 
27
  for msg in history:
28
- messages.append(msg)
 
 
 
29
 
 
 
30
  model_inputs = tokenizer.apply_chat_template(
31
  messages,
32
  tokenize=True,
@@ -40,11 +52,11 @@ def chat(history, system_prompt, temp, top_p, max_tokens, rep_penalty):
40
  generation_kwargs = dict(
41
  **model_inputs,
42
  streamer=streamer,
43
- max_new_tokens=max_tokens,
44
  do_sample=True if temp > 0 else False,
45
- temperature=temp,
46
- top_p=top_p,
47
- repetition_penalty=rep_penalty,
48
  )
49
 
50
  thread = Thread(target=model.generate, kwargs=generation_kwargs)
@@ -62,40 +74,27 @@ def chat(history, system_prompt, temp, top_p, max_tokens, rep_penalty):
62
  stats = f"**Stats:** {tps:.2f} tokens/sec | {get_system_stats()}"
63
  yield generated_text, stats
64
 
65
- with gr.Blocks(theme=gr.themes.Soft()) as demo:
66
- # --- SIDEBAR MENU ---
67
- with gr.Sidebar(label="Engine Settings", open=False) as sidebar:
68
- gr.Markdown("### 🛠️ ML Engineer Controls")
69
-
70
  system_input = gr.Textbox(
71
- value="You are a helpful and concise AI assistant.",
72
  label="System Prompt",
73
- lines=3
74
  )
75
-
76
- with gr.Accordion("Sampling Parameters", open=True):
77
- temp_slider = gr.Slider(minimum=0.0, maximum=2.0, value=0.7, step=0.1, label="Temperature")
78
- top_p_slider = gr.Slider(minimum=0.0, maximum=1.0, value=0.9, step=0.05, label="Top-P")
79
- rep_penalty_slider = gr.Slider(minimum=1.0, maximum=2.0, value=1.1, step=0.1, label="Repetition Penalty")
80
-
81
- with gr.Accordion("Response Limits", open=False):
82
- max_tokens_slider = gr.Slider(minimum=64, maximum=2048, value=512, step=64, label="Max New Tokens")
83
-
84
  gr.Markdown("---")
85
  stats_output = gr.Markdown("Stats: System Ready")
86
 
87
- # --- MAIN CHAT INTERFACE ---
88
- gr.Markdown(f"# Qwen 3.5 Pro Interface")
89
-
90
- chatbot = gr.Chatbot(label="Qwen 0.8B (CPU)")
91
 
92
  with gr.Row():
93
- msg = gr.Textbox(
94
- placeholder="Type your message and press Enter...",
95
- label="Input",
96
- scale=4
97
- )
98
- clear = gr.Button("🗑️", scale=1)
99
 
100
  def user_action(user_message, history):
101
  if history is None: history = []
@@ -104,22 +103,18 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
104
 
105
  def bot_action(history, sys_prompt, temp, top_p, max_t, rep_p):
106
  history.append({"role": "assistant", "content": ""})
107
-
108
- # Pulling all settings into the chat function
109
  for partial_text, stats in chat(history[:-1], sys_prompt, temp, top_p, max_t, rep_p):
110
  history[-1]["content"] = partial_text
111
  yield history, stats
112
 
113
- # Event Handlers
114
- msg.submit(
115
- user_action, [msg, chatbot], [msg, chatbot], queue=False
116
- ).then(
117
  bot_action,
118
  [chatbot, system_input, temp_slider, top_p_slider, max_tokens_slider, rep_penalty_slider],
119
  [chatbot, stats_output]
120
  )
121
-
122
  clear.click(lambda: [], None, chatbot, queue=False)
123
 
124
  if __name__ == "__main__":
125
- demo.launch()
 
 
12
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
13
  model = AutoModelForCausalLM.from_pretrained(
14
  MODEL_ID,
15
+ dtype="auto", # Recommended by Phi-4 README
16
+ device_map="cpu",
17
+ trust_remote_code=True
18
  )
19
 
20
  def get_system_stats():
 
23
  return f"Available RAM: {available_gb:.2f} GB"
24
 
25
  def chat(history, system_prompt, temp, top_p, max_tokens, rep_penalty):
26
+ # Phi-4 requires a very specific list format
27
+ messages = []
28
+
29
+ # 1. Add System Prompt
30
+ if system_prompt:
31
+ messages.append({"role": "system", "content": str(system_prompt)})
32
+
33
+ # 2. Add History (ensuring all content is strictly string type)
34
  for msg in history:
35
+ messages.append({
36
+ "role": msg["role"],
37
+ "content": str(msg["content"])
38
+ })
39
 
40
+ # Phi-4 templates in transformers 4.49.0+ are strict about 'return_full_text'
41
+ # and the jinja rendering. We use the tokenizer's built-in template logic:
42
  model_inputs = tokenizer.apply_chat_template(
43
  messages,
44
  tokenize=True,
 
52
  generation_kwargs = dict(
53
  **model_inputs,
54
  streamer=streamer,
55
+ max_new_tokens=int(max_tokens),
56
  do_sample=True if temp > 0 else False,
57
+ temperature=float(temp) if temp > 0 else 1.0, # Avoid 0.0 temp error in some torch versions
58
+ top_p=float(top_p),
59
+ repetition_penalty=float(rep_penalty),
60
  )
61
 
62
  thread = Thread(target=model.generate, kwargs=generation_kwargs)
 
74
  stats = f"**Stats:** {tps:.2f} tokens/sec | {get_system_stats()}"
75
  yield generated_text, stats
76
 
77
+ with gr.Blocks() as demo:
78
+ with gr.Sidebar(label="ML Settings", open=False):
79
+ gr.Markdown("### 🛠️ Persona & Engine")
 
 
80
  system_input = gr.Textbox(
81
+ value="You are an individual named Arudra. You follow instructions strictly.",
82
  label="System Prompt",
83
+ lines=4
84
  )
85
+ temp_slider = gr.Slider(0.0, 2.0, 0.7, step=0.1, label="Temperature")
86
+ top_p_slider = gr.Slider(0.0, 1.0, 0.9, step=0.05, label="Top-P")
87
+ rep_penalty_slider = gr.Slider(1.0, 2.0, 1.1, step=0.1, label="Repetition Penalty")
88
+ max_tokens_slider = gr.Slider(64, 2048, 512, step=64, label="Max Tokens")
 
 
 
 
 
89
  gr.Markdown("---")
90
  stats_output = gr.Markdown("Stats: System Ready")
91
 
92
+ gr.Markdown("# Phi-4 Mini Engineering Console")
93
+ chatbot = gr.Chatbot(label="Phi-4 Mini")
 
 
94
 
95
  with gr.Row():
96
+ msg = gr.Textbox(placeholder="Enter message...", scale=4, label="Input")
97
+ clear = gr.Button("Clear", scale=1)
 
 
 
 
98
 
99
  def user_action(user_message, history):
100
  if history is None: history = []
 
103
 
104
  def bot_action(history, sys_prompt, temp, top_p, max_t, rep_p):
105
  history.append({"role": "assistant", "content": ""})
106
+ # History minus the empty slot we just added
 
107
  for partial_text, stats in chat(history[:-1], sys_prompt, temp, top_p, max_t, rep_p):
108
  history[-1]["content"] = partial_text
109
  yield history, stats
110
 
111
+ msg.submit(user_action, [msg, chatbot], [msg, chatbot], queue=False).then(
 
 
 
112
  bot_action,
113
  [chatbot, system_input, temp_slider, top_p_slider, max_tokens_slider, rep_penalty_slider],
114
  [chatbot, stats_output]
115
  )
 
116
  clear.click(lambda: [], None, chatbot, queue=False)
117
 
118
  if __name__ == "__main__":
119
+ # Theme is passed here for Gradio 6 compatibility
120
+ demo.launch(theme=gr.themes.Soft())