arudradey commited on
Commit
191ba31
·
verified ·
1 Parent(s): b9ed3f3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +102 -68
app.py CHANGED
@@ -2,119 +2,153 @@ import gradio as gr
2
  import torch
3
  import time
4
  import psutil
 
5
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
6
  from threading import Thread
7
 
8
- # Configuration
 
 
 
 
 
 
 
 
 
 
 
 
9
  MODEL_ID = "microsoft/Phi-4-mini-instruct"
10
 
11
- print(f"Loading {MODEL_ID} to CPU...")
 
 
12
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
 
 
13
  model = AutoModelForCausalLM.from_pretrained(
14
  MODEL_ID,
15
- dtype="auto", # Recommended by Phi-4 README
16
  device_map="cpu",
 
17
  trust_remote_code=True
18
  )
19
 
20
- def get_system_stats():
21
  vm = psutil.virtual_memory()
22
- available_gb = vm.available / (1024**3)
23
- return f"Available RAM: {available_gb:.2f} GB"
24
 
25
- def chat(history, system_prompt, temp, top_p, max_tokens, rep_penalty):
26
- # Phi-4 requires a very specific list format
27
  messages = []
28
-
29
- # 1. Add System Prompt
30
  if system_prompt:
31
  messages.append({"role": "system", "content": str(system_prompt)})
32
 
33
- # 2. Add History (ensuring all content is strictly string type)
34
  for msg in history:
35
- messages.append({
36
- "role": msg["role"],
37
- "content": str(msg["content"])
38
- })
39
-
40
- # Phi-4 templates in transformers 4.49.0+ are strict about 'return_full_text'
41
- # and the jinja rendering. We use the tokenizer's built-in template logic:
42
- model_inputs = tokenizer.apply_chat_template(
43
- messages,
44
- tokenize=True,
45
- add_generation_prompt=True,
46
- return_tensors="pt",
47
- return_dict=True
48
  ).to("cpu")
49
-
50
  streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
51
 
52
- generation_kwargs = dict(
53
- **model_inputs,
54
  streamer=streamer,
55
- max_new_tokens=int(max_tokens),
56
  do_sample=True if temp > 0 else False,
57
- temperature=float(temp) if temp > 0 else 1.0, # Avoid 0.0 temp error in some torch versions
58
  top_p=float(top_p),
59
- repetition_penalty=float(rep_penalty),
60
  )
61
 
62
- thread = Thread(target=model.generate, kwargs=generation_kwargs)
63
  thread.start()
64
 
65
- generated_text = ""
66
  start_time = time.time()
67
- tokens_count = 0
68
 
69
  for new_text in streamer:
70
- generated_text += new_text
71
- tokens_count += 1
72
- elapsed_time = time.time() - start_time
73
- tps = tokens_count / elapsed_time if elapsed_time > 0 else 0
74
- stats = f"**Stats:** {tps:.2f} tokens/sec | {get_system_stats()}"
75
- yield generated_text, stats
76
-
77
- with gr.Blocks() as demo:
78
- with gr.Sidebar(label="ML Settings", open=False):
79
- gr.Markdown("### 🛠Persona & Engine")
80
- system_input = gr.Textbox(
81
- value="You are an individual named Arudra. You follow instructions strictly.",
82
- label="System Prompt",
 
83
  lines=4
84
  )
85
- temp_slider = gr.Slider(0.0, 2.0, 0.7, step=0.1, label="Temperature")
86
- top_p_slider = gr.Slider(0.0, 1.0, 0.9, step=0.05, label="Top-P")
87
- rep_penalty_slider = gr.Slider(1.0, 2.0, 1.1, step=0.1, label="Repetition Penalty")
88
- max_tokens_slider = gr.Slider(64, 2048, 512, step=64, label="Max Tokens")
 
 
 
89
  gr.Markdown("---")
90
- stats_output = gr.Markdown("Stats: System Ready")
91
 
92
- gr.Markdown("# Phi-4 Mini Engineering Console")
93
- chatbot = gr.Chatbot(label="Phi-4 Mini")
 
 
94
 
95
  with gr.Row():
96
- msg = gr.Textbox(placeholder="Enter message...", scale=4, label="Input")
97
- clear = gr.Button("Clear", scale=1)
 
 
 
 
 
98
 
99
- def user_action(user_message, history):
100
- if history is None: history = []
101
- history.append({"role": "user", "content": user_message})
102
  return "", history
103
 
104
- def bot_action(history, sys_prompt, temp, top_p, max_t, rep_p):
 
105
  history.append({"role": "assistant", "content": ""})
106
- # History minus the empty slot we just added
107
- for partial_text, stats in chat(history[:-1], sys_prompt, temp, top_p, max_t, rep_p):
108
- history[-1]["content"] = partial_text
 
109
  yield history, stats
110
 
111
- msg.submit(user_action, [msg, chatbot], [msg, chatbot], queue=False).then(
112
- bot_action,
113
- [chatbot, system_input, temp_slider, top_p_slider, max_tokens_slider, rep_penalty_slider],
114
- [chatbot, stats_output]
 
 
 
115
  )
116
- clear.click(lambda: [], None, chatbot, queue=False)
 
 
 
 
 
 
 
 
 
117
 
118
  if __name__ == "__main__":
119
- # Theme is passed here for Gradio 6 compatibility
120
  demo.launch(theme=gr.themes.Soft())
 
2
  import torch
3
  import time
4
  import psutil
5
+ import transformers
6
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
7
  from threading import Thread
8
 
9
+ # --- MONKEY PATCH FOR PHI-4 COMPATIBILITY ---
10
+ # This resolves: ImportError: cannot import name 'LossKwargs' from 'transformers.utils'
11
+ if not hasattr(transformers.utils, "LossKwargs"):
12
+ try:
13
+ # Try to find where it moved
14
+ from transformers.loss.loss_utils import LossKwargs
15
+ transformers.utils.LossKwargs = LossKwargs
16
+ except ImportError:
17
+ # If all else fails, create a dummy class so the import doesn't crash
18
+ class LossKwargs: pass
19
+ transformers.utils.LossKwargs = LossKwargs
20
+ # --------------------------------------------
21
+
22
  MODEL_ID = "microsoft/Phi-4-mini-instruct"
23
 
24
+ print(f"Starting engine with {MODEL_ID}...")
25
+
26
+ # Load Tokenizer
27
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
28
+
29
+ # Load Model (Optimized for CPU)
30
  model = AutoModelForCausalLM.from_pretrained(
31
  MODEL_ID,
 
32
  device_map="cpu",
33
+ torch_dtype="auto",
34
  trust_remote_code=True
35
  )
36
 
37
+ def get_ram_info():
38
  vm = psutil.virtual_memory()
39
+ return f"{vm.available / (1024**3):.2f} GB"
 
40
 
41
+ def chat_engine(history, system_prompt, temp, top_p, max_t, rep_p):
42
+ # Prepare messages in the exact format Phi-4 expects
43
  messages = []
 
 
44
  if system_prompt:
45
  messages.append({"role": "system", "content": str(system_prompt)})
46
 
 
47
  for msg in history:
48
+ # History is a list of gr.ChatMessage or dicts
49
+ role = msg["role"] if isinstance(msg, dict) else msg.role
50
+ content = msg["content"] if isinstance(msg, dict) else msg.content
51
+ messages.append({"role": role, "content": str(content)})
52
+
53
+ # Apply Phi-4 Chat Template
54
+ input_ids = tokenizer.apply_chat_template(
55
+ messages,
56
+ add_generation_prompt=True,
57
+ return_tensors="pt"
 
 
 
58
  ).to("cpu")
59
+
60
  streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
61
 
62
+ gen_kwargs = dict(
63
+ input_ids=input_ids,
64
  streamer=streamer,
65
+ max_new_tokens=int(max_t),
66
  do_sample=True if temp > 0 else False,
67
+ temperature=float(temp) if temp > 0 else 1.0,
68
  top_p=float(top_p),
69
+ repetition_penalty=float(rep_p),
70
  )
71
 
72
+ thread = Thread(target=model.generate, kwargs=gen_kwargs)
73
  thread.start()
74
 
75
+ response = ""
76
  start_time = time.time()
77
+ tokens = 0
78
 
79
  for new_text in streamer:
80
+ response += new_text
81
+ tokens += 1
82
+ tps = tokens / (time.time() - start_time)
83
+ stats = f"**Stats:** {tps:.2f} t/s | Available RAM: {get_ram_info()}"
84
+ yield response, stats
85
+
86
+ # --- GRADIO UI ---
87
+ with gr.Blocks(title="Phi-4 Mini Pro") as demo:
88
+
89
+ with gr.Sidebar(label="🎛ML Engineer Console", open=False):
90
+ gr.Markdown("### Persona Configuration")
91
+ sys_msg = gr.Textbox(
92
+ value="You are Arudra, a highly intelligent and unique individual AI.",
93
+ label="System Prompt",
94
  lines=4
95
  )
96
+
97
+ gr.Markdown("### Generation Parameters")
98
+ temp = gr.Slider(0.0, 2.0, 0.7, step=0.1, label="Temperature")
99
+ top_p = gr.Slider(0.0, 1.0, 0.9, step=0.05, label="Top-P")
100
+ rep_p = gr.Slider(1.0, 2.0, 1.15, step=0.05, label="Repetition Penalty")
101
+ max_t = gr.Slider(64, 2048, 512, step=64, label="Max New Tokens")
102
+
103
  gr.Markdown("---")
104
+ status_box = gr.Markdown("Status: Engine Ready")
105
 
106
+ gr.Markdown(f"# Phi-4 Mini (3.8B) - CPU Edition")
107
+
108
+ # In Gradio 6, type="messages" is the modern standard
109
+ chatbot = gr.Chatbot(label="Conversation", type="messages", height=550)
110
 
111
  with gr.Row():
112
+ user_input = gr.Textbox(
113
+ placeholder="Type a message to Arudra...",
114
+ show_label=False,
115
+ scale=4
116
+ )
117
+ submit_btn = gr.Button("Send", variant="primary", scale=1)
118
+ clear_btn = gr.Button("🗑️", scale=0)
119
 
120
+ def handle_user(message, history):
121
+ # Adds user message to the chatbot
122
+ history.append({"role": "user", "content": message})
123
  return "", history
124
 
125
+ def handle_bot(history, system, t, p, mt, rp):
126
+ # Create a placeholder for the assistant response
127
  history.append({"role": "assistant", "content": ""})
128
+
129
+ # Generator loop
130
+ for text, stats in chat_engine(history[:-1], system, t, p, mt, rp):
131
+ history[-1]["content"] = text
132
  yield history, stats
133
 
134
+ # Event Wiring
135
+ submit_event = user_input.submit(
136
+ handle_user, [user_input, chatbot], [user_input, chatbot]
137
+ ).then(
138
+ handle_bot,
139
+ [chatbot, sys_msg, temp, top_p, max_t, rep_p],
140
+ [chatbot, status_box]
141
  )
142
+
143
+ submit_btn.click(
144
+ handle_user, [user_input, chatbot], [user_input, chatbot]
145
+ ).then(
146
+ handle_bot,
147
+ [chatbot, sys_msg, temp, top_p, max_t, rep_p],
148
+ [chatbot, status_box]
149
+ )
150
+
151
+ clear_btn.click(lambda: [], None, chatbot)
152
 
153
  if __name__ == "__main__":
 
154
  demo.launch(theme=gr.themes.Soft())