Spaces:

arudradey
/

chatphi

Sleeping

App Files Files Community

arudradey commited on Apr 29

Commit

b9ed3f3

verified ·

1 Parent(s): d13590e

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -43

app.py CHANGED Viewed

@@ -12,8 +12,9 @@ print(f"Loading {MODEL_ID} to CPU...")
 tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
 model = AutoModelForCausalLM.from_pretrained(
     MODEL_ID,
-    dtype=torch.float32,
-    device_map="cpu"
 )
 def get_system_stats():
@@ -22,11 +23,22 @@ def get_system_stats():
     return f"Available RAM: {available_gb:.2f} GB"
 def chat(history, system_prompt, temp, top_p, max_tokens, rep_penalty):
-    # Construct messages starting with the system prompt
-    messages = [{"role": "system", "content": system_prompt}]
     for msg in history:
-        messages.append(msg)
     model_inputs = tokenizer.apply_chat_template(
         messages,
         tokenize=True,
@@ -40,11 +52,11 @@ def chat(history, system_prompt, temp, top_p, max_tokens, rep_penalty):
     generation_kwargs = dict(
         **model_inputs,
         streamer=streamer,
-        max_new_tokens=max_tokens,
         do_sample=True if temp > 0 else False,
-        temperature=temp,
-        top_p=top_p,
-        repetition_penalty=rep_penalty,
     )
     thread = Thread(target=model.generate, kwargs=generation_kwargs)
@@ -62,40 +74,27 @@ def chat(history, system_prompt, temp, top_p, max_tokens, rep_penalty):
         stats = f"**Stats:** {tps:.2f} tokens/sec | {get_system_stats()}"
         yield generated_text, stats
-with gr.Blocks(theme=gr.themes.Soft()) as demo:
-    # --- SIDEBAR MENU ---
-    with gr.Sidebar(label="Engine Settings", open=False) as sidebar:
-        gr.Markdown("### 🛠️ ML Engineer Controls")
         system_input = gr.Textbox(
-            value="You are a helpful and concise AI assistant.",
             label="System Prompt",
-            lines=3
         )
-        with gr.Accordion("Sampling Parameters", open=True):
-            temp_slider = gr.Slider(minimum=0.0, maximum=2.0, value=0.7, step=0.1, label="Temperature")
-            top_p_slider = gr.Slider(minimum=0.0, maximum=1.0, value=0.9, step=0.05, label="Top-P")
-            rep_penalty_slider = gr.Slider(minimum=1.0, maximum=2.0, value=1.1, step=0.1, label="Repetition Penalty")
-        with gr.Accordion("Response Limits", open=False):
-            max_tokens_slider = gr.Slider(minimum=64, maximum=2048, value=512, step=64, label="Max New Tokens")
         gr.Markdown("---")
         stats_output = gr.Markdown("Stats: System Ready")
-    # --- MAIN CHAT INTERFACE ---
-    gr.Markdown(f"# Qwen 3.5 Pro Interface")
-    chatbot = gr.Chatbot(label="Qwen 0.8B (CPU)")
     with gr.Row():
-        msg = gr.Textbox(
-            placeholder="Type your message and press Enter...",
-            label="Input",
-            scale=4
-        )
-        clear = gr.Button("🗑️", scale=1)
     def user_action(user_message, history):
         if history is None: history = []
@@ -104,22 +103,18 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
     def bot_action(history, sys_prompt, temp, top_p, max_t, rep_p):
         history.append({"role": "assistant", "content": ""})
-        # Pulling all settings into the chat function
         for partial_text, stats in chat(history[:-1], sys_prompt, temp, top_p, max_t, rep_p):
             history[-1]["content"] = partial_text
             yield history, stats
-    # Event Handlers
-    msg.submit(
-        user_action, [msg, chatbot], [msg, chatbot], queue=False
-    ).then(
         bot_action,
         [chatbot, system_input, temp_slider, top_p_slider, max_tokens_slider, rep_penalty_slider],
         [chatbot, stats_output]
     )
     clear.click(lambda: [], None, chatbot, queue=False)
 if __name__ == "__main__":
-    demo.launch()

 tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
 model = AutoModelForCausalLM.from_pretrained(
     MODEL_ID,
+    dtype="auto", # Recommended by Phi-4 README
+    device_map="cpu",
+    trust_remote_code=True
 )
 def get_system_stats():
     return f"Available RAM: {available_gb:.2f} GB"
 def chat(history, system_prompt, temp, top_p, max_tokens, rep_penalty):
+    # Phi-4 requires a very specific list format
+    messages = []
+    # 1. Add System Prompt
+    if system_prompt:
+        messages.append({"role": "system", "content": str(system_prompt)})
+    # 2. Add History (ensuring all content is strictly string type)
     for msg in history:
+        messages.append({
+            "role": msg["role"],
+            "content": str(msg["content"])
+        })
+    # Phi-4 templates in transformers 4.49.0+ are strict about 'return_full_text'
+    # and the jinja rendering. We use the tokenizer's built-in template logic:
     model_inputs = tokenizer.apply_chat_template(
         messages,
         tokenize=True,
     generation_kwargs = dict(
         **model_inputs,
         streamer=streamer,
+        max_new_tokens=int(max_tokens),
         do_sample=True if temp > 0 else False,
+        temperature=float(temp) if temp > 0 else 1.0, # Avoid 0.0 temp error in some torch versions
+        top_p=float(top_p),
+        repetition_penalty=float(rep_penalty),
     )
     thread = Thread(target=model.generate, kwargs=generation_kwargs)
         stats = f"**Stats:** {tps:.2f} tokens/sec | {get_system_stats()}"
         yield generated_text, stats
+with gr.Blocks() as demo:
+    with gr.Sidebar(label="ML Settings", open=False):
+        gr.Markdown("### 🛠️ Persona & Engine")
         system_input = gr.Textbox(
+            value="You are an individual named Arudra. You follow instructions strictly.",
             label="System Prompt",
+            lines=4
         )
+        temp_slider = gr.Slider(0.0, 2.0, 0.7, step=0.1, label="Temperature")
+        top_p_slider = gr.Slider(0.0, 1.0, 0.9, step=0.05, label="Top-P")
+        rep_penalty_slider = gr.Slider(1.0, 2.0, 1.1, step=0.1, label="Repetition Penalty")
+        max_tokens_slider = gr.Slider(64, 2048, 512, step=64, label="Max Tokens")
         gr.Markdown("---")
         stats_output = gr.Markdown("Stats: System Ready")
+    gr.Markdown("# Phi-4 Mini Engineering Console")
+    chatbot = gr.Chatbot(label="Phi-4 Mini")
     with gr.Row():
+        msg = gr.Textbox(placeholder="Enter message...", scale=4, label="Input")
+        clear = gr.Button("Clear", scale=1)
     def user_action(user_message, history):
         if history is None: history = []
     def bot_action(history, sys_prompt, temp, top_p, max_t, rep_p):
         history.append({"role": "assistant", "content": ""})
+        # History minus the empty slot we just added
         for partial_text, stats in chat(history[:-1], sys_prompt, temp, top_p, max_t, rep_p):
             history[-1]["content"] = partial_text
             yield history, stats
+    msg.submit(user_action, [msg, chatbot], [msg, chatbot], queue=False).then(
         bot_action,
         [chatbot, system_input, temp_slider, top_p_slider, max_tokens_slider, rep_penalty_slider],
         [chatbot, stats_output]
     )
     clear.click(lambda: [], None, chatbot, queue=False)
 if __name__ == "__main__":
+    # Theme is passed here for Gradio 6 compatibility
+    demo.launch(theme=gr.themes.Soft())