Spaces:

FrederickSundeep
/

ChatMate

Sleeping

App Files Files Community

FrederickSundeep commited on Jun 26, 2025

Commit

1549b17

1 Parent(s): 6addef4

update commit with phi-3 mini 1113

Browse files

Files changed (1) hide show

app.py +20 -8

app.py CHANGED Viewed

@@ -1,16 +1,26 @@
 import gradio as gr
 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
-# Load Phi-3 Mini model
 model_id = "microsoft/phi-3-mini-4k-instruct"
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 model = AutoModelForCausalLM.from_pretrained(
-    model_id, torch_dtype="auto", device_map="auto"
 )
 pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
-# OpenAI-style messages (new format)
 def chat_fn(message, history):
     history_text = ""
     for item in history:
@@ -23,16 +33,17 @@ def chat_fn(message, history):
     result = pipe(prompt, max_new_tokens=512, do_sample=True, temperature=0.7)[0]['generated_text']
     reply = result.split("<|assistant|>")[-1].strip()
-    # Format code blocks
-    if "```" not in reply and any(word in reply for word in ["def ", "class ", "import "]):
         reply = f"```\n{reply}\n```"
     return reply
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.Markdown("## 💬 Chat with Phi-3 Mini")
     gr.Markdown(
-        "Welcome to your AI Assistant powered by Phi-3 Mini. Ask me anything or request code examples!"
     )
     gr.ChatInterface(
@@ -43,7 +54,8 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
             "Write a JavaScript function to reverse a string.",
             "Explain how transformers work.",
         ],
-        chatbot=gr.Chatbot(type="messages")  # fixes the deprecated tuples warning
     )
-demo.launch()

 import gradio as gr
+import torch
 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
+# ✅ Force ZeroGPU to allocate GPU early
+try:
+    _ = torch.tensor([1.0], device="cuda")
+    print("✅ ZeroGPU triggered successfully.")
+except Exception as e:
+    print(f"⚠️ GPU allocation failed: {e}")
+# 🚀 Load Phi-3 Mini model
 model_id = "microsoft/phi-3-mini-4k-instruct"
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 model = AutoModelForCausalLM.from_pretrained(
+    model_id,
+    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
+    device_map="auto"
 )
 pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
+# 💬 Chat function
 def chat_fn(message, history):
     history_text = ""
     for item in history:
     result = pipe(prompt, max_new_tokens=512, do_sample=True, temperature=0.7)[0]['generated_text']
     reply = result.split("<|assistant|>")[-1].strip()
+    # Format code blocks if applicable
+    if "```" not in reply and any(w in reply for w in ["def ", "class ", "import "]):
         reply = f"```\n{reply}\n```"
     return reply
+# 🧩 Gradio UI
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.Markdown("## 💬 Chat with Phi-3 Mini")
     gr.Markdown(
+        "Welcome to your AI Assistant powered by Phi-3 Mini and ZeroGPU (uses GPU if available)."
     )
     gr.ChatInterface(
             "Write a JavaScript function to reverse a string.",
             "Explain how transformers work.",
         ],
+        chatbot=gr.Chatbot(type="messages")
     )
+# 🚀 Launch
+demo.launch(ssr_mode=False)