Spaces:

FrederickSundeep
/

ChatMate

Sleeping

App Files Files Community

FrederickSundeep commited on Jun 26, 2025

Commit

16ebb52

1 Parent(s): 7b0bd94

update commit with phi-3 mini 113

Browse files

Files changed (1) hide show

app.py +21 -22

app.py CHANGED Viewed

@@ -2,27 +2,25 @@ import gradio as gr
 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 import torch
-# ✅ Set model
 model_id = "microsoft/phi-2"
-# ✅ Load tokenizer
-tokenizer = AutoTokenizer.from_pretrained(model_id)
-# ✅ Load model — this triggers GPU allocation in ZeroGPU
 model = AutoModelForCausalLM.from_pretrained(
     model_id,
-    torch_dtype=torch.float16,
-    device_map="auto"
 )
-# ✅ Create pipeline — device=0 will use CUDA if available
 pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
-# ✅ Detect actual device
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-print(f"🚀 Using device: {device}")
-# 💬 Chat logic
 def chat_fn(message, history):
     history_text = ""
     for item in history:
@@ -32,28 +30,29 @@ def chat_fn(message, history):
             history_text += f"<|assistant|>\n{item['content']}\n"
     prompt = f"{history_text}<|user|>\n{message}\n<|assistant|>\n"
-    output = pipe(prompt, max_new_tokens=512, do_sample=True, temperature=0.7)[0]["generated_text"]
-    reply = output.split("<|assistant|>")[-1].strip()
-    if "```" not in reply and any(x in reply for x in ["def ", "class ", "import "]):
         reply = f"```\n{reply}\n```"
     return reply
-# 🎨 Gradio UI
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.Markdown("## 🤖 Chat with Phi-2")
-    gr.Markdown("ZeroGPU Space powered by Phi-2")
     gr.ChatInterface(
         fn=chat_fn,
         chatbot=gr.Chatbot(type="messages"),
         examples=[
-            "What is a transformer model?",
-            "Write a C++ program to reverse a string.",
-            "Explain binary search."
         ]
     )
-# 🚀 Run in HF Space with SSR off
-demo.launch(debug=True, ssr_mode=False)

 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 import torch
+# Model ID
 model_id = "microsoft/phi-2"
+# Log device availability
+cuda_available = torch.cuda.is_available()
+print("🧠 CUDA Available:", cuda_available)
+# Load tokenizer and model with auto device map (ZeroGPU-compatible)
+tokenizer = AutoTokenizer.from_pretrained(model_id)
 model = AutoModelForCausalLM.from_pretrained(
     model_id,
+    device_map="auto",  # Automatically use GPU if available
+    torch_dtype=torch.float16 if cuda_available else torch.float32
 )
+# Initialize pipeline WITHOUT `device=` (to avoid conflict with Accelerate)
 pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
+# Chat function
 def chat_fn(message, history):
     history_text = ""
     for item in history:
             history_text += f"<|assistant|>\n{item['content']}\n"
     prompt = f"{history_text}<|user|>\n{message}\n<|assistant|>\n"
+    result = pipe(prompt, max_new_tokens=512, do_sample=True, temperature=0.7)[0]["generated_text"]
+    reply = result.split("<|assistant|>")[-1].strip()
+    # Wrap code in markdown if needed
+    if "```" not in reply and any(word in reply for word in ["def ", "class ", "import "]):
         reply = f"```\n{reply}\n```"
     return reply
+# Gradio UI
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.Markdown("## 🤖 Chat with Phi-2")
+    gr.Markdown("ZeroGPU-compatible AI Assistant (GPU if available, fallback to CPU)")
     gr.ChatInterface(
         fn=chat_fn,
         chatbot=gr.Chatbot(type="messages"),
         examples=[
+            "What is Python?",
+            "Write a Java function to sort a list.",
+            "Explain how neural networks work."
         ]
     )
+# Launch (ssr_mode=False avoids rendering issues in HF Spaces)
+demo.launch(ssr_mode=False)