Spaces:

FrederickSundeep
/

ChatMate

Sleeping

App Files Files Community

FrederickSundeep commited on Jun 26, 2025

Commit

d73c299

1 Parent(s): 4c61144

update commit with phi-3 mini 11

Browse files

Files changed (2) hide show

app.py +41 -20
requirements.txt +3 -1

app.py CHANGED Viewed

@@ -1,21 +1,43 @@
 import gradio as gr
 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 import torch
-# Force device based on availability
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-print(f"Using device: {device}")
-# Load Phi-3 Mini model
-model_id = "microsoft/phi-3-mini-4k-instruct"
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 model = AutoModelForCausalLM.from_pretrained(
-    model_id, torch_dtype=torch.float16 if device.type == "cuda" else torch.float32
 ).to(device)
 pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, device=0 if device.type == "cuda" else -1)
-# OpenAI-style messages (new format)
 def chat_fn(message, history):
     history_text = ""
     for item in history:
@@ -25,30 +47,29 @@ def chat_fn(message, history):
             history_text += f"<|assistant|>\n{item['content']}\n"
     prompt = f"{history_text}<|user|>\n{message}\n<|assistant|>\n"
-    result = pipe(prompt, max_new_tokens=512, do_sample=True, temperature=0.7)[0]['generated_text']
-    reply = result.split("<|assistant|>")[-1].strip()
-    # Format code blocks
-    if "```" not in reply and any(word in reply for word in ["def ", "class ", "import "]):
         reply = f"```\n{reply}\n```"
     return reply
-# Gradio UI
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
-    gr.Markdown("## 💬 Chat with Phi-3 Mini")
-    gr.Markdown("Lightweight AI Assistant powered by Microsoft's Phi-3 Mini. Works best with short prompts. Ask away!")
     gr.ChatInterface(
         fn=chat_fn,
-        title="",
         examples=[
-            "What is Python?",
-            "Write a JavaScript function to reverse a string.",
-            "Explain how transformers work.",
-        ],
-        chatbot=gr.Chatbot(type="messages")
     )
-# Launch without SSR and share (for Spaces)
 demo.launch(debug=True, ssr_mode=False)

 import gradio as gr
 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 import torch
+from pynvml import nvmlInit, nvmlDeviceGetHandleByIndex, nvmlDeviceGetMemoryInfo, nvmlDeviceGetUtilizationRates
+from huggingface_hub import spaces
+# 🔐 Required for ZeroGPU to allocate GPU
+@spaces.GPU
+def trigger_gpu():
+    print("✅ GPU requested")
+    return torch.cuda.is_available()
+trigger_gpu()
+# ✅ GPU Monitoring
+def log_gpu_usage():
+    try:
+        nvmlInit()
+        handle = nvmlDeviceGetHandleByIndex(0)
+        mem = nvmlDeviceGetMemoryInfo(handle)
+        util = nvmlDeviceGetUtilizationRates(handle)
+        print(f"[GPU] Memory Used: {mem.used / 1024 ** 2:.1f} MB / {mem.total / 1024 ** 2:.1f} MB")
+        print(f"[GPU] Utilization: {util.gpu}%")
+    except Exception as e:
+        print(f"[GPU Monitor] Error: {e}")
+# 📦 Model Choice (Phi-2 for fast inference)
+model_id = "microsoft/phi-2"
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+print(f"🔧 Using device: {device}")
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 model = AutoModelForCausalLM.from_pretrained(
+    model_id,
+    torch_dtype=torch.float16 if device.type == "cuda" else torch.float32
 ).to(device)
 pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, device=0 if device.type == "cuda" else -1)
+# 💬 Chat logic with openai-style messages
 def chat_fn(message, history):
     history_text = ""
     for item in history:
             history_text += f"<|assistant|>\n{item['content']}\n"
     prompt = f"{history_text}<|user|>\n{message}\n<|assistant|>\n"
+    response = pipe(prompt, max_new_tokens=512, do_sample=True, temperature=0.7)[0]['generated_text']
+    reply = response.split("<|assistant|>")[-1].strip()
+    if "```" not in reply and any(w in reply for w in ["def ", "class ", "import "]):
         reply = f"```\n{reply}\n```"
+    log_gpu_usage()  # 🔍 log usage per response
     return reply
+# 🖥️ Gradio app
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown("## 🤖 Chat with Phi-2 (Fast Lightweight Model)")
+    gr.Markdown("Ask questions or generate code. Powered by Microsoft's Phi-2 (2.7B).")
     gr.ChatInterface(
         fn=chat_fn,
+        chatbot=gr.Chatbot(type="messages"),
         examples=[
+            "What is a function in Python?",
+            "Write a for loop in JavaScript.",
+            "Explain how AI models are trained."
+        ]
     )
+# ✅ Launch safely without SSR for Hugging Face Spaces
 demo.launch(debug=True, ssr_mode=False)

requirements.txt CHANGED Viewed

@@ -1,4 +1,6 @@
 transformers
-gradio>=4.16.0
 torch
 accelerate

 transformers
 torch
 accelerate
+gradio
+pynvml
+huggingface_hub>=0.20.0