Spaces:

FrederickSundeep
/

ChatMate

Sleeping

App Files Files Community

FrederickSundeep commited on Jun 26, 2025

Commit

b1d3d86

1 Parent(s): 314bc06

update commit with phi-3 mini 14

Browse files

Files changed (2) hide show

app.py +18 -31
requirements.txt +0 -1

app.py CHANGED Viewed

@@ -1,35 +1,22 @@
 import gradio as gr
 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 import torch
-from pynvml import nvmlInit, nvmlDeviceGetHandleByIndex, nvmlDeviceGetMemoryInfo, nvmlDeviceGetUtilizationRates
-# ✅ Manually trigger GPU to keep ZeroGPU alive
-def force_gpu():
     if torch.cuda.is_available():
-        print("✅ GPU is available, allocating tensor...")
-        _ = torch.randn(1).to("cuda")
     else:
-        print("⚠️ GPU not available, using CPU.")
-force_gpu()
-# ✅ GPU usage logging
-def log_gpu_usage():
-    try:
-        nvmlInit()
-        handle = nvmlDeviceGetHandleByIndex(0)
-        mem = nvmlDeviceGetMemoryInfo(handle)
-        util = nvmlDeviceGetUtilizationRates(handle)
-        print(f"[GPU] Memory Used: {mem.used / 1024**2:.1f} MB / {mem.total / 1024**2:.1f} MB")
-        print(f"[GPU] Utilization: {util.gpu}%")
-    except Exception as e:
-        print(f"[GPU Monitor] Error: {e}")
-# ✅ Lightweight model for speed
-model_id = "microsoft/phi-2"
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 print(f"🚀 Using device: {device}")
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 model = AutoModelForCausalLM.from_pretrained(
     model_id,
@@ -38,7 +25,7 @@ model = AutoModelForCausalLM.from_pretrained(
 pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, device=0 if device.type == "cuda" else -1)
-# 💬 Chat function
 def chat_fn(message, history):
     history_text = ""
     for item in history:
@@ -48,28 +35,28 @@ def chat_fn(message, history):
             history_text += f"<|assistant|>\n{item['content']}\n"
     prompt = f"{history_text}<|user|>\n{message}\n<|assistant|>\n"
-    result = pipe(prompt, max_new_tokens=512, do_sample=True, temperature=0.7)[0]["generated_text"]
-    reply = result.split("<|assistant|>")[-1].strip()
     if "```" not in reply and any(w in reply for w in ["def ", "class ", "import "]):
         reply = f"```\n{reply}\n```"
-    log_gpu_usage()
     return reply
-# Gradio interface
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.Markdown("## 🤖 Chat with Phi-2")
-    gr.Markdown("Fast, privacy-friendly AI assistant powered by Phi-2 (2.7B).")
     gr.ChatInterface(
         fn=chat_fn,
         chatbot=gr.Chatbot(type="messages"),
         examples=[
-            "What is a function in Python?",
-            "Write a for loop in JavaScript.",
-            "Explain how AI models are trained."
         ]
     )
 demo.launch(debug=True, ssr_mode=False)

 import gradio as gr
 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 import torch
+# ✅ Force GPU allocation EARLY (ZeroGPU needs this before model load)
+try:
     if torch.cuda.is_available():
+        print("✅ CUDA is already available")
     else:
+        torch.randn(1).cuda()
+        print("✅ Triggered CUDA tensor to force GPU allocation")
+except Exception as e:
+    print(f"⚠️ GPU not available or failed to allocate: {e}")
+# ✅ Load model after GPU trigger
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 print(f"🚀 Using device: {device}")
+model_id = "microsoft/phi-2"  # Choose phi-2 for performance
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 model = AutoModelForCausalLM.from_pretrained(
     model_id,
 pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, device=0 if device.type == "cuda" else -1)
+# 💬 Chat logic
 def chat_fn(message, history):
     history_text = ""
     for item in history:
             history_text += f"<|assistant|>\n{item['content']}\n"
     prompt = f"{history_text}<|user|>\n{message}\n<|assistant|>\n"
+    response = pipe(prompt, max_new_tokens=512, do_sample=True, temperature=0.7)[0]["generated_text"]
+    reply = response.split("<|assistant|>")[-1].strip()
     if "```" not in reply and any(w in reply for w in ["def ", "class ", "import "]):
         reply = f"```\n{reply}\n```"
     return reply
+# 🖥️ Gradio UI
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.Markdown("## 🤖 Chat with Phi-2")
+    gr.Markdown("Fast AI assistant powered by Microsoft’s Phi-2, optimized for ZeroGPU on Hugging Face Spaces.")
     gr.ChatInterface(
         fn=chat_fn,
         chatbot=gr.Chatbot(type="messages"),
         examples=[
+            "What is a Python generator?",
+            "Write a for loop in C++",
+            "Explain LLM training"
         ]
     )
+# Launch without SSR for ZeroGPU
 demo.launch(debug=True, ssr_mode=False)

requirements.txt CHANGED Viewed

@@ -2,4 +2,3 @@ gradio
 transformers
 torch
 accelerate
-pynvml

 transformers
 torch
 accelerate