FrederickSundeep commited on
Commit
d73c299
·
1 Parent(s): 4c61144

update commit with phi-3 mini 11

Browse files
Files changed (2) hide show
  1. app.py +41 -20
  2. requirements.txt +3 -1
app.py CHANGED
@@ -1,21 +1,43 @@
1
  import gradio as gr
2
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
3
  import torch
 
 
4
 
5
- # Force device based on availability
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
7
- print(f"Using device: {device}")
8
 
9
- # Load Phi-3 Mini model
10
- model_id = "microsoft/phi-3-mini-4k-instruct"
11
  tokenizer = AutoTokenizer.from_pretrained(model_id)
12
  model = AutoModelForCausalLM.from_pretrained(
13
- model_id, torch_dtype=torch.float16 if device.type == "cuda" else torch.float32
 
14
  ).to(device)
15
 
16
  pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, device=0 if device.type == "cuda" else -1)
17
 
18
- # OpenAI-style messages (new format)
19
  def chat_fn(message, history):
20
  history_text = ""
21
  for item in history:
@@ -25,30 +47,29 @@ def chat_fn(message, history):
25
  history_text += f"<|assistant|>\n{item['content']}\n"
26
  prompt = f"{history_text}<|user|>\n{message}\n<|assistant|>\n"
27
 
28
- result = pipe(prompt, max_new_tokens=512, do_sample=True, temperature=0.7)[0]['generated_text']
29
- reply = result.split("<|assistant|>")[-1].strip()
30
 
31
- # Format code blocks
32
- if "```" not in reply and any(word in reply for word in ["def ", "class ", "import "]):
33
  reply = f"```\n{reply}\n```"
34
 
 
35
  return reply
36
 
37
- # Gradio UI
38
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
39
- gr.Markdown("## 💬 Chat with Phi-3 Mini")
40
- gr.Markdown("Lightweight AI Assistant powered by Microsoft's Phi-3 Mini. Works best with short prompts. Ask away!")
41
 
42
  gr.ChatInterface(
43
  fn=chat_fn,
44
- title="",
45
  examples=[
46
- "What is Python?",
47
- "Write a JavaScript function to reverse a string.",
48
- "Explain how transformers work.",
49
- ],
50
- chatbot=gr.Chatbot(type="messages")
51
  )
52
 
53
- # Launch without SSR and share (for Spaces)
54
  demo.launch(debug=True, ssr_mode=False)
 
1
  import gradio as gr
2
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
3
  import torch
4
+ from pynvml import nvmlInit, nvmlDeviceGetHandleByIndex, nvmlDeviceGetMemoryInfo, nvmlDeviceGetUtilizationRates
5
+ from huggingface_hub import spaces
6
 
7
+ # 🔐 Required for ZeroGPU to allocate GPU
8
+ @spaces.GPU
9
+ def trigger_gpu():
10
+ print("✅ GPU requested")
11
+ return torch.cuda.is_available()
12
+
13
+ trigger_gpu()
14
+
15
+ # ✅ GPU Monitoring
16
+ def log_gpu_usage():
17
+ try:
18
+ nvmlInit()
19
+ handle = nvmlDeviceGetHandleByIndex(0)
20
+ mem = nvmlDeviceGetMemoryInfo(handle)
21
+ util = nvmlDeviceGetUtilizationRates(handle)
22
+ print(f"[GPU] Memory Used: {mem.used / 1024 ** 2:.1f} MB / {mem.total / 1024 ** 2:.1f} MB")
23
+ print(f"[GPU] Utilization: {util.gpu}%")
24
+ except Exception as e:
25
+ print(f"[GPU Monitor] Error: {e}")
26
+
27
+ # 📦 Model Choice (Phi-2 for fast inference)
28
+ model_id = "microsoft/phi-2"
29
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
30
+ print(f"🔧 Using device: {device}")
31
 
 
 
32
  tokenizer = AutoTokenizer.from_pretrained(model_id)
33
  model = AutoModelForCausalLM.from_pretrained(
34
+ model_id,
35
+ torch_dtype=torch.float16 if device.type == "cuda" else torch.float32
36
  ).to(device)
37
 
38
  pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, device=0 if device.type == "cuda" else -1)
39
 
40
+ # 💬 Chat logic with openai-style messages
41
  def chat_fn(message, history):
42
  history_text = ""
43
  for item in history:
 
47
  history_text += f"<|assistant|>\n{item['content']}\n"
48
  prompt = f"{history_text}<|user|>\n{message}\n<|assistant|>\n"
49
 
50
+ response = pipe(prompt, max_new_tokens=512, do_sample=True, temperature=0.7)[0]['generated_text']
51
+ reply = response.split("<|assistant|>")[-1].strip()
52
 
53
+ if "```" not in reply and any(w in reply for w in ["def ", "class ", "import "]):
 
54
  reply = f"```\n{reply}\n```"
55
 
56
+ log_gpu_usage() # 🔍 log usage per response
57
  return reply
58
 
59
+ # 🖥️ Gradio app
60
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
61
+ gr.Markdown("## 🤖 Chat with Phi-2 (Fast Lightweight Model)")
62
+ gr.Markdown("Ask questions or generate code. Powered by Microsoft's Phi-2 (2.7B).")
63
 
64
  gr.ChatInterface(
65
  fn=chat_fn,
66
+ chatbot=gr.Chatbot(type="messages"),
67
  examples=[
68
+ "What is a function in Python?",
69
+ "Write a for loop in JavaScript.",
70
+ "Explain how AI models are trained."
71
+ ]
 
72
  )
73
 
74
+ # Launch safely without SSR for Hugging Face Spaces
75
  demo.launch(debug=True, ssr_mode=False)
requirements.txt CHANGED
@@ -1,4 +1,6 @@
1
  transformers
2
- gradio>=4.16.0
3
  torch
4
  accelerate
 
 
 
 
1
  transformers
 
2
  torch
3
  accelerate
4
+ gradio
5
+ pynvml
6
+ huggingface_hub>=0.20.0