FrederickSundeep commited on
Commit
16ebb52
·
1 Parent(s): 7b0bd94

update commit with phi-3 mini 113

Browse files
Files changed (1) hide show
  1. app.py +21 -22
app.py CHANGED
@@ -2,27 +2,25 @@ import gradio as gr
2
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
3
  import torch
4
 
5
- # Set model
6
  model_id = "microsoft/phi-2"
7
 
8
- # Load tokenizer
9
- tokenizer = AutoTokenizer.from_pretrained(model_id)
 
10
 
11
- # Load model this triggers GPU allocation in ZeroGPU
 
12
  model = AutoModelForCausalLM.from_pretrained(
13
  model_id,
14
- torch_dtype=torch.float16,
15
- device_map="auto"
16
  )
17
 
18
- # Create pipeline device=0 will use CUDA if available
19
  pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
20
 
21
- # Detect actual device
22
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
23
- print(f"🚀 Using device: {device}")
24
-
25
- # 💬 Chat logic
26
  def chat_fn(message, history):
27
  history_text = ""
28
  for item in history:
@@ -32,28 +30,29 @@ def chat_fn(message, history):
32
  history_text += f"<|assistant|>\n{item['content']}\n"
33
  prompt = f"{history_text}<|user|>\n{message}\n<|assistant|>\n"
34
 
35
- output = pipe(prompt, max_new_tokens=512, do_sample=True, temperature=0.7)[0]["generated_text"]
36
- reply = output.split("<|assistant|>")[-1].strip()
37
 
38
- if "```" not in reply and any(x in reply for x in ["def ", "class ", "import "]):
 
39
  reply = f"```\n{reply}\n```"
40
 
41
  return reply
42
 
43
- # 🎨 Gradio UI
44
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
45
  gr.Markdown("## 🤖 Chat with Phi-2")
46
- gr.Markdown("ZeroGPU Space powered by Phi-2")
47
 
48
  gr.ChatInterface(
49
  fn=chat_fn,
50
  chatbot=gr.Chatbot(type="messages"),
51
  examples=[
52
- "What is a transformer model?",
53
- "Write a C++ program to reverse a string.",
54
- "Explain binary search."
55
  ]
56
  )
57
 
58
- # 🚀 Run in HF Space with SSR off
59
- demo.launch(debug=True, ssr_mode=False)
 
2
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
3
  import torch
4
 
5
+ # Model ID
6
  model_id = "microsoft/phi-2"
7
 
8
+ # Log device availability
9
+ cuda_available = torch.cuda.is_available()
10
+ print("🧠 CUDA Available:", cuda_available)
11
 
12
+ # Load tokenizer and model with auto device map (ZeroGPU-compatible)
13
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
14
  model = AutoModelForCausalLM.from_pretrained(
15
  model_id,
16
+ device_map="auto", # Automatically use GPU if available
17
+ torch_dtype=torch.float16 if cuda_available else torch.float32
18
  )
19
 
20
+ # Initialize pipeline WITHOUT `device=` (to avoid conflict with Accelerate)
21
  pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
22
 
23
+ # Chat function
 
 
 
 
24
  def chat_fn(message, history):
25
  history_text = ""
26
  for item in history:
 
30
  history_text += f"<|assistant|>\n{item['content']}\n"
31
  prompt = f"{history_text}<|user|>\n{message}\n<|assistant|>\n"
32
 
33
+ result = pipe(prompt, max_new_tokens=512, do_sample=True, temperature=0.7)[0]["generated_text"]
34
+ reply = result.split("<|assistant|>")[-1].strip()
35
 
36
+ # Wrap code in markdown if needed
37
+ if "```" not in reply and any(word in reply for word in ["def ", "class ", "import "]):
38
  reply = f"```\n{reply}\n```"
39
 
40
  return reply
41
 
42
+ # Gradio UI
43
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
44
  gr.Markdown("## 🤖 Chat with Phi-2")
45
+ gr.Markdown("ZeroGPU-compatible AI Assistant (GPU if available, fallback to CPU)")
46
 
47
  gr.ChatInterface(
48
  fn=chat_fn,
49
  chatbot=gr.Chatbot(type="messages"),
50
  examples=[
51
+ "What is Python?",
52
+ "Write a Java function to sort a list.",
53
+ "Explain how neural networks work."
54
  ]
55
  )
56
 
57
+ # Launch (ssr_mode=False avoids rendering issues in HF Spaces)
58
+ demo.launch(ssr_mode=False)