emon-islam commited on
Commit
63f5a4f
·
verified ·
1 Parent(s): 109b71f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -17
app.py CHANGED
@@ -2,31 +2,58 @@ import gradio as gr
2
  from transformers import AutoTokenizer, AutoModelForCausalLM
3
  import torch
4
 
5
- # Load tokenizer and model
6
  model_name = "microsoft/phi-3-mini-4k-instruct"
7
  tokenizer = AutoTokenizer.from_pretrained(model_name)
8
  model = AutoModelForCausalLM.from_pretrained(model_name)
 
9
 
10
- # Move to CPU (safe for Hugging Face Spaces)
11
- model.to("cpu")
12
 
13
- # Basic chat function
14
- def generate_response(prompt):
 
 
 
 
 
 
 
 
 
 
 
15
  inputs = tokenizer(prompt, return_tensors="pt").to("cpu")
16
  outputs = model.generate(
17
  **inputs,
18
- max_new_tokens=256,
19
  do_sample=True,
20
  temperature=0.7,
21
- top_p=0.9
 
22
  )
23
- return tokenizer.decode(outputs[0], skip_special_tokens=True)
24
-
25
- # Gradio UI
26
- gr.Interface(
27
- fn=generate_response,
28
- inputs=gr.Textbox(lines=5, label="Enter your question or prompt"),
29
- outputs="text",
30
- title="Phi-3 Mini Chat",
31
- description="Powered by microsoft/phi-3-mini-4k-instruct"
32
- ).launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  from transformers import AutoTokenizer, AutoModelForCausalLM
3
  import torch
4
 
5
+ # Load model and tokenizer
6
  model_name = "microsoft/phi-3-mini-4k-instruct"
7
  tokenizer = AutoTokenizer.from_pretrained(model_name)
8
  model = AutoModelForCausalLM.from_pretrained(model_name)
9
+ model.to("cpu") # CPU for Hugging Face Spaces
10
 
11
+ # Optional: Custom system prompt (context for assistant behavior)
12
+ SYSTEM_PROMPT = "You are a friendly and knowledgeable assistant who answers clearly and helpfully."
13
 
14
+ # Chat formatting using Phi-3's expected format
15
+ def format_chat(system_prompt, history, user_input):
16
+ chat = ""
17
+ if system_prompt:
18
+ chat += f"<|system|>\n{system_prompt}\n"
19
+ for user, assistant in history:
20
+ chat += f"<|user|>\n{user}\n<|assistant|>\n{assistant}\n"
21
+ chat += f"<|user|>\n{user_input}\n<|assistant|>\n"
22
+ return chat
23
+
24
+ # Inference function
25
+ def chat_fn(message, chat_history):
26
+ prompt = format_chat(SYSTEM_PROMPT, chat_history, message)
27
  inputs = tokenizer(prompt, return_tensors="pt").to("cpu")
28
  outputs = model.generate(
29
  **inputs,
30
+ max_new_tokens=512,
31
  do_sample=True,
32
  temperature=0.7,
33
+ top_p=0.9,
34
+ pad_token_id=tokenizer.eos_token_id
35
  )
36
+ output_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
37
+
38
+ # Extract only the latest assistant reply (after the last <|assistant|>)
39
+ if "<|assistant|>" in output_text:
40
+ response = output_text.split("<|assistant|>")[-1].strip()
41
+ else:
42
+ response = output_text.strip()
43
+
44
+ chat_history.append((message, response))
45
+ return "", chat_history
46
+
47
+ # UI setup
48
+ with gr.Blocks(theme=gr.themes.Base()) as demo:
49
+ gr.Markdown("## 🤖 Phi-3 Mini Chatbot\nTalk to a compact AI assistant powered by Microsoft's Phi-3 model.")
50
+ chatbot = gr.Chatbot(show_label=False)
51
+ msg = gr.Textbox(label="Type your message here...", placeholder="Ask me anything!", lines=2)
52
+ clear = gr.Button("Clear Chat")
53
+
54
+ state = gr.State([]) # Keeps chat history
55
+
56
+ msg.submit(chat_fn, [msg, state], [msg, chatbot])
57
+ clear.click(lambda: ([], []), None, [state, chatbot])
58
+
59
+ demo.launch()