anabury commited on
Commit
0e4c2bd
·
verified ·
1 Parent(s): cc4c727

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -10
app.py CHANGED
@@ -1,29 +1,40 @@
1
  import gradio as gr
2
- from transformers import AutoModelForCausalLM, AutoTokenizer
3
  import torch
4
 
5
- # Load your model from Hugging Face Hub
6
- model_id = "Anabury/My_finetuned_Phi-4"
 
 
 
 
7
  tokenizer = AutoTokenizer.from_pretrained(model_id)
8
- model = AutoModelForCausalLM.from_pretrained(
 
 
9
  model_id,
 
10
  torch_dtype=torch.float16,
11
- device_map="auto"
12
  )
13
 
 
 
 
14
  def chat(message, history):
15
- # encode input
16
  inputs = tokenizer(message, return_tensors="pt").to(model.device)
17
- # generate output
18
- outputs = model.generate(**inputs, max_new_tokens=200, pad_token_id=tokenizer.eos_token_id)
 
 
 
19
  reply = tokenizer.decode(outputs[0], skip_special_tokens=True)
20
- # store history
21
  history.append((message, reply))
22
  return history, history
23
 
24
  with gr.Blocks() as demo:
25
  chatbot = gr.Chatbot()
26
- msg = gr.Textbox(placeholder="Ask me anything...")
27
  clear = gr.Button("Clear")
28
 
29
  msg.submit(chat, [msg, chatbot], [chatbot, chatbot])
 
1
  import gradio as gr
2
+ from transformers import AutoTokenizer, AutoConfig, PhiForCausalLM
3
  import torch
4
 
5
+ model_id = "Anabury/My_Finetuned_Phi-4"
6
+
7
+ # Load config to confirm model type
8
+ config = AutoConfig.from_pretrained(model_id)
9
+
10
+ # Load tokenizer
11
  tokenizer = AutoTokenizer.from_pretrained(model_id)
12
+
13
+ # Use PhiForCausalLM for Phi-4 architecture
14
+ model = PhiForCausalLM.from_pretrained(
15
  model_id,
16
+ device_map="auto",
17
  torch_dtype=torch.float16,
18
+ trust_remote_code=True # if needed for custom implementations
19
  )
20
 
21
+ model.config.use_cache = True # enables faster inference
22
+
23
+ # Define the chat interface
24
  def chat(message, history):
 
25
  inputs = tokenizer(message, return_tensors="pt").to(model.device)
26
+ outputs = model.generate(
27
+ **inputs,
28
+ max_new_tokens=200,
29
+ pad_token_id=tokenizer.eos_token_id
30
+ )
31
  reply = tokenizer.decode(outputs[0], skip_special_tokens=True)
 
32
  history.append((message, reply))
33
  return history, history
34
 
35
  with gr.Blocks() as demo:
36
  chatbot = gr.Chatbot()
37
+ msg = gr.Textbox(placeholder="Type your message here...")
38
  clear = gr.Button("Clear")
39
 
40
  msg.submit(chat, [msg, chatbot], [chatbot, chatbot])