azan888 commited on
Commit
8581ba1
·
1 Parent(s): 6e1104c
Files changed (2) hide show
  1. app.py +16 -15
  2. requirements.txt +0 -1
app.py CHANGED
@@ -2,40 +2,41 @@ import gradio as gr
2
  import torch
3
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
4
 
5
- # Recommended model for CPU systems (2vCPU / 16GB RAM)
6
  model_id = "microsoft/phi-2"
7
 
8
- # Load tokenizer and model
9
  tokenizer = AutoTokenizer.from_pretrained(model_id)
10
  model = AutoModelForCausalLM.from_pretrained(
11
- model_id,
12
- device_map="auto",
13
- torch_dtype=torch.float32, # ✅ use float32 on CPU
14
  )
15
 
16
- # Create text generation pipeline
17
  generator = pipeline(
18
  "text-generation",
19
  model=model,
20
  tokenizer=tokenizer,
21
- do_sample=True, # ✅ enables temperature to apply
22
  temperature=0.7,
23
  )
24
 
25
 
26
- # Chat handler
27
- def chat(message, history=[]):
28
- prompt = f"<|user|>\n{message}\n<|assistant|>\n"
29
- response = generator(prompt, max_new_tokens=256)
30
- reply = response[0]["generated_text"].replace(prompt, "").strip()
31
- return reply
 
 
 
32
 
33
 
34
- # Gradio Chat UI
35
  iface = gr.ChatInterface(
36
  fn=chat,
 
37
  title="Phi-2 Chatbot",
38
- chatbot=gr.Chatbot(type="messages"), # ✅ future-proof Gradio
39
  )
40
 
41
  if __name__ == "__main__":
 
2
  import torch
3
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
4
 
5
+ # Recommended for 2vCPU/16GB RAM CPU hosting
6
  model_id = "microsoft/phi-2"
7
 
8
+ # Load model and tokenizer
9
  tokenizer = AutoTokenizer.from_pretrained(model_id)
10
  model = AutoModelForCausalLM.from_pretrained(
11
+ model_id, torch_dtype=torch.float32, device_map="auto"
 
 
12
  )
13
 
14
+ # Pipeline for text generation
15
  generator = pipeline(
16
  "text-generation",
17
  model=model,
18
  tokenizer=tokenizer,
19
+ do_sample=True,
20
  temperature=0.7,
21
  )
22
 
23
 
24
+ # Chat handler that returns OpenAI-style messages
25
+ def chat(message, history):
26
+ prompt = ""
27
+ for turn in history:
28
+ prompt += f"<|user|>\n{turn['content']}\n<|assistant|>\n{turn['response']}\n"
29
+ prompt += f"<|user|>\n{message}\n<|assistant|>\n"
30
+ output = generator(prompt, max_new_tokens=256)[0]["generated_text"]
31
+ reply = output.replace(prompt, "").strip()
32
+ return {"response": reply}
33
 
34
 
35
+ # Launch with OpenAI-style message format
36
  iface = gr.ChatInterface(
37
  fn=chat,
38
+ chatbot=gr.Chatbot(type="messages"), # Matches interface default
39
  title="Phi-2 Chatbot",
 
40
  )
41
 
42
  if __name__ == "__main__":
requirements.txt CHANGED
@@ -1,4 +1,3 @@
1
- huggingface_hub==0.25.2
2
  transformers
3
  torch
4
  gradio
 
 
1
  transformers
2
  torch
3
  gradio