MindVR commited on
Commit
62d55b4
·
verified ·
1 Parent(s): fa83b44

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -2
app.py CHANGED
@@ -3,7 +3,7 @@ from huggingface_hub import login
3
  login(token=os.environ["HF_TOKEN"]) # Dùng biến môi trường để lấy token
4
 
5
  import torch
6
- from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
7
  import gradio as gr
8
 
9
  # Load model
@@ -13,11 +13,13 @@ tokenizer = AutoTokenizer.from_pretrained(model_id, token=os.environ["HF_TOKEN"]
13
  model = AutoModelForCausalLM.from_pretrained(
14
  model_id,
15
  device_map="auto",
16
- torch_dtype=torch.float16, # ✅ Chạy nhanh hơn, không dùng 4bit
17
  low_cpu_mem_usage=True,
18
  token=os.environ["HF_TOKEN"]
19
  )
20
 
 
 
21
  # Hàm xử lý yêu cầu
22
  def chat(prompt):
23
  input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to("cuda")
 
3
  login(token=os.environ["HF_TOKEN"]) # Dùng biến môi trường để lấy token
4
 
5
  import torch
6
+ from transformers import AutoTokenizer, AutoModelForCausalLM
7
  import gradio as gr
8
 
9
  # Load model
 
13
  model = AutoModelForCausalLM.from_pretrained(
14
  model_id,
15
  device_map="auto",
16
+ torch_dtype=torch.float16,
17
  low_cpu_mem_usage=True,
18
  token=os.environ["HF_TOKEN"]
19
  )
20
 
21
+
22
+
23
  # Hàm xử lý yêu cầu
24
  def chat(prompt):
25
  input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to("cuda")