MindVR commited on
Commit
a24a633
·
verified ·
1 Parent(s): 5b514bf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -8
app.py CHANGED
@@ -1,23 +1,31 @@
1
  import os
2
  from huggingface_hub import login
3
- login(token=os.environ["HF_TOKEN"]) # Dùng biến môi trường để lấy token
4
 
5
  import torch
6
- from transformers import AutoTokenizer, AutoModelForCausalLM
7
  import gradio as gr
8
 
9
  # Load model
10
  model_id = "MindVR/JohnTran_Fine-tune"
11
- tokenizer = AutoTokenizer.from_pretrained(model_id, token=True)
 
 
 
 
 
 
 
 
 
12
  model = AutoModelForCausalLM.from_pretrained(
13
  model_id,
14
- torch_dtype=torch.float16,
15
- device_map="auto", # Tự động GPU
16
- low_cpu_mem_usage=True, # Tối ưu RAM
17
- token=os.environ["HF_TOKEN"] # ✅ thay vì use_auth_token=True
18
  )
19
 
20
-
21
  # Hàm xử lý yêu cầu
22
  def chat(prompt):
23
  input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to("cuda")
 
1
  import os
2
  from huggingface_hub import login
3
+ login(token=os.environ["HF_TOKEN"]) # Dùng biến môi trường để lấy token
4
 
5
  import torch
6
+ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
7
  import gradio as gr
8
 
9
  # Load model
10
  model_id = "MindVR/JohnTran_Fine-tune"
11
+ tokenizer = AutoTokenizer.from_pretrained(model_id, token=os.environ["HF_TOKEN"])
12
+
13
+ # Thiết lập 4bit config
14
+ bnb_config = BitsAndBytesConfig(
15
+ load_in_4bit=True,
16
+ bnb_4bit_use_double_quant=True,
17
+ bnb_4bit_quant_type="nf4",
18
+ bnb_4bit_compute_dtype=torch.float16,
19
+ )
20
+
21
  model = AutoModelForCausalLM.from_pretrained(
22
  model_id,
23
+ device_map="auto",
24
+ quantization_config=bnb_config,
25
+ low_cpu_mem_usage=True,
26
+ token=os.environ["HF_TOKEN"]
27
  )
28
 
 
29
  # Hàm xử lý yêu cầu
30
  def chat(prompt):
31
  input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to("cuda")