BorderCollieWei commited on
Commit
57fc729
·
verified ·
1 Parent(s): 8d36e9b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -5
app.py CHANGED
@@ -1,5 +1,5 @@
1
  # -*- coding: utf-8 -*-
2
- """Hugging Face Space App with Authentication"""
3
 
4
  import os
5
  import gradio as gr
@@ -15,10 +15,17 @@ if not HF_TOKEN:
15
 
16
  login(HF_TOKEN) # 使用訪問令牌進行身份驗證
17
 
18
- # 加載 Llama-2-13b-chat-hf 模型
19
  MODEL_NAME = "meta-llama/Llama-2-13b-chat-hf"
 
 
 
 
 
 
 
 
20
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_auth_token=HF_TOKEN)
21
- model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, use_auth_token=HF_TOKEN)
22
 
23
  # 定義推理函數
24
  def generate_text(prompt):
@@ -37,8 +44,8 @@ interface = gr.Interface(
37
  fn=generate_text,
38
  inputs=gr.Textbox(lines=5, placeholder="Enter your prompt here..."),
39
  outputs="text",
40
- title="Llama 2 Text Generator",
41
- description="Generate text using the Llama-2-13b-chat-hf model hosted on Hugging Face Spaces."
42
  )
43
 
44
  # 啟動應用
 
1
  # -*- coding: utf-8 -*-
2
+ """Hugging Face Space App with INT8 Quantization"""
3
 
4
  import os
5
  import gradio as gr
 
15
 
16
  login(HF_TOKEN) # 使用訪問令牌進行身份驗證
17
 
18
+ # 加載量化的 Llama-2-13b-chat-hf 模型
19
  MODEL_NAME = "meta-llama/Llama-2-13b-chat-hf"
20
+
21
+ # 啟用量化選項
22
+ model = AutoModelForCausalLM.from_pretrained(
23
+ MODEL_NAME,
24
+ device_map="auto", # 自動分配設備(CPU/GPU)
25
+ load_in_8bit=True, # 啟用 INT8 量化
26
+ use_auth_token=HF_TOKEN # 使用 Hugging Face 訪問令牌
27
+ )
28
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_auth_token=HF_TOKEN)
 
29
 
30
  # 定義推理函數
31
  def generate_text(prompt):
 
44
  fn=generate_text,
45
  inputs=gr.Textbox(lines=5, placeholder="Enter your prompt here..."),
46
  outputs="text",
47
+ title="Llama 2 Text Generator (INT8 Quantized)",
48
+ description="Generate text using the INT8-quantized Llama-2-13b-chat-hf model hosted on Hugging Face Spaces."
49
  )
50
 
51
  # 啟動應用