Spaces:

BorderCollieWei
/

test

Runtime error

BorderCollieWei commited on Jan 6, 2025

Commit

170c6af

verified ·

1 Parent(s): da8f7f0

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,31 +1,32 @@
 # -*- coding: utf-8 -*-
-"""Hugging Face Space App with INT8 Quantization"""
 import os
 import gradio as gr
 from huggingface_hub import login
-from transformers import AutoTokenizer, AutoModelForCausalLM
 # 登錄 Hugging Face，使用訪問令牌進行身份驗證
-HF_TOKEN = os.getenv("HF_TOKEN")  # 從環境變數中獲取訪問令牌
 if not HF_TOKEN:
     raise ValueError(
         "未找到 Hugging Face 訪問令牌！請設置環境變數 'HF_TOKEN'，或者直接提供有效的訪問令牌。"
     )
-login(HF_TOKEN)  # 使用訪問令牌進行身份驗證
-# 加載量化的 Llama-2-13b-chat-hf 模型
 MODEL_NAME = "meta-llama/Llama-2-13b-chat-hf"
-# 啟用量化選項
 model = AutoModelForCausalLM.from_pretrained(
     MODEL_NAME,
-    device_map="auto",       # 自動分配設備（CPU/GPU）
-    load_in_8bit=True,       # 啟用 INT8 量化
-    use_auth_token=HF_TOKEN  # 使用 Hugging Face 訪問令牌
 )
-tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_auth_token=HF_TOKEN)
 # 定義推理函數
 def generate_text(prompt):
@@ -44,8 +45,8 @@ interface = gr.Interface(
     fn=generate_text,
     inputs=gr.Textbox(lines=5, placeholder="Enter your prompt here..."),
     outputs="text",
-    title="Llama 2 Text Generator (INT8 Quantized)",
-    description="Generate text using the INT8-quantized Llama-2-13b-chat-hf model hosted on Hugging Face Spaces."
 )
 # 啟動應用

 # -*- coding: utf-8 -*-
+"""Hugging Face Space App with CPU Quantization"""
 import os
 import gradio as gr
 from huggingface_hub import login
+from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
 # 登錄 Hugging Face，使用訪問令牌進行身份驗證
+HF_TOKEN = os.getenv("HF_TOKEN")
 if not HF_TOKEN:
     raise ValueError(
         "未找到 Hugging Face 訪問令牌！請設置環境變數 'HF_TOKEN'，或者直接提供有效的訪問令牌。"
     )
+login(HF_TOKEN)
+# 配置 4-bit 量化
 MODEL_NAME = "meta-llama/Llama-2-13b-chat-hf"
+quantization_config = BitsAndBytesConfig(load_in_4bit=True)
+# 加載量化模型
 model = AutoModelForCausalLM.from_pretrained(
     MODEL_NAME,
+    quantization_config=quantization_config,
+    device_map="auto",
+    token=HF_TOKEN
 )
+tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=HF_TOKEN)
 # 定義推理函數
 def generate_text(prompt):
     fn=generate_text,
     inputs=gr.Textbox(lines=5, placeholder="Enter your prompt here..."),
     outputs="text",
+    title="Llama 2 Text Generator (CPU Quantized)",
+    description="Generate text using the Llama-2-13b-chat-hf model with CPU quantization hosted on Hugging Face Spaces."
 )
 # 啟動應用