Spaces:

BorderCollieWei
/

test

Runtime error

BorderCollieWei commited on Jan 6, 2025

Commit

57fc729

verified ·

1 Parent(s): 8d36e9b

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-"""Hugging Face Space App with Authentication"""
 import os
 import gradio as gr
@@ -15,10 +15,17 @@ if not HF_TOKEN:
 login(HF_TOKEN)  # 使用訪問令牌進行身份驗證
-# 加載 Llama-2-13b-chat-hf 模型
 MODEL_NAME = "meta-llama/Llama-2-13b-chat-hf"
 tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_auth_token=HF_TOKEN)
-model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, use_auth_token=HF_TOKEN)
 # 定義推理函數
 def generate_text(prompt):
@@ -37,8 +44,8 @@ interface = gr.Interface(
     fn=generate_text,
     inputs=gr.Textbox(lines=5, placeholder="Enter your prompt here..."),
     outputs="text",
-    title="Llama 2 Text Generator",
-    description="Generate text using the Llama-2-13b-chat-hf model hosted on Hugging Face Spaces."
 )
 # 啟動應用

 # -*- coding: utf-8 -*-
+"""Hugging Face Space App with INT8 Quantization"""
 import os
 import gradio as gr
 login(HF_TOKEN)  # 使用訪問令牌進行身份驗證
+# 加載量化的 Llama-2-13b-chat-hf 模型
 MODEL_NAME = "meta-llama/Llama-2-13b-chat-hf"
+# 啟用量化選項
+model = AutoModelForCausalLM.from_pretrained(
+    MODEL_NAME,
+    device_map="auto",       # 自動分配設備（CPU/GPU）
+    load_in_8bit=True,       # 啟用 INT8 量化
+    use_auth_token=HF_TOKEN  # 使用 Hugging Face 訪問令牌
+)
 tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_auth_token=HF_TOKEN)
 # 定義推理函數
 def generate_text(prompt):
     fn=generate_text,
     inputs=gr.Textbox(lines=5, placeholder="Enter your prompt here..."),
     outputs="text",
+    title="Llama 2 Text Generator (INT8 Quantized)",
+    description="Generate text using the INT8-quantized Llama-2-13b-chat-hf model hosted on Hugging Face Spaces."
 )
 # 啟動應用