Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
# -*- coding: utf-8 -*-
|
| 2 |
-
"""Hugging Face Space App with
|
| 3 |
|
| 4 |
import os
|
| 5 |
import gradio as gr
|
|
@@ -15,10 +15,17 @@ if not HF_TOKEN:
|
|
| 15 |
|
| 16 |
login(HF_TOKEN) # 使用訪問令牌進行身份驗證
|
| 17 |
|
| 18 |
-
#
|
| 19 |
MODEL_NAME = "meta-llama/Llama-2-13b-chat-hf"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_auth_token=HF_TOKEN)
|
| 21 |
-
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, use_auth_token=HF_TOKEN)
|
| 22 |
|
| 23 |
# 定義推理函數
|
| 24 |
def generate_text(prompt):
|
|
@@ -37,8 +44,8 @@ interface = gr.Interface(
|
|
| 37 |
fn=generate_text,
|
| 38 |
inputs=gr.Textbox(lines=5, placeholder="Enter your prompt here..."),
|
| 39 |
outputs="text",
|
| 40 |
-
title="Llama 2 Text Generator",
|
| 41 |
-
description="Generate text using the Llama-2-13b-chat-hf model hosted on Hugging Face Spaces."
|
| 42 |
)
|
| 43 |
|
| 44 |
# 啟動應用
|
|
|
|
| 1 |
# -*- coding: utf-8 -*-
|
| 2 |
+
"""Hugging Face Space App with INT8 Quantization"""
|
| 3 |
|
| 4 |
import os
|
| 5 |
import gradio as gr
|
|
|
|
| 15 |
|
| 16 |
login(HF_TOKEN) # 使用訪問令牌進行身份驗證
|
| 17 |
|
| 18 |
+
# 加載量化的 Llama-2-13b-chat-hf 模型
|
| 19 |
MODEL_NAME = "meta-llama/Llama-2-13b-chat-hf"
|
| 20 |
+
|
| 21 |
+
# 啟用量化選項
|
| 22 |
+
model = AutoModelForCausalLM.from_pretrained(
|
| 23 |
+
MODEL_NAME,
|
| 24 |
+
device_map="auto", # 自動分配設備(CPU/GPU)
|
| 25 |
+
load_in_8bit=True, # 啟用 INT8 量化
|
| 26 |
+
use_auth_token=HF_TOKEN # 使用 Hugging Face 訪問令牌
|
| 27 |
+
)
|
| 28 |
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_auth_token=HF_TOKEN)
|
|
|
|
| 29 |
|
| 30 |
# 定義推理函數
|
| 31 |
def generate_text(prompt):
|
|
|
|
| 44 |
fn=generate_text,
|
| 45 |
inputs=gr.Textbox(lines=5, placeholder="Enter your prompt here..."),
|
| 46 |
outputs="text",
|
| 47 |
+
title="Llama 2 Text Generator (INT8 Quantized)",
|
| 48 |
+
description="Generate text using the INT8-quantized Llama-2-13b-chat-hf model hosted on Hugging Face Spaces."
|
| 49 |
)
|
| 50 |
|
| 51 |
# 啟動應用
|