CircleStar's picture
Update app.py
32a9f05 verified
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import os
# 加载模型和分词器,使用环境变量中的令牌
model_name = "meta-llama/Llama-2-7b-hf"
token = os.getenv("HF_TOKEN") # 从环境变量中获取 Token
# 检查是否可以使用 GPU
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")
try:
tokenizer = AutoTokenizer.from_pretrained(model_name, token=token)
model = AutoModelForCausalLM.from_pretrained(
model_name,
device_map="auto", # 自动分配设备
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
token=token
)
print("Model and tokenizer loaded successfully.")
except Exception as e:
print(f"Error loading model or tokenizer: {e}")
raise e
# 定义生成功能的教学内容,保证可重复性
def generate_content_with_parameters(prompt, temperature, max_length):
try:
# 将输入移动到模型所在设备
inputs = tokenizer(prompt, return_tensors="pt", truncation=True).to(device)
# 生成文本
outputs = model.generate(
**inputs,
max_length=max_length,
temperature=temperature, # 格式化传入的温度
num_return_sequences=1,
repetition_penalty=1.2 # 增加重复惩罚
)
# 解码输出
return tokenizer.decode(outputs[0], skip_special_tokens=True)
except Exception as e:
return f"Error during generation: {e}"
# 创建 Gradio 界面
interface = gr.Interface(
fn=generate_content_with_parameters,
inputs=[
gr.Textbox(label="Prompt", placeholder="Enter your prompt here."),
gr.Slider(label="Temperature", minimum=0, maximum=1, step=0.1, value=0.7),
gr.Slider(label="Max Length", minimum=10, maximum=2048, step=10, value=512),
],
outputs="text",
title="Customizable Text Generator",
description="Enter a prompt, adjust the temperature and max length, and generate consistent outputs.",
)
# 启动 Gradio 应用
if __name__ == "__main__":
interface.launch()