Spaces:
Paused
Paused
| import gradio as gr | |
| from transformers import AutoTokenizer, AutoModelForCausalLM | |
| import torch | |
| import os | |
| # 加载模型和分词器,使用环境变量中的令牌 | |
| model_name = "meta-llama/Llama-2-7b-hf" | |
| token = os.getenv("HF_TOKEN") # 从环境变量中获取 Token | |
| # 检查是否可以使用 GPU | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| print(f"Using device: {device}") | |
| try: | |
| tokenizer = AutoTokenizer.from_pretrained(model_name, token=token) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_name, | |
| device_map="auto", # 自动分配设备 | |
| torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, | |
| token=token | |
| ) | |
| print("Model and tokenizer loaded successfully.") | |
| except Exception as e: | |
| print(f"Error loading model or tokenizer: {e}") | |
| raise e | |
| # 定义生成功能的教学内容,保证可重复性 | |
| def generate_content_with_parameters(prompt, temperature, max_length): | |
| try: | |
| # 将输入移动到模型所在设备 | |
| inputs = tokenizer(prompt, return_tensors="pt", truncation=True).to(device) | |
| # 生成文本 | |
| outputs = model.generate( | |
| **inputs, | |
| max_length=max_length, | |
| temperature=temperature, # 格式化传入的温度 | |
| num_return_sequences=1, | |
| repetition_penalty=1.2 # 增加重复惩罚 | |
| ) | |
| # 解码输出 | |
| return tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| except Exception as e: | |
| return f"Error during generation: {e}" | |
| # 创建 Gradio 界面 | |
| interface = gr.Interface( | |
| fn=generate_content_with_parameters, | |
| inputs=[ | |
| gr.Textbox(label="Prompt", placeholder="Enter your prompt here."), | |
| gr.Slider(label="Temperature", minimum=0, maximum=1, step=0.1, value=0.7), | |
| gr.Slider(label="Max Length", minimum=10, maximum=2048, step=10, value=512), | |
| ], | |
| outputs="text", | |
| title="Customizable Text Generator", | |
| description="Enter a prompt, adjust the temperature and max length, and generate consistent outputs.", | |
| ) | |
| # 启动 Gradio 应用 | |
| if __name__ == "__main__": | |
| interface.launch() | |