Spaces:
Runtime error
Runtime error
File size: 4,840 Bytes
f97bf68 69855af 64d6c72 aeec480 f97bf68 aeec480 b0413b4 aeec480 b0413b4 aeec480 b0413b4 00b74b1 aeec480 4c55de4 353fc0a e857352 4c55de4 21116ac aeec480 e6fb950 fb18417 aeec480 c435396 21116ac aeec480 c435396 21116ac c435396 f97bf68 5709663 7eddf91 fb18417 7eddf91 1aa631a a0c6e41 5709663 e6fb950 2431dd6 9a18b41 1aa631a 7e99bc1 7eddf91 1aa631a 7eddf91 1aa631a e857352 f97bf68 244f082 e8b0857 244f082 f97bf68 1aa631a f97bf68 353fc0a 78cc8b8 f97bf68 aeec480 f97bf68 65e6d9d aeec480 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 |
import gradio as gr
import torch
import random
import transformers
from transformers import T5Tokenizer
from optimum.onnxruntime import ORTModelForSeq2SeqLM
# --- CUDA / Provider Setup ---
if torch.cuda.is_available():
device = "cuda"
provider = "CUDAExecutionProvider"
print(f"Using GPU with {provider}")
else:
device = "cpu"
provider = "CPUExecutionProvider"
print("Using CPU")
# Load Model with ONNX Runtime for Execution Provider support
# Note: This requires optimum installed: pip install optimum[onnxruntime-gpu]
try:
model = ORTModelForSeq2SeqLM.from_pretrained(
"roborovski/superprompt-v1",
provider=provider,
export=False # Set True if you want to force generate ONNX files from pytorch
)
print(f"Model loaded successfully using {provider}")
except Exception as e:
print(f"Failed to load ONNX model: {e}")
print("Falling back to standard PyTorch model...")
from transformers import T5ForConditionalGeneration
model = T5ForConditionalGeneration.from_pretrained(
"roborovski/superprompt-v1", legacy=False,
device_map="auto",
torch_dtype="auto"
)
# Standard torch model doesn't use ExecutionProvider string, but we keep the logic intact
if device == "cuda":
model.to(device)
tokenizer = T5Tokenizer.from_pretrained("roborovski/superprompt-v1")
def generate(your_prompt, task_prefix, max_new_tokens, repetition_penalty, temperature, model_precision_type, top_p, top_k, seed):
if seed == 0:
seed = random.randint(1, 2**32-1)
transformers.set_seed(seed)
# ONNX Runtime models usually manage their own precision/quantization via the file loaded,
# but we can leave the UI option for users to switch logic if they were swapping models.
# For this specific implementation, the precision is largely determined by the loaded provider/weights.
repetition_penalty = float(repetition_penalty)
input_text = f"{task_prefix}: {your_prompt}"
# ONNX models generally handle input tensors on the device they were initialized with automatically
input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to(device)
# ONNX Runtime generate function might differ slightly in arguments, but standard transformers args usually map over.
# We ensure we pass the device properly for PyTorch fallback.
if hasattr(model, 'device'):
input_ids = input_ids.to(model.device)
outputs = model.generate(
input_ids,
max_new_tokens=max_new_tokens,
repetition_penalty=repetition_penalty,
do_sample=True,
temperature=temperature,
top_p=top_p,
top_k=top_k,
)
better_prompt = tokenizer.decode(outputs[0], skip_special_tokens=True)
return better_prompt
your_prompt = gr.Textbox(label="Your Prompt", info="Your Prompt that you wanna make better")
task_prefix = gr.Textbox(label="Task Prefix", info="The prompt prefix for how the AI should make yours better",value="Expand the following prompt to add more detail")
max_new_tokens = gr.Slider(value=512, minimum=25, maximum=512, step=1, label="Max New Tokens", info="The maximum numbers of new tokens, controls how long is the output")
repetition_penalty = gr.Slider(value=1.2, minimum=0, maximum=2.0, step=0.05, label="Repetition Penalty", info="Penalize repeated tokens, making the AI repeat less itself")
temperature = gr.Slider(value=0.7, minimum=0, maximum=1, step=0.05, label="Temperature", info="Higher values produce more diverse outputs")
model_precision_type = gr.Dropdown(["fp16", "fp32"], value="fp16", label="Model Precision Type", info="The precision type to load the model, like fp16 which is faster, or fp32 which is more precise but more resource consuming")
top_p = gr.Slider(value=1, minimum=0, maximum=2, step=0.05, label="Top P", info="Higher values sample more low-probability tokens")
top_k = gr.Slider(value=50, minimum=1, maximum=100, step=1, label="Top K", info="Higher k means more diverse outputs by considering a range of tokens")
seed = gr.Slider(value=42, minimum=0, maximum=2**32-1, step=1, label="Seed", info="A starting point to initiate the generation process, put 0 for a random one")
examples = [
["A storefront with 'Text to Image' written on it.", "Expand the following prompt to add more detail", 512, 1.2, 0.5, "fp16", 1, 50, 42]
]
gr.Interface(
fn=generate,
inputs=[your_prompt, task_prefix, max_new_tokens, repetition_penalty, temperature, model_precision_type, top_p, top_k, seed],
outputs=gr.Textbox(label="Better Prompt"),
title="SuperPrompt-v1",
description='Make your prompts more detailed! <br> <br> Hugging Face Space made by Nick088 improved bu NeoPy/BF667',
examples=examples,
theme="NeoPy/Soft"
).launch(share=True) |