SuperPrompt-v1

Runtime error

File size: 4,840 Bytes

f97bf68
 
69855af
64d6c72
aeec480
 
f97bf68
aeec480
b0413b4
 
aeec480
 
b0413b4
 
aeec480
b0413b4
00b74b1
aeec480
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4c55de4
 
353fc0a
e857352
4c55de4
 
 
21116ac
aeec480
 
 
 
e6fb950
 
fb18417
aeec480
c435396
21116ac
aeec480
 
 
 
 
c435396
 
 
 
 
 
 
 
 
21116ac
c435396
 
f97bf68
5709663
7eddf91
fb18417
7eddf91
1aa631a
a0c6e41
5709663
e6fb950
2431dd6
9a18b41
1aa631a
7e99bc1
 
7eddf91
1aa631a
7eddf91
1aa631a
e857352
f97bf68
244f082
e8b0857
244f082
f97bf68
1aa631a
f97bf68
353fc0a
78cc8b8
f97bf68
aeec480
f97bf68
65e6d9d
aeec480

import gradio as gr
import torch
import random
import transformers
from transformers import T5Tokenizer
from optimum.onnxruntime import ORTModelForSeq2SeqLM

# --- CUDA / Provider Setup ---
if torch.cuda.is_available():
    device = "cuda"
    provider = "CUDAExecutionProvider"
    print(f"Using GPU with {provider}")
else:
    device = "cpu"
    provider = "CPUExecutionProvider"
    print("Using CPU")

# Load Model with ONNX Runtime for Execution Provider support
# Note: This requires optimum installed: pip install optimum[onnxruntime-gpu]
try:
    model = ORTModelForSeq2SeqLM.from_pretrained(
        "roborovski/superprompt-v1",
        provider=provider,
        export=False # Set True if you want to force generate ONNX files from pytorch
    )
    print(f"Model loaded successfully using {provider}")
except Exception as e:
    print(f"Failed to load ONNX model: {e}")
    print("Falling back to standard PyTorch model...")
    from transformers import T5ForConditionalGeneration
    model = T5ForConditionalGeneration.from_pretrained(
        "roborovski/superprompt-v1", legacy=False,
        device_map="auto", 
        torch_dtype="auto"
    )
    # Standard torch model doesn't use ExecutionProvider string, but we keep the logic intact
    if device == "cuda":
        model.to(device)

tokenizer = T5Tokenizer.from_pretrained("roborovski/superprompt-v1")

def generate(your_prompt, task_prefix, max_new_tokens, repetition_penalty, temperature, model_precision_type, top_p, top_k, seed):
    
    if seed == 0:
        seed = random.randint(1, 2**32-1)
    transformers.set_seed(seed)
    
    # ONNX Runtime models usually manage their own precision/quantization via the file loaded,
    # but we can leave the UI option for users to switch logic if they were swapping models.
    # For this specific implementation, the precision is largely determined by the loaded provider/weights.
    
    repetition_penalty = float(repetition_penalty)

    input_text = f"{task_prefix}: {your_prompt}"
    # ONNX models generally handle input tensors on the device they were initialized with automatically
    input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to(device)
        
    # ONNX Runtime generate function might differ slightly in arguments, but standard transformers args usually map over.
    # We ensure we pass the device properly for PyTorch fallback.
    if hasattr(model, 'device'):
        input_ids = input_ids.to(model.device)
        
    outputs = model.generate(
        input_ids,
        max_new_tokens=max_new_tokens,
        repetition_penalty=repetition_penalty,
        do_sample=True,
        temperature=temperature,
        top_p=top_p,
        top_k=top_k,
    )
        
    better_prompt = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return better_prompt


your_prompt = gr.Textbox(label="Your Prompt", info="Your Prompt that you wanna make better")

task_prefix = gr.Textbox(label="Task Prefix", info="The prompt prefix for how the AI should make yours better",value="Expand the following prompt to add more detail")

max_new_tokens = gr.Slider(value=512, minimum=25, maximum=512, step=1, label="Max New Tokens", info="The maximum numbers of new tokens, controls how long is the output")
    
repetition_penalty = gr.Slider(value=1.2, minimum=0, maximum=2.0, step=0.05, label="Repetition Penalty", info="Penalize repeated tokens, making the AI repeat less itself")

temperature = gr.Slider(value=0.7, minimum=0, maximum=1, step=0.05, label="Temperature", info="Higher values produce more diverse outputs")

model_precision_type = gr.Dropdown(["fp16", "fp32"], value="fp16", label="Model Precision Type", info="The precision type to load the model, like fp16 which is faster, or fp32 which is more precise but more resource consuming")

top_p = gr.Slider(value=1, minimum=0, maximum=2, step=0.05, label="Top P", info="Higher values sample more low-probability tokens")

top_k = gr.Slider(value=50, minimum=1, maximum=100, step=1, label="Top K", info="Higher k means more diverse outputs by considering a range of tokens")

seed = gr.Slider(value=42, minimum=0, maximum=2**32-1, step=1, label="Seed", info="A starting point to initiate the generation process, put 0 for a random one")

examples = [
    ["A storefront with 'Text to Image' written on it.", "Expand the following prompt to add more detail", 512, 1.2, 0.5, "fp16", 1, 50, 42]
]

gr.Interface(
    fn=generate,
    inputs=[your_prompt, task_prefix, max_new_tokens, repetition_penalty, temperature, model_precision_type, top_p, top_k, seed],
    outputs=gr.Textbox(label="Better Prompt"),
    title="SuperPrompt-v1",
    description='Make your prompts more detailed! <br> <br> Hugging Face Space made by Nick088 improved bu NeoPy/BF667',
    examples=examples,
    theme="NeoPy/Soft"
).launch(share=True)