Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import torch | |
| import random | |
| import transformers | |
| from transformers import T5Tokenizer | |
| from optimum.onnxruntime import ORTModelForSeq2SeqLM | |
| # --- CUDA / Provider Setup --- | |
| if torch.cuda.is_available(): | |
| device = "cuda" | |
| provider = "CUDAExecutionProvider" | |
| print(f"Using GPU with {provider}") | |
| else: | |
| device = "cpu" | |
| provider = "CPUExecutionProvider" | |
| print("Using CPU") | |
| # Load Model with ONNX Runtime for Execution Provider support | |
| # Note: This requires optimum installed: pip install optimum[onnxruntime-gpu] | |
| try: | |
| model = ORTModelForSeq2SeqLM.from_pretrained( | |
| "roborovski/superprompt-v1", | |
| provider=provider, | |
| export=False # Set True if you want to force generate ONNX files from pytorch | |
| ) | |
| print(f"Model loaded successfully using {provider}") | |
| except Exception as e: | |
| print(f"Failed to load ONNX model: {e}") | |
| print("Falling back to standard PyTorch model...") | |
| from transformers import T5ForConditionalGeneration | |
| model = T5ForConditionalGeneration.from_pretrained( | |
| "roborovski/superprompt-v1", legacy=False, | |
| device_map="auto", | |
| torch_dtype="auto" | |
| ) | |
| # Standard torch model doesn't use ExecutionProvider string, but we keep the logic intact | |
| if device == "cuda": | |
| model.to(device) | |
| tokenizer = T5Tokenizer.from_pretrained("roborovski/superprompt-v1") | |
| def generate(your_prompt, task_prefix, max_new_tokens, repetition_penalty, temperature, model_precision_type, top_p, top_k, seed): | |
| if seed == 0: | |
| seed = random.randint(1, 2**32-1) | |
| transformers.set_seed(seed) | |
| # ONNX Runtime models usually manage their own precision/quantization via the file loaded, | |
| # but we can leave the UI option for users to switch logic if they were swapping models. | |
| # For this specific implementation, the precision is largely determined by the loaded provider/weights. | |
| repetition_penalty = float(repetition_penalty) | |
| input_text = f"{task_prefix}: {your_prompt}" | |
| # ONNX models generally handle input tensors on the device they were initialized with automatically | |
| input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to(device) | |
| # ONNX Runtime generate function might differ slightly in arguments, but standard transformers args usually map over. | |
| # We ensure we pass the device properly for PyTorch fallback. | |
| if hasattr(model, 'device'): | |
| input_ids = input_ids.to(model.device) | |
| outputs = model.generate( | |
| input_ids, | |
| max_new_tokens=max_new_tokens, | |
| repetition_penalty=repetition_penalty, | |
| do_sample=True, | |
| temperature=temperature, | |
| top_p=top_p, | |
| top_k=top_k, | |
| ) | |
| better_prompt = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| return better_prompt | |
| your_prompt = gr.Textbox(label="Your Prompt", info="Your Prompt that you wanna make better") | |
| task_prefix = gr.Textbox(label="Task Prefix", info="The prompt prefix for how the AI should make yours better",value="Expand the following prompt to add more detail") | |
| max_new_tokens = gr.Slider(value=512, minimum=25, maximum=512, step=1, label="Max New Tokens", info="The maximum numbers of new tokens, controls how long is the output") | |
| repetition_penalty = gr.Slider(value=1.2, minimum=0, maximum=2.0, step=0.05, label="Repetition Penalty", info="Penalize repeated tokens, making the AI repeat less itself") | |
| temperature = gr.Slider(value=0.7, minimum=0, maximum=1, step=0.05, label="Temperature", info="Higher values produce more diverse outputs") | |
| model_precision_type = gr.Dropdown(["fp16", "fp32"], value="fp16", label="Model Precision Type", info="The precision type to load the model, like fp16 which is faster, or fp32 which is more precise but more resource consuming") | |
| top_p = gr.Slider(value=1, minimum=0, maximum=2, step=0.05, label="Top P", info="Higher values sample more low-probability tokens") | |
| top_k = gr.Slider(value=50, minimum=1, maximum=100, step=1, label="Top K", info="Higher k means more diverse outputs by considering a range of tokens") | |
| seed = gr.Slider(value=42, minimum=0, maximum=2**32-1, step=1, label="Seed", info="A starting point to initiate the generation process, put 0 for a random one") | |
| examples = [ | |
| ["A storefront with 'Text to Image' written on it.", "Expand the following prompt to add more detail", 512, 1.2, 0.5, "fp16", 1, 50, 42] | |
| ] | |
| gr.Interface( | |
| fn=generate, | |
| inputs=[your_prompt, task_prefix, max_new_tokens, repetition_penalty, temperature, model_precision_type, top_p, top_k, seed], | |
| outputs=gr.Textbox(label="Better Prompt"), | |
| title="SuperPrompt-v1", | |
| description='Make your prompts more detailed! <br> <br> Hugging Face Space made by Nick088 improved bu NeoPy/BF667', | |
| examples=examples, | |
| theme="NeoPy/Soft" | |
| ).launch(share=True) |