NeoPy commited on
Commit
aeec480
·
verified ·
1 Parent(s): 7a46954

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -13
app.py CHANGED
@@ -2,18 +2,42 @@ import gradio as gr
2
  import torch
3
  import random
4
  import transformers
5
- from transformers import T5Tokenizer, T5ForConditionalGeneration
 
6
 
 
7
  if torch.cuda.is_available():
8
  device = "cuda"
9
- print("Using GPU")
 
10
  else:
11
  device = "cpu"
 
12
  print("Using CPU")
13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  tokenizer = T5Tokenizer.from_pretrained("roborovski/superprompt-v1")
15
- model = T5ForConditionalGeneration.from_pretrained("roborovski/superprompt-v1", device_map="auto", torch_dtype="auto")
16
- model.to(device)
17
 
18
  def generate(your_prompt, task_prefix, max_new_tokens, repetition_penalty, temperature, model_precision_type, top_p, top_k, seed):
19
 
@@ -21,18 +45,21 @@ def generate(your_prompt, task_prefix, max_new_tokens, repetition_penalty, tempe
21
  seed = random.randint(1, 2**32-1)
22
  transformers.set_seed(seed)
23
 
24
- if model_precision_type == "fp16":
25
- dtype = torch.float16
26
- elif model_precision_type == "fp32":
27
- dtype = torch.float32
28
-
29
- model.to(dtype)
30
-
31
  repetition_penalty = float(repetition_penalty)
32
 
33
  input_text = f"{task_prefix}: {your_prompt}"
 
34
  input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to(device)
35
 
 
 
 
 
 
36
  outputs = model.generate(
37
  input_ids,
38
  max_new_tokens=max_new_tokens,
@@ -74,7 +101,7 @@ gr.Interface(
74
  inputs=[your_prompt, task_prefix, max_new_tokens, repetition_penalty, temperature, model_precision_type, top_p, top_k, seed],
75
  outputs=gr.Textbox(label="Better Prompt"),
76
  title="SuperPrompt-v1",
77
- description='Make your prompts more detailed! <br> <a href="https://github.com/Nick088Official/SuperPrompt-v1">Github Repository & Model used</a> <br> <a href="https://brianfitzgerald.xyz/prompt-augmentation/">Model Blog</a> <br> Hugging Face Space made by [Nick088](https://linktr.ee/Nick088)',
78
  examples=examples,
79
  theme="NeoPy/Soft"
80
- ).launch(share=True, mcp_mode=True)
 
2
  import torch
3
  import random
4
  import transformers
5
+ from transformers import T5Tokenizer
6
+ from optimum.onnxruntime import ORTModelForSeq2SeqLM
7
 
8
+ # --- CUDA / Provider Setup ---
9
  if torch.cuda.is_available():
10
  device = "cuda"
11
+ provider = "CUDAExecutionProvider"
12
+ print(f"Using GPU with {provider}")
13
  else:
14
  device = "cpu"
15
+ provider = "CPUExecutionProvider"
16
  print("Using CPU")
17
 
18
+ # Load Model with ONNX Runtime for Execution Provider support
19
+ # Note: This requires optimum installed: pip install optimum[onnxruntime-gpu]
20
+ try:
21
+ model = ORTModelForSeq2SeqLM.from_pretrained(
22
+ "roborovski/superprompt-v1",
23
+ provider=provider,
24
+ export=False # Set True if you want to force generate ONNX files from pytorch
25
+ )
26
+ print(f"Model loaded successfully using {provider}")
27
+ except Exception as e:
28
+ print(f"Failed to load ONNX model: {e}")
29
+ print("Falling back to standard PyTorch model...")
30
+ from transformers import T5ForConditionalGeneration
31
+ model = T5ForConditionalGeneration.from_pretrained(
32
+ "roborovski/superprompt-v1", legacy=False,
33
+ device_map="auto",
34
+ torch_dtype="auto"
35
+ )
36
+ # Standard torch model doesn't use ExecutionProvider string, but we keep the logic intact
37
+ if device == "cuda":
38
+ model.to(device)
39
+
40
  tokenizer = T5Tokenizer.from_pretrained("roborovski/superprompt-v1")
 
 
41
 
42
  def generate(your_prompt, task_prefix, max_new_tokens, repetition_penalty, temperature, model_precision_type, top_p, top_k, seed):
43
 
 
45
  seed = random.randint(1, 2**32-1)
46
  transformers.set_seed(seed)
47
 
48
+ # ONNX Runtime models usually manage their own precision/quantization via the file loaded,
49
+ # but we can leave the UI option for users to switch logic if they were swapping models.
50
+ # For this specific implementation, the precision is largely determined by the loaded provider/weights.
51
+
 
 
 
52
  repetition_penalty = float(repetition_penalty)
53
 
54
  input_text = f"{task_prefix}: {your_prompt}"
55
+ # ONNX models generally handle input tensors on the device they were initialized with automatically
56
  input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to(device)
57
 
58
+ # ONNX Runtime generate function might differ slightly in arguments, but standard transformers args usually map over.
59
+ # We ensure we pass the device properly for PyTorch fallback.
60
+ if hasattr(model, 'device'):
61
+ input_ids = input_ids.to(model.device)
62
+
63
  outputs = model.generate(
64
  input_ids,
65
  max_new_tokens=max_new_tokens,
 
101
  inputs=[your_prompt, task_prefix, max_new_tokens, repetition_penalty, temperature, model_precision_type, top_p, top_k, seed],
102
  outputs=gr.Textbox(label="Better Prompt"),
103
  title="SuperPrompt-v1",
104
+ description='Make your prompts more detailed! <br> <br> Hugging Face Space made by Nick088 improved bu NeoPy/BF667',
105
  examples=examples,
106
  theme="NeoPy/Soft"
107
+ ).launch(share=True)