Spaces:

retwpay
/

test

Running on Zero

App Files Files Community

retwpay commited on May 26, 2025

Commit

bd37b6c

verified ·

1 Parent(s): 49e16ba

Update app.py

Browse files

Files changed (1) hide show

app.py +287 -21

app.py CHANGED Viewed

@@ -7,6 +7,8 @@ import random
 from diffusers import StableDiffusionXLPipeline
 from diffusers import EulerAncestralDiscreteScheduler
 import torch
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
@@ -27,44 +29,291 @@ pipe.text_encoder_2.to(torch.float16)
 pipe.vae.to(torch.float16)
 pipe.unet.to(torch.float16)
 MAX_SEED = np.iinfo(np.int32).max
 MAX_IMAGE_SIZE = 1216
 @spaces.GPU
-def infer(prompt, negative_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps):
-    # Check and truncate prompt if too long (CLIP can only handle 77 tokens)
-    if len(prompt.split()) > 60:  # Rough estimate to avoid exceeding token limit
-        print("Warning: Prompt may be too long and will be truncated by the model")
     if randomize_seed:
         seed = random.randint(0, MAX_SEED)
     generator = torch.Generator(device=device).manual_seed(seed)
     try:
-        output_image = pipe(
-            prompt=prompt,
-            negative_prompt=negative_prompt,
-            guidance_scale=guidance_scale,
-            num_inference_steps=num_inference_steps,
-            width=width,
-            height=height,
-            generator=generator
-        ).images[0]
         return output_image
     except RuntimeError as e:
         print(f"Error during generation: {e}")
-        # Return a blank image with error message
         error_img = Image.new('RGB', (width, height), color=(0, 0, 0))
         return error_img
 css = """
 #col-container {
     margin: 0 auto;
     max-width: 520px;
 }
 """
 with gr.Blocks(css=css) as demo:
@@ -75,8 +324,8 @@ with gr.Blocks(css=css) as demo:
             prompt = gr.Text(
                 label="Prompt",
                 show_label=False,
-                max_lines=1,
-                placeholder="Enter your prompt (keep it under 60 words for best results)",
                 container=False,
             )
@@ -85,11 +334,28 @@ with gr.Blocks(css=css) as demo:
         result = gr.Image(label="Result", show_label=False)
         with gr.Accordion("Advanced Settings", open=False):
             negative_prompt = gr.Text(
                 label="Negative prompt",
-                max_lines=1,
-                placeholder="Enter a negative prompt",
                 value="nsfw, (low quality, worst quality:1.2), very displeasing, 3d, watermark, signature, ugly, poorly drawn"
             )
@@ -139,7 +405,7 @@ with gr.Blocks(css=css) as demo:
     run_button.click(
         fn=infer,
-        inputs=[prompt, negative_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps],
         outputs=[result]
     )

 from diffusers import StableDiffusionXLPipeline
 from diffusers import EulerAncestralDiscreteScheduler
 import torch
+import re
+from compel import Compel, ReturnedEmbeddingsType
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 pipe.vae.to(torch.float16)
 pipe.unet.to(torch.float16)
+# Initialize Compel for long prompt processing
+compel = Compel(
+    tokenizer=[pipe.tokenizer, pipe.tokenizer_2],
+    text_encoder=[pipe.text_encoder, pipe.text_encoder_2],
+    returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
+    requires_pooled=[False, True],
+    truncate_long_prompts=False  # Enable long prompt processing
+)
 MAX_SEED = np.iinfo(np.int32).max
 MAX_IMAGE_SIZE = 1216
+# =====================================
+# Long Prompt Processing Functions
+# =====================================
+def parse_prompt_attention(text):
+    """Parse prompt with attention weights like (word:1.2) or [word:0.8]"""
+    re_attention = re.compile(r"""
+      \\\(|
+      \\\)|
+      \\\[|
+      \\]|
+      \\\\|
+      \\|
+      \(|
+      \[|
+      :([+-]?[.\d]+)\)|
+      \)|
+      ]|
+      [^\\()\[\]:]+|
+      :
+      """, re.X)
+    res = []
+    round_brackets = []
+    square_brackets = []
+    round_bracket_multiplier = 1.1
+    square_bracket_multiplier = 1 / 1.1
+    def multiply_range(start_position, multiplier):
+        for p in range(start_position, len(res)):
+            res[p][1] *= multiplier
+    for m in re_attention.finditer(text):
+        text = m.group(0)
+        weight = m.group(1)
+        if text.startswith('\\'):
+            res.append([text[1:], 1.0])
+        elif text == '(':
+            round_brackets.append(len(res))
+        elif text == '[':
+            square_brackets.append(len(res))
+        elif weight is not None and len(round_brackets) > 0:
+            multiply_range(round_brackets.pop(), float(weight))
+        elif text == ')' and len(round_brackets) > 0:
+            multiply_range(round_brackets.pop(), round_bracket_multiplier)
+        elif text == ']' and len(square_brackets) > 0:
+            multiply_range(square_brackets.pop(), square_bracket_multiplier)
+        else:
+            parts = re.split(re.compile(r"\s*\bBREAK\b\s*", re.S), text)
+            for i, part in enumerate(parts):
+                if i > 0:
+                    res.append(["BREAK", -1])
+                res.append([part, 1.0])
+    for pos in round_brackets:
+        multiply_range(pos, round_bracket_multiplier)
+    for pos in square_brackets:
+        multiply_range(pos, square_bracket_multiplier)
+    if len(res) == 0:
+        res = [["", 1.0]]
+    # merge runs of identical weights
+    i = 0
+    while i + 1 < len(res):
+        if res[i][1] == res[i + 1][1]:
+            res[i][0] += res[i + 1][0]
+            res.pop(i + 1)
+        else:
+            i += 1
+    return res
+def prompt_attention_to_invoke_prompt(attention):
+    """Convert attention data back to compel format"""
+    tokens = []
+    for text, weight in attention:
+        weight = round(weight, 2)
+        if weight == 1.0:
+            tokens.append(text)
+        elif weight < 1.0:
+            if weight < 0.8:
+                tokens.append(f"({text}){weight}")
+            else:
+                tokens.append(f"({text})-" + "-" * int((1.0 - weight) * 10))
+        else:
+            if weight < 1.3:
+                tokens.append(f"({text})" + "+" * int((weight - 1.0) * 10))
+            else:
+                tokens.append(f"({text}){weight}")
+    return "".join(tokens)
+def tokenize_line(line, tokenizer):
+    """Split long prompts into chunks at appropriate boundaries"""
+    actual_prompt = line.lower().strip()
+    actual_tokens = tokenizer.tokenize(actual_prompt)
+    max_tokens = tokenizer.model_max_length - 2
+    comma_token = tokenizer.tokenize(',')[0]
+    chunks = []
+    chunk = []
+    for item in actual_tokens:
+        chunk.append(item)
+        if len(chunk) == max_tokens:
+            if chunk[-1] != comma_token:
+                for i in range(max_tokens-1, -1, -1):
+                    if chunk[i] == comma_token:
+                        actual_chunk, actual_prompt = detokenize(chunk[:i+1], actual_prompt)
+                        chunks.append(actual_chunk)
+                        chunk = chunk[i+1:]
+                        break
+                else:
+                    actual_chunk, actual_prompt = detokenize(chunk, actual_prompt)
+                    chunks.append(actual_chunk)
+                    chunk = []
+            else:
+                actual_chunk, actual_prompt = detokenize(chunk, actual_prompt)
+                chunks.append(actual_chunk)
+                chunk = []
+    if chunk:
+        actual_chunk, _ = detokenize(chunk, actual_prompt)
+        chunks.append(actual_chunk)
+    return chunks
+def detokenize(chunk, actual_prompt):
+    """Convert tokens back to text"""
+    chunk[-1] = chunk[-1].replace('</w>', '')
+    chanked_prompt = ''.join(chunk).strip()
+    while '</w>' in chanked_prompt:
+        if actual_prompt[chanked_prompt.find('</w>')] == ' ':
+            chanked_prompt = chanked_prompt.replace('</w>', ' ', 1)
+        else:
+            chanked_prompt = chanked_prompt.replace('</w>', '', 1)
+    actual_prompt = actual_prompt.replace(chanked_prompt,'')
+    return chanked_prompt.strip(), actual_prompt.strip()
+def merge_embeds(prompt_chunks, compel):
+    """Merge multiple prompt chunks with weighted combination"""
+    num_chunks = len(prompt_chunks)
+    if num_chunks != 0:
+        power_prompt = 1/(num_chunks*(num_chunks+1)//2)
+        prompt_embs = compel(prompt_chunks)
+        t_list = list(torch.split(prompt_embs, 1, dim=0))
+        for i in range(num_chunks):
+            t_list[-(i+1)] = t_list[-(i+1)] * ((i+1)*power_prompt)
+        prompt_emb = torch.stack(t_list, dim=0).sum(dim=0)
+    else:
+        prompt_emb = compel('')
+    return prompt_emb
+def process_long_prompt(prompt, pipeline, compel, only_convert_string=False):
+    """Main function to process long prompts with attention weights"""
+    # Fix excessive emphasis symbols
+    prompt = prompt.replace("((", "(").replace("))", ")").replace("\\", "\\\\\\")
+    # Parse attention weights
+    attention = parse_prompt_attention(prompt)
+    global_attention_chunks = []
+    for att in attention:
+        for chunk in att[0].split(','):
+            temp_prompt_chunks = tokenize_line(chunk, pipeline.tokenizer)
+            for small_chunk in temp_prompt_chunks:
+                temp_dict = {
+                    "weight": round(att[1], 2),
+                    "length": len(pipeline.tokenizer.tokenize(f'{small_chunk},')),
+                    "prompt": f'{small_chunk},'
+                }
+                global_attention_chunks.append(temp_dict)
+    max_tokens = pipeline.tokenizer.model_max_length - 2
+    global_prompt_chunks = []
+    current_list = []
+    current_length = 0
+    for item in global_attention_chunks:
+        if current_length + item['length'] > max_tokens:
+            global_prompt_chunks.append(current_list)
+            current_list = [[item['prompt'], item['weight']]]
+            current_length = item['length']
+        else:
+            if not current_list:
+                current_list.append([item['prompt'], item['weight']])
+            else:
+                if item['weight'] != current_list[-1][1]:
+                    current_list.append([item['prompt'], item['weight']])
+                else:
+                    current_list[-1][0] += f" {item['prompt']}"
+            current_length += item['length']
+    if current_list:
+        global_prompt_chunks.append(current_list)
+    if only_convert_string:
+        return ' '.join([prompt_attention_to_invoke_prompt(i) for i in global_prompt_chunks])
+    return merge_embeds([prompt_attention_to_invoke_prompt(i) for i in global_prompt_chunks], compel)
 @spaces.GPU
+def infer(prompt, negative_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps, enable_long_prompt):
     if randomize_seed:
         seed = random.randint(0, MAX_SEED)
     generator = torch.Generator(device=device).manual_seed(seed)
     try:
+        if enable_long_prompt:
+            # Use advanced prompt processing
+            print("Using advanced long prompt processing...")
+            # Process prompts with attention weights and chunking
+            if not negative_prompt:
+                negative_prompt = ""
+            processed_prompt = process_long_prompt(prompt, pipe, compel, only_convert_string=True)
+            processed_negative = process_long_prompt(negative_prompt, pipe, compel, only_convert_string=True)
+            # Get embeddings
+            conditioning, pooled = compel([processed_prompt, processed_negative])
+            # Generate with embeddings
+            output_image = pipe(
+                prompt_embeds=conditioning[0:1],
+                pooled_prompt_embeds=pooled[0:1],
+                negative_prompt_embeds=conditioning[1:2],
+                negative_pooled_prompt_embeds=pooled[1:2],
+                guidance_scale=guidance_scale,
+                num_inference_steps=num_inference_steps,
+                width=width,
+                height=height,
+                generator=generator
+            ).images[0]
+        else:
+            # Use standard processing with warning for long prompts
+            if len(prompt.split()) > 60:
+                print("Warning: Prompt may be too long. Consider enabling 'Long Prompt Processing'")
+            output_image = pipe(
+                prompt=prompt,
+                negative_prompt=negative_prompt,
+                guidance_scale=guidance_scale,
+                num_inference_steps=num_inference_steps,
+                width=width,
+                height=height,
+                generator=generator
+            ).images[0]
         return output_image
     except RuntimeError as e:
         print(f"Error during generation: {e}")
         error_img = Image.new('RGB', (width, height), color=(0, 0, 0))
         return error_img
 css = """
 #col-container {
     margin: 0 auto;
     max-width: 520px;
 }
+.long-prompt-info {
+    background-color: #f0f8ff;
+    padding: 10px;
+    border-radius: 5px;
+    margin: 10px 0;
+    font-size: 12px;
+}
 """
 with gr.Blocks(css=css) as demo:
             prompt = gr.Text(
                 label="Prompt",
                 show_label=False,
+                max_lines=3,  # Increased for longer prompts
+                placeholder="Enter your prompt. Use (word:1.2) for emphasis or [word:0.8] for de-emphasis",
                 container=False,
             )
         result = gr.Image(label="Result", show_label=False)
         with gr.Accordion("Advanced Settings", open=False):
+            # Long prompt processing toggle
+            enable_long_prompt = gr.Checkbox(
+                label="Enable Long Prompt Processing",
+                value=True,
+                info="Process very long prompts with attention weights like (word:1.2) or [word:0.8]"
+            )
+            with gr.Column(elem_class="long-prompt-info"):
+                gr.HTML("""
+                <strong>Long Prompt Features:</strong><br>
+                • <code>(word:1.2)</code> - Increase attention to 'word' by 1.2x<br>
+                • <code>[word:0.8]</code> - Decrease attention to 'word' by 0.8x<br>
+                • <code>((word))</code> - Strong emphasis (1.21x)<br>
+                • <code>[[word]]</code> - Strong de-emphasis (0.83x)<br>
+                • No token limit - write detailed prompts!
+                """)
             negative_prompt = gr.Text(
                 label="Negative prompt",
+                max_lines=2,  # Increased for longer negative prompts
+                placeholder="Enter a negative prompt (supports same weight syntax)",
                 value="nsfw, (low quality, worst quality:1.2), very displeasing, 3d, watermark, signature, ugly, poorly drawn"
             )
     run_button.click(
         fn=infer,
+        inputs=[prompt, negative_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps, enable_long_prompt],
         outputs=[result]
     )