Spaces:

retwpay
/

test

Running on Zero

App Files Files Community

retwpay commited on May 26, 2025

Commit

219e674

verified ·

1 Parent(s): 2c374c6

Update app.py

Browse files

Files changed (1) hide show

app.py +49 -276

app.py CHANGED Viewed

@@ -7,7 +7,6 @@ import random
 from diffusers import StableDiffusionXLPipeline
 from diffusers import EulerAncestralDiscreteScheduler
 import torch
-import re
 from compel import Compel, ReturnedEmbeddingsType
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
@@ -29,223 +28,32 @@ pipe.text_encoder_2.to(torch.float16)
 pipe.vae.to(torch.float16)
 pipe.unet.to(torch.float16)
-# Initialize Compel for long prompt processing
 compel = Compel(
     tokenizer=[pipe.tokenizer, pipe.tokenizer_2],
     text_encoder=[pipe.text_encoder, pipe.text_encoder_2],
     returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
     requires_pooled=[False, True],
-    truncate_long_prompts=False  # Enable long prompt processing
 )
 MAX_SEED = np.iinfo(np.int32).max
 MAX_IMAGE_SIZE = 1216
-# =====================================
-# Long Prompt Processing Functions
-# =====================================
-def parse_prompt_attention(text):
-    """Parse prompt with attention weights like (word:1.2) or [word:0.8]"""
-    re_attention = re.compile(r"""
-      \\\(|
-      \\\)|
-      \\\[|
-      \\]|
-      \\\\|
-      \\|
-      \(|
-      \[|
-      :([+-]?[.\d]+)\)|
-      \)|
-      ]|
-      [^\\()\[\]:]+|
-      :
-      """, re.X)
-    res = []
-    round_brackets = []
-    square_brackets = []
-    round_bracket_multiplier = 1.1
-    square_bracket_multiplier = 1 / 1.1
-    def multiply_range(start_position, multiplier):
-        for p in range(start_position, len(res)):
-            res[p][1] *= multiplier
-    for m in re_attention.finditer(text):
-        text = m.group(0)
-        weight = m.group(1)
-        if text.startswith('\\'):
-            res.append([text[1:], 1.0])
-        elif text == '(':
-            round_brackets.append(len(res))
-        elif text == '[':
-            square_brackets.append(len(res))
-        elif weight is not None and len(round_brackets) > 0:
-            multiply_range(round_brackets.pop(), float(weight))
-        elif text == ')' and len(round_brackets) > 0:
-            multiply_range(round_brackets.pop(), round_bracket_multiplier)
-        elif text == ']' and len(square_brackets) > 0:
-            multiply_range(square_brackets.pop(), square_bracket_multiplier)
-        else:
-            parts = re.split(re.compile(r"\s*\bBREAK\b\s*", re.S), text)
-            for i, part in enumerate(parts):
-                if i > 0:
-                    res.append(["BREAK", -1])
-                res.append([part, 1.0])
-    for pos in round_brackets:
-        multiply_range(pos, round_bracket_multiplier)
-    for pos in square_brackets:
-        multiply_range(pos, square_bracket_multiplier)
-    if len(res) == 0:
-        res = [["", 1.0]]
-    # merge runs of identical weights
-    i = 0
-    while i + 1 < len(res):
-        if res[i][1] == res[i + 1][1]:
-            res[i][0] += res[i + 1][0]
-            res.pop(i + 1)
-        else:
-            i += 1
-    return res
-def prompt_attention_to_invoke_prompt(attention):
-    """Convert attention data back to compel format"""
-    tokens = []
-    for text, weight in attention:
-        weight = round(weight, 2)
-        if weight == 1.0:
-            tokens.append(text)
-        elif weight < 1.0:
-            if weight < 0.8:
-                tokens.append(f"({text}){weight}")
-            else:
-                tokens.append(f"({text})-" + "-" * int((1.0 - weight) * 10))
-        else:
-            if weight < 1.3:
-                tokens.append(f"({text})" + "+" * int((weight - 1.0) * 10))
-            else:
-                tokens.append(f"({text}){weight}")
-    return "".join(tokens)
-def tokenize_line(line, tokenizer):
-    """Split long prompts into chunks at appropriate boundaries"""
-    actual_prompt = line.lower().strip()
-    actual_tokens = tokenizer.tokenize(actual_prompt)
-    max_tokens = tokenizer.model_max_length - 2
-    comma_token = tokenizer.tokenize(',')[0]
-    chunks = []
-    chunk = []
-    for item in actual_tokens:
-        chunk.append(item)
-        if len(chunk) == max_tokens:
-            if chunk[-1] != comma_token:
-                for i in range(max_tokens-1, -1, -1):
-                    if chunk[i] == comma_token:
-                        actual_chunk, actual_prompt = detokenize(chunk[:i+1], actual_prompt)
-                        chunks.append(actual_chunk)
-                        chunk = chunk[i+1:]
-                        break
-                else:
-                    actual_chunk, actual_prompt = detokenize(chunk, actual_prompt)
-                    chunks.append(actual_chunk)
-                    chunk = []
-            else:
-                actual_chunk, actual_prompt = detokenize(chunk, actual_prompt)
-                chunks.append(actual_chunk)
-                chunk = []
-    if chunk:
-        actual_chunk, _ = detokenize(chunk, actual_prompt)
-        chunks.append(actual_chunk)
-    return chunks
-def detokenize(chunk, actual_prompt):
-    """Convert tokens back to text"""
-    chunk[-1] = chunk[-1].replace('</w>', '')
-    chanked_prompt = ''.join(chunk).strip()
-    while '</w>' in chanked_prompt:
-        if actual_prompt[chanked_prompt.find('</w>')] == ' ':
-            chanked_prompt = chanked_prompt.replace('</w>', ' ', 1)
-        else:
-            chanked_prompt = chanked_prompt.replace('</w>', '', 1)
-    actual_prompt = actual_prompt.replace(chanked_prompt,'')
-    return chanked_prompt.strip(), actual_prompt.strip()
-def merge_embeds(prompt_chunks, compel):
-    """Merge multiple prompt chunks with weighted combination"""
-    num_chunks = len(prompt_chunks)
-    if num_chunks != 0:
-        power_prompt = 1/(num_chunks*(num_chunks+1)//2)
-        prompt_embs = compel(prompt_chunks)
-        t_list = list(torch.split(prompt_embs, 1, dim=0))
-        for i in range(num_chunks):
-            t_list[-(i+1)] = t_list[-(i+1)] * ((i+1)*power_prompt)
-        prompt_emb = torch.stack(t_list, dim=0).sum(dim=0)
-    else:
-        prompt_emb = compel('')
-    return prompt_emb
-def process_long_prompt(prompt, pipeline, compel, only_convert_string=False):
-    """Main function to process long prompts with attention weights"""
-    # Fix excessive emphasis symbols
-    prompt = prompt.replace("((", "(").replace("))", ")").replace("\\", "\\\\\\")
-    # Parse attention weights
-    attention = parse_prompt_attention(prompt)
-    global_attention_chunks = []
-    for att in attention:
-        for chunk in att[0].split(','):
-            temp_prompt_chunks = tokenize_line(chunk, pipeline.tokenizer)
-            for small_chunk in temp_prompt_chunks:
-                temp_dict = {
-                    "weight": round(att[1], 2),
-                    "length": len(pipeline.tokenizer.tokenize(f'{small_chunk},')),
-                    "prompt": f'{small_chunk},'
-                }
-                global_attention_chunks.append(temp_dict)
-    max_tokens = pipeline.tokenizer.model_max_length - 2
-    global_prompt_chunks = []
-    current_list = []
-    current_length = 0
-    for item in global_attention_chunks:
-        if current_length + item['length'] > max_tokens:
-            global_prompt_chunks.append(current_list)
-            current_list = [[item['prompt'], item['weight']]]
-            current_length = item['length']
-        else:
-            if not current_list:
-                current_list.append([item['prompt'], item['weight']])
-            else:
-                if item['weight'] != current_list[-1][1]:
-                    current_list.append([item['prompt'], item['weight']])
-                else:
-                    current_list[-1][0] += f" {item['prompt']}"
-            current_length += item['length']
-    if current_list:
-        global_prompt_chunks.append(current_list)
-    if only_convert_string:
-        return ' '.join([prompt_attention_to_invoke_prompt(i) for i in global_prompt_chunks])
-    return merge_embeds([prompt_attention_to_invoke_prompt(i) for i in global_prompt_chunks], compel)
 @spaces.GPU
-def infer(prompt, negative_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps, enable_long_prompt):
     if randomize_seed:
         seed = random.randint(0, MAX_SEED)
@@ -253,67 +61,49 @@ def infer(prompt, negative_prompt, seed, randomize_seed, width, height, guidance
     generator = torch.Generator(device=device).manual_seed(seed)
     try:
-        if enable_long_prompt:
-            # Use advanced prompt processing
-            print("Using advanced long prompt processing...")
-            # Process prompts with attention weights and chunking
-            if not negative_prompt:
-                negative_prompt = ""
-            processed_prompt = process_long_prompt(prompt, pipe, compel, only_convert_string=True)
-            processed_negative = process_long_prompt(negative_prompt, pipe, compel, only_convert_string=True)
-            # Get embeddings
-            conditioning, pooled = compel([processed_prompt, processed_negative])
-            # Generate with embeddings
-            output_image = pipe(
-                prompt_embeds=conditioning[0:1],
-                pooled_prompt_embeds=pooled[0:1],
-                negative_prompt_embeds=conditioning[1:2],
-                negative_pooled_prompt_embeds=pooled[1:2],
-                guidance_scale=guidance_scale,
-                num_inference_steps=num_inference_steps,
-                width=width,
-                height=height,
-                generator=generator
-            ).images[0]
-        else:
-            # Use standard processing with warning for long prompts
-            if len(prompt.split()) > 60:
-                print("Warning: Prompt may be too long. Consider enabling 'Long Prompt Processing'")
-            output_image = pipe(
-                prompt=prompt,
-                negative_prompt=negative_prompt,
-                guidance_scale=guidance_scale,
-                num_inference_steps=num_inference_steps,
-                width=width,
-                height=height,
-                generator=generator
-            ).images[0]
-        return output_image
     except RuntimeError as e:
         print(f"Error during generation: {e}")
         error_img = Image.new('RGB', (width, height), color=(0, 0, 0))
         return error_img
 css = """
 #col-container {
     margin: 0 auto;
     max-width: 520px;
 }
-.long-prompt-info {
-    background-color: #f0f8ff;
-    padding: 10px;
-    border-radius: 5px;
-    margin: 10px 0;
-    font-size: 12px;
-}
 """
 with gr.Blocks(css=css) as demo:
@@ -324,8 +114,8 @@ with gr.Blocks(css=css) as demo:
             prompt = gr.Text(
                 label="Prompt",
                 show_label=False,
-                max_lines=3,  # Increased for longer prompts
-                placeholder="Enter your prompt. Use (word:1.2) for emphasis or [word:0.8] for de-emphasis",
                 container=False,
             )
@@ -334,28 +124,11 @@ with gr.Blocks(css=css) as demo:
         result = gr.Image(label="Result", show_label=False)
         with gr.Accordion("Advanced Settings", open=False):
-            # Long prompt processing toggle
-            enable_long_prompt = gr.Checkbox(
-                label="Enable Long Prompt Processing",
-                value=True,
-                info="Process very long prompts with attention weights like (word:1.2) or [word:0.8]"
-            )
-            with gr.Column(elem_class="long-prompt-info"):
-                gr.HTML("""
-                <strong>Long Prompt Features:</strong><br>
-                • <code>(word:1.2)</code> - Increase attention to 'word' by 1.2x<br>
-                • <code>[word:0.8]</code> - Decrease attention to 'word' by 0.8x<br>
-                • <code>((word))</code> - Strong emphasis (1.21x)<br>
-                • <code>[[word]]</code> - Strong de-emphasis (0.83x)<br>
-                • No token limit - write detailed prompts!
-                """)
             negative_prompt = gr.Text(
                 label="Negative prompt",
-                max_lines=2,  # Increased for longer negative prompts
-                placeholder="Enter a negative prompt (supports same weight syntax)",
                 value="nsfw, (low quality, worst quality:1.2), very displeasing, 3d, watermark, signature, ugly, poorly drawn"
             )
@@ -405,7 +178,7 @@ with gr.Blocks(css=css) as demo:
     run_button.click(
         fn=infer,
-        inputs=[prompt, negative_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps, enable_long_prompt],
         outputs=[result]
     )

 from diffusers import StableDiffusionXLPipeline
 from diffusers import EulerAncestralDiscreteScheduler
 import torch
 from compel import Compel, ReturnedEmbeddingsType
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 pipe.vae.to(torch.float16)
 pipe.unet.to(torch.float16)
+# 追加: Initialize Compel for long prompt processing
 compel = Compel(
     tokenizer=[pipe.tokenizer, pipe.tokenizer_2],
     text_encoder=[pipe.text_encoder, pipe.text_encoder_2],
     returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
     requires_pooled=[False, True],
+    truncate_long_prompts=False
 )
 MAX_SEED = np.iinfo(np.int32).max
 MAX_IMAGE_SIZE = 1216
+# 追加: Simple long prompt processing function
+def process_long_prompt(prompt, negative_prompt=""):
+    """Simple long prompt processing using Compel"""
+    try:
+        conditioning, pooled = compel([prompt, negative_prompt])
+        return conditioning, pooled
+    except Exception as e:
+        print(f"Long prompt processing failed: {e}, falling back to standard processing")
+        return None, None
 @spaces.GPU
+def infer(prompt, negative_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps):
+    # 変更: Remove the 60-word limit warning and add long prompt check
+    use_long_prompt = len(prompt.split()) > 60 or len(prompt) > 300
     if randomize_seed:
         seed = random.randint(0, MAX_SEED)
     generator = torch.Generator(device=device).manual_seed(seed)
     try:
+        # 追加: Try long prompt processing first if prompt is long
+        if use_long_prompt:
+            print("Using long prompt processing...")
+            conditioning, pooled = process_long_prompt(prompt, negative_prompt)
+            if conditioning is not None:
+                output_image = pipe(
+                    prompt_embeds=conditioning[0:1],
+                    pooled_prompt_embeds=pooled[0:1],
+                    negative_prompt_embeds=conditioning[1:2],
+                    negative_pooled_prompt_embeds=pooled[1:2],
+                    guidance_scale=guidance_scale,
+                    num_inference_steps=num_inference_steps,
+                    width=width,
+                    height=height,
+                    generator=generator
+                ).images[0]
+                return output_image
+        # Fall back to standard processing
+        output_image = pipe(
+            prompt=prompt,
+            negative_prompt=negative_prompt,
+            guidance_scale=guidance_scale,
+            num_inference_steps=num_inference_steps,
+            width=width,
+            height=height,
+            generator=generator
+        ).images[0]
+        return output_image
     except RuntimeError as e:
         print(f"Error during generation: {e}")
+        # Return a blank image with error message
         error_img = Image.new('RGB', (width, height), color=(0, 0, 0))
         return error_img
 css = """
 #col-container {
     margin: 0 auto;
     max-width: 520px;
 }
 """
 with gr.Blocks(css=css) as demo:
             prompt = gr.Text(
                 label="Prompt",
                 show_label=False,
+                max_lines=3,  # 変更: Increased from 1 to 3 for longer prompts
+                placeholder="Enter your prompt (long prompts are automatically supported)",  # 変更: Updated placeholder
                 container=False,
             )
         result = gr.Image(label="Result", show_label=False)
         with gr.Accordion("Advanced Settings", open=False):
             negative_prompt = gr.Text(
                 label="Negative prompt",
+                max_lines=2,  # 変更: Increased from 1 to 2
+                placeholder="Enter a negative prompt",
                 value="nsfw, (low quality, worst quality:1.2), very displeasing, 3d, watermark, signature, ugly, poorly drawn"
             )
     run_button.click(
         fn=infer,
+        inputs=[prompt, negative_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps],
         outputs=[result]
     )