Qwen-Image-Edit_Fast-Presets

Running on Zero

App Files Files Community

LPX55 commited on Aug 19

Commit

4bd1e2d

verified ·

1 Parent(s): 7646ed2

Update app_local.py

Browse files

Files changed (1) hide show

app_local.py +80 -56

app_local.py CHANGED Viewed

@@ -28,6 +28,7 @@ logger = logging.getLogger(__name__)
 REWRITER_MODEL = "Qwen/Qwen1.5-4B-Chat"  # Upgraded to 4B for better JSON handling
 dtype = torch.bfloat16
 device = "cuda" if torch.cuda.is_available() else "cpu"
 # Quantization configuration
 bnb_config = BitsAndBytesConfig(
@@ -82,29 +83,40 @@ Please provide the rewritten instruction in a clean `json` format as:
 }
 '''
 def extract_json_response(model_output: str) -> str:
     """Extract rewritten instruction from potentially messy JSON output"""
-    # New: Remove code block markers first
     model_output = re.sub(r'```(?:json)?\s*', '', model_output)
     try:
-        # Try to find the JSON portion in the output
         start_idx = model_output.find('{')
         end_idx = model_output.rfind('}')
-        if start_idx == -1 or end_idx == -1:
-            return None
         # Expand to the full object including outer braces
         end_idx += 1  # Include the closing brace
         json_str = model_output[start_idx:end_idx]
-        # Improved quote handling for values
-        json_str = re.sub(r'(\w+)\s*:', r'"\1":', json_str)  # Quote keys
-        json_str = re.sub(r':\s*([^"\s{[]+)', r': "\1"', json_str)  # Quote unquoted string values
-        # Parse JSON
-        data = json.loads(json_str)
         # Extract rewritten prompt from possible key variations
         possible_keys = [
@@ -124,20 +136,20 @@ def extract_json_response(model_output: str) -> str:
             for value in data.values():
                 if isinstance(value, dict) and "Rewritten" in value:
                     return value["Rewritten"].strip()
         # Try to find any string value that looks like an instruction
         str_values = [v for v in data.values() if isinstance(v, str) and 10 < len(v) < 500]
         if str_values:
             return str_values[0].strip()
     except Exception as e:
         print(f"JSON parse error: {str(e)}")
     return None
 def polish_prompt(original_prompt: str) -> str:
     """Enhanced prompt rewriting using original system prompt with JSON handling"""
-    # load_rewriter()
     # Format as Qwen chat
     messages = [
@@ -156,11 +168,11 @@ def polish_prompt(original_prompt: str) -> str:
     with torch.no_grad():
         generated_ids = rewriter_model.generate(
             **model_inputs,
-            max_new_tokens=256,  # Reduced for better quality
             do_sample=True,
-            temperature=0.5,  # Less creative but more focused
             top_p=0.8,
-            repetition_penalty= 1.1,
             no_repeat_ngram_size=3,
             pad_token_id=rewriter_tokenizer.eos_token_id
         )
@@ -171,41 +183,34 @@ def polish_prompt(original_prompt: str) -> str:
         skip_special_tokens=True
     ).strip()
-    # New: Last-resort JSON content extraction
-    json_str = enhanced
-    if '```' in enhanced:
-        parts = enhanced.split('```')
-        if len(parts) >= 3:
-            json_str = parts[1]  # Take content between first set of ```
     # Try to extract JSON content
-    rewritten_prompt = extract_json_response(json_str if '```' in enhanced else enhanced)
     if rewritten_prompt:
         # Clean up remaining artifacts
         rewritten_prompt = re.sub(r'(Replace|Change|Add) "(.*?)"', r'\1 \2', rewritten_prompt)
         rewritten_prompt = rewritten_prompt.replace('\\"', '"').replace('\\n', ' ')
         return rewritten_prompt
-    # Fallback cleanup if JSON extraction fails
-    if '```' in enhanced:
-        # Extract content from code blocks
-        parts = enhanced.split('```')
-        if len(parts) >= 3:
-            rewritten_prompt = parts[1].strip()
         else:
             rewritten_prompt = enhanced
-    else:
-        rewritten_prompt = enhanced
-    # Improved cleaning of fallback output
-    rewritten_prompt = re.sub(r'.*{.*}.*', '', rewritten_prompt)
-    rewritten_prompt = re.sub(r'\s\s+', ' ', rewritten_prompt).strip()
-    if ': ' in rewritten_prompt:
-        rewritten_prompt = rewritten_prompt.split(': ', 1)[-1].strip()
-    return rewritten_prompt[:200]  # Ensure reasonable length
 # Load main image editing pipeline
 pipe = QwenImageEditPipeline.from_pretrained(
     "Qwen/Qwen-Image-Edit",
@@ -233,7 +238,6 @@ else:
 #         rewriter_model = None
 #     torch.cuda.empty_cache()
 #     gc.collect()
 @spaces.GPU()
 def infer(
     image,
@@ -246,6 +250,11 @@ def infer(
     num_images_per_prompt=1,
 ):
     """Image editing endpoint with optimized prompt handling"""
     original_prompt = prompt
     prompt_info = ""
@@ -253,15 +262,24 @@ def infer(
     if rewrite_prompt:
         try:
             enhanced_instruction = polish_prompt(original_prompt)
-            prompt_info = (
-                f"<div style='margin:10px; padding:15px; border-radius:8px; border-left:4px solid #4CAF50; background: #f5f9fe'>"
-                f"<h4 style='margin-top: 0;'>🚀 Prompt Enhancement</h4>"
-                f"<p><strong>Original:</strong> {original_prompt}</p>"
-                f"<p><strong style='color:#2E7D32;'>Enhanced:</strong> {enhanced_instruction}</p>"
-                f"</div>"
-            )
-            prompt = enhanced_instruction
         except Exception as e:
             gr.Warning(f"Prompt enhancement failed: {str(e)}")
             prompt_info = (
                 f"<div style='margin:10px; padding:15px; border-radius:8px; border-left:4px solid #FF5252; background: #fef5f5'>"
@@ -277,7 +295,6 @@ def infer(
             f"</div>"
         )
     # Set seed for reproducibility
     seed_val = seed if not randomize_seed else random.randint(0, MAX_SEED)
     generator = torch.Generator(device=device).manual_seed(seed_val)
@@ -293,9 +310,18 @@ def infer(
             true_cfg_scale=true_guidance_scale,
             num_images_per_prompt=num_images_per_prompt
         ).images
-        return edited_images, seed_val, prompt_info
     except Exception as e:
         gr.Error(f"Image generation failed: {str(e)}")
         return [], seed_val, (
             f"<div style='margin:10px; padding:15px; border-radius:8px; border-left:4px solid #dd2c00; background: #fef5f5'>"
@@ -304,8 +330,6 @@ def infer(
             f"</div>"
         )
-MAX_SEED = np.iinfo(np.int32).max
 with gr.Blocks(title="Qwen Image Editor Fast") as demo:
     gr.Markdown("""
     <div style="text-align: center; background: linear-gradient(to right, #3a7bd5, #00d2ff); color: white; padding: 20px; border-radius: 8px;">

 REWRITER_MODEL = "Qwen/Qwen1.5-4B-Chat"  # Upgraded to 4B for better JSON handling
 dtype = torch.bfloat16
 device = "cuda" if torch.cuda.is_available() else "cpu"
+MAX_SEED = np.iinfo(np.int32).max
 # Quantization configuration
 bnb_config = BitsAndBytesConfig(
 }
 '''
 def extract_json_response(model_output: str) -> str:
     """Extract rewritten instruction from potentially messy JSON output"""
+    # Remove code block markers first
     model_output = re.sub(r'```(?:json)?\s*', '', model_output)
     try:
+        # Find the JSON portion in the output
         start_idx = model_output.find('{')
         end_idx = model_output.rfind('}')
+        # Fix the condition - check if brackets were found
+        if start_idx == -1 or end_idx == -1 or start_idx >= end_idx:
+            print(f"No valid JSON structure found in output. Start: {start_idx}, End: {end_idx}")
+            return None
         # Expand to the full object including outer braces
         end_idx += 1  # Include the closing brace
         json_str = model_output[start_idx:end_idx]
+        # Handle potential markdown or other formatting
+        json_str = json_str.strip()
+        # Try to parse JSON directly first
+        try:
+            data = json.loads(json_str)
+        except json.JSONDecodeError as e:
+            print(f"Direct JSON parsing failed: {e}")
+            # If direct parsing fails, try cleanup
+            # Quote keys properly
+            json_str = re.sub(r'([^{}[\],\s"]+)(?=\s*:)', r'"\1"', json_str)
+            # Remove any trailing commas that might cause issues
+            json_str = re.sub(r',(\s*[}\]])', r'\1', json_str)
+            # Try parsing again
+            data = json.loads(json_str)
         # Extract rewritten prompt from possible key variations
         possible_keys = [
             for value in data.values():
                 if isinstance(value, dict) and "Rewritten" in value:
                     return value["Rewritten"].strip()
         # Try to find any string value that looks like an instruction
         str_values = [v for v in data.values() if isinstance(v, str) and 10 < len(v) < 500]
         if str_values:
             return str_values[0].strip()
     except Exception as e:
         print(f"JSON parse error: {str(e)}")
+        print(f"Model output was: {model_output}")
     return None
 def polish_prompt(original_prompt: str) -> str:
     """Enhanced prompt rewriting using original system prompt with JSON handling"""
     # Format as Qwen chat
     messages = [
     with torch.no_grad():
         generated_ids = rewriter_model.generate(
             **model_inputs,
+            max_new_tokens=256,
             do_sample=True,
+            temperature=0.5,
             top_p=0.8,
+            repetition_penalty=1.1,
             no_repeat_ngram_size=3,
             pad_token_id=rewriter_tokenizer.eos_token_id
         )
         skip_special_tokens=True
     ).strip()
+    print(f"Model raw output: {enhanced}")  # Debug logging
     # Try to extract JSON content
+    rewritten_prompt = extract_json_response(enhanced)
     if rewritten_prompt:
         # Clean up remaining artifacts
         rewritten_prompt = re.sub(r'(Replace|Change|Add) "(.*?)"', r'\1 \2', rewritten_prompt)
         rewritten_prompt = rewritten_prompt.replace('\\"', '"').replace('\\n', ' ')
         return rewritten_prompt
+    else:
+        # Fallback: try to extract from code blocks or just return cleaned content
+        if '```' in enhanced:
+            parts = enhanced.split('```')
+            if len(parts) >= 2:
+                rewritten_prompt = parts[1].strip()
+            else:
+                rewritten_prompt = enhanced
         else:
             rewritten_prompt = enhanced
+        # Basic cleanup
+        rewritten_prompt = re.sub(r'\s\s+', ' ', rewritten_prompt).strip()
+        if ': ' in rewritten_prompt:
+            rewritten_prompt = rewritten_prompt.split(': ', 1)[-1].strip()
+        return rewritten_prompt[:200] if rewritten_prompt else original_prompt
 # Load main image editing pipeline
 pipe = QwenImageEditPipeline.from_pretrained(
     "Qwen/Qwen-Image-Edit",
 #         rewriter_model = None
 #     torch.cuda.empty_cache()
 #     gc.collect()
 @spaces.GPU()
 def infer(
     image,
     num_images_per_prompt=1,
 ):
     """Image editing endpoint with optimized prompt handling"""
+    # Clear cache at start
+    if device == "cuda":
+        torch.cuda.empty_cache()
+        gc.collect()
     original_prompt = prompt
     prompt_info = ""
     if rewrite_prompt:
         try:
             enhanced_instruction = polish_prompt(original_prompt)
+            if enhanced_instruction and enhanced_instruction != original_prompt:
+                prompt_info = (
+                    f"<div style='margin:10px; padding:15px; border-radius:8px; border-left:4px solid #4CAF50; background: #f5f9fe'>"
+                    f"<h4 style='margin-top: 0;'>🚀 Prompt Enhancement</h4>"
+                    f"<p><strong>Original:</strong> {original_prompt}</p>"
+                    f"<p><strong style='color:#2E7D32;'>Enhanced:</strong> {enhanced_instruction}</p>"
+                    f"</div>"
+                )
+                prompt = enhanced_instruction
+            else:
+                prompt_info = (
+                    f"<div style='margin:10px; padding:15px; border-radius:8px; border-left:4px solid #FF9800; background: #fff8f0'>"
+                    f"<h4 style='margin-top: 0;'>📝 Prompt Enhancement</h4>"
+                    f"<p>No enhancement applied or enhancement failed</p>"
+                    f"</div>"
+                )
         except Exception as e:
+            print(f"Prompt enhancement error: {str(e)}")  # Debug logging
             gr.Warning(f"Prompt enhancement failed: {str(e)}")
             prompt_info = (
                 f"<div style='margin:10px; padding:15px; border-radius:8px; border-left:4px solid #FF5252; background: #fef5f5'>"
             f"</div>"
         )
     # Set seed for reproducibility
     seed_val = seed if not randomize_seed else random.randint(0, MAX_SEED)
     generator = torch.Generator(device=device).manual_seed(seed_val)
             true_cfg_scale=true_guidance_scale,
             num_images_per_prompt=num_images_per_prompt
         ).images
+        # Clear cache after generation
+        if device == "cuda":
+            torch.cuda.empty_cache()
+            gc.collect()
+        return edited_images, seed_val, prompt_info
     except Exception as e:
+        # Clear cache on error
+        if device == "cuda":
+            torch.cuda.empty_cache()
+            gc.collect()
         gr.Error(f"Image generation failed: {str(e)}")
         return [], seed_val, (
             f"<div style='margin:10px; padding:15px; border-radius:8px; border-left:4px solid #dd2c00; background: #fef5f5'>"
             f"</div>"
         )
 with gr.Blocks(title="Qwen Image Editor Fast") as demo:
     gr.Markdown("""
     <div style="text-align: center; background: linear-gradient(to right, #3a7bd5, #00d2ff); color: white; padding: 20px; border-radius: 8px;">