Spaces:

Manireddy1508
/

imagetoimage

Paused

App Files Files Community

Manireddy1508 commited on Apr 7, 2025

Commit

045059f

verified ·

1 Parent(s): 8cafce9

Update utils/planner.py

Browse files

Files changed (1) hide show

utils/planner.py +13 -15

utils/planner.py CHANGED Viewed

@@ -1,11 +1,12 @@
 import os
 import json
 from dotenv import load_dotenv
 from openai import OpenAI
 from PIL import Image
 import torch
-from transformers import BlipProcessor, BlipForConditionalGeneration
-from transformers import CLIPTokenizer
 # ----------------------------
 # 🔐 Load Environment & GPT Client
@@ -14,17 +15,16 @@ load_dotenv()
 client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
 # ----------------------------
-# 🧠 Load BLIP Captioning Model
 # ----------------------------
 device = "cuda" if torch.cuda.is_available() else "cpu"
 processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
 blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to(device)
-tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-base-patch32")
 # ----------------------------
-# 📁 Log File
 # ----------------------------
 LOG_PATH = "logs/prompt_log.jsonl"
 os.makedirs(os.path.dirname(LOG_PATH), exist_ok=True)
@@ -92,7 +92,7 @@ def generate_prompt_variations_from_scene(scene_plan: dict, base_prompt: str, n:
         system_msg = f"""
 You are a creative prompt variation generator for an AI image generation system.
 Given a base user prompt and its structured scene plan, generate {n} diverse image generation prompts.
-Each prompt should:
 - Be visually rich and descriptive
 - Include stylistic or contextual variation
 - Reference the same product and environment
@@ -117,14 +117,13 @@ Respond ONLY with a JSON array of strings. No explanations.
         content = response.choices[0].message.content
         all_prompts = json.loads(content)
-        # Enforce token limit using CLIP tokenizer
         filtered = []
         for p in all_prompts:
-            tokens = clip_tokenizer(p)["input_ids"]
-            if len(tokens) <= 77:
                 filtered.append(p)
-        print("🧠 Filtered Prompts (<=77 tokens):", filtered)
         return filtered or [base_prompt]
     except Exception as e:
@@ -140,8 +139,7 @@ def generate_negative_prompt_from_scene(scene_plan: dict) -> str:
 You are an assistant that generates negative prompts for an image generation model.
 Based on the structured scene plan, return a list of things that should NOT appear in the image,
 such as incorrect objects, extra limbs, distorted hands, text, watermark, etc.
-Return a single negative prompt string (comma-separated values).
-No explanations.
 """
         response = client.chat.completions.create(
@@ -162,9 +160,8 @@ No explanations.
         print("❌ generate_negative_prompt_from_scene() Error:", e)
         return "deformed hands, extra limbs, text, watermark, signature"
 # ----------------------------
-# 📝 Save Logs for Analysis
 # ----------------------------
 def save_generation_log(caption, scene_plan, prompts, negative_prompt):
     log = {
@@ -175,3 +172,4 @@ def save_generation_log(caption, scene_plan, prompts, negative_prompt):
     }
     with open(LOG_PATH, "a") as f:
         f.write(json.dumps(log, indent=2) + "\n")

+# utils/planner.py
 import os
 import json
 from dotenv import load_dotenv
 from openai import OpenAI
 from PIL import Image
 import torch
+from transformers import BlipProcessor, BlipForConditionalGeneration, CLIPTokenizer
 # ----------------------------
 # 🔐 Load Environment & GPT Client
 client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
 # ----------------------------
+# 🧠 Load BLIP & CLIP Tokenizer
 # ----------------------------
 device = "cuda" if torch.cuda.is_available() else "cpu"
 processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
 blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to(device)
+clip_tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-base-patch32")
 # ----------------------------
+# 📁 Log Path
 # ----------------------------
 LOG_PATH = "logs/prompt_log.jsonl"
 os.makedirs(os.path.dirname(LOG_PATH), exist_ok=True)
         system_msg = f"""
 You are a creative prompt variation generator for an AI image generation system.
 Given a base user prompt and its structured scene plan, generate {n} diverse image generation prompts.
+Each prompt must:
 - Be visually rich and descriptive
 - Include stylistic or contextual variation
 - Reference the same product and environment
         content = response.choices[0].message.content
         all_prompts = json.loads(content)
         filtered = []
         for p in all_prompts:
+            token_count = len(clip_tokenizer(p)["input_ids"])
+            if token_count <= 77:
                 filtered.append(p)
+        print("🧠 Filtered Prompts (<=77 tokens):", filtered)
         return filtered or [base_prompt]
     except Exception as e:
 You are an assistant that generates negative prompts for an image generation model.
 Based on the structured scene plan, return a list of things that should NOT appear in the image,
 such as incorrect objects, extra limbs, distorted hands, text, watermark, etc.
+Return a single negative prompt string (comma-separated values). No explanations.
 """
         response = client.chat.completions.create(
         print("❌ generate_negative_prompt_from_scene() Error:", e)
         return "deformed hands, extra limbs, text, watermark, signature"
 # ----------------------------
+# 📝 Save Logs
 # ----------------------------
 def save_generation_log(caption, scene_plan, prompts, negative_prompt):
     log = {
     }
     with open(LOG_PATH, "a") as f:
         f.write(json.dumps(log, indent=2) + "\n")