Spaces:

VOIDER
/

img-eval

Runtime error

App Files Files Community

VOIDER commited on May 17, 2025

Commit

b219c60

verified ·

1 Parent(s): 57f8ee6

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -11

app.py CHANGED Viewed

@@ -14,20 +14,20 @@ import plotly.express as px
 # --------------------
 # Setup Models
 # --------------------
 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 # CLIP for prompt alignment & aesthetics
 clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to(device)
 clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
-# BLIP-2 for caption generation
-blip_processor = Blip2Processor.from_pretrained("Salesforce/blip2-flan-t5-xl").to(device)
 blip_model = Blip2ForConditionalGeneration.from_pretrained(
     "Salesforce/blip2-flan-t5-xl", torch_dtype=torch.float16
 ).to(device)
-# LPIPS for diversity
-lpips_model = lpips.LPIPS(net='alex').to(device)
 # --------------------
 # Helper Functions
@@ -36,20 +36,23 @@ lpips_model = lpips.LPIPS(net='alex').to(device)
 def extract_metadata(image_bytes):
     """Extracts prompt and model name from image bytes using sd-parsers."""
     parser = ParserManager()
-    with open('temp.png', 'wb') as tmp:
         tmp.write(image_bytes)
-    info = parser.parse('temp.png')
     prompt = info.prompts[0].value if info.prompts else ''
     model_name = info.model_name or ''
-    os.remove('temp.png')
     return prompt, model_name
-# Image preprocessing for models
 preprocess = transforms.Compose([
     transforms.Resize((224, 224)),
     transforms.ToTensor(),
-    transforms.Normalize((0.48145466, 0.4578275, 0.40821073),
-                         (0.26862954, 0.26130258, 0.27577711))
 ])
 def compute_clip_score(img: Image.Image, text: str) -> float:
@@ -60,7 +63,7 @@ def compute_clip_score(img: Image.Image, text: str) -> float:
 @torch.no_grad()
 def compute_caption_similarity(img: Image.Image, prompt: str) -> float:
-    inputs = blip_processor(images=img, return_tensors="pt").to(device, torch.float16)
     out = blip_model.generate(**inputs)
     caption = blip_processor.decode(out[0], skip_special_tokens=True)
     return compute_clip_score(img, caption)

 # --------------------
 # Setup Models
 # --------------------
 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 # CLIP for prompt alignment & aesthetics
 clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to(device)
 clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
+# BLIP-2 for caption generation (processor without .to)
+blip_processor = Blip2Processor.from_pretrained("Salesforce/blip2-flan-t5-xl")
 blip_model = Blip2ForConditionalGeneration.from_pretrained(
     "Salesforce/blip2-flan-t5-xl", torch_dtype=torch.float16
 ).to(device)
+# LPIPS for diversity\ nlpips_model = lpips.LPIPS(net='alex').to(device)
 # --------------------
 # Helper Functions
 def extract_metadata(image_bytes):
     """Extracts prompt and model name from image bytes using sd-parsers."""
     parser = ParserManager()
+    tmp_path = "temp.png"
+    with open(tmp_path, 'wb') as tmp:
         tmp.write(image_bytes)
+    info = parser.parse(tmp_path)
     prompt = info.prompts[0].value if info.prompts else ''
     model_name = info.model_name or ''
+    os.remove(tmp_path)
     return prompt, model_name
+# Image preprocessing transform
 preprocess = transforms.Compose([
     transforms.Resize((224, 224)),
     transforms.ToTensor(),
+    transforms.Normalize(
+        (0.48145466, 0.4578275, 0.40821073),
+        (0.26862954, 0.26130258, 0.27577711)
+    )
 ])
 def compute_clip_score(img: Image.Image, text: str) -> float:
 @torch.no_grad()
 def compute_caption_similarity(img: Image.Image, prompt: str) -> float:
+    inputs = blip_processor(images=img, return_tensors="pt").to(device)
     out = blip_model.generate(**inputs)
     caption = blip_processor.decode(out[0], skip_special_tokens=True)
     return compute_clip_score(img, caption)