Spaces:

VOIDER
/

img-eval

Runtime error

App Files Files Community

VOIDER commited on May 17, 2025

Commit

b1862d3

verified ·

1 Parent(s): 7a83d0c

Update app.py

Browse files

Files changed (1) hide show

app.py +15 -16

app.py CHANGED Viewed

@@ -18,12 +18,10 @@ import plotly.express as px
 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 # CLIP for prompt alignment & aesthetics
-clip_model = CLIPModel.from_pretrained(
-    "openai/clip-vit-base-patch32"
-).to(device)
 clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
-# BLIP-2 for caption generation: 8-bit if GPU available, else float16
 blip_processor = Blip2Processor.from_pretrained("Salesforce/blip2-flan-t5-xl")
 if torch.cuda.is_available():
     bnb_config = BitsAndBytesConfig(load_in_8bit=True)
@@ -33,12 +31,10 @@ if torch.cuda.is_available():
         device_map="auto"
     )
 else:
-    # CPU-only environment: load half precision
     blip_model = Blip2ForConditionalGeneration.from_pretrained(
         "Salesforce/blip2-flan-t5-xl",
         torch_dtype=torch.float16
-    )
-    blip_model.to(device)
 # LPIPS for diversity
 lpips_model = lpips.LPIPS(net='alex').to(device)
@@ -51,11 +47,8 @@ def extract_metadata(file):
     """Extract prompt and model name using sd-parsers from file path."""
     parser = ParserManager()
     info = parser.parse(file.name)
-    # prompts list
     prompt = info.prompts[0].value if info.prompts else ''
-    # models list may contain model identifiers
     if hasattr(info, 'models') and info.models:
-        # info.models may be list of strings or objects
         first = info.models[0]
         model_name = first.name if hasattr(first, 'name') else str(first)
     else:
@@ -63,7 +56,7 @@ def extract_metadata(file):
     return prompt, model_name
 # Image preprocessing transform
-preprocess = transforms.Compose([ transforms.Compose([
     transforms.Resize((224, 224)),
     transforms.ToTensor(),
     transforms.Normalize(
@@ -72,6 +65,10 @@ preprocess = transforms.Compose([ transforms.Compose([
     )
 ])
 def compute_clip_score(img: Image.Image, text: str) -> float:
     inputs = clip_processor(text=[text], images=img, return_tensors="pt", padding=True).to(device)
     outputs = clip_model(**inputs)
@@ -110,7 +107,7 @@ def analyze_images(files):
         img = Image.open(f.name).convert('RGB')
         prompt, model = extract_metadata(f)
-        clip_score = compute_clip_score(img, prompt)
         cap_sim = compute_caption_similarity(img, prompt)
         brisque, niqe = compute_iqa_metrics(img)
         aesthetic = compute_clip_score(img, "a beautiful high quality image")
@@ -118,7 +115,7 @@ def analyze_images(files):
         records.append({
             'model': model,
             'prompt': prompt,
-            'clip_score': clip_score,
             'caption_sim': cap_sim,
             'brisque': brisque,
             'niqe': niqe,
@@ -153,14 +150,13 @@ def analyze_images(files):
 # --------------------
 def plot_metrics(agg: pd.DataFrame):
-    fig = px.bar(
         agg,
         x='model',
         y=['aesthetic_mean', 'clip_score_mean', 'caption_sim_mean', 'diversity'],
         barmode='group',
         title='Сравнение моделей по метрикам'
     )
-    return fig
 # --------------------
 # Gradio Interface
@@ -178,7 +174,10 @@ with gr.Blocks() as demo:
     with gr.Row():
         input_files = gr.File(file_count="multiple", label="Выберите PNG файлы")
         output_table = gr.Dataframe(
-            headers=["model", "clip_score_mean", "caption_sim_mean", "brisque_mean", "niqe_mean", "aesthetic_mean", "diversity"],
             label="Сводная таблица"
         )

 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 # CLIP for prompt alignment & aesthetics
+clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to(device)
 clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
+# BLIP-2 for caption generation: 8-bit if GPU available, else half precision
 blip_processor = Blip2Processor.from_pretrained("Salesforce/blip2-flan-t5-xl")
 if torch.cuda.is_available():
     bnb_config = BitsAndBytesConfig(load_in_8bit=True)
         device_map="auto"
     )
 else:
     blip_model = Blip2ForConditionalGeneration.from_pretrained(
         "Salesforce/blip2-flan-t5-xl",
         torch_dtype=torch.float16
+    ).to(device)
 # LPIPS for diversity
 lpips_model = lpips.LPIPS(net='alex').to(device)
     """Extract prompt and model name using sd-parsers from file path."""
     parser = ParserManager()
     info = parser.parse(file.name)
     prompt = info.prompts[0].value if info.prompts else ''
     if hasattr(info, 'models') and info.models:
         first = info.models[0]
         model_name = first.name if hasattr(first, 'name') else str(first)
     else:
     return prompt, model_name
 # Image preprocessing transform
+preprocess = transforms.Compose([
     transforms.Resize((224, 224)),
     transforms.ToTensor(),
     transforms.Normalize(
     )
 ])
+# --------------------
+# Metric Computations
+# --------------------
 def compute_clip_score(img: Image.Image, text: str) -> float:
     inputs = clip_processor(text=[text], images=img, return_tensors="pt", padding=True).to(device)
     outputs = clip_model(**inputs)
         img = Image.open(f.name).convert('RGB')
         prompt, model = extract_metadata(f)
+        cs = compute_clip_score(img, prompt)
         cap_sim = compute_caption_similarity(img, prompt)
         brisque, niqe = compute_iqa_metrics(img)
         aesthetic = compute_clip_score(img, "a beautiful high quality image")
         records.append({
             'model': model,
             'prompt': prompt,
+            'clip_score': cs,
             'caption_sim': cap_sim,
             'brisque': brisque,
             'niqe': niqe,
 # --------------------
 def plot_metrics(agg: pd.DataFrame):
+    return px.bar(
         agg,
         x='model',
         y=['aesthetic_mean', 'clip_score_mean', 'caption_sim_mean', 'diversity'],
         barmode='group',
         title='Сравнение моделей по метрикам'
     )
 # --------------------
 # Gradio Interface
     with gr.Row():
         input_files = gr.File(file_count="multiple", label="Выберите PNG файлы")
         output_table = gr.Dataframe(
+            headers=[
+                "model", "clip_score_mean", "caption_sim_mean", "brisque_mean",
+                "niqe_mean", "aesthetic_mean", "diversity"
+            ],
             label="Сводная таблица"
         )