Spaces:

AmandaPanda
/

Wk8Assignment_01

Sleeping

App Files Files Community

AmandaPanda commited on Nov 28, 2025

Commit

d3f61c4

verified ·

1 Parent(s): f16a87f

Reorganize code (e.g., import tools and libraries at the start). Edit and tweak.

Browse files

Files changed (1) hide show

app.py +34 -65

app.py CHANGED Viewed

@@ -1,90 +1,59 @@
 import gradio as gr
-# Load vision capability to support image display
-##pip install datasets
-# Load pandas for grid display
-##pip install pandas
 import pandas as pd
-# Load first 20 rows of dataset (merve/coco)
 from datasets import load_dataset
-dataset = load_dataset("merve/coco", split='train', stream=True)
-# Reduce dataset to 20 rows
-df = pd.dataset.iloc[0:19]
-print ("Print to show the 20 images available.")
-print ("The app will then select an image for further exploration.")
-print(df.head(20))
-# Use the sample command
-selected_image = df.sample(n=1)
-# Get url for image
-def parse_url(df):
-    for index, row in df.iterrows():
-        parsed = urlparse(str(row)).query # <- Notice the change here
-        parsed = parse_qs(parsed)
-        for k, v in parsed.items(): #use items() in Python3 and iteritems() in Python2
-            df.loc[index, k.strip()] = v[0].strip().lower()
-    return df
-image_url = parse_url(df['image'])
-print (selected_image)
-def greet(name):
-    return "Hello " + name + "!!"
-demo = gr.Interface(fn=greet, inputs="text", outputs="text")
-demo.launch()
-import requests
-from PIL import Image
-from transformers import BlipProcessor, BlipForConditionalGeneration
 processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
 model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large")
-##img_url = 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/demo.jpg'
-#Select image
-raw_image = Image.open(requests.get(img_url, stream=True).raw).convert('RGB')
-# conditional image captioning
-text = "a photography of"
-inputs = processor(raw_image, text, return_tensors="pt")
-out = model.generate(**inputs)
-print(processor.decode(out[0], skip_special_tokens=True))
-# unconditional image captioning
-inputs = processor(raw_image, return_tensors="pt")
-out = model.generate(**inputs)
-print(processor.decode(out[0], skip_special_tokens=True))
-# Get image database
-##curl -X GET \
-##     "https://datasets-server.huggingface.co/first-rows?dataset=merve%2Fcoco&config=default&split=validation"
-# Load transformer Salesforce/blip image captioning
-# Load model directly
-##from transformers import AutoProcessor, AutoModelForVision2Seq
-##processor = AutoProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
-##model = AutoModelForVision2Seq.from_pretrained("Salesforce/blip-image-captioning-large")

+# Import gradio - app framework
 import gradio as gr
+# Import pandas datasets, transformers, torch
 import pandas as pd
 from datasets import load_dataset
+from transformers import BlipProcessor, BlipForConditionalGeneration
+from PIL import Image
+import torch
+# Get merve/coco dataset
+dataset = load_dataset("merve/coco", split='train', stream=True)
+# Reduce dataset to 20 rows, i.e., get sample
+samples = list(dataset_stream.take(20))
+#Convert to dataframe
+df = pd.DataFrame(samples)
+## print ("Print to show the 20 images available.")
+## print ("The app will then select an image for further exploration.")
+## print(df.head(20))
+#Load the image captioning model (Salesforce/blip-image-captioning-large)
 processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
 model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large")
+#Configure captioning function
+def caption_random_image():
+    # pick random row
+    sample = df.sample(1).iloc[0]
+    # 'image' field contains an actual PIL image
+    image = sample["image"]
+    # Unconditional image captioning
+    inputs = processor(image, return_tensors="pt")
+    out = model.generate(**inputs)
+    caption = processor.decode(out[0], skip_special_tokens=True)
+    return image, caption
+demo = gr.Interface(
+    fn=caption_random_image,
+    inputs=None,
+    outputs=["image", "text"],
+    title="Image Captioning",
+    description="Pulls a random image from merve/coco and captions it using BLIP.",
+)
+demo.launch()