clip / text_calibration.py
Koushik Dutta
calibration
45d0854
import os
from PIL import Image
from transformers import CLIPProcessor
import os
text_input_calibration_samples = [
# People descriptions
"man with black backpack",
"woman with red hair",
"person wearing hoodie",
"tall man in suit",
"woman with blonde hair",
"person in wheelchair",
"man with beard",
"woman carrying purse",
"person wearing cap",
"elderly man with cane",
"young woman jogging",
"man in uniform",
"person with sunglasses",
"woman in dress",
"man carrying briefcase",
"person on bicycle",
"woman with shopping bags",
"man in casual clothes",
"person wearing mask",
"woman with ponytail",
# Clothing descriptions
"person in red shirt",
"man wearing blue jeans",
"woman in black coat",
"person in white sneakers",
"man with yellow jacket",
"woman wearing scarf",
"person in green hoodie",
"man in brown boots",
"woman with pink top",
"person wearing hat",
"man in dark clothing",
"woman in light colored dress",
"person with striped shirt",
"man wearing tie",
"woman in high heels",
"person with leather jacket",
"man in work clothes",
"woman carrying umbrella",
"person in sports uniform",
"man with reflective vest",
# Activities and behaviors
"person running",
"man walking quickly",
"woman looking around",
"person standing still",
"man sitting on bench",
"woman using phone",
"person entering building",
"man leaving premises",
"woman walking dog",
"person climbing fence",
"man opening door",
"woman waiting outside",
"person pacing back and forth",
"man looking over shoulder",
"woman checking watch",
"person hiding behind object",
"man crouching down",
"woman pointing at something",
"person waving hands",
"man shaking head",
# Multiple people
"two people talking",
"group of three men",
"couple walking together",
"family with children",
"two women arguing",
"group of teenagers",
"men in conversation",
"people shaking hands",
"crowd gathering",
"two people embracing",
"group walking together",
"people standing in line",
"multiple people running",
"group of workers",
"people sitting together",
"two men fighting",
"women laughing together",
"group carrying items",
"people looking up",
"multiple suspicious individuals",
# Vehicle related
"person getting out of car",
"man approaching vehicle",
"woman walking to car",
"person by motorcycle",
"man loading truck",
"woman on scooter",
"person near van",
"man washing car",
"woman in parking lot",
"person crossing street",
"man directing traffic",
"woman hailing taxi",
"person with bicycle",
"man fixing car",
"woman in driver seat",
"person walking between cars",
"man checking vehicle",
"woman backing out car",
"person on skateboard",
"man with car keys",
# Suspicious activities
"person looking through window",
"man trying door handle",
"woman peering around corner",
"person with crowbar",
"man in restricted area",
"woman acting nervously",
"person avoiding cameras",
"man covering face",
"woman dropping something",
"person moving stealthily",
"man with suspicious package",
"woman looking over fence",
"person tampering with lock",
"man hiding object",
"woman checking surroundings",
"person in dark alley",
"man wearing all black",
"woman with large bag",
"person acting erratically",
"man breaking window",
# Time-based descriptions
"person at night",
"man during daylight",
"woman in early morning",
"person at dusk",
"man in bright sunlight",
"woman in shadows",
"person during storm",
"man in fog",
"woman in rain",
"person at dawn",
"man during lunch hour",
"woman after hours",
"person on weekend",
"man during business hours",
"woman late at night",
"person in darkness",
"man under streetlight",
"woman in well-lit area",
"person during sunset",
"man in twilight",
# Location-based
"person near entrance",
"man by back door",
"woman at front gate",
"person in parking garage",
"man on rooftop",
"woman in stairwell",
"person near fence",
"man in courtyard",
"woman by dumpster",
"person on sidewalk",
"man in lobby",
"woman near elevator",
"person in hallway",
"man by loading dock",
"woman near mailbox",
"person at bus stop",
"man in garden area",
"woman by playground",
"person near storage unit",
"man at security booth",
# Physical characteristics
"short person walking",
"tall man standing",
"heavy set woman",
"thin person running",
"muscular man lifting",
"petite woman entering",
"bald man approaching",
"person with long hair",
"man with tattoos",
"woman with glasses",
"person limping",
"man with mustache",
"woman in heels",
"person using crutches",
"man with gray hair",
"young person skateboarding",
"middle-aged woman",
"elderly person sitting",
"child with adult",
"teenager alone",
# Carrying objects
"person with laptop bag",
"man carrying box",
"woman with suitcase",
"person holding tools",
"man with shopping cart",
"woman carrying flowers",
"person with sports equipment",
"man holding papers",
"woman with coffee cup",
"person carrying ladder",
"man with toolbox",
"woman holding keys",
"person with camera",
"man carrying rope",
"woman with baby stroller",
"person holding flashlight",
"man with garbage bag",
"woman carrying groceries",
"person with musical instrument",
"man holding phone",
# Emergency situations
"person calling for help",
"man running from scene",
"woman appearing distressed",
"person on ground",
"man assisting someone",
"woman fleeing area",
"person waving for attention",
"man performing CPR",
"woman directing emergency responders",
"person administering first aid",
"man evacuating building",
"woman in medical emergency",
"person calling police",
"man reporting incident",
"woman seeking shelter",
"person in accident",
"man helping injured person",
"woman escaping danger",
"person signaling distress",
"man during emergency",
# Delivery and service
"delivery person at door",
"man in uniform with package",
"woman signing clipboard",
"postal worker walking",
"repair technician working",
"cleaning person entering",
"security guard patrolling",
"maintenance worker outside",
"food delivery person",
"man reading meter",
"woman inspecting property",
"service vehicle driver",
"contractor with equipment",
"landscaper working",
"painter carrying supplies",
"electrician with tools",
"plumber at entrance",
"pest control worker",
"cable technician arriving",
"utility worker climbing",
# Facial expressions and gestures
"person smiling",
"man frowning",
"woman gesturing angrily",
"person nodding head",
"man shrugging shoulders",
"woman covering mouth",
"person pointing direction",
"man wiping forehead",
"woman clapping hands",
"person scratching head",
"man thumbs up gesture",
"woman waving goodbye",
"person looking confused",
"man appearing angry",
"woman seeming worried",
"person showing surprise",
"man looking tired",
"woman appearing happy",
"person checking time",
"man stretching arms",
# Weather conditions
"person in heavy coat",
"man without jacket in cold",
"woman with snow boots",
"person holding umbrella open",
"man squinting in sun",
"woman bundled up warmly",
"person running from rain",
"man in summer clothes",
"woman wearing raincoat",
"person slipping on ice",
"man sweating heavily",
"woman shivering outside",
"person in winter hat",
"man in shorts and sandals",
"woman with wet hair",
"person seeking shade",
"man wiping rain off",
"woman in light summer dress",
"person with frost breath",
"man removing wet clothes",
# Technology interaction
"person using ATM",
"man scanning keycard",
"woman typing on keypad",
"person talking on phone",
"man taking photo",
"woman checking smartphone",
"person using tablet",
"man operating machine",
"woman at computer terminal",
"person with headphones",
"man charging device",
"woman using GPS",
"person with smart watch",
"man installing equipment",
"woman troubleshooting device",
"person with bluetooth earpiece",
"man using remote control",
"woman at self-checkout",
"person scanning QR code",
"man with virtual reality headset"
]
def get_text_calibration_data():
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
calibration_data = []
for text in text_input_calibration_samples:
inputs = processor(text=[text], return_tensors="pt", padding="max_length", truncation=True)
calibration_data.append((inputs.data["input_ids"], inputs.data["attention_mask"]))
return calibration_data