CinemaCLIP / CinemaNetSchema.json
rsomani95's picture
initial push to hf-hub -- CinemaNetSchema.json
40c6d9f verified
{
"version": "1.0.0",
"model": "CinemaCLIP: A state-of-the-art CLIP model excelling in cinematic analysis while also maintaining robust general-purpose capabilities",
"license": "Commercial",
"meta": "",
"concepts": [],
"concepts_meta": {},
"categories": [
"color.contrast",
"color.key",
"color.saturation",
"color.theory",
"color.tones",
"shot.angle",
"shot.composition",
"shot.composition.symmetric",
"shot.focus",
"shot.framing",
"shot.height",
"shot.lens-size",
"shot.level.tilted",
"shot.lighting.cast",
"shot.lighting.contrast",
"shot.lighting.direction",
"shot.lighting.edge",
"shot.lighting.silhouette",
"shot.location",
"shot.time-of-day",
"shot.type",
"shot.type.crowd",
"shot.type.over-the-shoulder"
],
"categories_meta": {
"color.contrast": {
"title": "Color Contrast",
"description": "Determine the degree of color contrast in the image",
"type": "classifier",
"activation": "softmax",
"suggested_thresh": null,
"top_n": 1,
"labels": [
"low",
"neutral",
"high"
]
},
"color.key": {
"title": "Color Key",
"description": "Determine if image is meant to be keyed",
"type": "classifier",
"activation": "softmax",
"suggested_thresh": null,
"top_n": 1,
"labels": [
"blue",
"green",
"luma",
"matte",
"na"
]
},
"color.saturation": {
"title": "Color Saturation",
"description": "Determine the overall saturation of the image",
"type": "classifier",
"activation": "softmax",
"suggested_thresh": null,
"top_n": 1,
"labels": [
"desaturated",
"neutral",
"saturated"
]
},
"color.theory": {
"title": "Color Theory",
"description": "Determine the color theory in the image",
"type": "classifier",
"activation": "softmax",
"suggested_thresh": null,
"top_n": 1,
"labels": [
"analagous",
"complementary",
"monochrome"
]
},
"color.tones": {
"title": "Color Tones",
"description": "Determine the overall image color tonality",
"type": "classifier",
"activation": "softmax",
"suggested_thresh": null,
"top_n": 1,
"labels": [
"blackwhite",
"cool",
"mixed",
"neutral",
"warm"
]
},
"shot.angle": {
"title": "Shot Angle",
"description": "Determing the camera angle relative to the subject (Vertical axis / Pitch)",
"type": "classifier",
"activation": "softmax",
"suggested_thresh": null,
"top_n": 1,
"labels": [
"overhead",
"high-obvious",
"high-subtle",
"level",
"low-subtle",
"low-obvious",
"reverse-overhead",
"na"
]
},
"shot.composition": {
"title": "Shot Composition",
"description": "Determine which part of the image the eye is naturally directed towards",
"type": "classifier",
"activation": "softmax",
"suggested_thresh": null,
"top_n": 1,
"labels": [
"balanced",
"center",
"left",
"right",
"na"
]
},
"shot.composition.symmetric": {
"title": "Shot Composition Symmetric",
"description": "Determine if the frame composition is symmetric or not",
"type": "classifier",
"activation": "sigmoid",
"suggested_thresh": 0.85,
"top_n": null,
"labels": [
"likelihood"
]
},
"shot.focus": {
"title": "Shot Focus",
"description": "Determine the nature of lens focus in the frame",
"type": "classifier",
"activation": "softmax",
"suggested_thresh": null,
"top_n": 1,
"labels": [
"deep",
"neutral",
"shallow",
"out",
"na"
]
},
"shot.framing": {
"title": "Shot Framing",
"description": "Determine the frame size of the shot",
"type": "classifier",
"activation": "softmax",
"suggested_thresh": null,
"top_n": 1,
"labels": [
"extreme-wide",
"wide",
"full",
"medium-wide",
"medium",
"medium-closeup",
"closeup",
"extreme-closeup-face",
"extreme-closeup-face-macro-eyes-dual",
"extreme-closeup-face-macro-eye-single",
"extreme-closeup-face-macro-mouth",
"extreme-closeup-hands",
"extreme-closeup-body",
"extreme-closeup-prop",
"na"
]
},
"shot.height": {
"title": "Shot Height",
"description": "Determine the height of the camera relative to the subject",
"type": "classifier",
"activation": "softmax",
"suggested_thresh": null,
"top_n": 1,
"labels": [
"aerial",
"elevated",
"above-subject",
"eye-level",
"below-subject",
"ground-level",
"na"
]
},
"shot.lens-size": {
"title": "Shot Lens Size",
"description": "Determine the size of the lens used to shoot the frame",
"type": "classifier",
"activation": "softmax",
"suggested_thresh": null,
"top_n": 1,
"labels": [
"fisheye-ultrawide",
"wide",
"medium",
"long",
"telephoto",
"na"
]
},
"shot.level.tilted": {
"title": "Shot Level Tilted",
"description": "Determine the angle of the camera relative to the horizon",
"type": "classifier",
"activation": "sigmoid",
"suggested_thresh": 0.5,
"top_n": null,
"labels": [
"likelihood"
]
},
"shot.lighting.cast": {
"title": "Shot Lighting Cast",
"description": "Determine the harshness of shadows and lit areas in the frame",
"type": "classifier",
"activation": "softmax",
"suggested_thresh": null,
"top_n": 1,
"labels": [
"soft",
"neutral",
"hard",
"na"
]
},
"shot.lighting.contrast": {
"title": "Shot Lighting Contrast",
"description": "Determine the overall lighting contrast in the frame",
"type": "classifier",
"activation": "softmax",
"suggested_thresh": null,
"top_n": 1,
"labels": [
"low",
"neutral",
"high",
"na"
]
},
"shot.lighting.direction": {
"title": "Shot Lighting Direction",
"description": "Determine the direction of the main lighting source",
"type": "classifier",
"activation": "sigmoid",
"suggested_thresh": 0.67,
"top_n": null,
"labels": [
"ambient",
"back",
"front",
"left",
"right",
"top",
"under"
]
},
"shot.lighting.edge": {
"title": "Shot Lighting Edge",
"description": "Determine if the frame has edge lighting",
"type": "classifier",
"activation": "sigmoid",
"suggested_thresh": 0.8,
"top_n": null,
"labels": [
"likelihood"
]
},
"shot.lighting.silhouette": {
"title": "Shot Lighting Silhouette",
"description": "Determine if the frame has silhouette lighting",
"type": "classifier",
"activation": "sigmoid",
"suggested_thresh": 0.82,
"top_n": null,
"labels": [
"likelihood"
]
},
"shot.location": {
"title": "Shot Location",
"description": "Determine whether the frame was shot interior / exterior",
"type": "classifier",
"activation": "softmax",
"suggested_thresh": null,
"top_n": 1,
"labels": [
"exterior",
"interior",
"na"
]
},
"shot.time-of-day": {
"title": "Shot Time-Of-Day",
"description": "Determine the time of day the frame takes place in",
"type": "classifier",
"activation": "softmax",
"suggested_thresh": null,
"top_n": 1,
"labels": [
"dawn-dusk",
"day",
"night",
"sunrise-sunset",
"na"
]
},
"shot.type": {
"title": "Shot Type",
"description": "Determine the number of subjects in the frame",
"type": "classifier",
"activation": "softmax",
"suggested_thresh": null,
"top_n": 1,
"labels": [
"cleansingle",
"twoshot",
"threeshot",
"fourshot",
"group",
"insert",
"na"
]
},
"shot.type.crowd": {
"title": "Shot Type Crowd",
"description": "Determine if the frame has a crowd of people in it",
"type": "classifier",
"activation": "sigmoid",
"suggested_thresh": 0.85,
"top_n": null,
"labels": [
"likelihood"
]
},
"shot.type.over-the-shoulder": {
"title": "Shot Type Over-The-Shoulder",
"description": "Determine if the frame was shot from an over the shoulder perspective",
"type": "classifier",
"activation": "sigmoid",
"suggested_thresh": 0.77,
"top_n": null,
"labels": [
"likelihood"
]
}
},
"probabilities_labels": [
"color.contrast.low",
"color.contrast.neutral",
"color.contrast.high",
"color.key.blue",
"color.key.green",
"color.key.luma",
"color.key.matte",
"color.key.na",
"color.saturation.desaturated",
"color.saturation.neutral",
"color.saturation.saturated",
"color.theory.analagous",
"color.theory.complementary",
"color.theory.monochrome",
"color.tones.blackwhite",
"color.tones.cool",
"color.tones.mixed",
"color.tones.neutral",
"color.tones.warm",
"shot.angle.overhead",
"shot.angle.high-obvious",
"shot.angle.high-subtle",
"shot.angle.level",
"shot.angle.low-subtle",
"shot.angle.low-obvious",
"shot.angle.reverse-overhead",
"shot.angle.na",
"shot.composition.balanced",
"shot.composition.center",
"shot.composition.left",
"shot.composition.right",
"shot.composition.na",
"shot.composition.symmetric.likelihood",
"shot.focus.deep",
"shot.focus.neutral",
"shot.focus.shallow",
"shot.focus.out",
"shot.focus.na",
"shot.framing.extreme-wide",
"shot.framing.wide",
"shot.framing.full",
"shot.framing.medium-wide",
"shot.framing.medium",
"shot.framing.medium-closeup",
"shot.framing.closeup",
"shot.framing.extreme-closeup-face",
"shot.framing.extreme-closeup-face-macro-eyes-dual",
"shot.framing.extreme-closeup-face-macro-eye-single",
"shot.framing.extreme-closeup-face-macro-mouth",
"shot.framing.extreme-closeup-hands",
"shot.framing.extreme-closeup-body",
"shot.framing.extreme-closeup-prop",
"shot.framing.na",
"shot.height.aerial",
"shot.height.elevated",
"shot.height.above-subject",
"shot.height.eye-level",
"shot.height.below-subject",
"shot.height.ground-level",
"shot.height.na",
"shot.lens-size.fisheye-ultrawide",
"shot.lens-size.wide",
"shot.lens-size.medium",
"shot.lens-size.long",
"shot.lens-size.telephoto",
"shot.lens-size.na",
"shot.level.tilted.likelihood",
"shot.lighting.cast.soft",
"shot.lighting.cast.neutral",
"shot.lighting.cast.hard",
"shot.lighting.cast.na",
"shot.lighting.contrast.low",
"shot.lighting.contrast.neutral",
"shot.lighting.contrast.high",
"shot.lighting.contrast.na",
"shot.lighting.direction.ambient",
"shot.lighting.direction.back",
"shot.lighting.direction.front",
"shot.lighting.direction.left",
"shot.lighting.direction.right",
"shot.lighting.direction.top",
"shot.lighting.direction.under",
"shot.lighting.edge.likelihood",
"shot.lighting.silhouette.likelihood",
"shot.location.exterior",
"shot.location.interior",
"shot.location.na",
"shot.time-of-day.dawn-dusk",
"shot.time-of-day.day",
"shot.time-of-day.night",
"shot.time-of-day.sunrise-sunset",
"shot.time-of-day.na",
"shot.type.cleansingle",
"shot.type.twoshot",
"shot.type.threeshot",
"shot.type.fourshot",
"shot.type.group",
"shot.type.insert",
"shot.type.na",
"shot.type.crowd.likelihood",
"shot.type.over-the-shoulder.likelihood"
],
"probabilities_meta": {
"color.contrast.low": {
"title": "Low Contrast Colors",
"aliases": [],
"description": ""
},
"color.contrast.neutral": {
"title": "Neutral Contrast Colors",
"aliases": [],
"description": ""
},
"color.contrast.high": {
"title": "High Contrast Colors",
"aliases": [],
"description": ""
},
"color.key.blue": {
"title": "Blue Screen",
"aliases": [],
"description": ""
},
"color.key.green": {
"title": "Green Screen",
"aliases": [],
"description": ""
},
"color.key.luma": {
"title": "Luma Key",
"aliases": [],
"description": ""
},
"color.key.matte": {
"title": "Matte Key",
"aliases": [],
"description": ""
},
"color.key.na": {
"title": "NA",
"aliases": [],
"description": ""
},
"color.saturation.desaturated": {
"title": "Desaturated",
"aliases": [
"Muted"
],
"description": ""
},
"color.saturation.neutral": {
"title": "Neutral Saturation",
"aliases": [
"neutral colors"
],
"description": ""
},
"color.saturation.saturated": {
"title": "Saturated",
"aliases": [],
"description": ""
},
"color.theory.analagous": {
"title": "Analogous Colors",
"aliases": [],
"description": ""
},
"color.theory.complementary": {
"title": "Complementary Colors",
"aliases": [],
"description": ""
},
"color.theory.monochrome": {
"title": "Monochromatic Colors",
"aliases": [],
"description": ""
},
"color.tones.blackwhite": {
"title": "Black & White",
"aliases": [
"Grayscale"
],
"description": ""
},
"color.tones.cool": {
"title": "Cool Tones",
"aliases": [],
"description": ""
},
"color.tones.mixed": {
"title": "Mixed Tones",
"aliases": [],
"description": ""
},
"color.tones.neutral": {
"title": "Neutral Tones",
"aliases": [],
"description": ""
},
"color.tones.warm": {
"title": "Warm Tones",
"aliases": [],
"description": ""
},
"shot.angle.overhead": {
"title": "Overhead Angle",
"aliases": [
"Bird's Eye View",
"God's Eye View"
],
"description": ""
},
"shot.angle.high-obvious": {
"title": "High Angle",
"aliases": [],
"description": ""
},
"shot.angle.high-subtle": {
"title": "Subtle High Angle",
"aliases": [],
"description": ""
},
"shot.angle.level": {
"title": "Level Angle",
"aliases": [],
"description": ""
},
"shot.angle.low-subtle": {
"title": "Subtle Low Angle",
"aliases": [
"subtle low camera angle"
],
"description": ""
},
"shot.angle.low-obvious": {
"title": "Low Angle",
"aliases": [
"low camera angle"
],
"description": ""
},
"shot.angle.reverse-overhead": {
"title": "Reverse Overhead Angle",
"aliases": [],
"description": ""
},
"shot.angle.na": {
"title": "NA",
"aliases": [],
"description": ""
},
"shot.composition.balanced": {
"title": "Balanced Composition",
"aliases": [],
"description": ""
},
"shot.composition.center": {
"title": "Centered Composition",
"aliases": [],
"description": ""
},
"shot.composition.left": {
"title": "Left-Heavy Composition",
"aliases": [],
"description": ""
},
"shot.composition.right": {
"title": "Right-Heavy Composition",
"aliases": [],
"description": ""
},
"shot.composition.na": {
"title": "NA",
"aliases": [],
"description": ""
},
"shot.composition.symmetric.likelihood": {
"title": "Symmetric Composition",
"aliases": [],
"description": ""
},
"shot.focus.deep": {
"title": "Deep Focus",
"aliases": [
"Deep Depth of Field"
],
"description": ""
},
"shot.focus.neutral": {
"title": "Neutral Focus",
"aliases": [
"Neutral Depth of Field",
"Normal Focus"
],
"description": ""
},
"shot.focus.shallow": {
"title": "Shallow Focus",
"aliases": [
"Shallow Depth of Field",
"Narrow Focus",
"Narrow Depth of Field"
],
"description": ""
},
"shot.focus.out": {
"title": "Out of Focus",
"aliases": [],
"description": ""
},
"shot.focus.na": {
"title": "NA",
"aliases": [],
"description": ""
},
"shot.framing.extreme-wide": {
"title": "Extreme-Wide Framing",
"aliases": [],
"description": ""
},
"shot.framing.wide": {
"title": "Wide Framing",
"aliases": [],
"description": ""
},
"shot.framing.full": {
"title": "Full Framing",
"aliases": [],
"description": ""
},
"shot.framing.medium-wide": {
"title": "Medium-Wide Framing",
"aliases": [
"Cowboy Shot"
],
"description": ""
},
"shot.framing.medium": {
"title": "Medium Framing",
"aliases": [],
"description": ""
},
"shot.framing.medium-closeup": {
"title": "Medium Close-Up Framing",
"aliases": [],
"description": ""
},
"shot.framing.closeup": {
"title": "Close-Up Framing",
"aliases": [
"closeup"
],
"description": ""
},
"shot.framing.extreme-closeup-face": {
"title": "Extreme Close-Up Framing",
"aliases": [],
"description": ""
},
"shot.framing.extreme-closeup-face-macro-eyes-dual": {
"title": "Extreme Close-Up Framing",
"aliases": [],
"description": ""
},
"shot.framing.extreme-closeup-face-macro-eye-single": {
"title": "Extreme Close-Up Framing",
"aliases": [],
"description": ""
},
"shot.framing.extreme-closeup-face-macro-mouth": {
"title": "Extreme Close-Up Framing",
"aliases": [],
"description": ""
},
"shot.framing.extreme-closeup-hands": {
"title": "Extreme Close-Up Framing",
"aliases": [],
"description": ""
},
"shot.framing.extreme-closeup-body": {
"title": "Extreme Close-Up Framing",
"aliases": [],
"description": ""
},
"shot.framing.extreme-closeup-prop": {
"title": "Extreme Close-Up Framing",
"aliases": [],
"description": ""
},
"shot.framing.na": {
"title": "NA",
"aliases": [],
"description": ""
},
"shot.height.aerial": {
"title": "Aerial Framing",
"aliases": [
"Helicopter Shot",
"Drone Shot"
],
"description": ""
},
"shot.height.elevated": {
"title": "Elevated Camera",
"aliases": [],
"description": ""
},
"shot.height.above-subject": {
"title": "Above Subject Camera",
"aliases": [],
"description": ""
},
"shot.height.eye-level": {
"title": "Eye Level Camera",
"aliases": [],
"description": ""
},
"shot.height.below-subject": {
"title": "Below Subject Camera",
"aliases": [],
"description": ""
},
"shot.height.ground-level": {
"title": "Ground Level Camera",
"aliases": [],
"description": ""
},
"shot.height.na": {
"title": "NA",
"aliases": [],
"description": ""
},
"shot.lens-size.fisheye-ultrawide": {
"title": "Fisheye / Ultrawide Lens",
"aliases": [],
"description": ""
},
"shot.lens-size.wide": {
"title": "Wide Lens",
"aliases": [],
"description": ""
},
"shot.lens-size.medium": {
"title": "Medium Lens",
"aliases": [],
"description": ""
},
"shot.lens-size.long": {
"title": "Long Lens",
"aliases": [],
"description": ""
},
"shot.lens-size.telephoto": {
"title": "Telephoto Lens",
"aliases": [
"Zoom Lens"
],
"description": ""
},
"shot.lens-size.na": {
"title": "NA",
"aliases": [],
"description": ""
},
"shot.level.tilted.likelihood": {
"title": "Dutch Angle",
"aliases": [
"Canted shot"
],
"description": ""
},
"shot.lighting.cast.soft": {
"title": "Soft Lighting",
"aliases": [],
"description": ""
},
"shot.lighting.cast.neutral": {
"title": "Neutral Lighting",
"aliases": [],
"description": ""
},
"shot.lighting.cast.hard": {
"title": "Hard Lighting",
"aliases": [
"Harsh Lighting"
],
"description": ""
},
"shot.lighting.cast.na": {
"title": "NA",
"aliases": [],
"description": ""
},
"shot.lighting.contrast.high": {
"title": "High Contrast Lighting",
"aliases": [],
"description": ""
},
"shot.lighting.contrast.low": {
"title": "Low Contrast Lighting",
"aliases": [],
"description": ""
},
"shot.lighting.contrast.neutral": {
"title": "Neutral Contrast Lighting",
"aliases": [],
"description": ""
},
"shot.lighting.contrast.na": {
"title": "NA",
"aliases": [],
"description": ""
},
"shot.lighting.direction.ambient": {
"title": "Ambient Lighting",
"aliases": [],
"description": ""
},
"shot.lighting.direction.back": {
"title": "Back Lighting",
"aliases": [
"Backlit"
],
"description": ""
},
"shot.lighting.direction.front": {
"title": "Front Lighting",
"aliases": [
"Frontlit"
],
"description": ""
},
"shot.lighting.direction.left": {
"title": "Left Lighting",
"aliases": [
"Side Lighting"
],
"description": ""
},
"shot.lighting.direction.right": {
"title": "Right lighting",
"aliases": [
"Side Lighting"
],
"description": ""
},
"shot.lighting.direction.top": {
"title": "Top Lighting",
"aliases": [
"Top-lit"
],
"description": ""
},
"shot.lighting.direction.under": {
"title": "Bottom Lighting",
"aliases": [],
"description": ""
},
"shot.lighting.edge.likelihood": {
"title": "Edge Lighting",
"aliases": [
"Edge-lit"
],
"description": ""
},
"shot.lighting.silhouette.likelihood": {
"title": "Silhouette Lighting",
"aliases": [],
"description": ""
},
"shot.location.exterior": {
"title": "Exterior",
"aliases": [
"Outdoors"
],
"description": ""
},
"shot.location.interior": {
"title": "Interior",
"aliases": [
"Indoors"
],
"description": ""
},
"shot.location.na": {
"title": "NA",
"aliases": [],
"description": ""
},
"shot.time-of-day.dawn-dusk": {
"title": "Dawn / Dusk",
"aliases": [
"Twilight"
],
"description": ""
},
"shot.time-of-day.day": {
"title": "Day",
"aliases": [
"Daytime"
],
"description": ""
},
"shot.time-of-day.night": {
"title": "Night",
"aliases": [
"Night-time"
],
"description": ""
},
"shot.time-of-day.sunrise-sunset": {
"title": "Sunrise / Sunset",
"aliases": [],
"description": ""
},
"shot.time-of-day.na": {
"title": "NA",
"aliases": [],
"description": ""
},
"shot.type.cleansingle": {
"title": "Clean Single Shot",
"aliases": [
"Cleansingle",
"Single"
],
"description": ""
},
"shot.type.twoshot": {
"title": "Two Shot",
"aliases": [],
"description": ""
},
"shot.type.threeshot": {
"title": "Three Shot",
"aliases": [],
"description": ""
},
"shot.type.fourshot": {
"title": "Four Shot",
"aliases": [],
"description": ""
},
"shot.type.group": {
"title": "Group Shot",
"aliases": [],
"description": ""
},
"shot.type.insert": {
"title": "Insert Shot",
"aliases": [],
"description": ""
},
"shot.type.na": {
"title": "NA",
"aliases": [],
"description": ""
},
"shot.type.crowd.likelihood": {
"title": "Crowd Shot",
"aliases": [],
"description": ""
},
"shot.type.over-the-shoulder.likelihood": {
"title": "Over-The-Shoulder Shot",
"aliases": [
"OTS"
],
"description": ""
}
},
"preprocessing": {
"input_height": 256,
"input_width": 256,
"normalisation_mean": [
0.0,
0.0,
0.0
],
"normalisation_stdev": [
1.0,
1.0,
1.0
],
"interpolation": "bilinear",
"output_format": "RGB",
"resize_method": "squish",
"pad_fill": null
}
}