{ "version": "1.0.0", "model": "CinemaCLIP: A state-of-the-art CLIP model excelling in cinematic analysis while also maintaining robust general-purpose capabilities", "license": "Commercial", "meta": "", "concepts": [], "concepts_meta": {}, "categories": [ "color.contrast", "color.key", "color.saturation", "color.theory", "color.tones", "shot.angle", "shot.composition", "shot.composition.symmetric", "shot.focus", "shot.framing", "shot.height", "shot.lens-size", "shot.level.tilted", "shot.lighting.cast", "shot.lighting.contrast", "shot.lighting.direction", "shot.lighting.edge", "shot.lighting.silhouette", "shot.location", "shot.time-of-day", "shot.type", "shot.type.crowd", "shot.type.over-the-shoulder" ], "categories_meta": { "color.contrast": { "title": "Color Contrast", "description": "Determine the degree of color contrast in the image", "type": "classifier", "activation": "softmax", "suggested_thresh": null, "top_n": 1, "labels": [ "low", "neutral", "high" ] }, "color.key": { "title": "Color Key", "description": "Determine if image is meant to be keyed", "type": "classifier", "activation": "softmax", "suggested_thresh": null, "top_n": 1, "labels": [ "blue", "green", "luma", "matte", "na" ] }, "color.saturation": { "title": "Color Saturation", "description": "Determine the overall saturation of the image", "type": "classifier", "activation": "softmax", "suggested_thresh": null, "top_n": 1, "labels": [ "desaturated", "neutral", "saturated" ] }, "color.theory": { "title": "Color Theory", "description": "Determine the color theory in the image", "type": "classifier", "activation": "softmax", "suggested_thresh": null, "top_n": 1, "labels": [ "analagous", "complementary", "monochrome" ] }, "color.tones": { "title": "Color Tones", "description": "Determine the overall image color tonality", "type": "classifier", "activation": "softmax", "suggested_thresh": null, "top_n": 1, "labels": [ "blackwhite", "cool", "mixed", "neutral", "warm" ] }, "shot.angle": { "title": "Shot Angle", "description": "Determing the camera angle relative to the subject (Vertical axis / Pitch)", "type": "classifier", "activation": "softmax", "suggested_thresh": null, "top_n": 1, "labels": [ "overhead", "high-obvious", "high-subtle", "level", "low-subtle", "low-obvious", "reverse-overhead", "na" ] }, "shot.composition": { "title": "Shot Composition", "description": "Determine which part of the image the eye is naturally directed towards", "type": "classifier", "activation": "softmax", "suggested_thresh": null, "top_n": 1, "labels": [ "balanced", "center", "left", "right", "na" ] }, "shot.composition.symmetric": { "title": "Shot Composition Symmetric", "description": "Determine if the frame composition is symmetric or not", "type": "classifier", "activation": "sigmoid", "suggested_thresh": 0.85, "top_n": null, "labels": [ "likelihood" ] }, "shot.focus": { "title": "Shot Focus", "description": "Determine the nature of lens focus in the frame", "type": "classifier", "activation": "softmax", "suggested_thresh": null, "top_n": 1, "labels": [ "deep", "neutral", "shallow", "out", "na" ] }, "shot.framing": { "title": "Shot Framing", "description": "Determine the frame size of the shot", "type": "classifier", "activation": "softmax", "suggested_thresh": null, "top_n": 1, "labels": [ "extreme-wide", "wide", "full", "medium-wide", "medium", "medium-closeup", "closeup", "extreme-closeup-face", "extreme-closeup-face-macro-eyes-dual", "extreme-closeup-face-macro-eye-single", "extreme-closeup-face-macro-mouth", "extreme-closeup-hands", "extreme-closeup-body", "extreme-closeup-prop", "na" ] }, "shot.height": { "title": "Shot Height", "description": "Determine the height of the camera relative to the subject", "type": "classifier", "activation": "softmax", "suggested_thresh": null, "top_n": 1, "labels": [ "aerial", "elevated", "above-subject", "eye-level", "below-subject", "ground-level", "na" ] }, "shot.lens-size": { "title": "Shot Lens Size", "description": "Determine the size of the lens used to shoot the frame", "type": "classifier", "activation": "softmax", "suggested_thresh": null, "top_n": 1, "labels": [ "fisheye-ultrawide", "wide", "medium", "long", "telephoto", "na" ] }, "shot.level.tilted": { "title": "Shot Level Tilted", "description": "Determine the angle of the camera relative to the horizon", "type": "classifier", "activation": "sigmoid", "suggested_thresh": 0.5, "top_n": null, "labels": [ "likelihood" ] }, "shot.lighting.cast": { "title": "Shot Lighting Cast", "description": "Determine the harshness of shadows and lit areas in the frame", "type": "classifier", "activation": "softmax", "suggested_thresh": null, "top_n": 1, "labels": [ "soft", "neutral", "hard", "na" ] }, "shot.lighting.contrast": { "title": "Shot Lighting Contrast", "description": "Determine the overall lighting contrast in the frame", "type": "classifier", "activation": "softmax", "suggested_thresh": null, "top_n": 1, "labels": [ "low", "neutral", "high", "na" ] }, "shot.lighting.direction": { "title": "Shot Lighting Direction", "description": "Determine the direction of the main lighting source", "type": "classifier", "activation": "sigmoid", "suggested_thresh": 0.67, "top_n": null, "labels": [ "ambient", "back", "front", "left", "right", "top", "under" ] }, "shot.lighting.edge": { "title": "Shot Lighting Edge", "description": "Determine if the frame has edge lighting", "type": "classifier", "activation": "sigmoid", "suggested_thresh": 0.8, "top_n": null, "labels": [ "likelihood" ] }, "shot.lighting.silhouette": { "title": "Shot Lighting Silhouette", "description": "Determine if the frame has silhouette lighting", "type": "classifier", "activation": "sigmoid", "suggested_thresh": 0.82, "top_n": null, "labels": [ "likelihood" ] }, "shot.location": { "title": "Shot Location", "description": "Determine whether the frame was shot interior / exterior", "type": "classifier", "activation": "softmax", "suggested_thresh": null, "top_n": 1, "labels": [ "exterior", "interior", "na" ] }, "shot.time-of-day": { "title": "Shot Time-Of-Day", "description": "Determine the time of day the frame takes place in", "type": "classifier", "activation": "softmax", "suggested_thresh": null, "top_n": 1, "labels": [ "dawn-dusk", "day", "night", "sunrise-sunset", "na" ] }, "shot.type": { "title": "Shot Type", "description": "Determine the number of subjects in the frame", "type": "classifier", "activation": "softmax", "suggested_thresh": null, "top_n": 1, "labels": [ "cleansingle", "twoshot", "threeshot", "fourshot", "group", "insert", "na" ] }, "shot.type.crowd": { "title": "Shot Type Crowd", "description": "Determine if the frame has a crowd of people in it", "type": "classifier", "activation": "sigmoid", "suggested_thresh": 0.85, "top_n": null, "labels": [ "likelihood" ] }, "shot.type.over-the-shoulder": { "title": "Shot Type Over-The-Shoulder", "description": "Determine if the frame was shot from an over the shoulder perspective", "type": "classifier", "activation": "sigmoid", "suggested_thresh": 0.77, "top_n": null, "labels": [ "likelihood" ] } }, "probabilities_labels": [ "color.contrast.low", "color.contrast.neutral", "color.contrast.high", "color.key.blue", "color.key.green", "color.key.luma", "color.key.matte", "color.key.na", "color.saturation.desaturated", "color.saturation.neutral", "color.saturation.saturated", "color.theory.analagous", "color.theory.complementary", "color.theory.monochrome", "color.tones.blackwhite", "color.tones.cool", "color.tones.mixed", "color.tones.neutral", "color.tones.warm", "shot.angle.overhead", "shot.angle.high-obvious", "shot.angle.high-subtle", "shot.angle.level", "shot.angle.low-subtle", "shot.angle.low-obvious", "shot.angle.reverse-overhead", "shot.angle.na", "shot.composition.balanced", "shot.composition.center", "shot.composition.left", "shot.composition.right", "shot.composition.na", "shot.composition.symmetric.likelihood", "shot.focus.deep", "shot.focus.neutral", "shot.focus.shallow", "shot.focus.out", "shot.focus.na", "shot.framing.extreme-wide", "shot.framing.wide", "shot.framing.full", "shot.framing.medium-wide", "shot.framing.medium", "shot.framing.medium-closeup", "shot.framing.closeup", "shot.framing.extreme-closeup-face", "shot.framing.extreme-closeup-face-macro-eyes-dual", "shot.framing.extreme-closeup-face-macro-eye-single", "shot.framing.extreme-closeup-face-macro-mouth", "shot.framing.extreme-closeup-hands", "shot.framing.extreme-closeup-body", "shot.framing.extreme-closeup-prop", "shot.framing.na", "shot.height.aerial", "shot.height.elevated", "shot.height.above-subject", "shot.height.eye-level", "shot.height.below-subject", "shot.height.ground-level", "shot.height.na", "shot.lens-size.fisheye-ultrawide", "shot.lens-size.wide", "shot.lens-size.medium", "shot.lens-size.long", "shot.lens-size.telephoto", "shot.lens-size.na", "shot.level.tilted.likelihood", "shot.lighting.cast.soft", "shot.lighting.cast.neutral", "shot.lighting.cast.hard", "shot.lighting.cast.na", "shot.lighting.contrast.low", "shot.lighting.contrast.neutral", "shot.lighting.contrast.high", "shot.lighting.contrast.na", "shot.lighting.direction.ambient", "shot.lighting.direction.back", "shot.lighting.direction.front", "shot.lighting.direction.left", "shot.lighting.direction.right", "shot.lighting.direction.top", "shot.lighting.direction.under", "shot.lighting.edge.likelihood", "shot.lighting.silhouette.likelihood", "shot.location.exterior", "shot.location.interior", "shot.location.na", "shot.time-of-day.dawn-dusk", "shot.time-of-day.day", "shot.time-of-day.night", "shot.time-of-day.sunrise-sunset", "shot.time-of-day.na", "shot.type.cleansingle", "shot.type.twoshot", "shot.type.threeshot", "shot.type.fourshot", "shot.type.group", "shot.type.insert", "shot.type.na", "shot.type.crowd.likelihood", "shot.type.over-the-shoulder.likelihood" ], "probabilities_meta": { "color.contrast.low": { "title": "Low Contrast Colors", "aliases": [], "description": "" }, "color.contrast.neutral": { "title": "Neutral Contrast Colors", "aliases": [], "description": "" }, "color.contrast.high": { "title": "High Contrast Colors", "aliases": [], "description": "" }, "color.key.blue": { "title": "Blue Screen", "aliases": [], "description": "" }, "color.key.green": { "title": "Green Screen", "aliases": [], "description": "" }, "color.key.luma": { "title": "Luma Key", "aliases": [], "description": "" }, "color.key.matte": { "title": "Matte Key", "aliases": [], "description": "" }, "color.key.na": { "title": "NA", "aliases": [], "description": "" }, "color.saturation.desaturated": { "title": "Desaturated", "aliases": [ "Muted" ], "description": "" }, "color.saturation.neutral": { "title": "Neutral Saturation", "aliases": [ "neutral colors" ], "description": "" }, "color.saturation.saturated": { "title": "Saturated", "aliases": [], "description": "" }, "color.theory.analagous": { "title": "Analogous Colors", "aliases": [], "description": "" }, "color.theory.complementary": { "title": "Complementary Colors", "aliases": [], "description": "" }, "color.theory.monochrome": { "title": "Monochromatic Colors", "aliases": [], "description": "" }, "color.tones.blackwhite": { "title": "Black & White", "aliases": [ "Grayscale" ], "description": "" }, "color.tones.cool": { "title": "Cool Tones", "aliases": [], "description": "" }, "color.tones.mixed": { "title": "Mixed Tones", "aliases": [], "description": "" }, "color.tones.neutral": { "title": "Neutral Tones", "aliases": [], "description": "" }, "color.tones.warm": { "title": "Warm Tones", "aliases": [], "description": "" }, "shot.angle.overhead": { "title": "Overhead Angle", "aliases": [ "Bird's Eye View", "God's Eye View" ], "description": "" }, "shot.angle.high-obvious": { "title": "High Angle", "aliases": [], "description": "" }, "shot.angle.high-subtle": { "title": "Subtle High Angle", "aliases": [], "description": "" }, "shot.angle.level": { "title": "Level Angle", "aliases": [], "description": "" }, "shot.angle.low-subtle": { "title": "Subtle Low Angle", "aliases": [ "subtle low camera angle" ], "description": "" }, "shot.angle.low-obvious": { "title": "Low Angle", "aliases": [ "low camera angle" ], "description": "" }, "shot.angle.reverse-overhead": { "title": "Reverse Overhead Angle", "aliases": [], "description": "" }, "shot.angle.na": { "title": "NA", "aliases": [], "description": "" }, "shot.composition.balanced": { "title": "Balanced Composition", "aliases": [], "description": "" }, "shot.composition.center": { "title": "Centered Composition", "aliases": [], "description": "" }, "shot.composition.left": { "title": "Left-Heavy Composition", "aliases": [], "description": "" }, "shot.composition.right": { "title": "Right-Heavy Composition", "aliases": [], "description": "" }, "shot.composition.na": { "title": "NA", "aliases": [], "description": "" }, "shot.composition.symmetric.likelihood": { "title": "Symmetric Composition", "aliases": [], "description": "" }, "shot.focus.deep": { "title": "Deep Focus", "aliases": [ "Deep Depth of Field" ], "description": "" }, "shot.focus.neutral": { "title": "Neutral Focus", "aliases": [ "Neutral Depth of Field", "Normal Focus" ], "description": "" }, "shot.focus.shallow": { "title": "Shallow Focus", "aliases": [ "Shallow Depth of Field", "Narrow Focus", "Narrow Depth of Field" ], "description": "" }, "shot.focus.out": { "title": "Out of Focus", "aliases": [], "description": "" }, "shot.focus.na": { "title": "NA", "aliases": [], "description": "" }, "shot.framing.extreme-wide": { "title": "Extreme-Wide Framing", "aliases": [], "description": "" }, "shot.framing.wide": { "title": "Wide Framing", "aliases": [], "description": "" }, "shot.framing.full": { "title": "Full Framing", "aliases": [], "description": "" }, "shot.framing.medium-wide": { "title": "Medium-Wide Framing", "aliases": [ "Cowboy Shot" ], "description": "" }, "shot.framing.medium": { "title": "Medium Framing", "aliases": [], "description": "" }, "shot.framing.medium-closeup": { "title": "Medium Close-Up Framing", "aliases": [], "description": "" }, "shot.framing.closeup": { "title": "Close-Up Framing", "aliases": [ "closeup" ], "description": "" }, "shot.framing.extreme-closeup-face": { "title": "Extreme Close-Up Framing", "aliases": [], "description": "" }, "shot.framing.extreme-closeup-face-macro-eyes-dual": { "title": "Extreme Close-Up Framing", "aliases": [], "description": "" }, "shot.framing.extreme-closeup-face-macro-eye-single": { "title": "Extreme Close-Up Framing", "aliases": [], "description": "" }, "shot.framing.extreme-closeup-face-macro-mouth": { "title": "Extreme Close-Up Framing", "aliases": [], "description": "" }, "shot.framing.extreme-closeup-hands": { "title": "Extreme Close-Up Framing", "aliases": [], "description": "" }, "shot.framing.extreme-closeup-body": { "title": "Extreme Close-Up Framing", "aliases": [], "description": "" }, "shot.framing.extreme-closeup-prop": { "title": "Extreme Close-Up Framing", "aliases": [], "description": "" }, "shot.framing.na": { "title": "NA", "aliases": [], "description": "" }, "shot.height.aerial": { "title": "Aerial Framing", "aliases": [ "Helicopter Shot", "Drone Shot" ], "description": "" }, "shot.height.elevated": { "title": "Elevated Camera", "aliases": [], "description": "" }, "shot.height.above-subject": { "title": "Above Subject Camera", "aliases": [], "description": "" }, "shot.height.eye-level": { "title": "Eye Level Camera", "aliases": [], "description": "" }, "shot.height.below-subject": { "title": "Below Subject Camera", "aliases": [], "description": "" }, "shot.height.ground-level": { "title": "Ground Level Camera", "aliases": [], "description": "" }, "shot.height.na": { "title": "NA", "aliases": [], "description": "" }, "shot.lens-size.fisheye-ultrawide": { "title": "Fisheye / Ultrawide Lens", "aliases": [], "description": "" }, "shot.lens-size.wide": { "title": "Wide Lens", "aliases": [], "description": "" }, "shot.lens-size.medium": { "title": "Medium Lens", "aliases": [], "description": "" }, "shot.lens-size.long": { "title": "Long Lens", "aliases": [], "description": "" }, "shot.lens-size.telephoto": { "title": "Telephoto Lens", "aliases": [ "Zoom Lens" ], "description": "" }, "shot.lens-size.na": { "title": "NA", "aliases": [], "description": "" }, "shot.level.tilted.likelihood": { "title": "Dutch Angle", "aliases": [ "Canted shot" ], "description": "" }, "shot.lighting.cast.soft": { "title": "Soft Lighting", "aliases": [], "description": "" }, "shot.lighting.cast.neutral": { "title": "Neutral Lighting", "aliases": [], "description": "" }, "shot.lighting.cast.hard": { "title": "Hard Lighting", "aliases": [ "Harsh Lighting" ], "description": "" }, "shot.lighting.cast.na": { "title": "NA", "aliases": [], "description": "" }, "shot.lighting.contrast.high": { "title": "High Contrast Lighting", "aliases": [], "description": "" }, "shot.lighting.contrast.low": { "title": "Low Contrast Lighting", "aliases": [], "description": "" }, "shot.lighting.contrast.neutral": { "title": "Neutral Contrast Lighting", "aliases": [], "description": "" }, "shot.lighting.contrast.na": { "title": "NA", "aliases": [], "description": "" }, "shot.lighting.direction.ambient": { "title": "Ambient Lighting", "aliases": [], "description": "" }, "shot.lighting.direction.back": { "title": "Back Lighting", "aliases": [ "Backlit" ], "description": "" }, "shot.lighting.direction.front": { "title": "Front Lighting", "aliases": [ "Frontlit" ], "description": "" }, "shot.lighting.direction.left": { "title": "Left Lighting", "aliases": [ "Side Lighting" ], "description": "" }, "shot.lighting.direction.right": { "title": "Right lighting", "aliases": [ "Side Lighting" ], "description": "" }, "shot.lighting.direction.top": { "title": "Top Lighting", "aliases": [ "Top-lit" ], "description": "" }, "shot.lighting.direction.under": { "title": "Bottom Lighting", "aliases": [], "description": "" }, "shot.lighting.edge.likelihood": { "title": "Edge Lighting", "aliases": [ "Edge-lit" ], "description": "" }, "shot.lighting.silhouette.likelihood": { "title": "Silhouette Lighting", "aliases": [], "description": "" }, "shot.location.exterior": { "title": "Exterior", "aliases": [ "Outdoors" ], "description": "" }, "shot.location.interior": { "title": "Interior", "aliases": [ "Indoors" ], "description": "" }, "shot.location.na": { "title": "NA", "aliases": [], "description": "" }, "shot.time-of-day.dawn-dusk": { "title": "Dawn / Dusk", "aliases": [ "Twilight" ], "description": "" }, "shot.time-of-day.day": { "title": "Day", "aliases": [ "Daytime" ], "description": "" }, "shot.time-of-day.night": { "title": "Night", "aliases": [ "Night-time" ], "description": "" }, "shot.time-of-day.sunrise-sunset": { "title": "Sunrise / Sunset", "aliases": [], "description": "" }, "shot.time-of-day.na": { "title": "NA", "aliases": [], "description": "" }, "shot.type.cleansingle": { "title": "Clean Single Shot", "aliases": [ "Cleansingle", "Single" ], "description": "" }, "shot.type.twoshot": { "title": "Two Shot", "aliases": [], "description": "" }, "shot.type.threeshot": { "title": "Three Shot", "aliases": [], "description": "" }, "shot.type.fourshot": { "title": "Four Shot", "aliases": [], "description": "" }, "shot.type.group": { "title": "Group Shot", "aliases": [], "description": "" }, "shot.type.insert": { "title": "Insert Shot", "aliases": [], "description": "" }, "shot.type.na": { "title": "NA", "aliases": [], "description": "" }, "shot.type.crowd.likelihood": { "title": "Crowd Shot", "aliases": [], "description": "" }, "shot.type.over-the-shoulder.likelihood": { "title": "Over-The-Shoulder Shot", "aliases": [ "OTS" ], "description": "" } }, "preprocessing": { "input_height": 256, "input_width": 256, "normalisation_mean": [ 0.0, 0.0, 0.0 ], "normalisation_stdev": [ 1.0, 1.0, 1.0 ], "interpolation": "bilinear", "output_format": "RGB", "resize_method": "squish", "pad_fill": null } }