Spaces:
Sleeping
Sleeping
| """ | |
| Amazon Trailer Inspector β app.py | |
| HuggingFace Spaces Β· FastAPI Β· Google Gemini Vision API | |
| REST API that accepts 6 labeled images and runs all 6 aspect inspections | |
| in parallel, returning a structured JSON inspection report. | |
| Endpoint: POST /inspect | |
| """ | |
| import base64 | |
| import concurrent.futures | |
| import io | |
| import json | |
| import os | |
| import random | |
| import re | |
| import time | |
| import traceback | |
| from typing import Optional | |
| import requests | |
| import uvicorn | |
| from fastapi import FastAPI, HTTPException | |
| from fastapi.middleware.cors import CORSMiddleware | |
| from fastapi.responses import JSONResponse | |
| from PIL import Image | |
| from pydantic import BaseModel, Field | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # GEMINI MODELS (tried in order β first success wins) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| MODELS = [ | |
| "gemini-2.0-flash", # Primary β best quality, fast, free tier | |
| "gemini-2.0-flash-lite", # Fallback 1 β lighter 2.0 variant, free tier | |
| "gemini-2.5-flash-lite", # Fallback 2 β 2.5 series lite, free tier | |
| ] | |
| # Gemini API base URL | |
| GEMINI_API_BASE = "https://generativelanguage.googleapis.com/v1beta/models" | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # ASPECT PROMPTS | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| PROMPTS = { | |
| "front": """You are a precise visual inspector for Amazon trailer fleets. | |
| ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| STEP 1 β IMAGE VALIDATION (do this BEFORE anything else) | |
| ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| Determine whether this is a valid FRONT LEFT or FRONT RIGHT image of an Amazon trailer. | |
| A VALID front-aspect image shows the trailer from the FRONT or FRONT-CORNER area: | |
| - The main subject is the SIDE PANEL of the trailer β the large blue/white body with branding | |
| - The image is shot from the FRONT HALF looking toward the rear, OR from the front corner | |
| - The rear dual-axle truck tires are NOT visible (or are tiny/distant in the far background) | |
| - Components like sensors, GPS, Prime logo, and the green Trailer ID label are the focus | |
| An INVALID image is one where: | |
| - The trailer's REAR DUAL-AXLE TRUCK TIRES are LARGE, PROMINENT, and CLEARLY VISIBLE | |
| - These are specifically: large inflated rubber truck tires on the REAR BOGIE AXLES, | |
| appearing as 4 large grouped tires (2 axles Γ 2 tires each = 4 tires together) | |
| at the REAR UNDERCARRIAGE of the trailer body | |
| - They appear in the foreground or mid-frame, bottom-center of the image, large in size | |
| - The shot is clearly taken from behind or the rear half of the trailer | |
| β οΈ CRITICAL β DO NOT CONFUSE THESE WITH REAR TIRES: | |
| β LANDING GEAR / SUPPORT LEGS: The retractable metal support struts/legs under the | |
| front of the trailer when it is parked (not attached to a truck). These are METAL | |
| POLES/STRUTS, not rubber tires. They hold up the front of a parked trailer. | |
| β DO NOT flag landing gear as rear tires. | |
| β SINGLE FRONT STEER AXLE: If a truck cab is attached, its single front steering wheel | |
| (one tire on each side, much smaller than rear bogie) is NOT the rear dual axle. | |
| β DO NOT flag single front steer wheels as rear dual-axle tires. | |
| β TRAILER DOLLIES / SMALL WHEELS: Any small wheels used for maneuvering a parked | |
| trailer are not the rear axle tires. | |
| POSITIVE IDENTIFICATION β only flag as INVALID if you see ALL of these: | |
| β Large inflated RUBBER TRUCK TIRES (clearly rubber, round, with tread) | |
| β DUAL AXLE configuration β two sets of large tires grouped together (4 tires total) | |
| β Located at the REAR of the trailer body / rear undercarriage | |
| β LARGE in the frame β prominent, not a tiny distant element | |
| DECISION: | |
| β If rear dual-axle RUBBER TRUCK TIRES (4 grouped) are LARGE AND PROMINENT in frame: | |
| Set image_valid = "missing", Set ALL other components to "missing" | |
| β In ALL other cases (no tires, landing gear visible, single wheels, distant tires, etc.): | |
| Set image_valid = "detected" | |
| Proceed to STEP 2 below. | |
| ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| STEP 2 β COMPONENT DETECTION (only if image_valid = "detected") | |
| ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| This image shows the FRONT-LEFT or FRONT-RIGHT corner of an Amazon trailer β the rear corner | |
| area is visible from the side/front angle showing the side panels and rear corner post. | |
| Carefully locate all 4 components described below. | |
| ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| COMPONENT 1 β SENSORS | |
| ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| WHERE: On the REAR DOOR FACE or the lower area of the trailer near the rear corner. | |
| Look at the lower-middle or lower-left area of the rear panel visible in this image. | |
| WHAT: Exactly TWO metal plates shaped like DIAMONDS (rotated squares / rhombuses). | |
| - Each plate has diagonal cross-bracing visible on its face (an X pattern of raised ridges) | |
| - They are mounted SIDE BY SIDE, touching or close together | |
| - Color: beige, gold, tan, or silver-gray metallic | |
| - Size: roughly the size of a dinner plate each | |
| - They appear as a PAIR β two identical diamond shapes next to each other | |
| - May be on the rear face of the trailer or on the lower panel near the door area | |
| ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| COMPONENT 2 β GPS_DEVICE | |
| ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| β οΈ THIS IS THE MOST COMMONLY MISSED COMPONENT β READ CAREFULLY β οΈ | |
| WHERE: At the VERY TOP of the REAR CORNER POST. The corner post is the narrow vertical | |
| aluminum pillar/column at the rear corner of the trailer β where the SIDE WALL meets | |
| the rear face. Look at the TOP of this post, right at or just below the ROOF LINE. | |
| CRITICAL SEARCH STRATEGY β do this before answering: | |
| 1. First locate the GREEN TRAILER ID STRIP (component 4 β the lime-green vertical label) | |
| 2. Look DIRECTLY ABOVE that green strip, on the SAME vertical corner post | |
| 3. Search for a small white or light-gray rectangular box mounted there | |
| 4. Also check the VERY TOP CORNER where the corner post meets the roof rail | |
| WHAT IT LOOKS LIKE: | |
| - A small white, off-white, or light gray rectangular electronic housing/box | |
| - Roughly the size of a large book or small tablet (wider than tall, or square) | |
| - Has a visible FRONT FACE β may show a small digital display, sensor window, or LED | |
| - Mounted FLUSH to or BRACKETED onto the corner post or roof/top rail junction | |
| CONFIDENCE GUIDANCE: If you see ANY small rectangular box or housing at the top of the | |
| corner post, even if partially visible or unclear, mark "detected". Only mark "missing" | |
| if you can clearly confirm there is NO box/device at the top of the corner post. | |
| ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| COMPONENT 3 β PRIME_LOGO | |
| ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| WHERE: On the main side panels of the trailer body β the large blue (or white) surface. | |
| WHAT: Any Amazon Prime branding β ANY of the following counts: | |
| - The word "prime" in white letters on the trailer body | |
| - The word "amazon" with or without the arrow/smile logo | |
| - The Amazon arrow/smile swoosh logo alone (curved arrow shape) | |
| - Any partial visibility of the above β even one letter or partial arrow | |
| ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| COMPONENT 4 β TRAILER_ID | |
| ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| WHERE: On the REAR VERTICAL CORNER POST β the narrow vertical aluminum pillar/column | |
| at the rear corner of the trailer, where the side panel meets the rear face. | |
| WHAT: A fluorescent GREEN or LIME-GREEN vertical label strip affixed to this corner post. | |
| - The strip runs VERTICALLY down a section of the corner post | |
| - Displays an alphanumeric code running vertically: e.g. "SV2602705", "AZNG..." | |
| - The green background color is very distinctive β bright lime-green | |
| - Located roughly at mid-height to upper-middle of the corner post | |
| IMPORTANT: Even if only PART of the green strip is visible β still mark "detected". | |
| Reply ONLY with a single flat JSON object β no extra text, no markdown fences, no nested objects: | |
| { | |
| "image_valid": "detected", | |
| "sensors": "missing", | |
| "gps_device": "missing", | |
| "prime_logo": "detected", | |
| "trailer_id": "detected" | |
| } | |
| Each value must be exactly "detected" or "missing". Nothing else.""", | |
| "rear": """You are a precise visual inspector for Amazon trailer fleets. | |
| ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| STEP 1 β IMAGE VALIDATION: IS THIS A VALID REAR-SIDE VIEW? | |
| ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| Your FIRST task is to determine whether this image shows the REAR HALF / REAR SIDE of an Amazon | |
| trailer. This is critical β FRONT-SIDE views of the trailer must be rejected. | |
| THE SINGLE MOST RELIABLE RULE β TIRE PROXIMITY TEST: | |
| Look at the BOTTOM of the image, near the side of the trailer CLOSEST TO THE CAMERA: | |
| REAR-SIDE IMAGE (VALID): | |
| β The trailer's REAR DUAL-AXLE TIRES are on the NEAR SIDE β CLOSE to the camera, | |
| appearing LARGE and PROMINENT in the lower portion of the image. | |
| β "Rear dual axle" = a GROUP of 4 large rubber truck tires (2 axles Γ 2 tires each), | |
| all packed together at the rear undercarriage. | |
| β The trailer's REAR DOORS / REAR FACE is also visible in this view. | |
| FRONT-SIDE IMAGE (INVALID β must reject): | |
| β The area CLOSEST TO THE CAMERA shows NO LARGE TIRES β only: | |
| β’ Metal support legs / landing gear struts | |
| β’ Open undercarriage with no dominant tire group visible on the near side | |
| β The rear dual-axle tires, IF visible at all, appear SMALL and FAR AWAY. | |
| VALIDATION DECISION: | |
| Q1: Are large rubber truck tires (dual-axle group) visible CLOSE TO THE CAMERA? | |
| β YES β image_valid = "detected" β proceed to STEP 2 | |
| β NO β image_valid = "missing", set ALL other components to "missing", STOP. | |
| ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| STEP 2 β COMPONENT DETECTION (only if image_valid = "detected") | |
| ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| COMPONENT 1 β SIDE SKIRT / FIN | |
| ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| WHERE: Directly below the trailer body floor, along the BOTTOM SIDE of the trailer. | |
| Just below the horizontal red-and-white reflective tape stripe at the trailer bottom. | |
| WHAT: A flat, solid rectangular panel hanging vertically below the trailer chassis. | |
| - Fills the gap between the trailer floor underside and the ground level, beside the axles | |
| - May be dark gray, charcoal, black, silver, or metallic in color | |
| - IN SHADOW: look for its RECTANGULAR OUTLINE and STRAIGHT EDGES instead of color | |
| - Look for a SOLID FLAT SURFACE blocking the view through to the undercarriage | |
| ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| COMPONENT 2 β EDGE KIT | |
| ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| WHERE: On the SIDE SURFACE of the trailer body, near the REAR END. | |
| Located at roughly mid-to-upper height on the side panel, just before the rear corner. | |
| WHAT: | |
| - A BODY-COLORED rectangular panel β the SAME COLOR as the trailer body | |
| - Has VISIBLE BOLT HOLES or screw holes (several dots/holes visible in the panel) | |
| - Taller than it is wide β roughly portrait-orientation rectangle | |
| - Mounted flush against the trailer side near the rear-door corner post area | |
| ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| DETECTION SCOPE β THIS IMAGE ONLY | |
| ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| - You are looking at ONE side of the trailer. Inspect what is VISIBLE IN THIS IMAGE. | |
| - The side skirt/fin will be visible along the bottom of the trailer side in frame. | |
| - The edge kit will be visible near the rear corner post area on the side in frame. | |
| - Mark "detected" if the component IS PRESENT anywhere in this image. | |
| - Mark "missing" ONLY if you have looked carefully and it is genuinely absent. | |
| β οΈ IMPORTANT: The side skirt may appear LIGHT GRAY, SILVER, or METALLIC in bright light, | |
| or DARK / IN SHADOW depending on lighting. Look for the rectangular flat panel shape, | |
| not just a specific color. | |
| β οΈ IMPORTANT: The edge kit is a body-colored (BLUE or matching trailer color) rectangular | |
| panel near the rear corner with visible bolt/screw holes. It sits flush against the side | |
| panel near the rear corner post, roughly upper-half height. | |
| Reply ONLY with a single flat JSON object β no extra text, no markdown fences, no nested objects: | |
| { | |
| "image_valid": "detected", | |
| "side_skirts": "detected", | |
| "edge_kit": "detected" | |
| } | |
| Each value must be exactly "detected" or "missing". Nothing else.""", | |
| "inside": """You are a precise visual inspector for Amazon trailer fleets. | |
| Examine this image of an Amazon trailer interior. | |
| ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| STEP 1 β DOOR STATUS CHECK (do this FIRST) | |
| ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| DOORS ARE OPEN if you can see INTO the trailer cargo area: | |
| - A long dark tunnel/corridor extending into the trailer depth | |
| - Corrugated ribbed metal side walls running into the distance | |
| - A wooden or composite floor surface at the entrance threshold | |
| DOORS ARE CLOSED if the image shows flat door panel surfaces as the main subject. | |
| If doors are CLOSED β set BOTH components to "missing" | |
| If doors are OPEN β proceed to STEP 2. | |
| ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| STEP 2 β COMPONENT DETECTION (only if doors are OPEN) | |
| ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| COMPONENT 1 β SIDE_GUARDS | |
| WHERE: Along the LEFT and RIGHT interior side walls of the trailer cargo area. | |
| WHAT: Corrugated or ribbed protective panels lining the inside walls β typically silver/gray | |
| metal with horizontal or diagonal ribbing/corrugation. They run from near the floor upward | |
| along both interior side walls. Mark "detected" if visible on at least one side wall. | |
| COMPONENT 2 β FLOORING | |
| WHERE: At the BOTTOM of the trailer interior opening β the floor surface at the entrance. | |
| WHAT: Wooden plank flooring β individual wooden planks running parallel lengthwise. | |
| - Color: brown, amber, tan, or light brown wood tone | |
| - The planks span the full width of the trailer floor | |
| - ONLY mark "detected" if you can clearly see the brown wooden plank surface INSIDE the trailer | |
| - Do NOT count asphalt/concrete ground outside the trailer | |
| Reply ONLY with a single flat JSON object β no extra text, no markdown fences, no nested objects: | |
| { | |
| "side_guards": "detected", | |
| "flooring": "missing" | |
| } | |
| Each value must be exactly "detected" or "missing". Nothing else.""", | |
| "door": """You are a precise visual inspector for Amazon trailer fleets. | |
| ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| STEP 1 β IMAGE VALIDATION (do this BEFORE anything else) | |
| ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| A VALID door-details image has ALL of the following: | |
| β The REAR SWING DOORS of the trailer are the main subject β both door panels visible face-on | |
| β The doors are CLOSED (flat white/gray/metal door panels visible β NOT an open interior view) | |
| β The BOTTOM of the door frame is visible | |
| β The image is taken straight-on or slightly angled from the REAR of the trailer | |
| An INVALID image: | |
| - A FRONT or SIDE view of the trailer | |
| - Doors are OPEN | |
| - Not showing the rear swing door panels as the main subject | |
| - Bottom of door frame is cut off | |
| DECISION: | |
| β If NOT a valid door-details image: | |
| Set image_valid = "missing", Set BOTH other components to "missing" | |
| β If IS a valid closed rear-door image: | |
| Set image_valid = "detected", Proceed to STEP 2. | |
| ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| STEP 2 β COMPONENT DETECTION (only if image_valid = "detected") | |
| ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| COMPONENT 1 β LATCH_KIT_LASH_LINKS | |
| Door securing hardware β ANY of the following: | |
| a) LATCH KIT: Metal door latching/locking mechanism β horizontal latch bars, vertical locking | |
| rods, T-handles, cam locks, keeper plates, door handle assemblies, lock rod brackets, | |
| or any hardware that keeps the door closed. | |
| b) LASH LINKS: Metal chain links, D-rings, anchor hooks, or tie-down rings on door/inner frame. | |
| Mark "detected" if ANY latch hardware OR lash link hardware is visible. | |
| COMPONENT 2 β GROTE_LED_LIGHTS | |
| LED light fixtures at the bottom of the door frame: | |
| - Look specifically at the BOTTOM CORNERS of the rear door frame / underside of the trailer | |
| - Grote lights appear as rectangular or square metal housing boxes (silver, black, or chrome) | |
| with LED lenses inside β typically red but may be white or amber | |
| - They are mounted at the lower edge of the door frame, one on each bottom corner | |
| - Even if only one side is visible, mark "detected" | |
| - Do NOT count reflective tape or passive reflectors β only active LED light fixtures | |
| Reply ONLY with a single flat JSON object β no extra text, no markdown fences, no nested objects: | |
| { | |
| "image_valid": "detected", | |
| "latch_kit_lash_links": "detected", | |
| "grote_led_lights": "missing" | |
| } | |
| Each value must be exactly "detected" or "missing". Nothing else.""" | |
| } | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # ASPECT METADATA | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| ASPECT_KEYS = { | |
| "front": ["image_valid", "sensors", "gps_device", "prime_logo", "trailer_id"], | |
| "rear": ["image_valid", "side_skirts", "edge_kit"], | |
| "inside": ["side_guards", "flooring"], | |
| "door": ["image_valid", "latch_kit_lash_links", "grote_led_lights"], | |
| } | |
| CONF_RANK = {"high": 3, "medium": 2, "low": 1, "": 0} | |
| # Valid label names accepted by the API | |
| VALID_LABELS = {"front_right", "front_left", "rear_right", "rear_left", "inside", "door"} | |
| # Map each label to its inspection aspect | |
| LABEL_TO_ASPECT = { | |
| "front_right": "front", | |
| "front_left": "front", | |
| "rear_right": "rear", | |
| "rear_left": "rear", | |
| "inside": "inside", | |
| "door": "door", | |
| } | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # GEMINI API CALL | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def call_gemini(b64_image: str, prompt: str, model: str, api_key: str) -> str: | |
| """ | |
| Call Google Gemini vision API. | |
| Returns the raw text response from the model. | |
| Raises requests.HTTPError on API errors. | |
| """ | |
| url = f"{GEMINI_API_BASE}/{model}:generateContent?key={api_key}" | |
| payload = { | |
| "system_instruction": { | |
| "parts": [{ | |
| "text": ( | |
| "You are a JSON-only API for trailer inspection. " | |
| "You MUST respond with a single valid flat JSON object and absolutely " | |
| "nothing else β no explanation, no preamble, no markdown fences, " | |
| "no reasoning text, no nested objects. " | |
| "Every value must be exactly the string \"detected\" or \"missing\". " | |
| "Start your response with '{' and end with '}'." | |
| ) | |
| }] | |
| }, | |
| "contents": [{ | |
| "parts": [ | |
| { | |
| "inline_data": { | |
| "mime_type": "image/jpeg", | |
| "data": b64_image, | |
| } | |
| }, | |
| { | |
| "text": prompt, | |
| } | |
| ] | |
| }], | |
| "generationConfig": { | |
| "temperature": 0.05, | |
| "maxOutputTokens": 120, | |
| }, | |
| } | |
| resp = requests.post(url, json=payload, timeout=45) | |
| resp.raise_for_status() | |
| data = resp.json() | |
| return data["candidates"][0]["content"]["parts"][0]["text"] | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # IMAGE HELPERS | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def pil_to_b64(img: Image.Image, max_side: int = 1024) -> str: | |
| img = img.copy().convert("RGB") | |
| if max(img.size) > max_side: | |
| img.thumbnail((max_side, max_side), Image.LANCZOS) | |
| buf = io.BytesIO() | |
| img.save(buf, format="JPEG", quality=82) | |
| return base64.b64encode(buf.getvalue()).decode("utf-8") | |
| def decode_b64_image(b64_str: str) -> Image.Image: | |
| """Decode a base64 string (with or without data-URI prefix) to a PIL Image.""" | |
| if "," in b64_str: | |
| b64_str = b64_str.split(",", 1)[1] | |
| raw = base64.b64decode(b64_str) | |
| return Image.open(io.BytesIO(raw)).convert("RGB") | |
| def fetch_image_from_url(url: str, timeout: int = 20) -> Image.Image: | |
| """Download an image from a URL and return it as a PIL Image.""" | |
| resp = requests.get(url, timeout=timeout) | |
| resp.raise_for_status() | |
| content_type = resp.headers.get("Content-Type", "") | |
| if content_type and not content_type.startswith("image/"): | |
| raise ValueError(f"URL did not return an image (Content-Type: {content_type})") | |
| return Image.open(io.BytesIO(resp.content)).convert("RGB") | |
| def load_image(image_url: str) -> Image.Image: | |
| """ | |
| Load an image from either a URL (http/https) or a base64 string / data-URI. | |
| This is the single entry point for all image loading in the inspect route. | |
| """ | |
| stripped = image_url.strip() | |
| if stripped.startswith("http://") or stripped.startswith("https://"): | |
| return fetch_image_from_url(stripped) | |
| return decode_b64_image(stripped) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # JSON EXTRACTION | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def extract_json(text: str, keys: list) -> dict | None: | |
| if not text: | |
| return None | |
| text = re.sub(r"<think>[\s\S]*?</think>", "", text, flags=re.IGNORECASE) | |
| text = re.sub(r"```(?:json)?", "", text, flags=re.IGNORECASE).replace("```", "") | |
| brace = text.find("{") | |
| if brace > 0: | |
| text = text[brace:] | |
| text = text.strip() | |
| m = re.search(r"\{[\s\S]*\}", text) | |
| if not m: | |
| return None | |
| raw = m.group() | |
| try: | |
| return json.loads(raw) | |
| except json.JSONDecodeError: | |
| pass | |
| fixed = re.sub(r",\s*([}\]])", r"\1", raw) | |
| try: | |
| return json.loads(fixed) | |
| except json.JSONDecodeError: | |
| pass | |
| try: | |
| rebuilt = {} | |
| for key in keys: | |
| m_str = re.search(rf'"{key}"\s*:\s*"([^"]+)"', raw) | |
| if m_str: | |
| rebuilt[key] = m_str.group(1) | |
| continue | |
| m_obj = re.search(rf'"{key}"\s*:\s*(\{{[^}}]+\}})', raw, re.DOTALL) | |
| if m_obj: | |
| try: | |
| rebuilt[key] = json.loads(m_obj.group(1)) | |
| except Exception: | |
| pass | |
| if rebuilt: | |
| return rebuilt | |
| except Exception: | |
| pass | |
| return None | |
| def validate_result(data: dict, keys: list) -> dict | None: | |
| if not data: | |
| return None | |
| out = {} | |
| for key in keys: | |
| item = data.get(key) | |
| if item is None: | |
| return None | |
| if isinstance(item, str): | |
| found = item.strip().lower() == "detected" | |
| elif isinstance(item, dict): | |
| found = item.get("found", False) | |
| if isinstance(found, str): | |
| found = found.lower() in ("true", "yes", "1") | |
| found = bool(found) | |
| else: | |
| return None | |
| out[key] = {"found": found, "confidence": "high", "notes": ""} | |
| return out | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # PER-IMAGE ANALYSIS | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def analyze_one(img: Image.Image, aspect: str, token: str) -> tuple: | |
| """ | |
| Try Gemini MODELS in order for a single image. | |
| Returns (result_dict, model_name) on success, | |
| (None, joined_error_string) on total failure. | |
| Image is encoded once and reused across all fallback attempts. | |
| token = GEMINI_API_KEY environment variable value. | |
| """ | |
| b64 = pil_to_b64(img) | |
| keys = ASPECT_KEYS[aspect] | |
| prompt = PROMPTS[aspect] | |
| errors = [] | |
| for model in MODELS: | |
| # Retry up to 3 attempts on rate-limit (429) before falling to next model | |
| for attempt in range(3): | |
| try: | |
| raw_content = call_gemini(b64, prompt, model, token) | |
| print(f"[{model}][{aspect}] raw: {raw_content[:300]}") | |
| data = extract_json(raw_content, keys) | |
| result = validate_result(data, keys) | |
| if result is not None: | |
| return result, model | |
| errors.append(f"{model}: JSON parse failed. Raw: {raw_content[:150]}") | |
| break # parse fail is not retryable | |
| except requests.HTTPError as e: | |
| status = e.response.status_code if e.response is not None else "?" | |
| if status == 400: | |
| errors.append(f"{model}: bad request β check image or prompt ({str(e)[:120]})") | |
| break | |
| elif status in (401, 403): | |
| errors.append(f"{model}: invalid API key β check GEMINI_API_KEY") | |
| break | |
| elif status == 429: | |
| if attempt < 2: | |
| wait = 5 * (attempt + 1) + random.uniform(0, 2) | |
| print(f"[{model}][{aspect}] rate limited, retrying in {wait:.1f}s (attempt {attempt+1}/3)") | |
| time.sleep(wait) | |
| continue | |
| errors.append(f"{model}: rate limited after 3 attempts β trying next model") | |
| break | |
| elif status == 503: | |
| errors.append(f"{model}: service unavailable β retrying next model") | |
| break | |
| else: | |
| errors.append(f"{model}: HTTP {status} β {str(e)[:150]}") | |
| break | |
| except requests.Timeout: | |
| errors.append(f"{model}: request timed out β retrying next model") | |
| break | |
| except Exception as e: | |
| errors.append(f"{model}: {str(e)[:180]}") | |
| break | |
| return None, " | ".join(errors) | |
| def merge_results(results: list, aspect: str) -> dict: | |
| """OR-merge multiple image results: if any image detected it, it's found.""" | |
| keys = ASPECT_KEYS[aspect] | |
| merged = {k: {"found": False, "confidence": "low", "notes": ""} for k in keys} | |
| for res in results: | |
| if not res: | |
| continue | |
| for k in keys: | |
| src = res.get(k, {}) | |
| if src.get("found"): | |
| merged[k]["found"] = True | |
| if CONF_RANK.get(src.get("confidence", ""), 0) > CONF_RANK.get(merged[k]["confidence"], 0): | |
| merged[k]["confidence"] = src["confidence"] | |
| return merged | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # REPORT BUILDERS | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def build_front_report(merged_left: dict | None, merged_right: dict | None) -> dict: | |
| """ | |
| Combine front_left and front_right results. | |
| For image_valid: if either side's image is invalid, note it. | |
| For components: detected if found in EITHER side (OR logic). | |
| """ | |
| components = {} | |
| comp_keys = ["sensors", "gps_device", "prime_logo", "trailer_id"] | |
| comp_names = { | |
| "sensors": "Sensors", | |
| "gps_device": "GPS Device", | |
| "prime_logo": "Prime Logo", | |
| "trailer_id": "Trailer ID Label", | |
| } | |
| for key in comp_keys: | |
| left_found = merged_left.get(key, {}).get("found", False) if merged_left else False | |
| right_found = merged_right.get(key, {}).get("found", False) if merged_right else False | |
| detected = left_found or right_found | |
| components[comp_names[key]] = "detected" if detected else "missing" | |
| # Image validity notes | |
| notes = [] | |
| if merged_left is None: | |
| notes.append("front_left: image missing from input") | |
| elif not merged_left.get("image_valid", {}).get("found", True): | |
| notes.append("front_left: invalid image (wrong angle)") | |
| if merged_right is None: | |
| notes.append("front_right: image missing from input") | |
| elif not merged_right.get("image_valid", {}).get("found", True): | |
| notes.append("front_right: invalid image (wrong angle)") | |
| return {"components": components, "notes": notes} | |
| def build_rear_report(merged_left: dict | None, merged_right: dict | None) -> dict: | |
| """ | |
| Combine rear_left and rear_right results. | |
| Each image independently inspects the side facing the camera. | |
| A component is detected (2/2) if BOTH images found it, partially (1/2) if only one did. | |
| """ | |
| components = {} | |
| notes = [] | |
| # Check image validity | |
| left_valid = merged_left is not None and merged_left.get("image_valid", {}).get("found", True) | |
| right_valid = merged_right is not None and merged_right.get("image_valid", {}).get("found", True) | |
| if merged_left is None: | |
| notes.append("rear_left: image missing from input") | |
| elif not left_valid: | |
| notes.append("rear_left: invalid image (wrong angle/side)") | |
| if merged_right is None: | |
| notes.append("rear_right: image missing from input") | |
| elif not right_valid: | |
| notes.append("rear_right: invalid image (wrong angle/side)") | |
| # Each key is now simply "side_skirts" / "edge_kit" per image (no left/right split) | |
| comp_keys = [ | |
| ("side_skirts", "Side Skirts / Fins"), | |
| ("edge_kit", "Edge Kit"), | |
| ] | |
| for key, display_name in comp_keys: | |
| left_found = merged_left.get(key, {}).get("found", False) if (merged_left and left_valid) else False | |
| right_found = merged_right.get(key, {}).get("found", False) if (merged_right and right_valid) else False | |
| count = int(left_found) + int(right_found) | |
| if count == 2: | |
| result = "detected" | |
| count_str = "2/2" | |
| elif count == 1: | |
| which = "rear_left image" if left_found else "rear_right image" | |
| result = f"partially detected ({which} only)" | |
| count_str = "1/2" | |
| else: | |
| result = "missing" | |
| count_str = "0/2" | |
| components[display_name] = { | |
| "status": result, | |
| "count": count_str, | |
| } | |
| return {"components": components, "notes": notes} | |
| def build_inside_report(merged: dict | None) -> dict: | |
| if merged is None: | |
| return { | |
| "components": { | |
| "Side Guards": "missing", | |
| "Flooring": "missing", | |
| }, | |
| "notes": ["inside: image missing from input"], | |
| } | |
| return { | |
| "components": { | |
| "Side Guards": "detected" if merged.get("side_guards", {}).get("found") else "missing", | |
| "Flooring": "detected" if merged.get("flooring", {}).get("found") else "missing", | |
| }, | |
| "notes": [], | |
| } | |
| def build_door_report(merged: dict | None) -> dict: | |
| if merged is None: | |
| return { | |
| "components": { | |
| "Latch Kit & Lash Links": "missing", | |
| "Grote LED Lights": "missing", | |
| }, | |
| "notes": ["door: image missing from input"], | |
| } | |
| notes = [] | |
| if not merged.get("image_valid", {}).get("found", True): | |
| notes.append("door: invalid image (not a valid rear door view)") | |
| return { | |
| "components": { | |
| "Latch Kit & Lash Links": "detected" if merged.get("latch_kit_lash_links", {}).get("found") else "missing", | |
| "Grote LED Lights": "detected" if merged.get("grote_led_lights", {}).get("found") else "missing", | |
| }, | |
| "notes": notes, | |
| } | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # FASTAPI APP | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| app = FastAPI( | |
| title="Amazon Trailer Inspector API", | |
| description=( | |
| "AI-powered trailer inspection API. " | |
| "Submit up to 6 labeled images (front_right, front_left, rear_right, rear_left, inside, door) " | |
| "and receive a structured component detection report." | |
| ), | |
| version="2.0.0", | |
| ) | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=["*"], | |
| allow_methods=["*"], | |
| allow_headers=["*"], | |
| ) | |
| # ββ Pydantic models βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class ImageInput(BaseModel): | |
| label: str = Field( | |
| ..., | |
| description="One of: front_right, front_left, rear_right, rear_left, inside, door", | |
| example="front_left", | |
| ) | |
| image_url: str = Field( | |
| ..., | |
| description=( | |
| "Image source β either a public/signed HTTPS URL " | |
| "(e.g. a Firebase Storage download URL) " | |
| "OR a base64-encoded string / data-URI " | |
| "(e.g. 'data:image/jpeg;base64,...'). " | |
| "Supported formats: JPEG, PNG, WEBP." | |
| ), | |
| example="https://firebasestorage.googleapis.com/...", | |
| ) | |
| class InspectRequest(BaseModel): | |
| images: list[ImageInput] = Field( | |
| ..., | |
| min_length=1, | |
| max_length=6, | |
| description="List of labeled images. Each label may appear at most once.", | |
| example=[ | |
| {"label": "front_left", "image_url": "https://firebasestorage.googleapis.com/..."}, | |
| {"label": "front_right", "image_url": "https://firebasestorage.googleapis.com/..."}, | |
| {"label": "rear_left", "image_url": "https://firebasestorage.googleapis.com/..."}, | |
| {"label": "rear_right", "image_url": "https://firebasestorage.googleapis.com/..."}, | |
| {"label": "inside", "image_url": "https://firebasestorage.googleapis.com/..."}, | |
| {"label": "door", "image_url": "https://firebasestorage.googleapis.com/..."}, | |
| ], | |
| ) | |
| # ββ Routes ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def root(): | |
| return { | |
| "status": "ok", | |
| "service": "Amazon Trailer Inspector API", | |
| "version": "2.0.0", | |
| "endpoint": "POST /inspect", | |
| } | |
| def health(): | |
| token = os.environ.get("GEMINI_API_KEY", "").strip() | |
| return { | |
| "status": "ok", | |
| "gemini_api_key_set": bool(token), | |
| "models": MODELS, | |
| } | |
| def inspect(request: InspectRequest): | |
| """ | |
| Run full trailer inspection on all submitted images in parallel. | |
| **Input:** Up to 6 labeled images β each as a signed/public HTTPS URL or base64 string. | |
| **Output:** Per-label report with component detection results. | |
| Labels accepted: `front_right`, `front_left`, `rear_right`, `rear_left`, `inside`, `door` | |
| """ | |
| token = os.environ.get("GEMINI_API_KEY", "").strip() | |
| if not token: | |
| raise HTTPException( | |
| status_code=503, | |
| detail=( | |
| "GEMINI_API_KEY not configured. " | |
| "Set it in Space Settings β Repository Secrets. " | |
| "Get a free key at https://aistudio.google.com/apikey" | |
| ), | |
| ) | |
| # Validate labels and deduplicate | |
| seen_labels = {} | |
| for item in request.images: | |
| if item.label not in VALID_LABELS: | |
| raise HTTPException( | |
| status_code=422, | |
| detail=f"Invalid label '{item.label}'. Must be one of: {sorted(VALID_LABELS)}", | |
| ) | |
| if item.label in seen_labels: | |
| raise HTTPException( | |
| status_code=422, | |
| detail=f"Duplicate label '{item.label}'. Each label may only appear once.", | |
| ) | |
| seen_labels[item.label] = item.image_url | |
| # Load all images (URL download or base64 decode) | |
| decoded: dict[str, Image.Image] = {} | |
| for label, image_url in seen_labels.items(): | |
| try: | |
| decoded[label] = load_image(image_url) | |
| except Exception as e: | |
| raise HTTPException( | |
| status_code=422, | |
| detail=f"Could not load image for label '{label}': {e}", | |
| ) | |
| # ββ Run label analyses in small batches βββββββββββββββββββββββββββββββββ | |
| # Batched concurrency: pairs of labels run in parallel (2 at a time), | |
| # with a short pause between batches. This keeps us well within the | |
| # free-tier 15 RPM limit while cutting total time by ~3x vs sequential. | |
| # Batch 1: front_left + front_right (same prompt, safe to parallelize) | |
| # Batch 2: rear_left + rear_right (same prompt, safe to parallelize) | |
| # Batch 3: inside + door (different prompts, still only 2 RPM burst) | |
| BATCHES = [ | |
| ["front_left", "front_right"], | |
| ["rear_left", "rear_right"], | |
| ["inside", "door"], | |
| ] | |
| label_results: dict[str, dict | None] = {} | |
| def run_label(label: str) -> tuple[str, dict | None]: | |
| aspect = LABEL_TO_ASPECT[label] | |
| img = decoded[label] | |
| result, meta = analyze_one(img, aspect, token) | |
| if result is not None: | |
| print(f"[API] {label} β success via {meta}") | |
| else: | |
| print(f"[API] {label} β all models failed: {meta}") | |
| return label, result | |
| for i, batch in enumerate(BATCHES): | |
| present = [lbl for lbl in batch if lbl in decoded] | |
| if not present: | |
| continue | |
| if len(present) == 1: | |
| lbl, result = run_label(present[0]) | |
| label_results[lbl] = result | |
| else: | |
| with concurrent.futures.ThreadPoolExecutor(max_workers=2) as pool: | |
| futures = {pool.submit(run_label, lbl): lbl for lbl in present} | |
| for fut in concurrent.futures.as_completed(futures): | |
| lbl, result = fut.result() | |
| label_results[lbl] = result | |
| # Pause between batches to avoid hitting RPM limit across bursts | |
| if i < len(BATCHES) - 1: | |
| time.sleep(5) | |
| # ββ Build the final report βββββββββββββββββββββββββββββββββββββββββββββββ | |
| # FRONT: merge left + right with OR logic | |
| front_left_raw = label_results.get("front_left") | |
| front_right_raw = label_results.get("front_right") | |
| front_report = None | |
| if "front_left" in decoded or "front_right" in decoded: | |
| front_report = build_front_report(front_left_raw, front_right_raw) | |
| # REAR: left and right reported with X/2 count logic | |
| rear_left_raw = label_results.get("rear_left") | |
| rear_right_raw = label_results.get("rear_right") | |
| rear_report = None | |
| if "rear_left" in decoded or "rear_right" in decoded: | |
| rear_report = build_rear_report(rear_left_raw, rear_right_raw) | |
| # INSIDE | |
| inside_report = None | |
| if "inside" in decoded: | |
| inside_report = build_inside_report(label_results.get("inside")) | |
| # DOOR | |
| door_report = None | |
| if "door" in decoded: | |
| door_report = build_door_report(label_results.get("door")) | |
| # ββ Assemble response ββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| report = {} | |
| if front_report is not None: | |
| report["front"] = { | |
| "label": "Front Left / Right", | |
| "images_provided": [l for l in ("front_left", "front_right") if l in decoded], | |
| "components": front_report["components"], | |
| "notes": front_report["notes"], | |
| } | |
| if rear_report is not None: | |
| report["rear"] = { | |
| "label": "Rear Left / Right", | |
| "images_provided": [l for l in ("rear_left", "rear_right") if l in decoded], | |
| "components": rear_report["components"], | |
| "notes": rear_report["notes"], | |
| } | |
| if inside_report is not None: | |
| report["inside"] = { | |
| "label": "Inside Trailer", | |
| "images_provided": ["inside"], | |
| "components": inside_report["components"], | |
| "notes": inside_report["notes"], | |
| } | |
| if door_report is not None: | |
| report["door"] = { | |
| "label": "Door Details", | |
| "images_provided": ["door"], | |
| "components": door_report["components"], | |
| "notes": door_report["notes"], | |
| } | |
| # Note any labels that were not submitted | |
| missing_labels = sorted(VALID_LABELS - set(decoded.keys())) | |
| return JSONResponse(content={ | |
| "status": "success", | |
| "images_received": list(decoded.keys()), | |
| "labels_missing": missing_labels, | |
| "report": report, | |
| }) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # STARTUP | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| _tok = os.environ.get("GEMINI_API_KEY", "") | |
| print("=" * 60) | |
| print(" Amazon Trailer Inspector β API mode (Gemini)") | |
| print(f" GEMINI_API_KEY : {'SET (' + str(len(_tok)) + ' chars)' if _tok else 'NOT SET β οΈ β get free key at aistudio.google.com/apikey'}") | |
| print(f" Models : {MODELS}") | |
| print("=" * 60) | |
| if __name__ == "__main__": | |
| uvicorn.run("app:app", host="0.0.0.0", port=7860, reload=False) |