import gradio as gr import os import base64 from groq import Groq from PIL import Image import io # 1. Setup Groq Client (Ensure GROQ_API_KEY is in Space Secrets) client = Groq(api_key=os.environ.get("GROQ_API_KEY")) # 2. Function to convert PIL image to Base64 def encode_image(image): buffered = io.BytesIO() image.save(buffered, format="JPEG") return base64.b64encode(buffered.getvalue()).decode('utf-8') # 3. Load and encode fixed reference images from the local folder REF_PATHS = ["references/reference_1.jpeg", "references/reference_2.jpeg"] FIXED_BASE64 = [] for path in REF_PATHS: if os.path.exists(path): img = Image.open(path).convert("RGB") FIXED_BASE64.append(encode_image(img)) def detect_covering(query_image): if query_image is None: return "Please upload an image." # Encode the user's query image query_b64 = encode_image(query_image) # Build the multi-image message content content = [{"type": "text", "text": "First two reference images show a green pole around the tree. Determine if the LAST image contains the SAME type of green pole. Answer ONLY YES or NO."}] # Add reference images to content for b64 in FIXED_BASE64: content.append({ "type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{b64}"} }) # Add the final query image content.append({ "type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{query_b64}"} }) # Call Groq API (Llama 3.2 Vision supports up to 5 images per request) completion = client.chat.completions.create( model="meta-llama/llama-4-scout-17b-16e-instruct", messages=[{"role": "user", "content": content}], temperature=0.0, # Keep output consistent max_tokens=10 ) return completion.choices[0].message.content.strip().upper() # Gradio Interface demo = gr.Interface( fn=detect_covering, inputs=gr.Image(type="pil", label="Upload Query Image"), outputs="text", title="Tree Guard Detector", description="Uses pre-set reference images from the repo to detect tree guards via Groq." ) demo.launch()