from dotenv import load_dotenv import os #import google.generativeai as genai from google import genai from google.genai import types from PIL import Image import gradio as gr import requests from io import BytesIO import json from openai import OpenAI from pydantic import BaseModel, Field from typing import Literal # Load environment variables from .env load_dotenv() #genai.configure(api_key=os.environ.get("GENAI_API_KEY")) clientGemini = genai.Client() #I'm using a virtual environment for this locally #python -m venv eccomercespace #source eccomercespace/Scripts/activate (for windows) #pip install -r requirements.txt #python app.py # Fetch variables HF_TOKEN = os.getenv("HF_TOKEN") #login(token=HF_TOKEN) import base64 import requests def sniff_image_mime(data: bytes) -> str: # JPEG starts with FF D8 FF if data[:3] == b"\xff\xd8\xff": return "image/jpeg" # PNG starts with 89 50 4E 47 0D 0A 1A 0A if data[:8] == b"\x89PNG\r\n\x1a\n": return "image/png" # GIF starts with GIF87a or GIF89a if data[:6] in (b"GIF87a", b"GIF89a"): return "image/gif" # WEBP is RIFF....WEBP if data[:4] == b"RIFF" and data[8:12] == b"WEBP": return "image/webp" raise ValueError("Downloaded bytes don't look like a supported image (jpeg/png/gif/webp).") def url_to_data_url_allow_octet(url: str) -> str: r = requests.get(url, timeout=30, allow_redirects=True) r.raise_for_status() mime = sniff_image_mime(r.content) b64 = base64.b64encode(r.content).decode("utf-8") return f"data:{mime};base64,{b64}" def pil_to_bytes(img: Image.Image) -> tuple[bytes, str]: # Convert to RGB and JPEG for consistent mime_type img = img.convert("RGB") buf = BytesIO() img.save(buf, format="JPEG", quality=92) return buf.getvalue(), "image/jpeg" def product_identification_response(image_path=r"C:\Users\JoeJo\Downloads\XyAaqBEtYtb8YffjKZ68Gb.jpg"): # Load Gemini Pro Vision #model = genai.GenerativeModel('gemini-2.5-flash') # Load your image clean_path = image_path.strip('"') #image = Image.open(clean_path) if clean_path.startswith("http"): response = requests.get(clean_path) response.raise_for_status() # Throw error if download fails image = Image.open(BytesIO(response.content)) else: image = Image.open(clean_path) image_bytes, mime_type = pil_to_bytes(image) #structured output schema = { "type": "object", "properties": { "product_name_specific": {"type": ["string", "null"], "description": "the specific name of the product in the image, if you can identify it. If you can't, return None"}, "product_name_general": {"type": ["string", "null"], "description": "the name of the product in the image which the user uploaded. If you can't identify it, return None"}, "product_identified": {"type": "boolean", "description": "a True or False bool response of whether you were able to identify the product from the image or not. If you are able to identify one or both of product_name_specific and product_name_generic, return True. Otherwise, if both are None, then you must return False"}, "condition": {"type": "string", "enum": ["new", "like new", "good", "fair", "poor"], "description": "Condition of the product"}, }, "required": ["product_name_specific", "product_name_general", "product_identified", "condition"] } #I probably need to revisit this code, and flesh-out the prompt it's given. class ProductDetails(BaseModel): product_name_specific: str = Field( ..., description="the specific name of the product in the image, if you can identify it. If you can't, return None " ) product_name_general: str = Field( ..., description="the name of the product in the image which the user uploaded. If you can't identify it, return None" ) product_identified: bool = Field( ..., description="a True or False bool response of whether you were able to identify the product from the image or not. If you are able to identify one or both of product_name_specific and product_name_generic, return True. Otherwise, if both are None, then you must return False" ) condition: Literal["new", "like new", "good", "fair", "poor"] = Field( ..., description="the condition of the product in the image which the user uploaded" ) resp = clientGemini.models.generate_content( model="gemini-2.5-flash-lite", contents=[ types.Part.from_text(text="What product is in this image, and what is the condition of the product?"), types.Part.from_bytes(data=image_bytes, mime_type=mime_type), ], # user prompt config=types.GenerateContentConfig( # system prompt response_mime_type="application/json", # force JSON response_schema=ProductDetails, # schema (Pydantic model) ), ) # 3) Parse into your typed object response = ProductDetails.model_validate_json(resp.text) print(f"value of speak score and reasoning from Gemini returned is: {response}") ##openai version #add in new product response schema #client = OpenAI() #image = url_to_data_url_allow_octet(clean_path) #response = client.responses.parse( # model="gpt-4.1-mini", # input=[{ #"role": "user", #"content": [ # {"type": "input_text", "text": "What product is in this image, and what is the condition of the product?"}, #{ # "type": "input_image", #"detail": "high", #this param should boost performance #"image_url": image, #}, #], ##}], #text_format=ProductDetails #should also be possible to pass pydantic schema #) #print(response.output_text) data = response.model_dump() print(f"data after pushing response into JSON is: {data}") return data # Ask Gemini #response = model.generate_content( #["What product is in this image, and what is the condition of the product?", image] #) #print(f"gemini-2.5-flash answer is: {response.text}") #product_identification_response() demo = gr.Interface( fn=product_identification_response, inputs="text", outputs="json", title="identify product and condition", description="finds info about a product" ) demo.launch(share=True)