Spaces:
Sleeping
Sleeping
File size: 6,757 Bytes
5f0eb5c 7fa3709 5f0eb5c 46e2937 42ba277 6d1b873 7fa3709 5f0eb5c 7fa3709 5f0eb5c 6d1b873 5f0eb5c 7fa3709 5f0eb5c 7fa3709 5f0eb5c 98e61eb 7fa3709 5f0eb5c 7fa3709 6d1b873 7fa3709 6d1b873 7fa3709 6d1b873 7fa3709 6d1b873 7fa3709 b6f01f7 6d1b873 5f0eb5c 7fa3709 5f0eb5c 76bc704 5f0eb5c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 |
from dotenv import load_dotenv
import os
#import google.generativeai as genai
from google import genai
from google.genai import types
from PIL import Image
import gradio as gr
import requests
from io import BytesIO
import json
from openai import OpenAI
from pydantic import BaseModel, Field
from typing import Literal
# Load environment variables from .env
load_dotenv()
#genai.configure(api_key=os.environ.get("GENAI_API_KEY"))
clientGemini = genai.Client()
#I'm using a virtual environment for this locally
#python -m venv eccomercespace
#source eccomercespace/Scripts/activate (for windows)
#pip install -r requirements.txt
#python app.py
# Fetch variables
HF_TOKEN = os.getenv("HF_TOKEN")
#login(token=HF_TOKEN)
import base64
import requests
def sniff_image_mime(data: bytes) -> str:
# JPEG starts with FF D8 FF
if data[:3] == b"\xff\xd8\xff":
return "image/jpeg"
# PNG starts with 89 50 4E 47 0D 0A 1A 0A
if data[:8] == b"\x89PNG\r\n\x1a\n":
return "image/png"
# GIF starts with GIF87a or GIF89a
if data[:6] in (b"GIF87a", b"GIF89a"):
return "image/gif"
# WEBP is RIFF....WEBP
if data[:4] == b"RIFF" and data[8:12] == b"WEBP":
return "image/webp"
raise ValueError("Downloaded bytes don't look like a supported image (jpeg/png/gif/webp).")
def url_to_data_url_allow_octet(url: str) -> str:
r = requests.get(url, timeout=30, allow_redirects=True)
r.raise_for_status()
mime = sniff_image_mime(r.content)
b64 = base64.b64encode(r.content).decode("utf-8")
return f"data:{mime};base64,{b64}"
def pil_to_bytes(img: Image.Image) -> tuple[bytes, str]:
# Convert to RGB and JPEG for consistent mime_type
img = img.convert("RGB")
buf = BytesIO()
img.save(buf, format="JPEG", quality=92)
return buf.getvalue(), "image/jpeg"
def product_identification_response(image_path=r"C:\Users\JoeJo\Downloads\XyAaqBEtYtb8YffjKZ68Gb.jpg"):
# Load Gemini Pro Vision
#model = genai.GenerativeModel('gemini-2.5-flash')
# Load your image
clean_path = image_path.strip('"')
#image = Image.open(clean_path)
if clean_path.startswith("http"):
response = requests.get(clean_path)
response.raise_for_status() # Throw error if download fails
image = Image.open(BytesIO(response.content))
else:
image = Image.open(clean_path)
image_bytes, mime_type = pil_to_bytes(image)
#structured output
schema = {
"type": "object",
"properties": {
"product_name_specific": {"type": ["string", "null"], "description": "the specific name of the product in the image, if you can identify it. If you can't, return None"},
"product_name_general": {"type": ["string", "null"], "description": "the name of the product in the image which the user uploaded. If you can't identify it, return None"},
"product_identified": {"type": "boolean", "description": "a True or False bool response of whether you were able to identify the product from the image or not. If you are able to identify one or both of product_name_specific and product_name_generic, return True. Otherwise, if both are None, then you must return False"},
"condition": {"type": "string", "enum": ["new", "like new", "good", "fair", "poor"], "description": "Condition of the product"},
},
"required": ["product_name_specific", "product_name_general", "product_identified", "condition"]
}
#I probably need to revisit this code, and flesh-out the prompt it's given.
class ProductDetails(BaseModel):
product_name_specific: str = Field(
...,
description="the specific name of the product in the image, if you can identify it. If you can't, return None "
)
product_name_general: str = Field(
...,
description="the name of the product in the image which the user uploaded. If you can't identify it, return None"
)
product_identified: bool = Field(
...,
description="a True or False bool response of whether you were able to identify the product from the image or not. If you are able to identify one or both of product_name_specific and product_name_generic, return True. Otherwise, if both are None, then you must return False"
)
condition: Literal["new", "like new", "good", "fair", "poor"] = Field(
...,
description="the condition of the product in the image which the user uploaded"
)
resp = clientGemini.models.generate_content(
model="gemini-2.5-flash-lite",
contents=[
types.Part.from_text(text="What product is in this image, and what is the condition of the product?"),
types.Part.from_bytes(data=image_bytes, mime_type=mime_type),
], # user prompt
config=types.GenerateContentConfig( # system prompt
response_mime_type="application/json", # force JSON
response_schema=ProductDetails, # schema (Pydantic model)
),
)
# 3) Parse into your typed object
response = ProductDetails.model_validate_json(resp.text)
print(f"value of speak score and reasoning from Gemini returned is: {response}")
##openai version
#add in new product response schema
#client = OpenAI()
#image = url_to_data_url_allow_octet(clean_path)
#response = client.responses.parse(
# model="gpt-4.1-mini",
# input=[{
#"role": "user",
#"content": [
# {"type": "input_text", "text": "What product is in this image, and what is the condition of the product?"},
#{
# "type": "input_image",
#"detail": "high", #this param should boost performance
#"image_url": image,
#},
#],
##}],
#text_format=ProductDetails #should also be possible to pass pydantic schema
#)
#print(response.output_text)
data = response.model_dump()
print(f"data after pushing response into JSON is: {data}")
return data
# Ask Gemini
#response = model.generate_content(
#["What product is in this image, and what is the condition of the product?", image]
#)
#print(f"gemini-2.5-flash answer is: {response.text}")
#product_identification_response()
demo = gr.Interface(
fn=product_identification_response,
inputs="text",
outputs="json",
title="identify product and condition",
description="finds info about a product"
)
demo.launch(share=True) |