SellerMVPPython / app.py
Derfel2025's picture
updated hf space image identification logic
7fa3709
from dotenv import load_dotenv
import os
#import google.generativeai as genai
from google import genai
from google.genai import types
from PIL import Image
import gradio as gr
import requests
from io import BytesIO
import json
from openai import OpenAI
from pydantic import BaseModel, Field
from typing import Literal
# Load environment variables from .env
load_dotenv()
#genai.configure(api_key=os.environ.get("GENAI_API_KEY"))
clientGemini = genai.Client()
#I'm using a virtual environment for this locally
#python -m venv eccomercespace
#source eccomercespace/Scripts/activate (for windows)
#pip install -r requirements.txt
#python app.py
# Fetch variables
HF_TOKEN = os.getenv("HF_TOKEN")
#login(token=HF_TOKEN)
import base64
import requests
def sniff_image_mime(data: bytes) -> str:
# JPEG starts with FF D8 FF
if data[:3] == b"\xff\xd8\xff":
return "image/jpeg"
# PNG starts with 89 50 4E 47 0D 0A 1A 0A
if data[:8] == b"\x89PNG\r\n\x1a\n":
return "image/png"
# GIF starts with GIF87a or GIF89a
if data[:6] in (b"GIF87a", b"GIF89a"):
return "image/gif"
# WEBP is RIFF....WEBP
if data[:4] == b"RIFF" and data[8:12] == b"WEBP":
return "image/webp"
raise ValueError("Downloaded bytes don't look like a supported image (jpeg/png/gif/webp).")
def url_to_data_url_allow_octet(url: str) -> str:
r = requests.get(url, timeout=30, allow_redirects=True)
r.raise_for_status()
mime = sniff_image_mime(r.content)
b64 = base64.b64encode(r.content).decode("utf-8")
return f"data:{mime};base64,{b64}"
def pil_to_bytes(img: Image.Image) -> tuple[bytes, str]:
# Convert to RGB and JPEG for consistent mime_type
img = img.convert("RGB")
buf = BytesIO()
img.save(buf, format="JPEG", quality=92)
return buf.getvalue(), "image/jpeg"
def product_identification_response(image_path=r"C:\Users\JoeJo\Downloads\XyAaqBEtYtb8YffjKZ68Gb.jpg"):
# Load Gemini Pro Vision
#model = genai.GenerativeModel('gemini-2.5-flash')
# Load your image
clean_path = image_path.strip('"')
#image = Image.open(clean_path)
if clean_path.startswith("http"):
response = requests.get(clean_path)
response.raise_for_status() # Throw error if download fails
image = Image.open(BytesIO(response.content))
else:
image = Image.open(clean_path)
image_bytes, mime_type = pil_to_bytes(image)
#structured output
schema = {
"type": "object",
"properties": {
"product_name_specific": {"type": ["string", "null"], "description": "the specific name of the product in the image, if you can identify it. If you can't, return None"},
"product_name_general": {"type": ["string", "null"], "description": "the name of the product in the image which the user uploaded. If you can't identify it, return None"},
"product_identified": {"type": "boolean", "description": "a True or False bool response of whether you were able to identify the product from the image or not. If you are able to identify one or both of product_name_specific and product_name_generic, return True. Otherwise, if both are None, then you must return False"},
"condition": {"type": "string", "enum": ["new", "like new", "good", "fair", "poor"], "description": "Condition of the product"},
},
"required": ["product_name_specific", "product_name_general", "product_identified", "condition"]
}
#I probably need to revisit this code, and flesh-out the prompt it's given.
class ProductDetails(BaseModel):
product_name_specific: str = Field(
...,
description="the specific name of the product in the image, if you can identify it. If you can't, return None "
)
product_name_general: str = Field(
...,
description="the name of the product in the image which the user uploaded. If you can't identify it, return None"
)
product_identified: bool = Field(
...,
description="a True or False bool response of whether you were able to identify the product from the image or not. If you are able to identify one or both of product_name_specific and product_name_generic, return True. Otherwise, if both are None, then you must return False"
)
condition: Literal["new", "like new", "good", "fair", "poor"] = Field(
...,
description="the condition of the product in the image which the user uploaded"
)
resp = clientGemini.models.generate_content(
model="gemini-2.5-flash-lite",
contents=[
types.Part.from_text(text="What product is in this image, and what is the condition of the product?"),
types.Part.from_bytes(data=image_bytes, mime_type=mime_type),
], # user prompt
config=types.GenerateContentConfig( # system prompt
response_mime_type="application/json", # force JSON
response_schema=ProductDetails, # schema (Pydantic model)
),
)
# 3) Parse into your typed object
response = ProductDetails.model_validate_json(resp.text)
print(f"value of speak score and reasoning from Gemini returned is: {response}")
##openai version
#add in new product response schema
#client = OpenAI()
#image = url_to_data_url_allow_octet(clean_path)
#response = client.responses.parse(
# model="gpt-4.1-mini",
# input=[{
#"role": "user",
#"content": [
# {"type": "input_text", "text": "What product is in this image, and what is the condition of the product?"},
#{
# "type": "input_image",
#"detail": "high", #this param should boost performance
#"image_url": image,
#},
#],
##}],
#text_format=ProductDetails #should also be possible to pass pydantic schema
#)
#print(response.output_text)
data = response.model_dump()
print(f"data after pushing response into JSON is: {data}")
return data
# Ask Gemini
#response = model.generate_content(
#["What product is in this image, and what is the condition of the product?", image]
#)
#print(f"gemini-2.5-flash answer is: {response.text}")
#product_identification_response()
demo = gr.Interface(
fn=product_identification_response,
inputs="text",
outputs="json",
title="identify product and condition",
description="finds info about a product"
)
demo.launch(share=True)