Spaces:

Baon2024
/

SellerMVPPython

Sleeping

App Files Files Community

SellerMVPPython / app.py

Derfel2025

updated hf space image identification logic

7fa3709 8 days ago

raw

history blame contribute delete

6.76 kB

	from dotenv import load_dotenv
	import os
	#import google.generativeai as genai
	from google import genai
	from google.genai import types
	from PIL import Image
	import gradio as gr
	import requests
	from io import BytesIO
	import json
	from openai import OpenAI
	from pydantic import BaseModel, Field
	from typing import Literal

	# Load environment variables from .env
	load_dotenv()


	#genai.configure(api_key=os.environ.get("GENAI_API_KEY"))
	clientGemini = genai.Client()

	#I'm using a virtual environment for this locally
	#python -m venv eccomercespace
	#source eccomercespace/Scripts/activate (for windows)
	#pip install -r requirements.txt
	#python app.py


	# Fetch variables
	HF_TOKEN = os.getenv("HF_TOKEN")


	#login(token=HF_TOKEN)

	import base64
	import requests

	def sniff_image_mime(data: bytes) -> str:
	# JPEG starts with FF D8 FF
	if data[:3] == b"\xff\xd8\xff":
	return "image/jpeg"
	# PNG starts with 89 50 4E 47 0D 0A 1A 0A
	if data[:8] == b"\x89PNG\r\n\x1a\n":
	return "image/png"
	# GIF starts with GIF87a or GIF89a
	if data[:6] in (b"GIF87a", b"GIF89a"):
	return "image/gif"
	# WEBP is RIFF....WEBP
	if data[:4] == b"RIFF" and data[8:12] == b"WEBP":
	return "image/webp"
	raise ValueError("Downloaded bytes don't look like a supported image (jpeg/png/gif/webp).")

	def url_to_data_url_allow_octet(url: str) -> str:
	r = requests.get(url, timeout=30, allow_redirects=True)
	r.raise_for_status()
	mime = sniff_image_mime(r.content)
	b64 = base64.b64encode(r.content).decode("utf-8")
	return f"data:{mime};base64,{b64}"

	def pil_to_bytes(img: Image.Image) -> tuple[bytes, str]:
	# Convert to RGB and JPEG for consistent mime_type
	img = img.convert("RGB")
	buf = BytesIO()
	img.save(buf, format="JPEG", quality=92)
	return buf.getvalue(), "image/jpeg"

	def product_identification_response(image_path=r"C:\Users\JoeJo\Downloads\XyAaqBEtYtb8YffjKZ68Gb.jpg"):

	# Load Gemini Pro Vision
	#model = genai.GenerativeModel('gemini-2.5-flash')

	# Load your image
	clean_path = image_path.strip('"')
	#image = Image.open(clean_path)

	if clean_path.startswith("http"):
	response = requests.get(clean_path)
	response.raise_for_status() # Throw error if download fails
	image = Image.open(BytesIO(response.content))
	else:
	image = Image.open(clean_path)


	image_bytes, mime_type = pil_to_bytes(image)


	#structured output
	schema = {
	"type": "object",
	"properties": {
	"product_name_specific": {"type": ["string", "null"], "description": "the specific name of the product in the image, if you can identify it. If you can't, return None"},
	"product_name_general": {"type": ["string", "null"], "description": "the name of the product in the image which the user uploaded. If you can't identify it, return None"},
	"product_identified": {"type": "boolean", "description": "a True or False bool response of whether you were able to identify the product from the image or not. If you are able to identify one or both of product_name_specific and product_name_generic, return True. Otherwise, if both are None, then you must return False"},
	"condition": {"type": "string", "enum": ["new", "like new", "good", "fair", "poor"], "description": "Condition of the product"},
	},
	"required": ["product_name_specific", "product_name_general", "product_identified", "condition"]
	}

	#I probably need to revisit this code, and flesh-out the prompt it's given.



	class ProductDetails(BaseModel):
	product_name_specific: str = Field(
	...,
	description="the specific name of the product in the image, if you can identify it. If you can't, return None "
	)
	product_name_general: str = Field(
	...,
	description="the name of the product in the image which the user uploaded. If you can't identify it, return None"
	)
	product_identified: bool = Field(
	...,
	description="a True or False bool response of whether you were able to identify the product from the image or not. If you are able to identify one or both of product_name_specific and product_name_generic, return True. Otherwise, if both are None, then you must return False"
	)
	condition: Literal["new", "like new", "good", "fair", "poor"] = Field(
	...,
	description="the condition of the product in the image which the user uploaded"
	)


	resp = clientGemini.models.generate_content(
	model="gemini-2.5-flash-lite",
	contents=[
	types.Part.from_text(text="What product is in this image, and what is the condition of the product?"),
	types.Part.from_bytes(data=image_bytes, mime_type=mime_type),
	], # user prompt
	config=types.GenerateContentConfig( # system prompt
	response_mime_type="application/json", # force JSON
	response_schema=ProductDetails, # schema (Pydantic model)
	),
	)

	# 3) Parse into your typed object
	response = ProductDetails.model_validate_json(resp.text)

	print(f"value of speak score and reasoning from Gemini returned is: {response}")

	##openai version

	#add in new product response schema


	#client = OpenAI()

	#image = url_to_data_url_allow_octet(clean_path)

	#response = client.responses.parse(
	# model="gpt-4.1-mini",
	# input=[{
	#"role": "user",
	#"content": [
	# {"type": "input_text", "text": "What product is in this image, and what is the condition of the product?"},
	#{
	# "type": "input_image",
	#"detail": "high", #this param should boost performance
	#"image_url": image,
	#},
	#],
	##}],
	#text_format=ProductDetails #should also be possible to pass pydantic schema
	#)

	#print(response.output_text)



	data = response.model_dump()
	print(f"data after pushing response into JSON is: {data}")
	return data

	# Ask Gemini
	#response = model.generate_content(
	#["What product is in this image, and what is the condition of the product?", image]
	#)

	#print(f"gemini-2.5-flash answer is: {response.text}")






	#product_identification_response()

	demo = gr.Interface(
	fn=product_identification_response,
	inputs="text",
	outputs="json",
	title="identify product and condition",
	description="finds info about a product"
	)

	demo.launch(share=True)