Spaces:

Baon2024
/

SellerMVPPython

Sleeping

File size: 6,757 Bytes

from dotenv import load_dotenv
import os
#import google.generativeai as genai
from google import genai
from google.genai import types
from PIL import Image
import gradio as gr
import requests
from io import BytesIO
import json
from openai import OpenAI
from pydantic import BaseModel, Field
from typing import Literal

# Load environment variables from .env
load_dotenv()


#genai.configure(api_key=os.environ.get("GENAI_API_KEY"))
clientGemini = genai.Client()

#I'm using a virtual environment for this locally
#python -m venv eccomercespace
#source eccomercespace/Scripts/activate  (for windows)
#pip install -r requirements.txt
#python app.py


# Fetch variables
HF_TOKEN = os.getenv("HF_TOKEN")


#login(token=HF_TOKEN)

import base64
import requests

def sniff_image_mime(data: bytes) -> str:
    # JPEG starts with FF D8 FF
    if data[:3] == b"\xff\xd8\xff":
        return "image/jpeg"
    # PNG starts with 89 50 4E 47 0D 0A 1A 0A
    if data[:8] == b"\x89PNG\r\n\x1a\n":
        return "image/png"
    # GIF starts with GIF87a or GIF89a
    if data[:6] in (b"GIF87a", b"GIF89a"):
        return "image/gif"
    # WEBP is RIFF....WEBP
    if data[:4] == b"RIFF" and data[8:12] == b"WEBP":
        return "image/webp"
    raise ValueError("Downloaded bytes don't look like a supported image (jpeg/png/gif/webp).")

def url_to_data_url_allow_octet(url: str) -> str:
    r = requests.get(url, timeout=30, allow_redirects=True)
    r.raise_for_status()
    mime = sniff_image_mime(r.content)
    b64 = base64.b64encode(r.content).decode("utf-8")
    return f"data:{mime};base64,{b64}"

def pil_to_bytes(img: Image.Image) -> tuple[bytes, str]:
    # Convert to RGB and JPEG for consistent mime_type
    img = img.convert("RGB")
    buf = BytesIO()
    img.save(buf, format="JPEG", quality=92)
    return buf.getvalue(), "image/jpeg"

def product_identification_response(image_path=r"C:\Users\JoeJo\Downloads\XyAaqBEtYtb8YffjKZ68Gb.jpg"):
    
    # Load Gemini Pro Vision
    #model = genai.GenerativeModel('gemini-2.5-flash')
    
    # Load your image
    clean_path = image_path.strip('"')
    #image = Image.open(clean_path)

    if clean_path.startswith("http"):
        response = requests.get(clean_path)
        response.raise_for_status()  # Throw error if download fails
        image = Image.open(BytesIO(response.content))
    else:
        image = Image.open(clean_path)
        
    
    image_bytes, mime_type = pil_to_bytes(image)


    #structured output
    schema = {
        "type": "object",
        "properties": {
            "product_name_specific": {"type": ["string", "null"], "description": "the specific name of the product in the image, if you can identify it. If you can't, return None"},
            "product_name_general": {"type": ["string", "null"], "description": "the name of the product in the image which the user uploaded. If you can't identify it, return None"},
            "product_identified": {"type": "boolean", "description": "a True or False bool response of whether you were able to identify the product from the image or not. If you are able to identify one or both of product_name_specific and product_name_generic, return True. Otherwise, if both are None, then you must return False"},
            "condition": {"type": "string", "enum": ["new", "like new", "good", "fair", "poor"], "description": "Condition of the product"},
            },
            "required": ["product_name_specific", "product_name_general", "product_identified", "condition"]
        }

    #I probably need to revisit this code, and flesh-out the prompt it's given.

    

    class ProductDetails(BaseModel):
        product_name_specific: str = Field(
            ...,
            description="the specific name of the product in the image, if you can identify it. If you can't, return None "
        )
        product_name_general: str = Field(
            ...,
            description="the name of the product in the image which the user uploaded. If you can't identify it, return None"
        )
        product_identified: bool = Field(
            ...,
            description="a True or False bool response of whether you were able to identify the product from the image or not. If you are able to identify one or both of product_name_specific and product_name_generic, return True. Otherwise, if both are None, then you must return False"
        )
        condition: Literal["new", "like new", "good", "fair", "poor"] = Field(
            ...,
            description="the condition of the product in the image which the user uploaded"
        )
    

    resp = clientGemini.models.generate_content(
            model="gemini-2.5-flash-lite",
            contents=[
                types.Part.from_text(text="What product is in this image, and what is the condition of the product?"),
                types.Part.from_bytes(data=image_bytes, mime_type=mime_type),
            ],  # user prompt
            config=types.GenerateContentConfig(         # system prompt
                response_mime_type="application/json",    # force JSON
                response_schema=ProductDetails,   # schema (Pydantic model)
            ),
        )
        
        # 3) Parse into your typed object
    response = ProductDetails.model_validate_json(resp.text)

    print(f"value of speak score and reasoning from Gemini returned is: {response}")

    ##openai version
    
    #add in new product response schema


    #client = OpenAI()

    #image = url_to_data_url_allow_octet(clean_path)

    #response = client.responses.parse(
       # model="gpt-4.1-mini",
       # input=[{
            #"role": "user",
            #"content": [
               # {"type": "input_text", "text": "What product is in this image, and what is the condition of the product?"},
                #{
                   # "type": "input_image",
                    #"detail": "high", #this param should boost performance
                    #"image_url": image,
                #},
            #],
        ##}],
        #text_format=ProductDetails #should also be possible to pass pydantic schema
    #)

    #print(response.output_text)



    data = response.model_dump()
    print(f"data after pushing response into JSON is: {data}")
    return data

    # Ask Gemini
    #response = model.generate_content(
        #["What product is in this image, and what is the condition of the product?", image]
    #)
    
    #print(f"gemini-2.5-flash answer is: {response.text}")
    
    
    



#product_identification_response()

demo = gr.Interface(
    fn=product_identification_response,
    inputs="text",
    outputs="json",
    title="identify product and condition",
    description="finds info about a product"
)

demo.launch(share=True)