Spaces:

Baon2024
/

SellerMVPPython

Sleeping

App Files Files Community

Derfel2025 commited on Jan 17

Commit

7fa3709

1 Parent(s): 76bc704

updated hf space image identification logic

Browse files

Files changed (2) hide show

app.py +118 -42
requirements.txt +3 -1

app.py CHANGED Viewed

@@ -1,22 +1,23 @@
 from dotenv import load_dotenv
 import os
-import google.generativeai as genai
-from groq import Groq
 from PIL import Image
 import gradio as gr
 import requests
 from io import BytesIO
 import json
 # Load environment variables from .env
 load_dotenv()
-from groq import Groq
-client = Groq(
-    api_key=os.environ.get("GROQ_API_KEY"),
-)
-genai.configure(api_key=os.environ.get("GENAI_API_KEY"))
 #I'm using a virtual environment for this locally
 #python -m venv eccomercespace
@@ -31,11 +32,42 @@ HF_TOKEN = os.getenv("HF_TOKEN")
 #login(token=HF_TOKEN)
 def product_identification_response(image_path=r"C:\Users\JoeJo\Downloads\XyAaqBEtYtb8YffjKZ68Gb.jpg"):
     # Load Gemini Pro Vision
-    model = genai.GenerativeModel('gemini-2.5-flash')
     # Load your image
     clean_path = image_path.strip('"')
@@ -47,27 +79,93 @@ def product_identification_response(image_path=r"C:\Users\JoeJo\Downloads\XyAaqB
         image = Image.open(BytesIO(response.content))
     else:
         image = Image.open(clean_path)
     #structured output
     schema = {
         "type": "object",
         "properties": {
-            "product": {"type": "string", "description": "Name of the product in the image"},
             "condition": {"type": "string", "enum": ["new", "like new", "good", "fair", "poor"], "description": "Condition of the product"},
             },
-            "required": ["product", "condition"]
         }
-    response = model.generate_content(
-        contents=["What product is in this image, and what is the condition of the product?", image],
-        generation_config={
-            "response_mime_type": "application/json",
-            "response_schema": schema
-        }
-    )
-    print(f"response is: {response}")
-    data = json.loads(response.text)
     print(f"data after pushing response into JSON is: {data}")
     return data
@@ -79,29 +177,7 @@ def product_identification_response(image_path=r"C:\Users\JoeJo\Downloads\XyAaqB
     #print(f"gemini-2.5-flash answer is: {response.text}")
-    prompt2 = f"""Your task is to returned structured JSON of product and condition in the following format: {{ "product": "the identity of the product", "condition": "the condition of the product"}}.
-    The condition of the product must be one of the following: (*) New or (*) Used.
-    Use the data from {response} as the source for your response"""
-    #this is a second LLM call, to LLama using Grok, to format identified image data - need to remove this unneccesary call
-    chat_completion = client.chat.completions.create(
-        messages=[
-            {
-                "role": "system",
-                "content": prompt2
-            },
-            {
-                "role": "user",
-                "content": response.text,
-            }
-            ],
-            model="llama-3.3-70b-versatile",
-            response_format={"type": "json_object"},#and include word 'json' in messages/prompt
-        )
-    print(chat_completion.choices[0].message.content)
-    return chat_completion.choices[0].message.content

 from dotenv import load_dotenv
 import os
+#import google.generativeai as genai
+from google import genai
+from google.genai import types
 from PIL import Image
 import gradio as gr
 import requests
 from io import BytesIO
 import json
+from openai import OpenAI
+from pydantic import BaseModel, Field
+from typing import Literal
 # Load environment variables from .env
 load_dotenv()
+#genai.configure(api_key=os.environ.get("GENAI_API_KEY"))
+clientGemini = genai.Client()
 #I'm using a virtual environment for this locally
 #python -m venv eccomercespace
 #login(token=HF_TOKEN)
+import base64
+import requests
+def sniff_image_mime(data: bytes) -> str:
+    # JPEG starts with FF D8 FF
+    if data[:3] == b"\xff\xd8\xff":
+        return "image/jpeg"
+    # PNG starts with 89 50 4E 47 0D 0A 1A 0A
+    if data[:8] == b"\x89PNG\r\n\x1a\n":
+        return "image/png"
+    # GIF starts with GIF87a or GIF89a
+    if data[:6] in (b"GIF87a", b"GIF89a"):
+        return "image/gif"
+    # WEBP is RIFF....WEBP
+    if data[:4] == b"RIFF" and data[8:12] == b"WEBP":
+        return "image/webp"
+    raise ValueError("Downloaded bytes don't look like a supported image (jpeg/png/gif/webp).")
+def url_to_data_url_allow_octet(url: str) -> str:
+    r = requests.get(url, timeout=30, allow_redirects=True)
+    r.raise_for_status()
+    mime = sniff_image_mime(r.content)
+    b64 = base64.b64encode(r.content).decode("utf-8")
+    return f"data:{mime};base64,{b64}"
+def pil_to_bytes(img: Image.Image) -> tuple[bytes, str]:
+    # Convert to RGB and JPEG for consistent mime_type
+    img = img.convert("RGB")
+    buf = BytesIO()
+    img.save(buf, format="JPEG", quality=92)
+    return buf.getvalue(), "image/jpeg"
 def product_identification_response(image_path=r"C:\Users\JoeJo\Downloads\XyAaqBEtYtb8YffjKZ68Gb.jpg"):
     # Load Gemini Pro Vision
+    #model = genai.GenerativeModel('gemini-2.5-flash')
     # Load your image
     clean_path = image_path.strip('"')
         image = Image.open(BytesIO(response.content))
     else:
         image = Image.open(clean_path)
+    image_bytes, mime_type = pil_to_bytes(image)
     #structured output
     schema = {
         "type": "object",
         "properties": {
+            "product_name_specific": {"type": ["string", "null"], "description": "the specific name of the product in the image, if you can identify it. If you can't, return None"},
+            "product_name_general": {"type": ["string", "null"], "description": "the name of the product in the image which the user uploaded. If you can't identify it, return None"},
+            "product_identified": {"type": "boolean", "description": "a True or False bool response of whether you were able to identify the product from the image or not. If you are able to identify one or both of product_name_specific and product_name_generic, return True. Otherwise, if both are None, then you must return False"},
             "condition": {"type": "string", "enum": ["new", "like new", "good", "fair", "poor"], "description": "Condition of the product"},
             },
+            "required": ["product_name_specific", "product_name_general", "product_identified", "condition"]
         }
+    #I probably need to revisit this code, and flesh-out the prompt it's given.
+    class ProductDetails(BaseModel):
+        product_name_specific: str = Field(
+            ...,
+            description="the specific name of the product in the image, if you can identify it. If you can't, return None "
+        )
+        product_name_general: str = Field(
+            ...,
+            description="the name of the product in the image which the user uploaded. If you can't identify it, return None"
+        )
+        product_identified: bool = Field(
+            ...,
+            description="a True or False bool response of whether you were able to identify the product from the image or not. If you are able to identify one or both of product_name_specific and product_name_generic, return True. Otherwise, if both are None, then you must return False"
+        )
+        condition: Literal["new", "like new", "good", "fair", "poor"] = Field(
+            ...,
+            description="the condition of the product in the image which the user uploaded"
+        )
+    resp = clientGemini.models.generate_content(
+            model="gemini-2.5-flash-lite",
+            contents=[
+                types.Part.from_text(text="What product is in this image, and what is the condition of the product?"),
+                types.Part.from_bytes(data=image_bytes, mime_type=mime_type),
+            ],  # user prompt
+            config=types.GenerateContentConfig(         # system prompt
+                response_mime_type="application/json",    # force JSON
+                response_schema=ProductDetails,   # schema (Pydantic model)
+            ),
+        )
+        # 3) Parse into your typed object
+    response = ProductDetails.model_validate_json(resp.text)
+    print(f"value of speak score and reasoning from Gemini returned is: {response}")
+    ##openai version
+    #add in new product response schema
+    #client = OpenAI()
+    #image = url_to_data_url_allow_octet(clean_path)
+    #response = client.responses.parse(
+       # model="gpt-4.1-mini",
+       # input=[{
+            #"role": "user",
+            #"content": [
+               # {"type": "input_text", "text": "What product is in this image, and what is the condition of the product?"},
+                #{
+                   # "type": "input_image",
+                    #"detail": "high", #this param should boost performance
+                    #"image_url": image,
+                #},
+            #],
+        ##}],
+        #text_format=ProductDetails #should also be possible to pass pydantic schema
+    #)
+    #print(response.output_text)
+    data = response.model_dump()
     print(f"data after pushing response into JSON is: {data}")
     return data
     #print(f"gemini-2.5-flash answer is: {response.text}")

requirements.txt CHANGED Viewed

@@ -1,6 +1,8 @@
 google-generativeai>=0.8.0
-groq>=0.2.1
 Pillow>=10.0.0
 gradio>=4.28.0
 python-dotenv>=1.0.0
 requests

 google-generativeai>=0.8.0
 Pillow>=10.0.0
 gradio>=4.28.0
 python-dotenv>=1.0.0
 requests
+pydantic
+openai
+google-genai