Derfel2025 commited on
Commit
7fa3709
·
1 Parent(s): 76bc704

updated hf space image identification logic

Browse files
Files changed (2) hide show
  1. app.py +118 -42
  2. requirements.txt +3 -1
app.py CHANGED
@@ -1,22 +1,23 @@
1
  from dotenv import load_dotenv
2
  import os
3
- import google.generativeai as genai
4
- from groq import Groq
 
5
  from PIL import Image
6
  import gradio as gr
7
  import requests
8
  from io import BytesIO
9
  import json
 
 
 
10
 
11
  # Load environment variables from .env
12
  load_dotenv()
13
 
14
- from groq import Groq
15
 
16
- client = Groq(
17
- api_key=os.environ.get("GROQ_API_KEY"),
18
- )
19
- genai.configure(api_key=os.environ.get("GENAI_API_KEY"))
20
 
21
  #I'm using a virtual environment for this locally
22
  #python -m venv eccomercespace
@@ -31,11 +32,42 @@ HF_TOKEN = os.getenv("HF_TOKEN")
31
 
32
  #login(token=HF_TOKEN)
33
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
 
35
  def product_identification_response(image_path=r"C:\Users\JoeJo\Downloads\XyAaqBEtYtb8YffjKZ68Gb.jpg"):
36
 
37
  # Load Gemini Pro Vision
38
- model = genai.GenerativeModel('gemini-2.5-flash')
39
 
40
  # Load your image
41
  clean_path = image_path.strip('"')
@@ -47,27 +79,93 @@ def product_identification_response(image_path=r"C:\Users\JoeJo\Downloads\XyAaqB
47
  image = Image.open(BytesIO(response.content))
48
  else:
49
  image = Image.open(clean_path)
 
50
 
 
 
 
51
  #structured output
52
  schema = {
53
  "type": "object",
54
  "properties": {
55
- "product": {"type": "string", "description": "Name of the product in the image"},
 
 
56
  "condition": {"type": "string", "enum": ["new", "like new", "good", "fair", "poor"], "description": "Condition of the product"},
57
  },
58
- "required": ["product", "condition"]
59
  }
60
 
61
- response = model.generate_content(
62
- contents=["What product is in this image, and what is the condition of the product?", image],
63
- generation_config={
64
- "response_mime_type": "application/json",
65
- "response_schema": schema
66
- }
67
- )
68
- print(f"response is: {response}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
 
70
- data = json.loads(response.text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  print(f"data after pushing response into JSON is: {data}")
72
  return data
73
 
@@ -79,29 +177,7 @@ def product_identification_response(image_path=r"C:\Users\JoeJo\Downloads\XyAaqB
79
  #print(f"gemini-2.5-flash answer is: {response.text}")
80
 
81
 
82
- prompt2 = f"""Your task is to returned structured JSON of product and condition in the following format: {{ "product": "the identity of the product", "condition": "the condition of the product"}}.
83
- The condition of the product must be one of the following: (*) New or (*) Used.
84
- Use the data from {response} as the source for your response"""
85
-
86
-
87
- #this is a second LLM call, to LLama using Grok, to format identified image data - need to remove this unneccesary call
88
- chat_completion = client.chat.completions.create(
89
- messages=[
90
- {
91
- "role": "system",
92
- "content": prompt2
93
- },
94
- {
95
- "role": "user",
96
- "content": response.text,
97
- }
98
- ],
99
- model="llama-3.3-70b-versatile",
100
- response_format={"type": "json_object"},#and include word 'json' in messages/prompt
101
- )
102
-
103
- print(chat_completion.choices[0].message.content)
104
- return chat_completion.choices[0].message.content
105
 
106
 
107
 
 
1
  from dotenv import load_dotenv
2
  import os
3
+ #import google.generativeai as genai
4
+ from google import genai
5
+ from google.genai import types
6
  from PIL import Image
7
  import gradio as gr
8
  import requests
9
  from io import BytesIO
10
  import json
11
+ from openai import OpenAI
12
+ from pydantic import BaseModel, Field
13
+ from typing import Literal
14
 
15
  # Load environment variables from .env
16
  load_dotenv()
17
 
 
18
 
19
+ #genai.configure(api_key=os.environ.get("GENAI_API_KEY"))
20
+ clientGemini = genai.Client()
 
 
21
 
22
  #I'm using a virtual environment for this locally
23
  #python -m venv eccomercespace
 
32
 
33
  #login(token=HF_TOKEN)
34
 
35
+ import base64
36
+ import requests
37
+
38
+ def sniff_image_mime(data: bytes) -> str:
39
+ # JPEG starts with FF D8 FF
40
+ if data[:3] == b"\xff\xd8\xff":
41
+ return "image/jpeg"
42
+ # PNG starts with 89 50 4E 47 0D 0A 1A 0A
43
+ if data[:8] == b"\x89PNG\r\n\x1a\n":
44
+ return "image/png"
45
+ # GIF starts with GIF87a or GIF89a
46
+ if data[:6] in (b"GIF87a", b"GIF89a"):
47
+ return "image/gif"
48
+ # WEBP is RIFF....WEBP
49
+ if data[:4] == b"RIFF" and data[8:12] == b"WEBP":
50
+ return "image/webp"
51
+ raise ValueError("Downloaded bytes don't look like a supported image (jpeg/png/gif/webp).")
52
+
53
+ def url_to_data_url_allow_octet(url: str) -> str:
54
+ r = requests.get(url, timeout=30, allow_redirects=True)
55
+ r.raise_for_status()
56
+ mime = sniff_image_mime(r.content)
57
+ b64 = base64.b64encode(r.content).decode("utf-8")
58
+ return f"data:{mime};base64,{b64}"
59
+
60
+ def pil_to_bytes(img: Image.Image) -> tuple[bytes, str]:
61
+ # Convert to RGB and JPEG for consistent mime_type
62
+ img = img.convert("RGB")
63
+ buf = BytesIO()
64
+ img.save(buf, format="JPEG", quality=92)
65
+ return buf.getvalue(), "image/jpeg"
66
 
67
  def product_identification_response(image_path=r"C:\Users\JoeJo\Downloads\XyAaqBEtYtb8YffjKZ68Gb.jpg"):
68
 
69
  # Load Gemini Pro Vision
70
+ #model = genai.GenerativeModel('gemini-2.5-flash')
71
 
72
  # Load your image
73
  clean_path = image_path.strip('"')
 
79
  image = Image.open(BytesIO(response.content))
80
  else:
81
  image = Image.open(clean_path)
82
+
83
 
84
+ image_bytes, mime_type = pil_to_bytes(image)
85
+
86
+
87
  #structured output
88
  schema = {
89
  "type": "object",
90
  "properties": {
91
+ "product_name_specific": {"type": ["string", "null"], "description": "the specific name of the product in the image, if you can identify it. If you can't, return None"},
92
+ "product_name_general": {"type": ["string", "null"], "description": "the name of the product in the image which the user uploaded. If you can't identify it, return None"},
93
+ "product_identified": {"type": "boolean", "description": "a True or False bool response of whether you were able to identify the product from the image or not. If you are able to identify one or both of product_name_specific and product_name_generic, return True. Otherwise, if both are None, then you must return False"},
94
  "condition": {"type": "string", "enum": ["new", "like new", "good", "fair", "poor"], "description": "Condition of the product"},
95
  },
96
+ "required": ["product_name_specific", "product_name_general", "product_identified", "condition"]
97
  }
98
 
99
+ #I probably need to revisit this code, and flesh-out the prompt it's given.
100
+
101
+
102
+
103
+ class ProductDetails(BaseModel):
104
+ product_name_specific: str = Field(
105
+ ...,
106
+ description="the specific name of the product in the image, if you can identify it. If you can't, return None "
107
+ )
108
+ product_name_general: str = Field(
109
+ ...,
110
+ description="the name of the product in the image which the user uploaded. If you can't identify it, return None"
111
+ )
112
+ product_identified: bool = Field(
113
+ ...,
114
+ description="a True or False bool response of whether you were able to identify the product from the image or not. If you are able to identify one or both of product_name_specific and product_name_generic, return True. Otherwise, if both are None, then you must return False"
115
+ )
116
+ condition: Literal["new", "like new", "good", "fair", "poor"] = Field(
117
+ ...,
118
+ description="the condition of the product in the image which the user uploaded"
119
+ )
120
+
121
 
122
+ resp = clientGemini.models.generate_content(
123
+ model="gemini-2.5-flash-lite",
124
+ contents=[
125
+ types.Part.from_text(text="What product is in this image, and what is the condition of the product?"),
126
+ types.Part.from_bytes(data=image_bytes, mime_type=mime_type),
127
+ ], # user prompt
128
+ config=types.GenerateContentConfig( # system prompt
129
+ response_mime_type="application/json", # force JSON
130
+ response_schema=ProductDetails, # schema (Pydantic model)
131
+ ),
132
+ )
133
+
134
+ # 3) Parse into your typed object
135
+ response = ProductDetails.model_validate_json(resp.text)
136
+
137
+ print(f"value of speak score and reasoning from Gemini returned is: {response}")
138
+
139
+ ##openai version
140
+
141
+ #add in new product response schema
142
+
143
+
144
+ #client = OpenAI()
145
+
146
+ #image = url_to_data_url_allow_octet(clean_path)
147
+
148
+ #response = client.responses.parse(
149
+ # model="gpt-4.1-mini",
150
+ # input=[{
151
+ #"role": "user",
152
+ #"content": [
153
+ # {"type": "input_text", "text": "What product is in this image, and what is the condition of the product?"},
154
+ #{
155
+ # "type": "input_image",
156
+ #"detail": "high", #this param should boost performance
157
+ #"image_url": image,
158
+ #},
159
+ #],
160
+ ##}],
161
+ #text_format=ProductDetails #should also be possible to pass pydantic schema
162
+ #)
163
+
164
+ #print(response.output_text)
165
+
166
+
167
+
168
+ data = response.model_dump()
169
  print(f"data after pushing response into JSON is: {data}")
170
  return data
171
 
 
177
  #print(f"gemini-2.5-flash answer is: {response.text}")
178
 
179
 
180
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
181
 
182
 
183
 
requirements.txt CHANGED
@@ -1,6 +1,8 @@
1
  google-generativeai>=0.8.0
2
- groq>=0.2.1
3
  Pillow>=10.0.0
4
  gradio>=4.28.0
5
  python-dotenv>=1.0.0
6
  requests
 
 
 
 
1
  google-generativeai>=0.8.0
 
2
  Pillow>=10.0.0
3
  gradio>=4.28.0
4
  python-dotenv>=1.0.0
5
  requests
6
+ pydantic
7
+ openai
8
+ google-genai