File size: 4,317 Bytes
adae09f
 
 
 
 
 
 
 
d85086a
 
b4df4ef
adae09f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b4df4ef
 
 
 
 
adae09f
d85086a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b4df4ef
 
 
adae09f
 
 
 
4e3c349
adae09f
d85086a
 
b4df4ef
 
 
 
 
 
 
adae09f
 
 
b4df4ef
 
 
 
 
adae09f
 
 
b4df4ef
 
 
 
 
 
adae09f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b4df4ef
adae09f
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
from dotenv import load_dotenv
import os
import google.generativeai as genai
from groq import Groq
from PIL import Image
import gradio as gr
import requests
from io import BytesIO
import base64
import re
import json

# Load environment variables from .env
load_dotenv()

from groq import Groq

client = Groq(
    api_key=os.environ.get("GROQ_API_KEY"),
)




# Fetch variables
HF_TOKEN = os.getenv("HF_TOKEN")


#login(token=HF_TOKEN)

#I'm using a virtual environment for this locally
#python -m venv getReg
#source getReg/Scripts/activate  (for windows)
#pip install -r requirements.txt
#python app.py



def is_base64(s):
    # Rough check: base64 strings are long, usually just A-Z, a-z, 0-9, +, /
    return isinstance(s, str) and len(s) > 100 and re.match(r'^[A-Za-z0-9+/=\n\r]+$', s.strip())

def load_image(image_input):
    if isinstance(image_input, str):
        clean_input = image_input.strip('"')
        if clean_input.startswith("http"):
            print("📡 Loading image from URL")
            response = requests.get(clean_input)
            response.raise_for_status()
            return Image.open(BytesIO(response.content))
        elif is_base64(clean_input):
            print("📦 Loading image from base64 string")
            image_data = base64.b64decode(clean_input)
            return Image.open(BytesIO(image_data))
        elif os.path.exists(clean_input):
            print("📁 Loading image from local file path")
            return Image.open(clean_input)
        else:
            raise ValueError("Invalid image input string — not URL, base64, or file path")
    else:
        raise TypeError("Expected image input to be a string")


def product_identification_response(image_path=r"C:\Users\JoeJo\Downloads\batty car front.jpg"):
    #"C:\Users\JoeJo\Downloads\batty car front.jpg"

    # Authenticate
    genai.configure(api_key=os.environ.get("GENAI_API_KEY"))
    
    # Load Gemini Pro Vision
    model = genai.GenerativeModel('gemini-2.5-flash')
    
    image = load_image(image_path)
    
    schema = {
        "type": "object",
        "properties": {
            "registration_number": {"type": "string", "description": "registration number of the vehicle in this image"}
            },
            "required": ["registration_number"]
        }
    
    # Ask Gemini
    response = model.generate_content(
        contents=["What is the registration number of the vehicle in this image", image],
        generation_config={
            "response_mime_type": "application/json",
            "response_schema": schema
        }
    )
    
    print(f"gemini-1.5-flash answer is: {response.text}")


    data = json.loads(response.text)
    print(f"data after pushing response into JSON is: {data}")
    return data

    
    prompt = f"""Your task is to returned structured JSON of product and condition in the following format: {{ "product": "the identity of the product", "condition": "the condition of the product"}}.
    The condition of the product must be one of the following: (*) New, (*) Like New, (*) Good or (*) Poor.
    Use the data from {response} as the source for your response
    """
    prompt2 = f"""Your task is to returned structured JSON of the registration of the car in the image, in the following format: {{ "registration_number": "the registration number of the car" }}.
    The registration number is the two art number that is visible at the front of the car.
    Use this image of the car as your data source: {response}"""

    chat_completion = client.chat.completions.create(
        messages=[
            {
                "role": "system",
                "content": prompt2
            },
            {
                "role": "user",
                "content": response.text,
            }
            ],
            model="llama-3.3-70b-versatile",
            response_format={"type": "json_object"},#and include word 'json' in messages/prompt
        )
        
    print(chat_completion.choices[0].message.content)
    return chat_completion.choices[0].message.content



#product_identification_response()

demo = gr.Interface(
    fn=product_identification_response,
    inputs="text",
    outputs="json",
    title="identify registration number",
    description="finds info about a product"
)

demo.launch(share=True)