File size: 4,317 Bytes
adae09f d85086a b4df4ef adae09f b4df4ef adae09f d85086a b4df4ef adae09f 4e3c349 adae09f d85086a b4df4ef adae09f b4df4ef adae09f b4df4ef adae09f b4df4ef adae09f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 |
from dotenv import load_dotenv
import os
import google.generativeai as genai
from groq import Groq
from PIL import Image
import gradio as gr
import requests
from io import BytesIO
import base64
import re
import json
# Load environment variables from .env
load_dotenv()
from groq import Groq
client = Groq(
api_key=os.environ.get("GROQ_API_KEY"),
)
# Fetch variables
HF_TOKEN = os.getenv("HF_TOKEN")
#login(token=HF_TOKEN)
#I'm using a virtual environment for this locally
#python -m venv getReg
#source getReg/Scripts/activate (for windows)
#pip install -r requirements.txt
#python app.py
def is_base64(s):
# Rough check: base64 strings are long, usually just A-Z, a-z, 0-9, +, /
return isinstance(s, str) and len(s) > 100 and re.match(r'^[A-Za-z0-9+/=\n\r]+$', s.strip())
def load_image(image_input):
if isinstance(image_input, str):
clean_input = image_input.strip('"')
if clean_input.startswith("http"):
print("📡 Loading image from URL")
response = requests.get(clean_input)
response.raise_for_status()
return Image.open(BytesIO(response.content))
elif is_base64(clean_input):
print("📦 Loading image from base64 string")
image_data = base64.b64decode(clean_input)
return Image.open(BytesIO(image_data))
elif os.path.exists(clean_input):
print("📁 Loading image from local file path")
return Image.open(clean_input)
else:
raise ValueError("Invalid image input string — not URL, base64, or file path")
else:
raise TypeError("Expected image input to be a string")
def product_identification_response(image_path=r"C:\Users\JoeJo\Downloads\batty car front.jpg"):
#"C:\Users\JoeJo\Downloads\batty car front.jpg"
# Authenticate
genai.configure(api_key=os.environ.get("GENAI_API_KEY"))
# Load Gemini Pro Vision
model = genai.GenerativeModel('gemini-2.5-flash')
image = load_image(image_path)
schema = {
"type": "object",
"properties": {
"registration_number": {"type": "string", "description": "registration number of the vehicle in this image"}
},
"required": ["registration_number"]
}
# Ask Gemini
response = model.generate_content(
contents=["What is the registration number of the vehicle in this image", image],
generation_config={
"response_mime_type": "application/json",
"response_schema": schema
}
)
print(f"gemini-1.5-flash answer is: {response.text}")
data = json.loads(response.text)
print(f"data after pushing response into JSON is: {data}")
return data
prompt = f"""Your task is to returned structured JSON of product and condition in the following format: {{ "product": "the identity of the product", "condition": "the condition of the product"}}.
The condition of the product must be one of the following: (*) New, (*) Like New, (*) Good or (*) Poor.
Use the data from {response} as the source for your response
"""
prompt2 = f"""Your task is to returned structured JSON of the registration of the car in the image, in the following format: {{ "registration_number": "the registration number of the car" }}.
The registration number is the two art number that is visible at the front of the car.
Use this image of the car as your data source: {response}"""
chat_completion = client.chat.completions.create(
messages=[
{
"role": "system",
"content": prompt2
},
{
"role": "user",
"content": response.text,
}
],
model="llama-3.3-70b-versatile",
response_format={"type": "json_object"},#and include word 'json' in messages/prompt
)
print(chat_completion.choices[0].message.content)
return chat_completion.choices[0].message.content
#product_identification_response()
demo = gr.Interface(
fn=product_identification_response,
inputs="text",
outputs="json",
title="identify registration number",
description="finds info about a product"
)
demo.launch(share=True) |