getReg / app.py
Derfel2025's picture
changed getReg from 2 LLM calls to 1 LLM call
b4df4ef
from dotenv import load_dotenv
import os
import google.generativeai as genai
from groq import Groq
from PIL import Image
import gradio as gr
import requests
from io import BytesIO
import base64
import re
import json
# Load environment variables from .env
load_dotenv()
from groq import Groq
client = Groq(
api_key=os.environ.get("GROQ_API_KEY"),
)
# Fetch variables
HF_TOKEN = os.getenv("HF_TOKEN")
#login(token=HF_TOKEN)
#I'm using a virtual environment for this locally
#python -m venv getReg
#source getReg/Scripts/activate (for windows)
#pip install -r requirements.txt
#python app.py
def is_base64(s):
# Rough check: base64 strings are long, usually just A-Z, a-z, 0-9, +, /
return isinstance(s, str) and len(s) > 100 and re.match(r'^[A-Za-z0-9+/=\n\r]+$', s.strip())
def load_image(image_input):
if isinstance(image_input, str):
clean_input = image_input.strip('"')
if clean_input.startswith("http"):
print("πŸ“‘ Loading image from URL")
response = requests.get(clean_input)
response.raise_for_status()
return Image.open(BytesIO(response.content))
elif is_base64(clean_input):
print("πŸ“¦ Loading image from base64 string")
image_data = base64.b64decode(clean_input)
return Image.open(BytesIO(image_data))
elif os.path.exists(clean_input):
print("πŸ“ Loading image from local file path")
return Image.open(clean_input)
else:
raise ValueError("Invalid image input string β€” not URL, base64, or file path")
else:
raise TypeError("Expected image input to be a string")
def product_identification_response(image_path=r"C:\Users\JoeJo\Downloads\batty car front.jpg"):
#"C:\Users\JoeJo\Downloads\batty car front.jpg"
# Authenticate
genai.configure(api_key=os.environ.get("GENAI_API_KEY"))
# Load Gemini Pro Vision
model = genai.GenerativeModel('gemini-2.5-flash')
image = load_image(image_path)
schema = {
"type": "object",
"properties": {
"registration_number": {"type": "string", "description": "registration number of the vehicle in this image"}
},
"required": ["registration_number"]
}
# Ask Gemini
response = model.generate_content(
contents=["What is the registration number of the vehicle in this image", image],
generation_config={
"response_mime_type": "application/json",
"response_schema": schema
}
)
print(f"gemini-1.5-flash answer is: {response.text}")
data = json.loads(response.text)
print(f"data after pushing response into JSON is: {data}")
return data
prompt = f"""Your task is to returned structured JSON of product and condition in the following format: {{ "product": "the identity of the product", "condition": "the condition of the product"}}.
The condition of the product must be one of the following: (*) New, (*) Like New, (*) Good or (*) Poor.
Use the data from {response} as the source for your response
"""
prompt2 = f"""Your task is to returned structured JSON of the registration of the car in the image, in the following format: {{ "registration_number": "the registration number of the car" }}.
The registration number is the two art number that is visible at the front of the car.
Use this image of the car as your data source: {response}"""
chat_completion = client.chat.completions.create(
messages=[
{
"role": "system",
"content": prompt2
},
{
"role": "user",
"content": response.text,
}
],
model="llama-3.3-70b-versatile",
response_format={"type": "json_object"},#and include word 'json' in messages/prompt
)
print(chat_completion.choices[0].message.content)
return chat_completion.choices[0].message.content
#product_identification_response()
demo = gr.Interface(
fn=product_identification_response,
inputs="text",
outputs="json",
title="identify registration number",
description="finds info about a product"
)
demo.launch(share=True)