Image2Network

Sleeping

App Files Files Community

Image2Network / CV2Net.py

Ifeanyi

Upload 3 files

d83f3b2 verified 5 months ago

raw

history blame contribute delete

4.13 kB

	# define analysis engine
	from google.genai.types import Tool, GenerateContentConfig, GoogleSearch
	from google.genai import types
	from google import genai
	from io import BytesIO
	from PIL import Image
	import pandas as pd
	import gradio as gr
	import base64
	import json
	import os


	def cv2net(image_path,api_key):
	# authenticate gemini client
	client = genai.Client(api_key=api_key)

	# call Google Search tool
	google_search_tool = Tool(
	google_search = GoogleSearch()
	)

	with open(image_path, 'rb') as f:
	image_data = f.read()

	prompt = """
	I want you to carefully analyze the image(s) and map the functional relationship between every single identified entity in the image.
	Do not ignore small or partially visible items. Collect the following information from the image(s) and DO NOT include items, objects, or things that are not in the image(s):
	- Specific object name or person
	- Precise functional relationship verb
	- Class: object, person, animal, environment, text, brand
	- Primary function or role
	- Dominant color
	- Small, medium, large, tiny, huge
	- Material type
	- Location description
	- Current condition
	- Spatial context
	- Setting or environment
	- Relationship strength: strong, medium, weak
	- Spatial context
	- Scene context
	- Confidence: high, medium, low
	- Today's date (YYYY-MM-DD)
	Ignore what a person in an image is wearing. Return the results as one JSON file with the following structure exactly:
	```json
	[
	{
	"Vertex1": "specific_object_name_or_person",
	"Vertex2": "specific_object_name_or_person",
	"Relationship": "precise_functional_relationship_verb",
	"Vertex1_class": "Object\|Person\|Animal\|Environment\|Text\|Brand",
	"Vertex1_purpose": "primary_function_or_role",
	"Vertex1_size": "small\|medium\|large\|tiny\|huge",
	"Vertex1_position": "location_description",
	"Vertex1_state": "current_condition",
	"Vertex2_class": "Object\|Person\|Animal\|Environment\|Text\|Brand",
	"Vertex2_purpose": "primary_function_or_role",
	"Vertex2_size": "small\|medium\|large\|tiny\|huge",
	"Vertex2_position": "location_description",
	"Vertex2_state": "current_condition",
	"Relationship_type": "spatial\|functional\|contextual\|interactive",
	"Relationship_strength": "strong\|medium\|weak",
	"Spatial_context": "detailed_spatial_description",
	"Scene_context": "setting_or_environment",
	"Confidence": "high\|medium\|low",
	"Date": "today's_date"
	}
	]
	```
	Here is an example JSON output:
	```json
	[
	{
	"Vertex1": "Man",
	"Vertex2": "Bench",
	"Relationship": "Sits on",
	"Vertex1_class": "Person",
	"Vertex1_purpose": "Posing for photo",
	"Vertex1_size": "Medium",
	"Vertex1_position": "Left foreground",
	"Vertex1_state": "Visible",
	"Vertex2_class": "Object",
	"Vertex2_purpose": "A seat",
	"Vertex2_size": "Medium",
	"Vertex2_position": "Middle ground",
	"Vertex2_state": "Visible",
	"Relationship_type": "Functional",
	"Relationship_strength": "Strong",
	"Spatial_context": "Man is sitting on bench",
	"Scene_context": "Outdoor scene in the park",
	"Confidence": "High",
	"Date": "2025-07-16"
	}
	]
	```
	"""

	response = client.models.generate_content(
	model="gemini-2.0-flash",
	contents=[types.Part.from_bytes(data=image_data, mime_type="image/jpeg"), prompt],
	config=GenerateContentConfig(
	tools=[google_search_tool],
	response_modalities=["TEXT"],
	response_mime_type="application/json",
	)
	)

	try:
	# convert response from string to JSON
	json_file = json.loads(response.text)

	# convert JSON into a DataFrame
	df = pd.DataFrame(json_file)
	return df
	except json.JSONDecodeError as e:
	print(f"Error decoding JSON for image: {image_data} - {e}")
	return None