Spaces:

pvanand
/

animal-tracking-v2

Sleeping

App Files Files Community

animal-tracking-v2 / animal_classifier.py

pvanand

Upload 14 files

d3f35ed verified 3 months ago

raw

history blame contribute delete

3.92 kB

	import dspy
	import base64
	import requests
	import os

	from dotenv import load_dotenv
	load_dotenv(".env")
	OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")

	# --- 1. Configuration & Helper Functions ---

	def encode_image(image_source):
	"""
	Accepts a URL or a local file path and returns a Base64 string.
	This is required because local Ollama instances generally expect
	embedded base64 data rather than fetching URLs directly.
	"""
	try:
	# If it's a URL
	if image_source.startswith(('http://', 'https://')):
	response = requests.get(image_source)
	response.raise_for_status()
	image_data = response.content
	# If it's a local file
	else:
	if not os.path.exists(image_source):
	raise FileNotFoundError(f"File not found: {image_source}")
	with open(image_source, "rb") as image_file:
	image_data = image_file.read()

	return base64.b64encode(image_data).decode('utf-8')
	except Exception as e:
	print(f"Error encoding image: {e}")
	return None

	# Configure DSPy to use the local Ollama instance
	# We use a higher timeout because local inference on images can be slow
	lm_local = dspy.LM("ollama_chat/qwen3-vl:2b", api_base="http://localhost:11434", api_key="")
	lm_remote = dspy.LM(model="openrouter/qwen/qwen3-vl-8b-thinking", api_base="https://openrouter.ai/api/v1/chat/completions", api_key=OPENROUTER_API_KEY)
	dspy.configure(lm=lm_remote)


	# --- 2. Define the Signature ---

	class AnimalToScientificName(dspy.Signature):
	"""
	You are a biologist. Analyze the visual features of the animal in the image
	and identify its species. Return ONLY the scientific name (Genus species).
	Do not add conversational filler.
	"""

	# Input: The base64 string of the image
	image_base64 = dspy.InputField(desc="Base64 encoded string of the animal image.")

	# Output: The Latin scientific name
	scientific_name = dspy.OutputField(desc="The scientific name in Latin (e.g., 'Panthera leo').")


	# --- 3. Define the Module ---

	class LocalAnimalClassifier(dspy.Module):
	def __init__(self):
	super().__init__()
	# ChainOfThought encourages the model to describe features (spots, stripes, ear shape)
	# before concluding the name, which drastically improves accuracy for VLMs.
	self.predictor = dspy.ChainOfThought(AnimalToScientificName)

	def forward(self, image_input):
	# 1. Convert input (URL or Path) to Base64
	encoded_img = encode_image(image_input)

	if not encoded_img:
	return dspy.Prediction(scientific_name="Error: Could not process image.")

	# 2. Call the predictor
	# DSPy automatically handles the prompting structure for the VLM
	return self.predictor(image_base64=encoded_img)


	# --- 4. Execution ---

	if __name__ == "__main__":
	# Create the classifier
	classifier = LocalAnimalClassifier()

	# -- TEST CASE 1: Using a URL --
	print("--- Test Case 1: URL (Red Panda) ---")
	url = "https://moxieservices.com/app/uploads/2024/11/What-Is-a-Black-Scorpion-940.jpg.webp"

	print(f"Processing: {url}...")
	response_url = classifier(image_input=url)

	print(f"\nModel Reasoning: {response_url.reasoning}")
	print(f"Scientific Name: {response_url.scientific_name}")

	print("-" * 30)

	# -- TEST CASE 2: Using a Local File (Optional) --
	# Uncomment and change path to test a local file

	# local_path = "my_cat.jpg"
	# if os.path.exists(local_path):
	# print(f"--- Test Case 2: Local File ({local_path}) ---")
	# response_local = classifier(image_input=local_path)
	# print(f"Scientific Name: {response_local.scientific_name}")