animal-tracking-v2 / animal_classifier.py
pvanand's picture
Upload 14 files
d3f35ed verified
import dspy
import base64
import requests
import os
from dotenv import load_dotenv
load_dotenv(".env")
OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
# --- 1. Configuration & Helper Functions ---
def encode_image(image_source):
"""
Accepts a URL or a local file path and returns a Base64 string.
This is required because local Ollama instances generally expect
embedded base64 data rather than fetching URLs directly.
"""
try:
# If it's a URL
if image_source.startswith(('http://', 'https://')):
response = requests.get(image_source)
response.raise_for_status()
image_data = response.content
# If it's a local file
else:
if not os.path.exists(image_source):
raise FileNotFoundError(f"File not found: {image_source}")
with open(image_source, "rb") as image_file:
image_data = image_file.read()
return base64.b64encode(image_data).decode('utf-8')
except Exception as e:
print(f"Error encoding image: {e}")
return None
# Configure DSPy to use the local Ollama instance
# We use a higher timeout because local inference on images can be slow
lm_local = dspy.LM("ollama_chat/qwen3-vl:2b", api_base="http://localhost:11434", api_key="")
lm_remote = dspy.LM(model="openrouter/qwen/qwen3-vl-8b-thinking", api_base="https://openrouter.ai/api/v1/chat/completions", api_key=OPENROUTER_API_KEY)
dspy.configure(lm=lm_remote)
# --- 2. Define the Signature ---
class AnimalToScientificName(dspy.Signature):
"""
You are a biologist. Analyze the visual features of the animal in the image
and identify its species. Return ONLY the scientific name (Genus species).
Do not add conversational filler.
"""
# Input: The base64 string of the image
image_base64 = dspy.InputField(desc="Base64 encoded string of the animal image.")
# Output: The Latin scientific name
scientific_name = dspy.OutputField(desc="The scientific name in Latin (e.g., 'Panthera leo').")
# --- 3. Define the Module ---
class LocalAnimalClassifier(dspy.Module):
def __init__(self):
super().__init__()
# ChainOfThought encourages the model to describe features (spots, stripes, ear shape)
# before concluding the name, which drastically improves accuracy for VLMs.
self.predictor = dspy.ChainOfThought(AnimalToScientificName)
def forward(self, image_input):
# 1. Convert input (URL or Path) to Base64
encoded_img = encode_image(image_input)
if not encoded_img:
return dspy.Prediction(scientific_name="Error: Could not process image.")
# 2. Call the predictor
# DSPy automatically handles the prompting structure for the VLM
return self.predictor(image_base64=encoded_img)
# --- 4. Execution ---
if __name__ == "__main__":
# Create the classifier
classifier = LocalAnimalClassifier()
# -- TEST CASE 1: Using a URL --
print("--- Test Case 1: URL (Red Panda) ---")
url = "https://moxieservices.com/app/uploads/2024/11/What-Is-a-Black-Scorpion-940.jpg.webp"
print(f"Processing: {url}...")
response_url = classifier(image_input=url)
print(f"\nModel Reasoning: {response_url.reasoning}")
print(f"Scientific Name: {response_url.scientific_name}")
print("-" * 30)
# -- TEST CASE 2: Using a Local File (Optional) --
# Uncomment and change path to test a local file
# local_path = "my_cat.jpg"
# if os.path.exists(local_path):
# print(f"--- Test Case 2: Local File ({local_path}) ---")
# response_local = classifier(image_input=local_path)
# print(f"Scientific Name: {response_local.scientific_name}")