animal-tracking-v2 / ai_module_local.py
pvanand's picture
Upload 14 files
d3f35ed verified
"""
Local AI module using Ollama with qwen2.5:3b for image detection
Compatible with ai_module.py interface
"""
import json
import base64
import requests
from typing import Optional, Dict, Any
from pydantic import BaseModel, Field
# Ollama native API endpoint
OLLAMA_BASE_URL = "http://localhost:11434"
OLLAMA_CHAT_ENDPOINT = f"{OLLAMA_BASE_URL}/api/chat"
MODEL_NAME = "ministral-3:3b"
class Wildlife(BaseModel):
"""Wildlife information model."""
id: Optional[str] = Field(default=None, description="Unique identifier")
detected_class: str = Field(description="YOLO detection class name (e.g., buffalo, elephant)")
is_animal: bool = Field(description="Whether the detected object is an animal")
commonName: Optional[str] = None
scientificName: Optional[str] = None
description: Optional[str] = None
habitat: Optional[str] = None
behavior: Optional[str] = None
safetyInfo: Optional[str] = None
conservationStatus: Optional[str] = Field(default=None, description="LC, NT, VU, EN, or CR")
isDangerous: bool = Field(default=False)
def test_ollama_connection() -> Dict[str, Any]:
"""
Test if Ollama is running and the model is available.
Returns:
Dictionary with connection status
"""
try:
# Try a simple text-only request first
test_payload = {
"model": MODEL_NAME,
"messages": [
{
"role": "user",
"content": "Hello"
}
],
"stream": False
}
test_response = requests.post(OLLAMA_CHAT_ENDPOINT, json=test_payload, timeout=10)
test_response.raise_for_status()
return {
"success": True,
"message": "Ollama connection successful",
"endpoint": OLLAMA_CHAT_ENDPOINT
}
except Exception as e:
return {
"success": False,
"error": str(e),
"endpoint": OLLAMA_CHAT_ENDPOINT,
"suggestion": "Make sure Ollama is running and the model is installed: ollama pull qwen3-vl:2b"
}
def identify_wildlife(detected_class: str, base64_image: Optional[str] = None, mime_type: str = "image/jpeg", recent_context: Optional[list] = None) -> Dict[str, Any]:
"""
Identify wildlife from YOLO detection class name and return detailed information.
Compatible with ai_module.py interface.
Args:
detected_class: YOLO detection class name (e.g., "buffalo", "elephant", "bear")
base64_image: Optional base64-encoded image string
mime_type: MIME type of the image (default: "image/jpeg")
recent_context: Optional list of recently identified Wildlife objects for context
Returns:
Dictionary containing wildlife information matching Wildlife model structure
"""
# Build context string from recent identifications with full information
context_str = ""
if recent_context and len(recent_context) > 0:
context_str = "\n\nRecently detected animals in the area (use this context to help identify similar or related species):\n"
for idx, wildlife in enumerate(recent_context[-2:], 1): # Last 2 only
context_str += f"\n--- Animal {idx} ---\n"
if wildlife.commonName:
context_str += f"Common Name: {wildlife.commonName}\n"
if wildlife.scientificName:
context_str += f"Scientific Name: {wildlife.scientificName}\n"
if wildlife.description:
context_str += f"Description: {wildlife.description}\n"
if wildlife.habitat:
context_str += f"Habitat: {wildlife.habitat}\n"
if wildlife.behavior:
context_str += f"Behavior: {wildlife.behavior}\n"
if wildlife.conservationStatus:
context_str += f"Conservation Status: {wildlife.conservationStatus}\n"
if wildlife.isDangerous:
context_str += f"Dangerous: Yes\n"
if wildlife.safetyInfo:
context_str += f"Safety Info: {wildlife.safetyInfo}\n"
# Build user message content
base_prompt = "First, determine if the detected object is an animal. If it is an animal, provide detailed information including: common name, scientific name, description, habitat, behavior, safety information, conservation status (LC, NT, VU, EN, or CR), and whether it is dangerous to humans. If it is NOT an animal (e.g., person, vehicle, object), set is_animal to false and set all other fields to null."
if context_str:
base_prompt += context_str
# Add detected class context
if detected_class:
base_prompt = f"The YOLO detection system identified this object as: {detected_class}. {base_prompt}"
# Add JSON format requirement
prompt = f"""{base_prompt}
You must respond with ONLY a valid JSON object, no other text. The JSON must have this exact structure:
{{
"is_animal": true or false,
"commonName": "string or null",
"scientificName": "string or null",
"description": "string or null",
"habitat": "string or null",
"behavior": "string or null",
"safetyInfo": "string or null",
"conservationStatus": "LC, NT, VU, EN, CR, or null",
"isDangerous": true or false
}}
Return ONLY the JSON object, no markdown, no code blocks, no explanations."""
# For Ollama native API, images are passed as base64 strings in the images array
# Prepare messages for Ollama native API format
messages = [
{
"role": "system",
"content": "You are a wildlife expert that provides accurate, detailed information about animals. Output only valid JSON."
}
]
# Add user message with image if provided
user_message = {
"role": "user",
"content": prompt
}
if base64_image:
user_message["images"] = [base64_image] # Ollama native API uses images array with base64 strings
messages.append(user_message)
try:
# Call Ollama native API with format: json
payload = {
"model": MODEL_NAME,
"messages": messages,
"stream": False,
"format": "json" # Request JSON format response
}
api_response = requests.post(OLLAMA_CHAT_ENDPOINT, json=payload, timeout=120)
api_response.raise_for_status()
# Extract response content
response_data = api_response.json()
content = response_data.get("message", {}).get("content", "")
# Parse JSON from response
# With format: "json", Ollama should return valid JSON, but we'll handle edge cases
json_data = None
try:
# Clean the content
content_clean = content.strip()
# Remove markdown code blocks if present (shouldn't happen with format: json, but just in case)
if content_clean.startswith("```json"):
content_clean = content_clean[7:]
elif content_clean.startswith("```"):
content_clean = content_clean[3:]
if content_clean.endswith("```"):
content_clean = content_clean[:-3]
content_clean = content_clean.strip()
# Try parsing directly (format: json should return valid JSON)
json_data = json.loads(content_clean)
except (json.JSONDecodeError, ValueError) as e:
# If JSON parsing fails, try to extract JSON object from the response
try:
# Find JSON object in the response
start_idx = content_clean.find("{")
end_idx = content_clean.rfind("}") + 1
if start_idx >= 0 and end_idx > start_idx:
json_str = content_clean[start_idx:end_idx]
json_data = json.loads(json_str)
else:
raise ValueError("No JSON object found in response") from e
except (json.JSONDecodeError, ValueError) as e2:
# If JSON parsing fails, raise exception with debugging info
error_msg = f"Failed to parse JSON response: {str(e2)}"
error_msg += f"\nRaw response: {content}"
error_msg += f"\nCleaned response: {content_clean}"
error_msg += f"\nOriginal error: {str(e)}"
raise ValueError(error_msg) from e2
# Validate and structure the response
# Ensure all required fields are present
wildlife_data = {
"is_animal": json_data.get("is_animal", False),
"commonName": json_data.get("commonName"),
"scientificName": json_data.get("scientificName"),
"description": json_data.get("description"),
"habitat": json_data.get("habitat"),
"behavior": json_data.get("behavior"),
"safetyInfo": json_data.get("safetyInfo"),
"conservationStatus": json_data.get("conservationStatus"),
"isDangerous": json_data.get("isDangerous", False)
}
# Add detected_class back to the data (it's an input parameter, not from VLM)
wildlife_data["detected_class"] = detected_class
# If not an animal, ensure all fields are None/null
if not wildlife_data.get("is_animal", False):
wildlife_data["commonName"] = None
wildlife_data["scientificName"] = None
wildlife_data["description"] = None
wildlife_data["habitat"] = None
wildlife_data["behavior"] = None
wildlife_data["safetyInfo"] = None
wildlife_data["conservationStatus"] = None
wildlife_data["isDangerous"] = False
return wildlife_data
except Exception as e:
# Re-raise as a more specific error for compatibility
raise RuntimeError(f"Failed to identify wildlife: {str(e)}") from e
def get_wildlife_info(detected_class: str, base64_image: Optional[str] = None, mime_type: str = "image/jpeg", recent_context: Optional[list] = None) -> Wildlife:
"""
Get wildlife information and return as Wildlife model instance.
Compatible with ai_module.py interface.
Args:
detected_class: YOLO detection class name
base64_image: Optional base64-encoded image string
mime_type: MIME type of the image (default: "image/jpeg")
recent_context: Optional list of recently identified Wildlife objects for context
Returns:
Wildlife model instance with all information populated
"""
data = identify_wildlife(detected_class, base64_image, mime_type, recent_context)
return Wildlife(**data)
if __name__ == "__main__":
# Test with a sample image
print("=" * 70)
print("Wildlife Identification System - Test (Local Ollama)")
print("=" * 70)
# Test image URL (you can change this to any image URL)
test_image_url = "http://cdn.britannica.com/16/234216-050-C66F8665/beagle-hound-dog.jpg"
print(f"Image URL: {test_image_url}")
print(f"Model: {MODEL_NAME}")
print(f"Ollama Endpoint: {OLLAMA_CHAT_ENDPOINT}\n")
# Test Ollama connection first
print("Testing Ollama connection...")
connection_test = test_ollama_connection()
if not connection_test["success"]:
print(f"βœ— Connection test failed: {connection_test.get('error', 'Unknown error')}")
if 'suggestion' in connection_test:
print(f" Suggestion: {connection_test['suggestion']}")
print(f" Endpoint: {connection_test['endpoint']}")
exit(1)
print(f"βœ“ {connection_test['message']}\n")
try:
# Download and encode image to base64
print("Downloading image...")
response = requests.get(test_image_url, timeout=30)
response.raise_for_status()
base64_image = base64.b64encode(response.content).decode('utf-8')
print(f"βœ“ Image encoded ({len(base64_image)} characters)\n")
# Get wildlife information
print("Querying LLM for wildlife information...")
wildlife = get_wildlife_info(
detected_class="dog",
base64_image=base64_image,
mime_type="image/jpeg"
)
print("βœ“ Information retrieved\n")
# Display results
print("=" * 70)
print("RESULTS")
print("=" * 70)
print(f"Detected Class: {wildlife.detected_class}")
print(f"Is Animal: {wildlife.is_animal}")
if wildlife.is_animal:
print(f"Common Name: {wildlife.commonName}")
print(f"Scientific Name: {wildlife.scientificName}")
print(f"\nDescription:\n {wildlife.description}")
print(f"\nHabitat:\n {wildlife.habitat}")
print(f"\nBehavior:\n {wildlife.behavior}")
print(f"\nSafety Information:\n {wildlife.safetyInfo}")
print(f"\nConservation Status: {wildlife.conservationStatus}")
print(f"Is Dangerous: {wildlife.isDangerous}")
print("=" * 70)
# JSON output
print("\nJSON Output:")
print(json.dumps(wildlife.model_dump(), indent=2))
except requests.RequestException as e:
print(f"βœ— Network error: {e}")
except Exception as e:
print(f"βœ— Error: {e}")
import traceback
traceback.print_exc()