Spaces:
Sleeping
Sleeping
| import dspy | |
| import base64 | |
| import requests | |
| import os | |
| from dotenv import load_dotenv | |
| load_dotenv(".env") | |
| OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY") | |
| # --- 1. Configuration & Helper Functions --- | |
| def encode_image(image_source): | |
| """ | |
| Accepts a URL or a local file path and returns a Base64 string. | |
| This is required because local Ollama instances generally expect | |
| embedded base64 data rather than fetching URLs directly. | |
| """ | |
| try: | |
| # If it's a URL | |
| if image_source.startswith(('http://', 'https://')): | |
| response = requests.get(image_source) | |
| response.raise_for_status() | |
| image_data = response.content | |
| # If it's a local file | |
| else: | |
| if not os.path.exists(image_source): | |
| raise FileNotFoundError(f"File not found: {image_source}") | |
| with open(image_source, "rb") as image_file: | |
| image_data = image_file.read() | |
| return base64.b64encode(image_data).decode('utf-8') | |
| except Exception as e: | |
| print(f"Error encoding image: {e}") | |
| return None | |
| # Configure DSPy to use the local Ollama instance | |
| # We use a higher timeout because local inference on images can be slow | |
| lm_local = dspy.LM("ollama_chat/qwen3-vl:2b", api_base="http://localhost:11434", api_key="") | |
| lm_remote = dspy.LM(model="openrouter/qwen/qwen3-vl-8b-thinking", api_base="https://openrouter.ai/api/v1/chat/completions", api_key=OPENROUTER_API_KEY) | |
| dspy.configure(lm=lm_remote) | |
| # --- 2. Define the Signature --- | |
| class AnimalToScientificName(dspy.Signature): | |
| """ | |
| You are a biologist. Analyze the visual features of the animal in the image | |
| and identify its species. Return ONLY the scientific name (Genus species). | |
| Do not add conversational filler. | |
| """ | |
| # Input: The base64 string of the image | |
| image_base64 = dspy.InputField(desc="Base64 encoded string of the animal image.") | |
| # Output: The Latin scientific name | |
| scientific_name = dspy.OutputField(desc="The scientific name in Latin (e.g., 'Panthera leo').") | |
| # --- 3. Define the Module --- | |
| class LocalAnimalClassifier(dspy.Module): | |
| def __init__(self): | |
| super().__init__() | |
| # ChainOfThought encourages the model to describe features (spots, stripes, ear shape) | |
| # before concluding the name, which drastically improves accuracy for VLMs. | |
| self.predictor = dspy.ChainOfThought(AnimalToScientificName) | |
| def forward(self, image_input): | |
| # 1. Convert input (URL or Path) to Base64 | |
| encoded_img = encode_image(image_input) | |
| if not encoded_img: | |
| return dspy.Prediction(scientific_name="Error: Could not process image.") | |
| # 2. Call the predictor | |
| # DSPy automatically handles the prompting structure for the VLM | |
| return self.predictor(image_base64=encoded_img) | |
| # --- 4. Execution --- | |
| if __name__ == "__main__": | |
| # Create the classifier | |
| classifier = LocalAnimalClassifier() | |
| # -- TEST CASE 1: Using a URL -- | |
| print("--- Test Case 1: URL (Red Panda) ---") | |
| url = "https://moxieservices.com/app/uploads/2024/11/What-Is-a-Black-Scorpion-940.jpg.webp" | |
| print(f"Processing: {url}...") | |
| response_url = classifier(image_input=url) | |
| print(f"\nModel Reasoning: {response_url.reasoning}") | |
| print(f"Scientific Name: {response_url.scientific_name}") | |
| print("-" * 30) | |
| # -- TEST CASE 2: Using a Local File (Optional) -- | |
| # Uncomment and change path to test a local file | |
| # local_path = "my_cat.jpg" | |
| # if os.path.exists(local_path): | |
| # print(f"--- Test Case 2: Local File ({local_path}) ---") | |
| # response_local = classifier(image_input=local_path) | |
| # print(f"Scientific Name: {response_local.scientific_name}") |