import os
import requests
import uuid
from PIL import Image
from io import BytesIO
from dotenv import load_dotenv
from cora_vision import CoraVision
from cora_memory import CoraMemory

# Load Env (Needs SI_API_KEY if required, but often SI allows some access or we need to find the specific endpoint)
load_dotenv()
SI_API_KEY = os.getenv("SI_API_KEY")

class SmithsonianLoader:
    def __init__(self):
        self.vision = CoraVision()
        self.memory = CoraMemory()
        self.base_url = "https://api.si.edu/openaccess/api/v1.0/search"
        
    def search_and_index(self, query, limit=5):
        """
        Searches Smithsonian API and indexes results into CoraMemory.
        """
        print(f"🏛️  Searching Smithsonian for: '{query}'...")
        
        if not SI_API_KEY:
            print("⚠️  Warning: SI_API_KEY not found in .env. API calls might fail if key is required.")
        
        # Construct Params
        params = {
            "q": query,
            "rows": limit,
            "api_key": SI_API_KEY
        }
        
        try:
            response = requests.get(self.base_url, params=params)
            if response.status_code != 200:
                print(f"❌ API Error: {response.text}")
                return

            data = response.json()
            rows = data.get('response', {}).get('rows', [])
            
            print(f"Found {len(rows)} artifacts. Processing...")
            
            for item in rows:
                try:
                    # Extract Data
                    title = item.get('title', 'Unknown Artifact')
                    content = item.get('content', {})
                    # Try to find media
                    media = content.get('descriptiveNonRepeating', {}).get('online_media', {}).get('media', [])
                    
                    if not media:
                        continue
                        
                    # Get first image URL (usually thumbnail or screen image)
                    image_url = media[0].get('content')
                    if not image_url:
                        continue
                        
                    print(f"📥 Downloading: {title}...")
                    
                    # Download Image
                    img_resp = requests.get(image_url)
                    img = Image.open(BytesIO(img_resp.content))
                    
                    # Save Locally
                    filename = f"si_{uuid.uuid4()}.jpg"
                    local_path = os.path.join("archive_images", filename)
                    if not os.path.exists("archive_images"):
                        os.makedirs("archive_images")
                        
                    img.save(local_path)
                    
                    # Embed & Tag (The "Training" Part)
                    emb = self.vision.embed_image(img)
                    tags = self.vision.detect_tags(img)
                    
                    # Add Source tag
                    tags.append("smithsonian_open_access")
                    
                    # Index
                    self.memory.save(local_path, emb, title, tags)
                    print(f"✅ Indexed: {title}")
                    
                except Exception as e:
                    print(f"⚠️ Failed to process item: {e}")
                    
        except Exception as e:
            print(f"Critical Loader Error: {e}")

if __name__ == "__main__":
    loader = SmithsonianLoader()
    # Test Run
    loader.search_and_index("Roman Armor", limit=3)