| import os
|
| import requests
|
| import uuid
|
| from PIL import Image
|
| from io import BytesIO
|
| from dotenv import load_dotenv
|
| from cora_vision import CoraVision
|
| from cora_memory import CoraMemory
|
|
|
|
|
| load_dotenv()
|
| SI_API_KEY = os.getenv("SI_API_KEY")
|
|
|
| class SmithsonianLoader:
|
| def __init__(self):
|
| self.vision = CoraVision()
|
| self.memory = CoraMemory()
|
| self.base_url = "https://api.si.edu/openaccess/api/v1.0/search"
|
|
|
| def search_and_index(self, query, limit=5):
|
| """
|
| Searches Smithsonian API and indexes results into CoraMemory.
|
| """
|
| print(f"🏛️ Searching Smithsonian for: '{query}'...")
|
|
|
| if not SI_API_KEY:
|
| print("⚠️ Warning: SI_API_KEY not found in .env. API calls might fail if key is required.")
|
|
|
|
|
| params = {
|
| "q": query,
|
| "rows": limit,
|
| "api_key": SI_API_KEY
|
| }
|
|
|
| try:
|
| response = requests.get(self.base_url, params=params)
|
| if response.status_code != 200:
|
| print(f"❌ API Error: {response.text}")
|
| return
|
|
|
| data = response.json()
|
| rows = data.get('response', {}).get('rows', [])
|
|
|
| print(f"Found {len(rows)} artifacts. Processing...")
|
|
|
| for item in rows:
|
| try:
|
|
|
| title = item.get('title', 'Unknown Artifact')
|
| content = item.get('content', {})
|
|
|
| media = content.get('descriptiveNonRepeating', {}).get('online_media', {}).get('media', [])
|
|
|
| if not media:
|
| continue
|
|
|
|
|
| image_url = media[0].get('content')
|
| if not image_url:
|
| continue
|
|
|
| print(f"📥 Downloading: {title}...")
|
|
|
|
|
| img_resp = requests.get(image_url)
|
| img = Image.open(BytesIO(img_resp.content))
|
|
|
|
|
| filename = f"si_{uuid.uuid4()}.jpg"
|
| local_path = os.path.join("archive_images", filename)
|
| if not os.path.exists("archive_images"):
|
| os.makedirs("archive_images")
|
|
|
| img.save(local_path)
|
|
|
|
|
| emb = self.vision.embed_image(img)
|
| tags = self.vision.detect_tags(img)
|
|
|
|
|
| tags.append("smithsonian_open_access")
|
|
|
|
|
| self.memory.save(local_path, emb, title, tags)
|
| print(f"✅ Indexed: {title}")
|
|
|
| except Exception as e:
|
| print(f"⚠️ Failed to process item: {e}")
|
|
|
| except Exception as e:
|
| print(f"Critical Loader Error: {e}")
|
|
|
| if __name__ == "__main__":
|
| loader = SmithsonianLoader()
|
|
|
| loader.search_and_index("Roman Armor", limit=3)
|
|
|