Spaces:
Sleeping
Sleeping
File size: 7,296 Bytes
76bc9a7 1476642 76bc9a7 1476642 5441644 1476642 5441644 1476642 5441644 1476642 76bc9a7 5441644 76bc9a7 8acf7d6 76bc9a7 c550680 76bc9a7 c550680 f78d74e 76bc9a7 f78d74e 76bc9a7 f78d74e 76bc9a7 f78d74e 8acf7d6 76bc9a7 c550680 5441644 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 | import time
import requests
import random
import pandas as pd
URL = "https://collectionapi.metmuseum.org/public/collection/v1/"
'''
Need to add error handling
'''
# gets information on a single object
def get_object(objectID):
try:
response = requests.get(f"{URL}/objects/{objectID}")
response.raise_for_status()
return response.json()
except Exception as e:
print(f"Error fetching object {objectID}: {e}")
return None
# gets all the objects that have some sort of image
def get_objectsWithImages():
try:
response = requests.get(f"{URL}search?hasImages=true&q=*")
response.raise_for_status()
data = response.json()
total = data.get("total", 0)
objectIDs = data.get("objectIDs", [])
return total, objectIDs
except Exception as e:
print(f"Error fetching objects with images: {e}")
return 0, []
# gets the urls for random objects with images
def get_images(totalObjects, objectIDs, limit):
try:
images = []
# grabbing extra in case a primary image is blank (Works best on small limits)
rand_indexes = random.sample(range(totalObjects), limit + 20)
for i in rand_indexes:
obj = get_object(objectIDs[i])
if obj and obj.get("primaryImage"):
images.append((obj["primaryImage"], obj.get("title", "Untitled")))
if len(images) == limit:
break
return images
except Exception as e:
print(f"Error in get_images: {e}")
return []
def department_counts(q="*", max_ids=200):
"""
Analytic: return a list of (department, count) for search results.
- q: search query (default '*' = anything)
- max_ids: cap how many object IDs to inspect (keeps it fast)
Uses the Met search endpoint (images only), then tallies the 'department'
field from each object's metadata.
"""
try:
resp = requests.get(f"{URL}search", params={"q": q, "hasImages": True}, timeout=15)
resp.raise_for_status()
ids = (resp.json().get("objectIDs") or [])[:max_ids]
except Exception:
return []
counts = {}
for oid in ids:
try:
obj = get_object(oid) # uses your existing helper
dep = obj.get("department") or "(unknown)"
counts[dep] = counts.get(dep, 0) + 1
except Exception:
continue
# return sorted (department, count) pairs, highest first
return sorted(counts.items(), key=lambda x: x[1], reverse=True)
"""
Get a list of departments
"""
def list_met_departments():
"""
Return a DataFrame of all Met departments (id + name) to help you choose.
"""
try:
r = requests.get(f"{URL}/departments")
r.raise_for_status()
depts = r.json().get("departments", [])
return pd.DataFrame(depts)[["departmentId", "displayName"]]
except Exception as e:
print(f"Error fetching departments: {e}")
return pd.DataFrame(columns=["departmentId", "displayName"])
"""
returns a dataframe of list of objects with images and metadata that matches search term
To be polite, will only get default max 5 random results from each department,
but can be specified as parameter. Also, can choose the departments to search from.
"""
def search_for_images(query,
max_per_department=5,
departments=None):
if not query or not query.strip():
raise ValueError("Please provide a query.")
# 1) Departments
if departments is None:
resp = requests.get(f"{URL}/departments")
resp.raise_for_status()
dept_list = resp.json().get("departments", [])
dept_ids = [d["departmentId"] for d in dept_list]
dept_id_to_name = {d["departmentId"]: d["displayName"] for d in dept_list}
else:
dept_ids = list(departments)
# Names will be filled from object details; provide a generic fallback
dept_id_to_name = {d: f"Department {d}" for d in dept_ids}
# 2) Per-department search → random sample of IDs → fetch details
rows = []
#session = requests.Session()
print("Searching for", query)
print("Across departments:", dept_ids)
for dept_id in dept_ids:
# limit to only those with images and is highlighted
params = {
"q": query,
"hasImages": "true",
#"isHighlight": "true",
"departmentId": dept_id,
}
try:
r = requests.get(f"{URL}/search", params=params)
#print(r.url)
r.raise_for_status()
except requests.RequestException:
continue
object_ids = (r.json() or {}).get("objectIDs") or []
if not object_ids:
continue
sample_ids = random.sample(object_ids, k=min(max_per_department, len(object_ids)))
for oid in sample_ids:
#print(f"Found: {oid}")
try:
obj = requests.get(f"{URL}/objects/{oid}").json()
except requests.RequestException:
continue
# skip if no image
if not obj["primaryImage"]:
continue
rows.append({
"objectID": obj.get("objectID"),
"title": obj.get("title"),
"artistDisplayName": obj.get("artistDisplayName"),
"objectDate": obj.get("objectDate"),
"culture": obj.get("culture"),
"medium": obj.get("medium"),
"department": obj.get("department") or dept_id_to_name.get(dept_id),
"objectName": obj.get("objectName"),
"classification": obj.get("classification"),
"primaryImageSmall": obj.get("primaryImageSmall"),
"primaryImage": obj.get("primaryImage"),
"objectURL": obj.get("objectURL"),
"isPublicDomain": obj.get("isPublicDomain"),
})
df = pd.DataFrame(rows)
if not df.empty:
df = df.sort_values(["department", "title"]).reset_index(drop=True)
return df
"""
CLI when run from command line
Displays the departments to allow user to input a department
Then searches the department based on input, and displays the results with images.
"""
def main():
print("Welcome to Met Search.")
departments = list_met_departments()
print(departments.to_string(index=False))
random.seed(time.time())
while True:
dept_no = input("Choose a departmentId #: (Type 'q', 'quit' to stop the program, or enter for all) ").strip()
if dept_no.lower() in ['q', 'quit']:
break
elif dept_no == '':
dept = None
else:
if not dept_no.isdigit():
print("Invalid input. Please try again.")
continue
dept = [int(dept_no)]
query = input("Search the Met for: ").strip()
if query == '':
print("Please enter a query.")
continue
results = search_for_images(query,2, departments=dept)
if results.empty:
print("No results found.")
continue
print(results.to_string(index=False))
if __name__ == '__main__':
main()
|