File size: 7,296 Bytes
76bc9a7
1476642
 
76bc9a7
1476642
 
 
 
 
 
 
 
 
5441644
 
 
 
 
 
 
 
1476642
 
 
5441644
 
 
 
 
 
 
 
 
 
1476642
 
 
5441644
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1476642
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76bc9a7
 
 
 
 
 
 
 
5441644
 
 
 
 
 
 
 
76bc9a7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8acf7d6
76bc9a7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c550680
76bc9a7
 
 
 
 
 
 
 
 
 
 
 
 
 
c550680
f78d74e
76bc9a7
 
 
 
f78d74e
 
 
 
76bc9a7
 
f78d74e
 
 
76bc9a7
f78d74e
8acf7d6
 
 
 
 
76bc9a7
 
 
c550680
 
 
5441644
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
import time
import requests
import random
import pandas as pd

URL = "https://collectionapi.metmuseum.org/public/collection/v1/"

'''
Need to add error handling
'''

# gets information on a single object
def get_object(objectID):
    try:
        response = requests.get(f"{URL}/objects/{objectID}")
        response.raise_for_status()
        return response.json()
    except Exception as e:
        print(f"Error fetching object {objectID}: {e}")
        return None


# gets all the objects that have some sort of image
def get_objectsWithImages():
    try:
        response = requests.get(f"{URL}search?hasImages=true&q=*")
        response.raise_for_status()
        data = response.json()
        total = data.get("total", 0)
        objectIDs = data.get("objectIDs", [])
        return total, objectIDs
    except Exception as e:
        print(f"Error fetching objects with images: {e}")
        return 0, []

# gets the urls for random objects with images
def get_images(totalObjects, objectIDs, limit):
    try:
        images = []
        # grabbing extra in case a primary image is blank (Works best on small limits)
        rand_indexes = random.sample(range(totalObjects), limit + 20)
        for i in rand_indexes:
            obj = get_object(objectIDs[i])
            if obj and obj.get("primaryImage"):
                images.append((obj["primaryImage"], obj.get("title", "Untitled")))
            if len(images) == limit:
                break

        return images
    except Exception as e:
        print(f"Error in get_images: {e}")
        return []

def department_counts(q="*", max_ids=200):
    """
    Analytic: return a list of (department, count) for search results.
    - q: search query (default '*' = anything)
    - max_ids: cap how many object IDs to inspect (keeps it fast)

    Uses the Met search endpoint (images only), then tallies the 'department'
    field from each object's metadata.
    """
    try:
        resp = requests.get(f"{URL}search", params={"q": q, "hasImages": True}, timeout=15)
        resp.raise_for_status()
        ids = (resp.json().get("objectIDs") or [])[:max_ids]
    except Exception:
        return []

    counts = {}
    for oid in ids:
        try:
            obj = get_object(oid)  # uses your existing helper
            dep = obj.get("department") or "(unknown)"
            counts[dep] = counts.get(dep, 0) + 1
        except Exception:
            continue

    # return sorted (department, count) pairs, highest first
    return sorted(counts.items(), key=lambda x: x[1], reverse=True)

"""
Get a list of departments
"""
def list_met_departments():
    """
    Return a DataFrame of all Met departments (id + name) to help you choose.
    """
    try:
        r = requests.get(f"{URL}/departments")
        r.raise_for_status()
        depts = r.json().get("departments", [])
        return pd.DataFrame(depts)[["departmentId", "displayName"]]
    except Exception as e:
        print(f"Error fetching departments: {e}")
        return pd.DataFrame(columns=["departmentId", "displayName"])

"""
returns a dataframe of list of objects with images and metadata that matches search term
To be polite, will only get default max 5 random results from each department,
but can be specified as parameter. Also, can choose the departments to search from.
"""
def search_for_images(query,
                    max_per_department=5,
                    departments=None):

    if not query or not query.strip():
        raise ValueError("Please provide a query.")

    # 1) Departments
    if departments is None:
        resp = requests.get(f"{URL}/departments")
        resp.raise_for_status()
        dept_list = resp.json().get("departments", [])
        dept_ids = [d["departmentId"] for d in dept_list]
        dept_id_to_name = {d["departmentId"]: d["displayName"] for d in dept_list}
    else:
        dept_ids = list(departments)
        # Names will be filled from object details; provide a generic fallback
        dept_id_to_name = {d: f"Department {d}" for d in dept_ids}

    # 2) Per-department search → random sample of IDs → fetch details
    rows = []
    #session = requests.Session()

    print("Searching for", query)
    print("Across departments:", dept_ids)

    for dept_id in dept_ids:
        # limit to only those with images and is highlighted
        params = {
            "q": query,
            "hasImages": "true",
            #"isHighlight": "true",
            "departmentId": dept_id,
        }
        try:
            r = requests.get(f"{URL}/search", params=params)
            #print(r.url)
            r.raise_for_status()
        except requests.RequestException:
            continue

        object_ids = (r.json() or {}).get("objectIDs") or []
        if not object_ids:
            continue

        sample_ids = random.sample(object_ids, k=min(max_per_department, len(object_ids)))

        for oid in sample_ids:
            #print(f"Found: {oid}")
            try:
                obj = requests.get(f"{URL}/objects/{oid}").json()
            except requests.RequestException:
                continue

            # skip if no image
            if not obj["primaryImage"]:
                continue

            rows.append({
                "objectID": obj.get("objectID"),
                "title": obj.get("title"),
                "artistDisplayName": obj.get("artistDisplayName"),
                "objectDate": obj.get("objectDate"),
                "culture": obj.get("culture"),
                "medium": obj.get("medium"),
                "department": obj.get("department") or dept_id_to_name.get(dept_id),
                "objectName": obj.get("objectName"),
                "classification": obj.get("classification"),
                "primaryImageSmall": obj.get("primaryImageSmall"),
                "primaryImage": obj.get("primaryImage"),
                "objectURL": obj.get("objectURL"),
                "isPublicDomain": obj.get("isPublicDomain"),
            })

    df = pd.DataFrame(rows)
    if not df.empty:
        df = df.sort_values(["department", "title"]).reset_index(drop=True)
    return df

"""
    CLI when run from command line
    Displays the departments to allow user to input a department
    Then searches the department based on input, and displays the results with images.
"""
def main():
    print("Welcome to Met Search.")
    departments = list_met_departments()
    print(departments.to_string(index=False))
    random.seed(time.time())
    while True:
        dept_no = input("Choose a departmentId #: (Type 'q', 'quit' to stop the program, or enter for all) ").strip()
        if dept_no.lower() in ['q', 'quit']:
            break
        elif dept_no == '':
            dept = None
        else:
            if not dept_no.isdigit():
                print("Invalid input. Please try again.")
                continue
            dept = [int(dept_no)]

        query = input("Search the Met for: ").strip()
        if query == '':
            print("Please enter a query.")
            continue
        results = search_for_images(query,2, departments=dept)
        if results.empty:
            print("No results found.")
            continue
        print(results.to_string(index=False))

if __name__ == '__main__':
    main()