hanwang commited on
Commit
76bc9a7
·
1 Parent(s): 1476642

add search_for_images function and CLI

Browse files
Files changed (2) hide show
  1. .gitignore +2 -1
  2. met_api.py +120 -0
.gitignore CHANGED
@@ -1,2 +1,3 @@
1
  venv
2
- final_project
 
 
1
  venv
2
+ final_project
3
+ .idea
met_api.py CHANGED
@@ -1,5 +1,7 @@
 
1
  import requests
2
  import random
 
3
 
4
  URL = "https://collectionapi.metmuseum.org/public/collection/v1/"
5
 
@@ -59,3 +61,121 @@ def department_counts(q="*", max_ids=200):
59
 
60
  # return sorted (department, count) pairs, highest first
61
  return sorted(counts.items(), key=lambda x: x[1], reverse=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
  import requests
3
  import random
4
+ import pandas as pd
5
 
6
  URL = "https://collectionapi.metmuseum.org/public/collection/v1/"
7
 
 
61
 
62
  # return sorted (department, count) pairs, highest first
63
  return sorted(counts.items(), key=lambda x: x[1], reverse=True)
64
+
65
+ """
66
+ Get a list of departments
67
+ """
68
+ def list_met_departments():
69
+ """
70
+ Return a DataFrame of all Met departments (id + name) to help you choose.
71
+ """
72
+ r = requests.get(f"{URL}/departments")
73
+ r.raise_for_status()
74
+ depts = r.json().get("departments", [])
75
+ return pd.DataFrame(depts)[["departmentId", "displayName"]]
76
+
77
+ """
78
+ returns a dataframe of list of objects with images and metadata that matches search term
79
+ To be polite, will only get default max 5 random results from each department,
80
+ but can be specified as parameter. Also, can choose the departments to search from.
81
+ """
82
+ def search_for_images(query,
83
+ max_per_department=5,
84
+ departments=None):
85
+
86
+ if not query or not query.strip():
87
+ raise ValueError("Please provide a query.")
88
+
89
+ # 1) Departments
90
+ if departments is None:
91
+ resp = requests.get(f"{URL}/departments")
92
+ resp.raise_for_status()
93
+ dept_list = resp.json().get("departments", [])
94
+ dept_ids = [d["departmentId"] for d in dept_list]
95
+ dept_id_to_name = {d["departmentId"]: d["displayName"] for d in dept_list}
96
+ else:
97
+ dept_ids = list(departments)
98
+ # Names will be filled from object details; provide a generic fallback
99
+ dept_id_to_name = {d: f"Department {d}" for d in dept_ids}
100
+
101
+ # 2) Per-department search → random sample of IDs → fetch details
102
+ rows = []
103
+ #session = requests.Session()
104
+
105
+ print("Searching for", query)
106
+ print("Departments:", dept_ids)
107
+
108
+ for dept_id in dept_ids:
109
+ # limit to only those with images and is highlighted
110
+ params = {
111
+ "q": query,
112
+ "hasImages": "true",
113
+ #"isHighlight": "true",
114
+ "departmentId": dept_id,
115
+ }
116
+ try:
117
+ r = requests.get(f"{URL}/search", params=params)
118
+ #print(r.url)
119
+ r.raise_for_status()
120
+ except requests.RequestException:
121
+ continue
122
+
123
+ object_ids = (r.json() or {}).get("objectIDs") or []
124
+ if not object_ids:
125
+ continue
126
+
127
+ sample_ids = random.sample(object_ids, k=min(max_per_department, len(object_ids)))
128
+
129
+ for oid in sample_ids:
130
+ #print(f"Found: {oid}")
131
+ try:
132
+ obj = requests.get(f"{URL}/objects/{oid}").json()
133
+ except requests.RequestException:
134
+ continue
135
+
136
+ # skip if no image
137
+ if not obj["primaryImage"]:
138
+ continue
139
+
140
+ rows.append({
141
+ "objectID": obj.get("objectID"),
142
+ "title": obj.get("title"),
143
+ "artistDisplayName": obj.get("artistDisplayName"),
144
+ "objectDate": obj.get("objectDate"),
145
+ "culture": obj.get("culture"),
146
+ "medium": obj.get("medium"),
147
+ "department": obj.get("department") or dept_id_to_name.get(dept_id),
148
+ "objectName": obj.get("objectName"),
149
+ "classification": obj.get("classification"),
150
+ "primaryImageSmall": obj.get("primaryImageSmall"),
151
+ #"primaryImage": obj.get("primaryImage"),
152
+ "objectURL": obj.get("objectURL"),
153
+ "isPublicDomain": obj.get("isPublicDomain"),
154
+ })
155
+
156
+ df = pd.DataFrame(rows)
157
+ if not df.empty:
158
+ df = df.sort_values(["department", "title"]).reset_index(drop=True)
159
+ return df
160
+
161
+ """
162
+ CLI when run from command line
163
+ Displays the departments to allow user to input a department
164
+ Then searches the department based on input, and displays the results with images.
165
+ """
166
+ if __name__ == '__main__':
167
+ print("Welcome to Met Search")
168
+ departments = list_met_departments()
169
+ print(departments.to_string(index=False))
170
+ random.seed(time.time())
171
+ while True:
172
+ dept_no = input("Choose a departmentId #: (or enter for all)").strip()
173
+ if dept_no == '':
174
+ dept = None
175
+ else:
176
+ dept = [int(dept_no)]
177
+ results = search_for_images(input("Search the Met for: "),2, departments=dept)
178
+ if results.empty:
179
+ print("No results found.")
180
+ continue
181
+ print(results.to_string(index=False))