annotated-990 / test.py
marcmaxmeister's picture
simple python script to test all parameters available from the annotated-990 API
dab4575 verified
import requests as r
import time
import urllib.parse
### This is a sample script to testing most of the annotated 990 API endpoints.
### The test loop only tries each parameter separately, but you can combine as many as you want
### to create narrower datasets. API will return EINs that match all criteria provided.
### See API reference sheet for more details
### https://docs.google.com/spreadsheets/d/1bXXKZ7kJ8UsoAVAfk8sH7yYSVB6xo8OLl1OGmSpSZ4k/edit?gid=1284351522#gid=1284351522
endpoint = 'https://givingtuesday-annotated-990.hf.space/eins/'
key = "<redacted>" # see your email for the api key, if you are invited to join the testing group
nonbinary_fields = ['taxyear', 'list_ein_zipcodes'] # TAXYEAR MUST BE 2023
less_than_fields = ['acs_fraction_poverty', 'acs_fraction_snap', 'acs_fraction_insured', 'acs_median_income', 'acs_fraction_hs', 'acs_fraction_vacant', 'acs_dep_index',
'mapagora_n', 'mapagora_civic_org_sum', 'mapagora_civic_opp_sum', 'mapagora_volunteer_sum', 'mapagora_membership_sum',
'mapagora_take_action_sum', 'mapagora_events_sum', 'mapagora_TotalPopulation', 'mapagora_BROAD', 'mapagora_REMNRTY',
'mapagora_POV150', 'mapagora_NOHSDP', 'mapagora_SNGPNT', 'mapagora_UNEMP', 'mapagora_civic_org_sum_normalized', 'mapagora_civic_opp_sum_normalized',
'mapagora_civic_opp_index', 'mapagora_volunteer_sum_normalized', 'mapagora_membership_sum_normalized', 'mapagora_take_action_sum_normalized', 'mapagora_events_sum_normalized',
'staff_gender_male_fraction', 'staff_gender_female_fraction',
]
greater_than_fields = [
'cause_area_arts_culture_humanities', 'cause_area_education', 'cause_area_universities', 'cause_area_environment',
'cause_area_animal_related', 'cause_area_healthcare', 'cause_area_hospitals', 'cause_area_mental_health_crisis_intervention',
'cause_area_voluntary_health_associations_medical_disciplines', 'cause_area_medical_research', 'cause_area_crime_legal_related',
'cause_area_employment', 'cause_area_food_agriculture_nutrition', 'cause_area_housing_shelter',
'cause_area_public_safety_disaster_preparedness_relief', 'cause_area_recreation_sports', 'cause_area_youth_development',
'cause_area_human_services', 'cause_area_international_foreign_affairs_national_security',
'cause_area_civil_rights_social_action_advocacy', 'cause_area_community_improvement_capacity_building',
'cause_area_philanthropy_voluntarism_grantmaking_foundations', 'cause_area_science_technology', 'cause_area_social_science',
'cause_area_public_societal_benefit', 'cause_area_religion_related', 'cause_area_mutual_membership_benefit', 'cause_area_unknown',
'opencorporates_match_confidence',
]
test_strings = {
'cause_area_primary_class': [
'Hopsitals', #'Hopsitals', # NOTE THIS IS MISSPELLED IN THE API DATA -- AND zero primary matches
'Food, Agriculture & Nutrition',
'Community Improvement & Capacity Building',
'Mental Health & Crisis Intervention',
'Food, Agriculture & Nutrition',
'Housing & Shelter',
'Human Services',
'Arts, Culture & Humanities',
'Youth Development',
'Community Improvement & Capacity Building',
'Mutual & Membership Benefit',
'Civil Rights, Social Action & Advocacy',
'Health Care',
'Animal-Related',
'Arts, Culture & Humanities',
'Recreation & Sports',
'Food, Agriculture & Nutrition',
],
'cause_area_secondary_class': [
'Hopsitals', #'Hopsitals', # NOTE THIS IS MISSPELLED IN THE API DATA -- 369 secondary matches
'Food, Agriculture & Nutrition',
'Community Improvement & Capacity Building',
'Mental Health & Crisis Intervention',
'Food, Agriculture & Nutrition',
'Housing & Shelter',
'Human Services',
'Arts, Culture & Humanities',
'Youth Development',
'Community Improvement & Capacity Building',
'Mutual & Membership Benefit',
'Civil Rights, Social Action & Advocacy',
'Health Care',
'Animal-Related',
'Arts, Culture & Humanities',
'Recreation & Sports',
'Food, Agriculture & Nutrition',
],
'locality': ['local state-wide', 'regional (multi-state)', 'international'],
'locality_by_grants': ['local/regional', 'national', 'international'],
'sdg': ['sdg1', 'sdg2', 'sdg3', 'sdg4', 'sdg5', 'sdg6', 'sdg7', 'sdg8', 'sdg9', 'sdg10', 'sdg11', 'sdg12', 'sdg13', 'sdg14', 'sdg15', 'sdg16'],
}
binary_fields = [
'opencorporates_matched',
'url200',
'local_place_based_org',
'is_a_community_foundation',
'org_religious_affiliation',
'org_mostly_supported_by_individuals',
'org_has_membership_dues',
'organization_supports_children',
'organization_supports_women',
'organization_supports_bipoc',
'organization_supports_elderly',
'organization_supports_low_income',
'organization_supports_disability',
'organization_supports_immigrants_refugees',
'organization_supports_lgbt',
'organization_supports_none_of_these_groups',
'pro_democracy_org',
'social_justice_human_rights_org',
'climate_change',
'has_revenue_model',
]
greater_than_fields = [
'cause_area_arts_culture_humanities', 'cause_area_education', 'cause_area_universities', 'cause_area_environment',
'cause_area_animal_related', 'cause_area_healthcare', 'cause_area_hospitals', 'cause_area_mental_health_crisis_intervention',
'cause_area_voluntary_health_associations_medical_disciplines', 'cause_area_medical_research', 'cause_area_crime_legal_related',
'cause_area_employment', 'cause_area_food_agriculture_nutrition', 'cause_area_housing_shelter',
'cause_area_public_safety_disaster_preparedness_relief', 'cause_area_recreation_sports', 'cause_area_youth_development',
'cause_area_human_services', 'cause_area_international_foreign_affairs_national_security',
'cause_area_civil_rights_social_action_advocacy', 'cause_area_community_improvement_capacity_building',
'cause_area_philanthropy_voluntarism_grantmaking_foundations', 'cause_area_science_technology', 'cause_area_social_science',
'cause_area_public_societal_benefit', 'cause_area_religion_related', 'cause_area_mutual_membership_benefit', 'cause_area_unknown',
'opencorporates_match_confidence',
]
less_than_fields = [
'acs_fraction_poverty', 'acs_fraction_snap', 'acs_fraction_insured', 'acs_median_income', 'acs_fraction_hs', 'acs_fraction_vacant', 'acs_dep_index',
'mapagora_BROAD', 'mapagora_REMNRTY',
'mapagora_POV150', 'mapagora_NOHSDP', 'mapagora_SNGPNT', 'mapagora_UNEMP', 'mapagora_civic_org_sum_normalized', 'mapagora_civic_opp_sum_normalized',
'mapagora_civic_opp_index', 'mapagora_volunteer_sum_normalized', 'mapagora_membership_sum_normalized', 'mapagora_take_action_sum_normalized', 'mapagora_events_sum_normalized',
'staff_gender_male_fraction', 'staff_gender_female_fraction',
'cause_area_animal_related',
]
less_than_int_fields = {
'mapagora_totalpopulation':50000,
'mapagora_n':3, 'mapagora_civic_org_sum':500, 'mapagora_civic_opp_sum':100,
'mapagora_volunteer_sum':150, 'mapagora_membership_sum':600,
'mapagora_take_action_sum':50, 'mapagora_events_sum':175,
}
def test_one(field, label=None):
if label and isinstance(label, (int,float)):
req = f"{endpoint}?key={key}&taxyear=2023&{field}={label}"
elif label:
req = f"{endpoint}?key={key}&taxyear=2023&{field}={urllib.parse.quote(label)}"
else:
req = f"{endpoint}?key={key}&taxyear=2023&{field}=1"
resp = r.get(req)
resp = resp.json()
if resp.get('success'):
if label:
print(f"{resp['params']}, {label} | {len(resp['eins'])} eins")
else:
print(f"{resp['params']}, {len(resp['eins'])} eins")
else:
print(resp)
time.sleep(0.1)
for field,labels in test_strings.items(): # PASSED
for label in labels:
test_one(field, label)
for field in binary_fields: # PASSED
test_one(field)
for field, value in less_than_int_fields.items(): # PASSED
test_one(field, value)
for field in greater_than_fields: # PASSED
test_one(field, 0.7)
for field in less_than_fields: # PASSED
test_one(field, 0.3)