Mark Febrizio commited on
Select agencies (#23)
Browse files* move function to utils
* adjust input agencies
* use acronyms instead of slugs
for selection menu
* remove acronyms
keeps improved get metadata values function; values in menu now scrollable via css
- app.py +4 -2
- modules/__init__.py +2 -0
- modules/get_rules_in_window.py +9 -3
- modules/grouping.py +2 -24
- modules/utils.py +34 -0
- www/style.css +4 -0
app.py
CHANGED
|
@@ -271,6 +271,8 @@ with ui.accordion(open=False):
|
|
| 271 |
This dashboard allows users to explore how different lookback window dates would affect the set of rules available for congressional review.
|
| 272 |
|
| 273 |
Rule data are retrieved daily from the [Federal Register API](https://www.federalregister.gov/developers/documentation/api/v1), which publishes new editions of the Federal Register each business day.
|
|
|
|
|
|
|
| 274 |
"""
|
| 275 |
)
|
| 276 |
|
|
@@ -284,7 +286,7 @@ ui.markdown(
|
|
| 284 |
|
| 285 |
|
| 286 |
@reactive.calc
|
| 287 |
-
def filtered_df():
|
| 288 |
filt_df = DF
|
| 289 |
|
| 290 |
# filter dates
|
|
@@ -295,7 +297,7 @@ def filtered_df():
|
|
| 295 |
|
| 296 |
# filter agencies
|
| 297 |
if (input.menu_agency() is not None) and ("all" not in input.menu_agency()):
|
| 298 |
-
bool_agency = [True if sum(selected in agency for selected in input.menu_agency()) > 0 else False for agency in filt_df[
|
| 299 |
filt_df = filt_df.loc[bool_agency]
|
| 300 |
|
| 301 |
# return filtered dataframe
|
|
|
|
| 271 |
This dashboard allows users to explore how different lookback window dates would affect the set of rules available for congressional review.
|
| 272 |
|
| 273 |
Rule data are retrieved daily from the [Federal Register API](https://www.federalregister.gov/developers/documentation/api/v1), which publishes new editions of the Federal Register each business day.
|
| 274 |
+
|
| 275 |
+
A list of common agency acronyms is available from the [U.S. Government Manual](https://www.govinfo.gov/content/pkg/GOVMAN-2022-12-31/pdf/GOVMAN-2022-12-31-Commonly-Used-Acronyms-105.pdf).
|
| 276 |
"""
|
| 277 |
)
|
| 278 |
|
|
|
|
| 286 |
|
| 287 |
|
| 288 |
@reactive.calc
|
| 289 |
+
def filtered_df(agency_column: str = "parent_slug"):
|
| 290 |
filt_df = DF
|
| 291 |
|
| 292 |
# filter dates
|
|
|
|
| 297 |
|
| 298 |
# filter agencies
|
| 299 |
if (input.menu_agency() is not None) and ("all" not in input.menu_agency()):
|
| 300 |
+
bool_agency = [True if sum(selected in agency for selected in input.menu_agency()) > 0 else False for agency in filt_df[agency_column]]
|
| 301 |
filt_df = filt_df.loc[bool_agency]
|
| 302 |
|
| 303 |
# return filtered dataframe
|
modules/__init__.py
CHANGED
|
@@ -3,6 +3,7 @@ from .grouping import *
|
|
| 3 |
from .plotting import *
|
| 4 |
from .search_columns import *
|
| 5 |
from .significant import *
|
|
|
|
| 6 |
|
| 7 |
|
| 8 |
# see: https://docs.python.org/3.11/tutorial/modules.html#packages
|
|
@@ -12,4 +13,5 @@ __all__ = [
|
|
| 12 |
"plotting",
|
| 13 |
"search_columns",
|
| 14 |
"significant",
|
|
|
|
| 15 |
]
|
|
|
|
| 3 |
from .plotting import *
|
| 4 |
from .search_columns import *
|
| 5 |
from .significant import *
|
| 6 |
+
from .utils import *
|
| 7 |
|
| 8 |
|
| 9 |
# see: https://docs.python.org/3.11/tutorial/modules.html#packages
|
|
|
|
| 13 |
"plotting",
|
| 14 |
"search_columns",
|
| 15 |
"significant",
|
| 16 |
+
"utils",
|
| 17 |
]
|
modules/get_rules_in_window.py
CHANGED
|
@@ -8,9 +8,11 @@ from pandas import DataFrame, to_datetime
|
|
| 8 |
try:
|
| 9 |
from search_columns import search_columns, SearchError
|
| 10 |
from significant import get_significant_info
|
|
|
|
| 11 |
except (ModuleNotFoundError, ImportError):
|
| 12 |
from .search_columns import search_columns, SearchError
|
| 13 |
from .significant import get_significant_info
|
|
|
|
| 14 |
|
| 15 |
|
| 16 |
METADATA, _ = AgencyMetadata().get_agency_metadata()
|
|
@@ -20,14 +22,16 @@ GET_SIGNIFICANT = True if date.fromisoformat(START_DATE) >= date(2023, 4, 6) els
|
|
| 20 |
|
| 21 |
|
| 22 |
class DataAvailabilityError(Exception):
|
|
|
|
| 23 |
pass
|
| 24 |
|
| 25 |
|
| 26 |
-
def get_date_range(start_date: str):
|
| 27 |
"""Define date range of documents returned by the app.
|
| 28 |
|
| 29 |
Args:
|
| 30 |
start_date (str): The start date for retrieving the documents.
|
|
|
|
| 31 |
|
| 32 |
Returns:
|
| 33 |
dict: Dictionary containing start date, end date, and transition year.
|
|
@@ -36,7 +40,7 @@ def get_date_range(start_date: str):
|
|
| 36 |
end_year = start_year + 1
|
| 37 |
date_range = {
|
| 38 |
"start": start_date,
|
| 39 |
-
"end": f"{end_year}-
|
| 40 |
"transition_year": end_year,
|
| 41 |
}
|
| 42 |
return date_range
|
|
@@ -155,13 +159,14 @@ def get_significant_rules(df, start_date):
|
|
| 155 |
return df, last_updated
|
| 156 |
|
| 157 |
|
| 158 |
-
def get_rules_in_window(start_date: str, get_significant: bool = True):
|
| 159 |
date_range = get_date_range(start_date)
|
| 160 |
transition_year = date_range.get("transition_year")
|
| 161 |
results = get_rules(date_range)
|
| 162 |
df = format_documents(results)
|
| 163 |
df, _ = filter_corrections(df)
|
| 164 |
df = filter_new_admin_rules(df, transition_year)
|
|
|
|
| 165 |
if get_significant:
|
| 166 |
df, last_updated = get_significant_rules(df, start_date)
|
| 167 |
else:
|
|
@@ -184,3 +189,4 @@ if __name__ == "__main__":
|
|
| 184 |
print(DF.columns)
|
| 185 |
print(LAST_UPDATED)
|
| 186 |
print(AGENCIES)
|
|
|
|
|
|
| 8 |
try:
|
| 9 |
from search_columns import search_columns, SearchError
|
| 10 |
from significant import get_significant_info
|
| 11 |
+
from utils import get_agency_metadata_values
|
| 12 |
except (ModuleNotFoundError, ImportError):
|
| 13 |
from .search_columns import search_columns, SearchError
|
| 14 |
from .significant import get_significant_info
|
| 15 |
+
from .utils import get_agency_metadata_values
|
| 16 |
|
| 17 |
|
| 18 |
METADATA, _ = AgencyMetadata().get_agency_metadata()
|
|
|
|
| 22 |
|
| 23 |
|
| 24 |
class DataAvailabilityError(Exception):
|
| 25 |
+
"""Raised when data is not available for the requested inputs."""
|
| 26 |
pass
|
| 27 |
|
| 28 |
|
| 29 |
+
def get_date_range(start_date: str, end_mmdd: str = "01-03"):
|
| 30 |
"""Define date range of documents returned by the app.
|
| 31 |
|
| 32 |
Args:
|
| 33 |
start_date (str): The start date for retrieving the documents.
|
| 34 |
+
end_mmdd (str, optional): The month and day for the end date in MM-DD format. Defaults to "01-03".
|
| 35 |
|
| 36 |
Returns:
|
| 37 |
dict: Dictionary containing start date, end date, and transition year.
|
|
|
|
| 40 |
end_year = start_year + 1
|
| 41 |
date_range = {
|
| 42 |
"start": start_date,
|
| 43 |
+
"end": f"{end_year}-{end_mmdd}",
|
| 44 |
"transition_year": end_year,
|
| 45 |
}
|
| 46 |
return date_range
|
|
|
|
| 159 |
return df, last_updated
|
| 160 |
|
| 161 |
|
| 162 |
+
def get_rules_in_window(start_date: str, get_significant: bool = True, metadata=METADATA):
|
| 163 |
date_range = get_date_range(start_date)
|
| 164 |
transition_year = date_range.get("transition_year")
|
| 165 |
results = get_rules(date_range)
|
| 166 |
df = format_documents(results)
|
| 167 |
df, _ = filter_corrections(df)
|
| 168 |
df = filter_new_admin_rules(df, transition_year)
|
| 169 |
+
df.loc[:, "acronym"] = get_agency_metadata_values(df, "parent_slug", metadata=METADATA, metadata_value="acronym")
|
| 170 |
if get_significant:
|
| 171 |
df, last_updated = get_significant_rules(df, start_date)
|
| 172 |
else:
|
|
|
|
| 189 |
print(DF.columns)
|
| 190 |
print(LAST_UPDATED)
|
| 191 |
print(AGENCIES)
|
| 192 |
+
print(len(METADATA.keys()))
|
modules/grouping.py
CHANGED
|
@@ -5,29 +5,7 @@ from datetime import datetime, date, timedelta
|
|
| 5 |
from dateutil.relativedelta import *
|
| 6 |
from pandas import DataFrame, Timestamp, to_datetime
|
| 7 |
|
| 8 |
-
|
| 9 |
-
def _get_agency_metadata_values(
|
| 10 |
-
df: DataFrame,
|
| 11 |
-
agency_column: str,
|
| 12 |
-
metadata: dict,
|
| 13 |
-
metadata_value: str,
|
| 14 |
-
):
|
| 15 |
-
"""Get a specific value from agency metadata (e.g., get acronym for Department of Homeland Security).
|
| 16 |
-
|
| 17 |
-
Args:
|
| 18 |
-
df (DataFrame): Input data.
|
| 19 |
-
agency_column (str): Column containing agency identifier.
|
| 20 |
-
metadata (dict): Agency metadata.
|
| 21 |
-
metadata_value (str): Value of interest from agency metadata.
|
| 22 |
-
|
| 23 |
-
Returns:
|
| 24 |
-
pd.Series: Pandas Series of new values for adding to DataFrame.
|
| 25 |
-
"""
|
| 26 |
-
if metadata_value == "acronym":
|
| 27 |
-
metadata_value = "short_name"
|
| 28 |
-
return df.loc[:, agency_column].apply(
|
| 29 |
-
lambda x: metadata.get(x, {}).get(metadata_value)
|
| 30 |
-
)
|
| 31 |
|
| 32 |
|
| 33 |
def _get_first_week_start(dates: list[date], week_start: int | str | "weekday" = MO):
|
|
@@ -229,7 +207,7 @@ def groupby_agency(
|
|
| 229 |
}, errors="ignore"
|
| 230 |
)
|
| 231 |
if metadata is not None:
|
| 232 |
-
grouped.loc[:, metadata_value] =
|
| 233 |
grouped,
|
| 234 |
agency_column="agency",
|
| 235 |
metadata=metadata,
|
|
|
|
| 5 |
from dateutil.relativedelta import *
|
| 6 |
from pandas import DataFrame, Timestamp, to_datetime
|
| 7 |
|
| 8 |
+
from .utils import get_agency_metadata_values
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
|
| 10 |
|
| 11 |
def _get_first_week_start(dates: list[date], week_start: int | str | "weekday" = MO):
|
|
|
|
| 207 |
}, errors="ignore"
|
| 208 |
)
|
| 209 |
if metadata is not None:
|
| 210 |
+
grouped.loc[:, metadata_value] = get_agency_metadata_values(
|
| 211 |
grouped,
|
| 212 |
agency_column="agency",
|
| 213 |
metadata=metadata,
|
modules/utils.py
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pandas import DataFrame
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
def _get_nested_metadata(metadata_key: str, metadata: dict[dict], metadata_value: str):
|
| 5 |
+
getter = metadata.get(metadata_key, {})
|
| 6 |
+
return getter.get(metadata_value, metadata_key)
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
def get_agency_metadata_values(
|
| 10 |
+
df: DataFrame,
|
| 11 |
+
agency_column: str,
|
| 12 |
+
metadata: dict,
|
| 13 |
+
metadata_value: str,
|
| 14 |
+
):
|
| 15 |
+
"""Get a specific value from agency metadata (e.g., get acronym for Department of Homeland Security).
|
| 16 |
+
|
| 17 |
+
Args:
|
| 18 |
+
df (DataFrame): Input data.
|
| 19 |
+
agency_column (str): Column containing agency identifier.
|
| 20 |
+
metadata (dict): Agency metadata.
|
| 21 |
+
metadata_value (str): Value of interest from agency metadata.
|
| 22 |
+
|
| 23 |
+
Returns:
|
| 24 |
+
pd.Series: Pandas Series of new values for adding to DataFrame.
|
| 25 |
+
"""
|
| 26 |
+
if metadata_value == "acronym":
|
| 27 |
+
metadata_value = "short_name"
|
| 28 |
+
return df.loc[:, agency_column].apply(
|
| 29 |
+
lambda x: [
|
| 30 |
+
_get_nested_metadata(x_item, metadata=metadata, metadata_value=metadata_value)
|
| 31 |
+
for x_item
|
| 32 |
+
in x
|
| 33 |
+
] if isinstance(x, list) else _get_nested_metadata(x, metadata=metadata, metadata_value=metadata_value)
|
| 34 |
+
)
|
www/style.css
CHANGED
|
@@ -23,3 +23,7 @@
|
|
| 23 |
#frequency {
|
| 24 |
margin-bottom: 5% !important;
|
| 25 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
#frequency {
|
| 24 |
margin-bottom: 5% !important;
|
| 25 |
}
|
| 26 |
+
|
| 27 |
+
#menu_agency {
|
| 28 |
+
overflow: visible;
|
| 29 |
+
}
|