Spaces:

regulatorystudies
/

cra-window-rules

Running

Mark Febrizio commited on Jul 31, 2024

Commit

393578a

unverified ·

1 Parent(s): 4158978

Select agencies (#23)

* move function to utils

* adjust input agencies

* use acronyms instead of slugs

for selection menu

* remove acronyms

keeps improved get metadata values function; values in menu now scrollable via css

Files changed (6) hide show

app.py +4 -2
modules/__init__.py +2 -0
modules/get_rules_in_window.py +9 -3
modules/grouping.py +2 -24
modules/utils.py +34 -0
www/style.css +4 -0

app.py CHANGED Viewed

@@ -271,6 +271,8 @@ with ui.accordion(open=False):
             This dashboard allows users to explore how different lookback window dates would affect the set of rules available for congressional review.
             Rule data are retrieved daily from the [Federal Register API](https://www.federalregister.gov/developers/documentation/api/v1), which publishes new editions of the Federal Register each business day.
             """
             )
@@ -284,7 +286,7 @@ ui.markdown(
 @reactive.calc
-def filtered_df():
     filt_df = DF
     # filter dates
@@ -295,7 +297,7 @@ def filtered_df():
     # filter agencies
     if (input.menu_agency() is not None) and ("all" not in input.menu_agency()):
-        bool_agency = [True if sum(selected in agency for selected in input.menu_agency()) > 0 else False for agency in filt_df["parent_slug"]]
         filt_df = filt_df.loc[bool_agency]
     # return filtered dataframe

             This dashboard allows users to explore how different lookback window dates would affect the set of rules available for congressional review.
             Rule data are retrieved daily from the [Federal Register API](https://www.federalregister.gov/developers/documentation/api/v1), which publishes new editions of the Federal Register each business day.
+            A list of common agency acronyms is available from the [U.S. Government Manual](https://www.govinfo.gov/content/pkg/GOVMAN-2022-12-31/pdf/GOVMAN-2022-12-31-Commonly-Used-Acronyms-105.pdf).
             """
             )
 @reactive.calc
+def filtered_df(agency_column: str = "parent_slug"):
     filt_df = DF
     # filter dates
     # filter agencies
     if (input.menu_agency() is not None) and ("all" not in input.menu_agency()):
+        bool_agency = [True if sum(selected in agency for selected in input.menu_agency()) > 0 else False for agency in filt_df[agency_column]]
         filt_df = filt_df.loc[bool_agency]
     # return filtered dataframe

modules/__init__.py CHANGED Viewed

@@ -3,6 +3,7 @@ from .grouping import *
 from .plotting import *
 from .search_columns import *
 from .significant import *
 # see: https://docs.python.org/3.11/tutorial/modules.html#packages
@@ -12,4 +13,5 @@ __all__ = [
     "plotting",
     "search_columns",
     "significant",
     ]

 from .plotting import *
 from .search_columns import *
 from .significant import *
+from .utils import *
 # see: https://docs.python.org/3.11/tutorial/modules.html#packages
     "plotting",
     "search_columns",
     "significant",
+    "utils",
     ]

modules/get_rules_in_window.py CHANGED Viewed

@@ -8,9 +8,11 @@ from pandas import DataFrame, to_datetime
 try:
     from search_columns import search_columns, SearchError
     from significant import get_significant_info
 except (ModuleNotFoundError, ImportError):
     from .search_columns import search_columns, SearchError
     from .significant import get_significant_info
 METADATA, _ = AgencyMetadata().get_agency_metadata()
@@ -20,14 +22,16 @@ GET_SIGNIFICANT = True if date.fromisoformat(START_DATE) >= date(2023, 4, 6) els
 class DataAvailabilityError(Exception):
     pass
-def get_date_range(start_date: str):
     """Define date range of documents returned by the app.
     Args:
         start_date (str): The start date for retrieving the documents.
     Returns:
         dict: Dictionary containing start date, end date, and transition year.
@@ -36,7 +40,7 @@ def get_date_range(start_date: str):
     end_year = start_year + 1
     date_range = {
         "start": start_date,
-        "end": f"{end_year}-01-03",
         "transition_year": end_year,
         }
     return date_range
@@ -155,13 +159,14 @@ def get_significant_rules(df, start_date):
     return df, last_updated
-def get_rules_in_window(start_date: str, get_significant: bool = True):
     date_range = get_date_range(start_date)
     transition_year = date_range.get("transition_year")
     results = get_rules(date_range)
     df = format_documents(results)
     df, _ = filter_corrections(df)
     df = filter_new_admin_rules(df, transition_year)
     if get_significant:
         df, last_updated = get_significant_rules(df, start_date)
     else:
@@ -184,3 +189,4 @@ if __name__ == "__main__":
     print(DF.columns)
     print(LAST_UPDATED)
     print(AGENCIES)

 try:
     from search_columns import search_columns, SearchError
     from significant import get_significant_info
+    from utils import get_agency_metadata_values
 except (ModuleNotFoundError, ImportError):
     from .search_columns import search_columns, SearchError
     from .significant import get_significant_info
+    from .utils import get_agency_metadata_values
 METADATA, _ = AgencyMetadata().get_agency_metadata()
 class DataAvailabilityError(Exception):
+    """Raised when data is not available for the requested inputs."""
     pass
+def get_date_range(start_date: str, end_mmdd: str = "01-03"):
     """Define date range of documents returned by the app.
     Args:
         start_date (str): The start date for retrieving the documents.
+        end_mmdd (str, optional): The month and day for the end date in MM-DD format. Defaults to "01-03".
     Returns:
         dict: Dictionary containing start date, end date, and transition year.
     end_year = start_year + 1
     date_range = {
         "start": start_date,
+        "end": f"{end_year}-{end_mmdd}",
         "transition_year": end_year,
         }
     return date_range
     return df, last_updated
+def get_rules_in_window(start_date: str, get_significant: bool = True, metadata=METADATA):
     date_range = get_date_range(start_date)
     transition_year = date_range.get("transition_year")
     results = get_rules(date_range)
     df = format_documents(results)
     df, _ = filter_corrections(df)
     df = filter_new_admin_rules(df, transition_year)
+    df.loc[:, "acronym"] = get_agency_metadata_values(df, "parent_slug", metadata=METADATA, metadata_value="acronym")
     if get_significant:
         df, last_updated = get_significant_rules(df, start_date)
     else:
     print(DF.columns)
     print(LAST_UPDATED)
     print(AGENCIES)
+    print(len(METADATA.keys()))

modules/grouping.py CHANGED Viewed

@@ -5,29 +5,7 @@ from datetime import datetime, date, timedelta
 from dateutil.relativedelta import *
 from pandas import DataFrame, Timestamp, to_datetime
-def _get_agency_metadata_values(
-        df: DataFrame,
-        agency_column: str,
-        metadata: dict,
-        metadata_value: str,
-    ):
-    """Get a specific value from agency metadata (e.g., get acronym for Department of Homeland Security).
-    Args:
-        df (DataFrame): Input data.
-        agency_column (str): Column containing agency identifier.
-        metadata (dict): Agency metadata.
-        metadata_value (str): Value of interest from agency metadata.
-    Returns:
-        pd.Series: Pandas Series of new values for adding to DataFrame.
-    """
-    if metadata_value == "acronym":
-        metadata_value = "short_name"
-    return df.loc[:, agency_column].apply(
-        lambda x: metadata.get(x, {}).get(metadata_value)
-        )
 def _get_first_week_start(dates: list[date], week_start: int | str | "weekday" = MO):
@@ -229,7 +207,7 @@ def groupby_agency(
             }, errors="ignore"
         )
     if metadata is not None:
-        grouped.loc[:, metadata_value] = _get_agency_metadata_values(
         grouped,
         agency_column="agency",
         metadata=metadata,

 from dateutil.relativedelta import *
 from pandas import DataFrame, Timestamp, to_datetime
+from .utils import get_agency_metadata_values
 def _get_first_week_start(dates: list[date], week_start: int | str | "weekday" = MO):
             }, errors="ignore"
         )
     if metadata is not None:
+        grouped.loc[:, metadata_value] = get_agency_metadata_values(
         grouped,
         agency_column="agency",
         metadata=metadata,

modules/utils.py ADDED Viewed

	@@ -0,0 +1,34 @@

+from pandas import DataFrame
+def _get_nested_metadata(metadata_key: str, metadata: dict[dict], metadata_value: str):
+    getter = metadata.get(metadata_key, {})
+    return getter.get(metadata_value, metadata_key)
+def get_agency_metadata_values(
+        df: DataFrame,
+        agency_column: str,
+        metadata: dict,
+        metadata_value: str,
+    ):
+    """Get a specific value from agency metadata (e.g., get acronym for Department of Homeland Security).
+    Args:
+        df (DataFrame): Input data.
+        agency_column (str): Column containing agency identifier.
+        metadata (dict): Agency metadata.
+        metadata_value (str): Value of interest from agency metadata.
+    Returns:
+        pd.Series: Pandas Series of new values for adding to DataFrame.
+    """
+    if metadata_value == "acronym":
+        metadata_value = "short_name"
+    return df.loc[:, agency_column].apply(
+        lambda x: [
+            _get_nested_metadata(x_item, metadata=metadata, metadata_value=metadata_value)
+            for x_item
+            in x
+            ] if isinstance(x, list) else _get_nested_metadata(x, metadata=metadata, metadata_value=metadata_value)
+        )

www/style.css CHANGED Viewed

@@ -23,3 +23,7 @@
 #frequency {
   margin-bottom: 5% !important;
 }

 #frequency {
   margin-bottom: 5% !important;
 }
+#menu_agency {
+  overflow: visible;
+}