from pandas import DataFrame, Series class DfUtils: @staticmethod def extract_languages(df: DataFrame) -> DataFrame: languages_df: DataFrame = df.copy() languages_df.loc[:, "languages"] = languages_df["github"].apply( lambda g: g.get("languages") if isinstance(g, dict) else None) return languages_df.drop(columns=["github"], errors="ignore") @staticmethod def filter_language(df: DataFrame, target_language: str) -> DataFrame: target_language_fold: str = target_language.casefold() mask: Series = df["languages"].apply( lambda languages: ( any(language is not None and language.casefold() == target_language_fold for language in languages) if languages is not None else False )) return df[mask] @staticmethod def extract_tests_count(df: DataFrame) -> DataFrame: tests_count_df: DataFrame = df.copy() tests_count_df.loc[:, "tests_count"] = tests_count_df["github"].apply( lambda g: g.get("tests_count") if isinstance(g, dict) else None) return tests_count_df.drop(columns=["github"], errors="ignore")