File size: 1,182 Bytes
5c282df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c946de7
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
from pandas import DataFrame, Series


class DfUtils:

    @staticmethod
    def extract_languages(df: DataFrame) -> DataFrame:
        languages_df: DataFrame = df.copy()
        languages_df.loc[:, "languages"] = languages_df["github"].apply(
            lambda g: g.get("languages") if isinstance(g, dict) else None)
        return languages_df.drop(columns=["github"], errors="ignore")

    @staticmethod
    def filter_language(df: DataFrame, target_language: str) -> DataFrame:
        target_language_fold: str = target_language.casefold()
        mask: Series = df["languages"].apply(
            lambda languages: (
                any(language is not None and language.casefold() == target_language_fold for language in languages)
                if languages is not None else False
            ))
        return df[mask]

    @staticmethod
    def extract_tests_count(df: DataFrame) -> DataFrame:
        tests_count_df: DataFrame = df.copy()
        tests_count_df.loc[:, "tests_count"] = tests_count_df["github"].apply(
            lambda g: g.get("tests_count") if isinstance(g, dict) else None)
        return tests_count_df.drop(columns=["github"], errors="ignore")