File size: 3,091 Bytes
1a3143c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0a5fbf6
 
 
 
 
1a3143c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import inspect
import sklearn
from sklearn.linear_model import LogisticRegression
from sklearn.base import ClassifierMixin
from sklearn.datasets import *
import pkgutil
import importlib
import warnings
import ast
import pandas as pd

def safe_import_module(name):
    try:
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            return importlib.import_module(name)
    except Exception:
        return None  # or raise/log if desired

def get_sklearn_classifiers():
    classifiers = {}

    #for modname in dir(sklearn):
    for _, modname, _ in pkgutil.walk_packages(sklearn.__path__, prefix="sklearn."):
        if '._' in modname: # exclude hidden modules
            continue

        if modname.count('.') > 1: # exclude modules more than two levels deep
            continue

        #print(modname)
        try:
            #with warnings.catch_warnings():
                #warnings.simplefilter("ignore")
 
            module = importlib.import_module(modname)
            for cls_name, cls in inspect.getmembers(module, inspect.isclass):
                if '._' not in cls_name and ('ClassifierMixin' not in cls_name):
                    if issubclass(cls, ClassifierMixin) and cls.__module__.startswith("sklearn"):
                        classifiers[cls_name] = cls
                        #classifiers.append(f"{cls.__module__}.{cls_name}")
        except:
            continue

    return classifiers

def get_sklearn_dataloaders():
    dataloaders = {}
    # these datasets either don't work or are too big and slow to load
    # dataloaders['20newsgroup'] = fetch_20newsgroups
    # dataloaders['20newsgroup_vectorized'] = fetch_20newsgroups_vectorized
    # dataloaders['covtype'] = fetch_covtype
    # dataloaders['kddcup99'] = fetch_kddcup99
    dataloaders['iris'] = load_iris

    return dataloaders

def parse_param_string(param_str):
    param_str = param_str.replace("*,", "")  # Remove '*' if present
    params = {}
    for item in param_str.split(','):
        if not item.strip():
            continue
        if '=' not in item:
            continue
        key, value = item.split('=', 1)
        key = key.strip()
        try:
            value = ast.literal_eval(value.strip())
        except Exception:
            value = value.strip()  # fallback: treat as string
        params[key] = value
    return params

def read(filename):
    if filename.endswith(".csv"):
        return pd.read_csv(filename)
    elif filename.endswith(".xlsx") or filename.endswith(".xls"):
        return pd.read_excel(filename)
    elif filename.endswith(".parquet"):
        return pd.read_parquet(filename)
    elif filename.endswith(".feather"):
        return pd.read_feather(filename)
    elif filename.endswith(".json"):
        return pd.read_json(filename)
    else:
        raise ValueError("Unsupported file format.")

if __name__ == '__main__':
    #print(classifier_list)
    s = "penalty='l2', *, dual=False, tol=0.0001, C=1.0, fit_intercept=True, intercept_scaling=1"
    parsed = parse_param_string(s)
    print(parsed)