import inspect import sklearn from sklearn.linear_model import LogisticRegression from sklearn.base import ClassifierMixin from sklearn.datasets import * import pkgutil import importlib import warnings import ast import pandas as pd def safe_import_module(name): try: with warnings.catch_warnings(): warnings.simplefilter("ignore") return importlib.import_module(name) except Exception: return None # or raise/log if desired def get_sklearn_classifiers(): classifiers = {} #for modname in dir(sklearn): for _, modname, _ in pkgutil.walk_packages(sklearn.__path__, prefix="sklearn."): if '._' in modname: # exclude hidden modules continue if modname.count('.') > 1: # exclude modules more than two levels deep continue #print(modname) try: #with warnings.catch_warnings(): #warnings.simplefilter("ignore") module = importlib.import_module(modname) for cls_name, cls in inspect.getmembers(module, inspect.isclass): if '._' not in cls_name and ('ClassifierMixin' not in cls_name): if issubclass(cls, ClassifierMixin) and cls.__module__.startswith("sklearn"): classifiers[cls_name] = cls #classifiers.append(f"{cls.__module__}.{cls_name}") except: continue return classifiers def get_sklearn_dataloaders(): dataloaders = {} # these datasets either don't work or are too big and slow to load # dataloaders['20newsgroup'] = fetch_20newsgroups # dataloaders['20newsgroup_vectorized'] = fetch_20newsgroups_vectorized # dataloaders['covtype'] = fetch_covtype # dataloaders['kddcup99'] = fetch_kddcup99 dataloaders['iris'] = load_iris return dataloaders def parse_param_string(param_str): param_str = param_str.replace("*,", "") # Remove '*' if present params = {} for item in param_str.split(','): if not item.strip(): continue if '=' not in item: continue key, value = item.split('=', 1) key = key.strip() try: value = ast.literal_eval(value.strip()) except Exception: value = value.strip() # fallback: treat as string params[key] = value return params def read(filename): if filename.endswith(".csv"): return pd.read_csv(filename) elif filename.endswith(".xlsx") or filename.endswith(".xls"): return pd.read_excel(filename) elif filename.endswith(".parquet"): return pd.read_parquet(filename) elif filename.endswith(".feather"): return pd.read_feather(filename) elif filename.endswith(".json"): return pd.read_json(filename) else: raise ValueError("Unsupported file format.") if __name__ == '__main__': #print(classifier_list) s = "penalty='l2', *, dual=False, tol=0.0001, C=1.0, fit_intercept=True, intercept_scaling=1" parsed = parse_param_string(s) print(parsed)