Spaces:
Running
Running
| import inspect | |
| import sklearn | |
| from sklearn.linear_model import LogisticRegression | |
| from sklearn.base import ClassifierMixin | |
| from sklearn.datasets import * | |
| import pkgutil | |
| import importlib | |
| import warnings | |
| import ast | |
| import pandas as pd | |
| def safe_import_module(name): | |
| try: | |
| with warnings.catch_warnings(): | |
| warnings.simplefilter("ignore") | |
| return importlib.import_module(name) | |
| except Exception: | |
| return None # or raise/log if desired | |
| def get_sklearn_classifiers(): | |
| classifiers = {} | |
| #for modname in dir(sklearn): | |
| for _, modname, _ in pkgutil.walk_packages(sklearn.__path__, prefix="sklearn."): | |
| if '._' in modname: # exclude hidden modules | |
| continue | |
| if modname.count('.') > 1: # exclude modules more than two levels deep | |
| continue | |
| #print(modname) | |
| try: | |
| #with warnings.catch_warnings(): | |
| #warnings.simplefilter("ignore") | |
| module = importlib.import_module(modname) | |
| for cls_name, cls in inspect.getmembers(module, inspect.isclass): | |
| if '._' not in cls_name and ('ClassifierMixin' not in cls_name): | |
| if issubclass(cls, ClassifierMixin) and cls.__module__.startswith("sklearn"): | |
| classifiers[cls_name] = cls | |
| #classifiers.append(f"{cls.__module__}.{cls_name}") | |
| except: | |
| continue | |
| return classifiers | |
| def get_sklearn_dataloaders(): | |
| dataloaders = {} | |
| # these datasets either don't work or are too big and slow to load | |
| # dataloaders['20newsgroup'] = fetch_20newsgroups | |
| # dataloaders['20newsgroup_vectorized'] = fetch_20newsgroups_vectorized | |
| # dataloaders['covtype'] = fetch_covtype | |
| # dataloaders['kddcup99'] = fetch_kddcup99 | |
| dataloaders['iris'] = load_iris | |
| return dataloaders | |
| def parse_param_string(param_str): | |
| param_str = param_str.replace("*,", "") # Remove '*' if present | |
| params = {} | |
| for item in param_str.split(','): | |
| if not item.strip(): | |
| continue | |
| if '=' not in item: | |
| continue | |
| key, value = item.split('=', 1) | |
| key = key.strip() | |
| try: | |
| value = ast.literal_eval(value.strip()) | |
| except Exception: | |
| value = value.strip() # fallback: treat as string | |
| params[key] = value | |
| return params | |
| def read(filename): | |
| if filename.endswith(".csv"): | |
| return pd.read_csv(filename) | |
| elif filename.endswith(".xlsx") or filename.endswith(".xls"): | |
| return pd.read_excel(filename) | |
| elif filename.endswith(".parquet"): | |
| return pd.read_parquet(filename) | |
| elif filename.endswith(".feather"): | |
| return pd.read_feather(filename) | |
| elif filename.endswith(".json"): | |
| return pd.read_json(filename) | |
| else: | |
| raise ValueError("Unsupported file format.") | |
| if __name__ == '__main__': | |
| #print(classifier_list) | |
| s = "penalty='l2', *, dual=False, tol=0.0001, C=1.0, fit_intercept=True, intercept_scaling=1" | |
| parsed = parse_param_string(s) | |
| print(parsed) | |