fcakyon commited on
Commit
2e497c6
·
verified ·
1 Parent(s): bf2e388

Delete src/moderators

Browse files
src/moderators/__init__.py DELETED
@@ -1,2 +0,0 @@
1
- __all__ = ["auto_model", "cli"]
2
- __version__ = "0.1.0"
 
 
 
src/moderators/auto_model.py DELETED
@@ -1,117 +0,0 @@
1
- # python
2
- from __future__ import annotations
3
-
4
- import importlib
5
- import json
6
- from pathlib import Path
7
- from typing import Any, Dict, Optional
8
-
9
- try:
10
- from huggingface_hub import ModelHubMixin # do not import hf_hub_download here
11
- except Exception:
12
- class ModelHubMixin:
13
- @classmethod
14
- def from_pretrained(cls, *args, **kwargs):
15
- return cls._from_pretrained(*args, **kwargs)
16
-
17
- def _load_config(identifier: str, *, local_files_only: bool = False) -> Dict[str, Any]:
18
- p = Path(identifier)
19
- if p.exists():
20
- cfg_path = p / "config.json"
21
- if not cfg_path.exists():
22
- raise FileNotFoundError(f"config.json not found in local folder: {cfg_path}")
23
- return json.loads(cfg_path.read_text())
24
-
25
- # Lazy import to avoid pulling heavy deps during module import
26
- from huggingface_hub import hf_hub_download
27
-
28
- cfg_fp = hf_hub_download(
29
- repo_id=identifier,
30
- filename="config.json",
31
- repo_type="model",
32
- local_files_only=local_files_only,
33
- )
34
- return json.loads(Path(cfg_fp).read_text())
35
-
36
-
37
- def _is_transformers_cfg(cfg: Dict[str, Any]) -> bool:
38
- # `architectures` is not enough alone to identify a Transformers model
39
- has_tf_sig = any(
40
- k in cfg for k in ("transformers_version", "model_type", "id2label", "label2id")
41
- )
42
- has_arch_list = isinstance(cfg.get("architectures"), list)
43
- return has_arch_list and has_tf_sig
44
-
45
-
46
- def _infer_task(cfg: Dict[str, Any]) -> Optional[str]:
47
- # get general task from architectures or problem_type
48
- archs = [str(a).lower() for a in cfg.get("architectures", [])]
49
- if any("classification" in a for a in archs):
50
- return "image-classification"
51
- prob = str(cfg.get("problem_type", "")).lower()
52
- if "classification" in prob:
53
- return "image-classification"
54
- return None
55
-
56
-
57
- class AutoModerator(ModelHubMixin):
58
- def __init__(self, *args, **kwargs) -> None:
59
- raise EnvironmentError(
60
- "AutoModerator is a factory class and cannot be instantiated directly. "
61
- "Please use the `AutoModerator.from_pretrained('model_id')` method."
62
- )
63
-
64
- @classmethod
65
- def _from_pretrained(
66
- cls,
67
- model_id: str,
68
- config: Optional[dict] = None,
69
- local_files_only: bool = False,
70
- **kwargs: Any,
71
- ):
72
- cfg = dict(config or _load_config(model_id, local_files_only=local_files_only))
73
-
74
- architecture = cfg.get("architecture")
75
- if not architecture:
76
- if _is_transformers_cfg(cfg):
77
- cfg["architecture"] = "TransformersModerator"
78
- if not cfg.get("task"):
79
- inferred = _infer_task(cfg)
80
- if inferred:
81
- cfg["task"] = inferred
82
- else:
83
- raise ValueError(
84
- "Could not infer 'task' from the Transformers config. "
85
- "Please specify 'task' in the model's config.json "
86
- "(e.g. 'image-classification')."
87
- )
88
- else:
89
- raise ValueError(
90
- f"Could not determine 'architecture' from config.json for model '{model_id}'. "
91
- )
92
-
93
- architecture = cfg["architecture"]
94
-
95
- # For MVP, only TransformersModerator is implemented
96
- if architecture != "TransformersModerator":
97
- raise NotImplementedError(
98
- f"'{architecture}' is not yet supported in this version of Moderators. "
99
- "As of now, only 'TransformersModerator' is implemented."
100
- )
101
-
102
- module_name = architecture.replace("Moderator", "_moderator").lower()
103
- module_path = f"moderators.integrations.{module_name}"
104
-
105
- try:
106
- module = importlib.import_module(module_path)
107
- moderator_class = getattr(module, architecture)
108
- except (ImportError, AttributeError) as e:
109
- raise ImportError(
110
- f"Could not find or import the class '{architecture}'. "
111
- f"Please ensure it is defined in '{module_path}.py'. Error: {e}"
112
- )
113
-
114
- instance = moderator_class(model_id=model_id, config=cfg, **kwargs)
115
- instance.load_model()
116
- return instance
117
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/moderators/cli.py DELETED
@@ -1,34 +0,0 @@
1
- # src/moderators/cli.py
2
- import argparse
3
- import json
4
- from dataclasses import asdict, is_dataclass
5
- from moderators.auto_model import AutoModerator
6
-
7
- def _to_jsonable(obj):
8
- if is_dataclass(obj):
9
- return asdict(obj)
10
- if isinstance(obj, (list, dict, str, int, float)) or obj is None:
11
- return obj
12
- return str(obj)
13
-
14
- def main():
15
- parser = argparse.ArgumentParser(prog="moderators", description="Moderators CLI")
16
- parser.add_argument("model", nargs="?", help="Local model folder or HF model id")
17
- parser.add_argument("input", nargs="?", help="Input text or file path")
18
- parser.add_argument("--local-files-only", action="store_true", dest="local_files_only",
19
- help="Use only local files")
20
- args = parser.parse_args()
21
-
22
- if not args.model:
23
- parser.print_help()
24
- return 0
25
-
26
- mod = AutoModerator.from_pretrained(args.model, local_files_only=args.local_files_only)
27
- if args.input:
28
- out = mod(args.input)
29
- print(json.dumps([_to_jsonable(x) for x in out], ensure_ascii=False, indent=2))
30
- else:
31
- print("Model loaded. Provide the 'input' argument to run inference.")
32
-
33
- if __name__ == "__main__":
34
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/moderators/integrations/__init__.py DELETED
@@ -1 +0,0 @@
1
- # Integration package init
 
 
src/moderators/integrations/base.py DELETED
@@ -1,69 +0,0 @@
1
- from __future__ import annotations
2
-
3
- from dataclasses import dataclass, field
4
- from typing import Any, Dict, List
5
- from abc import ABC, abstractmethod # added
6
-
7
- from huggingface_hub import ModelHubMixin
8
-
9
-
10
- @dataclass
11
- class Box:
12
- # xyxy: [x1, y1, x2, y2]
13
- xyxy: List[float]
14
- label: str
15
- score: float
16
-
17
-
18
- @dataclass
19
- class PredictionResult:
20
- # Context about the source (file path, URL, etc.)
21
- source_path: str = ""
22
- # Probability map for classification
23
- classifications: Dict[str, float] = field(default_factory=dict)
24
- # Detection results
25
- detections: List[Box] = field(default_factory=list)
26
- # Raw output specific to models/integrations
27
- raw_output: Any = None
28
-
29
-
30
- class BaseModerator(ABC, ModelHubMixin):
31
- def __init__(self, config: Dict[str, Any], model_id: str, **kwargs: Any) -> None:
32
- self.config: Dict[str, Any] = dict(config or {})
33
- self.model_id: str = model_id
34
-
35
- @abstractmethod
36
- def load_model(self) -> None:
37
- """Load model/pipeline and any processors if present."""
38
- pass
39
-
40
- # Inference flow
41
- def __call__(self, source: Any, **kwargs: Any):
42
- # self.run_callbacks("on_predict_start")
43
- processed_inputs = self._preprocess(source)
44
- model_outputs = self._predict(processed_inputs)
45
- results = self._postprocess(model_outputs)
46
- # self.run_callbacks("on_predict_end")
47
- return results
48
-
49
- @abstractmethod
50
- def _preprocess(self, inputs: Any) -> Any:
51
- """Convert inputs to model-ready format."""
52
- pass
53
-
54
- @abstractmethod
55
- def _predict(self, processed_inputs: Any) -> Any:
56
- """Run model inference."""
57
- pass
58
-
59
- @abstractmethod
60
- def _postprocess(self, model_outputs: Any) -> Any:
61
- """Convert outputs to PredictionResult format."""
62
- pass
63
-
64
- @abstractmethod
65
- def save_pretrained(self, save_directory: str, **kwargs: Any) -> str:
66
- """
67
- Save model and any processors to the given directory.
68
- """
69
- raise NotImplementedError
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/moderators/integrations/transformers_moderator.py DELETED
@@ -1,110 +0,0 @@
1
- from __future__ import annotations
2
-
3
- import json
4
- from pathlib import Path
5
- from typing import Any, Dict, List
6
-
7
- from .base import BaseModerator, PredictionResult
8
- from moderators.utils import (
9
- auto_install,
10
- ensure_transformers,
11
- ensure_dl_framework,
12
- ensure_pillow_for_task,
13
- preprocess_image_input,
14
- )
15
-
16
-
17
- class TransformersModerator(BaseModerator):
18
- def load_model(self) -> None:
19
- task = self.config.get("task")
20
- if not task:
21
- raise ValueError("TransformersModerator requires 'task' in config.json")
22
-
23
- # Ensure transformers is available
24
- try:
25
- _transformers = ensure_transformers(auto_install)
26
- except Exception as e:
27
- raise ImportError(
28
- "TransformersModerator requires the 'transformers' package. "
29
- "Install with: uv pip install -e '.[transformers]' or: uv pip install transformers"
30
- ) from e
31
- pipeline = _transformers.pipeline
32
-
33
- # Ensure a DL framework (pt/tf/flax)
34
- framework = ensure_dl_framework(auto_install)
35
-
36
- # Ensure Pillow for image tasks
37
- ensure_pillow_for_task(task, auto_install)
38
-
39
- # Build pipeline
40
- self._pipe = pipeline(task, model=self.model_id, framework=framework)
41
-
42
- def _preprocess(self, inputs: Any) -> Any:
43
- task = str(self.config.get("task", "")).lower()
44
- if "image" in task:
45
- return preprocess_image_input(inputs)
46
- return inputs
47
-
48
- def _predict(self, processed_inputs: Any) -> Any:
49
- return self._pipe(processed_inputs)
50
-
51
- def _postprocess(self, model_outputs: Any) -> List[PredictionResult]:
52
- # Pipelines typically return dict or list[dict]
53
- outputs = model_outputs
54
- if isinstance(outputs, dict):
55
- outputs = [outputs]
56
-
57
- results: List[PredictionResult] = []
58
- for out in outputs:
59
- classifications: Dict[str, float] = {}
60
- label = out.get("label")
61
- score = out.get("score")
62
- if label is not None and score is not None:
63
- classifications[str(label)] = float(score)
64
-
65
- results.append(
66
- PredictionResult(
67
- source_path=str(self.config.get("source", "")),
68
- classifications=classifications,
69
- detections=[],
70
- raw_output=out,
71
- )
72
- )
73
- return results
74
-
75
- def save_pretrained(self, save_directory: str, **kwargs: Any) -> str:
76
- """
77
- - Saves model, tokenizer, processor (if any) to `save_directory`.
78
- - Also saves/updates `config.json` with architecture and task info.
79
- Returns the `save_directory` path.
80
- """
81
- out_dir = Path(save_directory)
82
- out_dir.mkdir(parents=True, exist_ok=True)
83
-
84
- pipe = getattr(self, "_pipe", None)
85
- model = getattr(pipe, "model", None) if pipe is not None else None
86
- tokenizer = getattr(pipe, "tokenizer", None) if pipe is not None else None
87
- processor = getattr(pipe, "processor", None) if pipe is not None else getattr(pipe, "feature_extractor", None)
88
-
89
- if model and hasattr(model, "save_pretrained"):
90
- model.save_pretrained(out_dir)
91
- if tokenizer and hasattr(tokenizer, "save_pretrained"):
92
- tokenizer.save_pretrained(out_dir)
93
- if processor and hasattr(processor, "save_pretrained"):
94
- processor.save_pretrained(out_dir)
95
-
96
- # config.json'u garanti altına al ve özel alanları ekle
97
- cfg_path = out_dir / "config.json"
98
- cfg = {}
99
- if cfg_path.exists():
100
- try:
101
- cfg = json.loads(cfg_path.read_text(encoding="utf-8"))
102
- except Exception:
103
- cfg = {}
104
-
105
- cfg["architecture"] = "TransformersModerator"
106
- if self.config.get("task"):
107
- cfg["task"] = self.config["task"]
108
- cfg_path.write_text(json.dumps(cfg, ensure_ascii=False, indent=2), encoding="utf-8")
109
- return str(out_dir)
110
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/moderators/utils/__init__.py DELETED
@@ -1,11 +0,0 @@
1
- # filepath init for utils package
2
- from .deps import auto_install, ensure_transformers, ensure_dl_framework, ensure_pillow_for_task
3
- from .image import preprocess_image_input
4
-
5
- __all__ = [
6
- "auto_install",
7
- "ensure_transformers",
8
- "ensure_dl_framework",
9
- "ensure_pillow_for_task",
10
- "preprocess_image_input",
11
- ]
 
 
 
 
 
 
 
 
 
 
 
 
src/moderators/utils/deps.py DELETED
@@ -1,81 +0,0 @@
1
- from __future__ import annotations
2
-
3
- import os
4
- import shutil
5
- import subprocess
6
- import sys
7
- from typing import Callable, List
8
-
9
-
10
- def auto_install(packages: List[str]) -> bool:
11
- """
12
- Try to auto-install required packages using 'uv' if available, otherwise fall back to 'pip'.
13
- Controlled by env var: MODERATORS_DISABLE_AUTO_INSTALL=1 to disable.
14
- """
15
- if str(os.environ.get("MODERATORS_DISABLE_AUTO_INSTALL", "")).lower() in ("1", "true", "yes"):
16
- return False
17
-
18
- uv = shutil.which("uv")
19
- cmd = [uv, "pip", "install", *packages] if uv else [sys.executable, "-m", "pip", "install", *packages]
20
-
21
- try:
22
- subprocess.check_call(cmd)
23
- return True
24
- except Exception:
25
- return False
26
-
27
-
28
- def ensure_transformers(install_fn: Callable[[List[str]], bool]):
29
- """Ensure 'transformers' is importable; optionally auto-install and retry."""
30
- try:
31
- import transformers as _transformers # noqa: F401
32
- return _transformers
33
- except Exception:
34
- if not install_fn(["transformers"]):
35
- raise
36
- import transformers as _transformers # type: ignore
37
- return _transformers
38
-
39
-
40
- def ensure_dl_framework(install_fn: Callable[[List[str]], bool]) -> str:
41
- """
42
- Ensure at least one DL framework is available.
43
- Preference: PyTorch ('pt'), TensorFlow ('tf'), JAX/Flax ('flax').
44
- Tries to auto-install torch first.
45
- """
46
- try:
47
- import torch # noqa: F401
48
- return "pt"
49
- except Exception:
50
- if install_fn(["torch"]):
51
- try:
52
- import torch # noqa: F401
53
- return "pt"
54
- except Exception:
55
- pass
56
- try:
57
- import tensorflow # noqa: F401
58
- return "tf"
59
- except Exception:
60
- pass
61
- try:
62
- import jax # noqa: F401
63
- return "flax"
64
- except Exception:
65
- pass
66
- raise ImportError(
67
- "A deep learning framework is required for transformers pipelines. "
68
- "Install PyTorch with: uv pip install torch"
69
- )
70
-
71
-
72
- def ensure_pillow_for_task(task: str, install_fn: Callable[[List[str]], bool]) -> None:
73
- """For image tasks, ensure Pillow is available; auto-install if missing."""
74
- if "image" not in str(task).lower():
75
- return
76
- try:
77
- import PIL # noqa: F401
78
- except Exception:
79
- if not install_fn(["Pillow"]):
80
- raise ImportError("This image task requires Pillow. Install with: uv pip install Pillow")
81
- import PIL # noqa: F401
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/moderators/utils/image.py DELETED
@@ -1,41 +0,0 @@
1
- from __future__ import annotations
2
-
3
- from pathlib import Path
4
- from typing import Any
5
-
6
-
7
- def preprocess_image_input(inputs: Any, min_side: int = 16) -> Any:
8
- """
9
- Open path-like inputs with PIL, convert to RGB, ensure a minimal spatial size,
10
- and return a PIL.Image.Image. If PIL is unavailable or input is unsupported, return original input.
11
- """
12
- try:
13
- from PIL import Image
14
- except Exception:
15
- return inputs
16
-
17
- img = None
18
- if isinstance(inputs, (str, Path)):
19
- try:
20
- img = Image.open(str(inputs))
21
- except Exception:
22
- return inputs
23
- elif hasattr(inputs, "mode") and hasattr(inputs, "convert"):
24
- img = inputs
25
- else:
26
- return inputs
27
-
28
- try:
29
- if getattr(img, "mode", "") != "RGB":
30
- img = img.convert("RGB")
31
- except Exception:
32
- return inputs
33
-
34
- try:
35
- w, h = img.size
36
- if w < min_side or h < min_side:
37
- img = img.resize((max(min_side, w), max(min_side, h)), Image.BILINEAR)
38
- except Exception:
39
- pass
40
-
41
- return img