Spaces:

viddexa
/

moderators

Running

App Files Files Community

sukrukirman commited on Sep 1, 2025

Commit

53a775c

1 Parent(s): 24ca01d

init

Browse files

Files changed (12) hide show

app.py +204 -0
pyproject.toml +28 -0
requirements.txt +3 -0
src/moderators/__init__.py +2 -0
src/moderators/auto_model.py +117 -0
src/moderators/cli.py +34 -0
src/moderators/integrations/__init__.py +1 -0
src/moderators/integrations/base.py +69 -0
src/moderators/integrations/transformers_moderator.py +110 -0
src/moderators/utils/__init__.py +11 -0
src/moderators/utils/deps.py +81 -0
src/moderators/utils/image.py +41 -0

app.py ADDED Viewed

	@@ -0,0 +1,204 @@

+import gradio as gr
+import os
+import json
+from typing import Any, Dict, Generator
+from dotenv import load_dotenv
+import gradio.themes as gr_themes
+import io
+from contextlib import redirect_stdout, redirect_stderr
+# Load environment variables from a .env file for local development
+load_dotenv()
+# --- Secure Token Management ---
+VIDDEXA_TOKEN = os.getenv("HF_TOKEN")
+# A simple cache to store loaded model instances
+_MODEL_CACHE: Dict[str, Any] = {}
+def _load_model(model_id: str, user_hf_token: str | None = None):
+    """
+    Loads a model, caches it, and handles token management.
+    All print outputs from this function will be captured.
+    """
+    if model_id in _MODEL_CACHE:
+        print(f"Model '{model_id}' found in cache.")
+        return _MODEL_CACHE[model_id]
+    print(f"Loading model '{model_id}'...")
+    active_token = None
+    if model_id == "viddexa/mobilenet_v2_1.0_224":
+        if not VIDDEXA_TOKEN:
+            raise gr.Error(
+                "The featured model 'viddexa/mobilenet_v2_1.0_224' requires an 'HF_TOKEN' to be set in the Space Secrets or a local .env file."
+            )
+        active_token = VIDDEXA_TOKEN
+    elif user_hf_token:
+        active_token = user_hf_token
+    try:
+        from moderators.auto_model import AutoModerator
+        model = AutoModerator.from_pretrained(model_id, token=active_token, use_fast=True)
+        _MODEL_CACHE[model_id] = model
+        print("Model loaded successfully.")
+        return model
+    except Exception as e:
+        error_msg = f"Failed to load model: {model_id}. Error: {e}"
+        if "401" in str(e):
+            error_msg += "\n\nThis model may be private. Please ensure you have provided a valid Hugging Face token if required."
+        raise gr.Error(error_msg)
+def _to_jsonable(results: Any) -> Any:
+    """Helper function to make model outputs JSON-serializable."""
+    try:
+        return [getattr(r, "classifications", r) for r in results]
+    except TypeError:
+        return results
+    except Exception:
+        return results
+# --- NEW: Rewritten 'infer' function as a generator to stream logs ---
+def infer(image_path: str, model_choice: str, custom_model_id: str, user_hf_token: str) -> Generator[
+    tuple[str, Any], None, None]:
+    """
+    The main inference function that now yields updates to stream logs to the UI.
+    """
+    # 1. Clear previous outputs and show an initial message
+    yield "Starting analysis...", None
+    if not image_path:
+        raise gr.Error("Please upload an image first.")
+    log_stream = io.StringIO()
+    try:
+        # 2. Capture all printed output from the loading and inference process
+        with redirect_stdout(log_stream), redirect_stderr(log_stream):
+            if model_choice == "Custom Model":
+                model_id = (custom_model_id or "").strip()
+                if not model_id:
+                    raise gr.Error("Please enter the Hugging Face ID for your custom model.")
+            else:
+                model_id = model_choice
+                user_hf_token = ""
+            # Load model and yield logs generated during loading
+            model = _load_model(model_id, user_hf_token)
+            yield log_stream.getvalue(), None
+            # Run inference and yield any new logs
+            print("\nRunning inference on the image...")
+            results = model(image_path)
+            print("Inference complete.")
+            yield log_stream.getvalue(), None
+        # 3. Process the final result and yield it with the complete log
+        final_json = json.loads(json.dumps(_to_jsonable(results), ensure_ascii=False, indent=2))
+        yield log_stream.getvalue(), final_json
+    except gr.Error as e:
+        # If a Gradio error happens, show it in the logs
+        yield str(e), None
+    except Exception as e:
+        # For other exceptions, capture the error message and show it
+        yield f"An unexpected error occurred:\n{e}", None
+def on_model_choice_change(choice: str):
+    """Shows or hides the custom model input fields based on the dropdown selection."""
+    return gr.update(visible=(choice == "Custom Model"))
+# --- Enhanced Gradio Interface with a Log Viewer ---
+with gr.Blocks(
+        theme=gr_themes.Default(
+            primary_hue="blue",
+            secondary_hue="neutral",
+            font=gr_themes.GoogleFont("Inter")
+        ),
+        title="Moderators - Visual Content Moderation"
+) as demo:
+    gr.Markdown("# 🖼️ Moderators: Visual Content Moderation")
+    gr.Markdown(
+        "Analyze an image using the featured `viddexa/mobilenet_v2_1.0_224` model, "
+        "or select another model from the list. You can also use your own private or public model from the Hub."
+    )
+    with gr.Row(variant="panel"):
+        # Column 1: Controls and Inputs
+        with gr.Column(scale=1):
+            gr.Markdown("### ⚙️ Controls")
+            model_choice = gr.Dropdown(
+                choices=[
+                    "viddexa/mobilenet_v2_1.0_224",
+                    "Falconsai/nsfw_image_detection",
+                    "Custom Model",
+                ],
+                value="viddexa/mobilenet_v2_1.0_224",
+                label="Select Model",
+                info="Choose a model for the analysis.",
+            )
+            with gr.Group(visible=False) as custom_model_group:
+                custom_model_id = gr.Textbox(
+                    label="Custom Hugging Face Model ID",
+                    placeholder="username/model-name",
+                    info="Enter the ID of the model you want to use."
+                )
+                user_hf_token = gr.Textbox(
+                    label='HF Token (if your model is private)',
+                    type="password",
+                    placeholder="hf_...",
+                    info="An access token is required for private models."
+                )
+            gr.Markdown("### 🖼️ Upload Image")
+            image_input = gr.Image(type="filepath", label="Image to analyze")
+            run_btn = gr.Button("Analyze", variant="primary")
+            gr.Examples(
+                examples=[
+                    ["examples/safe_image.png", "viddexa/mobilenet_v2_1.0_224"],
+                    ["examples/hentai.jpg", "Falconsai/nsfw_image_detection"],
+                    ["examples/porn.jpg", "viddexa/mobilenet_v2_1.0_224"],
+                ],
+                inputs=[image_input, model_choice],
+                label="Click an example to run",
+            )
+        # Column 2: Outputs
+        with gr.Column(scale=2):
+            gr.Markdown("### 📊 Results")
+            # --- NEW: Status Log Textbox ---
+            status_log = gr.Textbox(
+                label="Status Logs",
+                info="Shows model loading progress and other technical details.",
+                interactive=False,
+                lines=8,  # Give it some height
+            )
+            output_json = gr.JSON(label="Model Output (JSON)")
+    # Define the interactive events
+    run_btn.click(
+        fn=infer,
+        inputs=[image_input, model_choice, custom_model_id, user_hf_token],
+        # --- NEW: The click event now updates both the log and the JSON output ---
+        outputs=[status_log, output_json],
+    )
+    model_choice.change(
+        fn=on_model_choice_change,
+        inputs=model_choice,
+        outputs=custom_model_group,
+    )
+if __name__ == "__main__":
+    if not os.path.exists("examples"):
+        os.makedirs("examples")
+        print("Created 'examples' directory.")
+        print("Please add some images like 'safe_image.jpg' to it for the examples to work.")
+    demo.launch()

pyproject.toml ADDED Viewed

	@@ -0,0 +1,28 @@

+[build-system]
+requires = ["setuptools>=61.0", "wheel"]
+build-backend = "setuptools.build_meta"
+[project]
+name = "moderators"
+version = "0.1.0"
+description = "Moderators: ModelHubMixin-based factory and core skeleton (Phase 1)."
+readme = "README.md"
+requires-python = ">=3.8"
+authors = [{ name = "Moderators Team" }]
+dependencies = [
+  "huggingface-hub>=0.22"
+]
+[project.optional-dependencies]
+transformers = ["transformers>=4.36"]
+dev = [
+  "pytest>=7.0",
+  "Pillow>=9.0"
+]
+[project.scripts]
+moderators = "moderators.cli:main"
+[tool.setuptools.packages.find]
+where = ["src"]

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+gradio
+python-dotenv
+.

src/moderators/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ __all__ = ["auto_model", "cli"]
2	+ __version__ = "0.1.0"

src/moderators/auto_model.py ADDED Viewed

	@@ -0,0 +1,117 @@

+# python
+from __future__ import annotations
+import importlib
+import json
+from pathlib import Path
+from typing import Any, Dict, Optional
+try:
+    from huggingface_hub import ModelHubMixin  # do not import hf_hub_download here
+except Exception:
+    class ModelHubMixin:
+        @classmethod
+        def from_pretrained(cls, *args, **kwargs):
+            return cls._from_pretrained(*args, **kwargs)
+def _load_config(identifier: str, *, local_files_only: bool = False) -> Dict[str, Any]:
+    p = Path(identifier)
+    if p.exists():
+        cfg_path = p / "config.json"
+        if not cfg_path.exists():
+            raise FileNotFoundError(f"config.json not found in local folder: {cfg_path}")
+        return json.loads(cfg_path.read_text())
+    # Lazy import to avoid pulling heavy deps during module import
+    from huggingface_hub import hf_hub_download
+    cfg_fp = hf_hub_download(
+        repo_id=identifier,
+        filename="config.json",
+        repo_type="model",
+        local_files_only=local_files_only,
+    )
+    return json.loads(Path(cfg_fp).read_text())
+def _is_transformers_cfg(cfg: Dict[str, Any]) -> bool:
+    # `architectures` is not enough alone to identify a Transformers model
+    has_tf_sig = any(
+        k in cfg for k in ("transformers_version", "model_type", "id2label", "label2id")
+    )
+    has_arch_list = isinstance(cfg.get("architectures"), list)
+    return has_arch_list and has_tf_sig
+def _infer_task(cfg: Dict[str, Any]) -> Optional[str]:
+    # get general task from architectures or problem_type
+    archs = [str(a).lower() for a in cfg.get("architectures", [])]
+    if any("classification" in a for a in archs):
+        return "image-classification"
+    prob = str(cfg.get("problem_type", "")).lower()
+    if "classification" in prob:
+        return "image-classification"
+    return None
+class AutoModerator(ModelHubMixin):
+    def __init__(self, *args, **kwargs) -> None:
+        raise EnvironmentError(
+            "AutoModerator is a factory class and cannot be instantiated directly. "
+            "Please use the `AutoModerator.from_pretrained('model_id')` method."
+        )
+    @classmethod
+    def _from_pretrained(
+        cls,
+        model_id: str,
+        config: Optional[dict] = None,
+        local_files_only: bool = False,
+        **kwargs: Any,
+    ):
+        cfg = dict(config or _load_config(model_id, local_files_only=local_files_only))
+        architecture = cfg.get("architecture")
+        if not architecture:
+            if _is_transformers_cfg(cfg):
+                cfg["architecture"] = "TransformersModerator"
+                if not cfg.get("task"):
+                    inferred = _infer_task(cfg)
+                    if inferred:
+                        cfg["task"] = inferred
+                    else:
+                        raise ValueError(
+                            "Could not infer 'task' from the Transformers config. "
+                            "Please specify 'task' in the model's config.json "
+                            "(e.g. 'image-classification')."
+                        )
+            else:
+                raise ValueError(
+                    f"Could not determine 'architecture' from config.json for model '{model_id}'. "
+                )
+        architecture = cfg["architecture"]
+        # For MVP, only TransformersModerator is implemented
+        if architecture != "TransformersModerator":
+            raise NotImplementedError(
+                f"'{architecture}' is not yet supported in this version of Moderators. "
+                "As of now, only 'TransformersModerator' is implemented."
+            )
+        module_name = architecture.replace("Moderator", "_moderator").lower()
+        module_path = f"moderators.integrations.{module_name}"
+        try:
+            module = importlib.import_module(module_path)
+            moderator_class = getattr(module, architecture)
+        except (ImportError, AttributeError) as e:
+            raise ImportError(
+                f"Could not find or import the class '{architecture}'. "
+                f"Please ensure it is defined in '{module_path}.py'. Error: {e}"
+            )
+        instance = moderator_class(model_id=model_id, config=cfg, **kwargs)
+        instance.load_model()
+        return instance

src/moderators/cli.py ADDED Viewed

	@@ -0,0 +1,34 @@

+# src/moderators/cli.py
+import argparse
+import json
+from dataclasses import asdict, is_dataclass
+from moderators.auto_model import AutoModerator
+def _to_jsonable(obj):
+    if is_dataclass(obj):
+        return asdict(obj)
+    if isinstance(obj, (list, dict, str, int, float)) or obj is None:
+        return obj
+    return str(obj)
+def main():
+    parser = argparse.ArgumentParser(prog="moderators", description="Moderators CLI")
+    parser.add_argument("model", nargs="?", help="Local model folder or HF model id")
+    parser.add_argument("input", nargs="?", help="Input text or file path")
+    parser.add_argument("--local-files-only", action="store_true", dest="local_files_only",
+                        help="Use only local files")
+    args = parser.parse_args()
+    if not args.model:
+        parser.print_help()
+        return 0
+    mod = AutoModerator.from_pretrained(args.model, local_files_only=args.local_files_only)
+    if args.input:
+        out = mod(args.input)
+        print(json.dumps([_to_jsonable(x) for x in out], ensure_ascii=False, indent=2))
+    else:
+        print("Model loaded. Provide the 'input' argument to run inference.")
+if __name__ == "__main__":
+    main()

src/moderators/integrations/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ # Integration package init

src/moderators/integrations/base.py ADDED Viewed

	@@ -0,0 +1,69 @@

+from __future__ import annotations
+from dataclasses import dataclass, field
+from typing import Any, Dict, List
+from abc import ABC, abstractmethod  # added
+from huggingface_hub import ModelHubMixin
+@dataclass
+class Box:
+    # xyxy: [x1, y1, x2, y2]
+    xyxy: List[float]
+    label: str
+    score: float
+@dataclass
+class PredictionResult:
+    # Context about the source (file path, URL, etc.)
+    source_path: str = ""
+    # Probability map for classification
+    classifications: Dict[str, float] = field(default_factory=dict)
+    # Detection results
+    detections: List[Box] = field(default_factory=list)
+    # Raw output specific to models/integrations
+    raw_output: Any = None
+class BaseModerator(ABC, ModelHubMixin):
+    def __init__(self, config: Dict[str, Any], model_id: str, **kwargs: Any) -> None:
+        self.config: Dict[str, Any] = dict(config or {})
+        self.model_id: str = model_id
+    @abstractmethod
+    def load_model(self) -> None:
+        """Load model/pipeline and any processors if present."""
+        pass
+    # Inference flow
+    def __call__(self, source: Any, **kwargs: Any):
+        # self.run_callbacks("on_predict_start")
+        processed_inputs = self._preprocess(source)
+        model_outputs = self._predict(processed_inputs)
+        results = self._postprocess(model_outputs)
+        # self.run_callbacks("on_predict_end")
+        return results
+    @abstractmethod
+    def _preprocess(self, inputs: Any) -> Any:
+        """Convert inputs to model-ready format."""
+        pass
+    @abstractmethod
+    def _predict(self, processed_inputs: Any) -> Any:
+        """Run model inference."""
+        pass
+    @abstractmethod
+    def _postprocess(self, model_outputs: Any) -> Any:
+        """Convert outputs to PredictionResult format."""
+        pass
+    @abstractmethod
+    def save_pretrained(self, save_directory: str, **kwargs: Any) -> str:
+        """
+        Save model and any processors to the given directory.
+        """
+        raise NotImplementedError

src/moderators/integrations/transformers_moderator.py ADDED Viewed

	@@ -0,0 +1,110 @@

+from __future__ import annotations
+import json
+from pathlib import Path
+from typing import Any, Dict, List
+from .base import BaseModerator, PredictionResult
+from moderators.utils import (
+    auto_install,
+    ensure_transformers,
+    ensure_dl_framework,
+    ensure_pillow_for_task,
+    preprocess_image_input,
+)
+class TransformersModerator(BaseModerator):
+    def load_model(self) -> None:
+        task = self.config.get("task")
+        if not task:
+            raise ValueError("TransformersModerator requires 'task' in config.json")
+        # Ensure transformers is available
+        try:
+            _transformers = ensure_transformers(auto_install)
+        except Exception as e:
+            raise ImportError(
+                "TransformersModerator requires the 'transformers' package. "
+                "Install with: uv pip install -e '.[transformers]' or: uv pip install transformers"
+            ) from e
+        pipeline = _transformers.pipeline
+        # Ensure a DL framework (pt/tf/flax)
+        framework = ensure_dl_framework(auto_install)
+        # Ensure Pillow for image tasks
+        ensure_pillow_for_task(task, auto_install)
+        # Build pipeline
+        self._pipe = pipeline(task, model=self.model_id, framework=framework)
+    def _preprocess(self, inputs: Any) -> Any:
+        task = str(self.config.get("task", "")).lower()
+        if "image" in task:
+            return preprocess_image_input(inputs)
+        return inputs
+    def _predict(self, processed_inputs: Any) -> Any:
+        return self._pipe(processed_inputs)
+    def _postprocess(self, model_outputs: Any) -> List[PredictionResult]:
+        # Pipelines typically return dict or list[dict]
+        outputs = model_outputs
+        if isinstance(outputs, dict):
+            outputs = [outputs]
+        results: List[PredictionResult] = []
+        for out in outputs:
+            classifications: Dict[str, float] = {}
+            label = out.get("label")
+            score = out.get("score")
+            if label is not None and score is not None:
+                classifications[str(label)] = float(score)
+            results.append(
+                PredictionResult(
+                    source_path=str(self.config.get("source", "")),
+                    classifications=classifications,
+                    detections=[],
+                    raw_output=out,
+                )
+            )
+        return results
+    def save_pretrained(self, save_directory: str, **kwargs: Any) -> str:
+        """
+        - Saves model, tokenizer, processor (if any) to `save_directory`.
+        - Also saves/updates `config.json` with architecture and task info.
+        Returns the `save_directory` path.
+        """
+        out_dir = Path(save_directory)
+        out_dir.mkdir(parents=True, exist_ok=True)
+        pipe = getattr(self, "_pipe", None)
+        model = getattr(pipe, "model", None) if pipe is not None else None
+        tokenizer = getattr(pipe, "tokenizer", None) if pipe is not None else None
+        processor = getattr(pipe, "processor", None) if pipe is not None else getattr(pipe, "feature_extractor", None)
+        if model and hasattr(model, "save_pretrained"):
+            model.save_pretrained(out_dir)
+        if tokenizer and hasattr(tokenizer, "save_pretrained"):
+            tokenizer.save_pretrained(out_dir)
+        if processor and hasattr(processor, "save_pretrained"):
+            processor.save_pretrained(out_dir)
+        # config.json'u garanti altına al ve özel alanları ekle
+        cfg_path = out_dir / "config.json"
+        cfg = {}
+        if cfg_path.exists():
+            try:
+                cfg = json.loads(cfg_path.read_text(encoding="utf-8"))
+            except Exception:
+                cfg = {}
+        cfg["architecture"] = "TransformersModerator"
+        if self.config.get("task"):
+            cfg["task"] = self.config["task"]
+        cfg_path.write_text(json.dumps(cfg, ensure_ascii=False, indent=2), encoding="utf-8")
+        return str(out_dir)

src/moderators/utils/__init__.py ADDED Viewed

	@@ -0,0 +1,11 @@

+# filepath init for utils package
+from .deps import auto_install, ensure_transformers, ensure_dl_framework, ensure_pillow_for_task
+from .image import preprocess_image_input
+__all__ = [
+    "auto_install",
+    "ensure_transformers",
+    "ensure_dl_framework",
+    "ensure_pillow_for_task",
+    "preprocess_image_input",
+]

src/moderators/utils/deps.py ADDED Viewed

	@@ -0,0 +1,81 @@

+from __future__ import annotations
+import os
+import shutil
+import subprocess
+import sys
+from typing import Callable, List
+def auto_install(packages: List[str]) -> bool:
+    """
+    Try to auto-install required packages using 'uv' if available, otherwise fall back to 'pip'.
+    Controlled by env var: MODERATORS_DISABLE_AUTO_INSTALL=1 to disable.
+    """
+    if str(os.environ.get("MODERATORS_DISABLE_AUTO_INSTALL", "")).lower() in ("1", "true", "yes"):
+        return False
+    uv = shutil.which("uv")
+    cmd = [uv, "pip", "install", *packages] if uv else [sys.executable, "-m", "pip", "install", *packages]
+    try:
+        subprocess.check_call(cmd)
+        return True
+    except Exception:
+        return False
+def ensure_transformers(install_fn: Callable[[List[str]], bool]):
+    """Ensure 'transformers' is importable; optionally auto-install and retry."""
+    try:
+        import transformers as _transformers  # noqa: F401
+        return _transformers
+    except Exception:
+        if not install_fn(["transformers"]):
+            raise
+        import transformers as _transformers  # type: ignore
+        return _transformers
+def ensure_dl_framework(install_fn: Callable[[List[str]], bool]) -> str:
+    """
+    Ensure at least one DL framework is available.
+    Preference: PyTorch ('pt'), TensorFlow ('tf'), JAX/Flax ('flax').
+    Tries to auto-install torch first.
+    """
+    try:
+        import torch  # noqa: F401
+        return "pt"
+    except Exception:
+        if install_fn(["torch"]):
+            try:
+                import torch  # noqa: F401
+                return "pt"
+            except Exception:
+                pass
+    try:
+        import tensorflow  # noqa: F401
+        return "tf"
+    except Exception:
+        pass
+    try:
+        import jax  # noqa: F401
+        return "flax"
+    except Exception:
+        pass
+    raise ImportError(
+        "A deep learning framework is required for transformers pipelines. "
+        "Install PyTorch with: uv pip install torch"
+    )
+def ensure_pillow_for_task(task: str, install_fn: Callable[[List[str]], bool]) -> None:
+    """For image tasks, ensure Pillow is available; auto-install if missing."""
+    if "image" not in str(task).lower():
+        return
+    try:
+        import PIL  # noqa: F401
+    except Exception:
+        if not install_fn(["Pillow"]):
+            raise ImportError("This image task requires Pillow. Install with: uv pip install Pillow")
+        import PIL  # noqa: F401

src/moderators/utils/image.py ADDED Viewed

	@@ -0,0 +1,41 @@

+from __future__ import annotations
+from pathlib import Path
+from typing import Any
+def preprocess_image_input(inputs: Any, min_side: int = 16) -> Any:
+    """
+    Open path-like inputs with PIL, convert to RGB, ensure a minimal spatial size,
+    and return a PIL.Image.Image. If PIL is unavailable or input is unsupported, return original input.
+    """
+    try:
+        from PIL import Image
+    except Exception:
+        return inputs
+    img = None
+    if isinstance(inputs, (str, Path)):
+        try:
+            img = Image.open(str(inputs))
+        except Exception:
+            return inputs
+    elif hasattr(inputs, "mode") and hasattr(inputs, "convert"):
+        img = inputs
+    else:
+        return inputs
+    try:
+        if getattr(img, "mode", "") != "RGB":
+            img = img.convert("RGB")
+    except Exception:
+        return inputs
+    try:
+        w, h = img.size
+        if w < min_side or h < min_side:
+            img = img.resize((max(min_side, w), max(min_side, h)), Image.BILINEAR)
+    except Exception:
+        pass
+    return img