Spaces:
Running
Running
sukrukirman
commited on
Commit
·
53a775c
1
Parent(s):
24ca01d
init
Browse files- app.py +204 -0
- pyproject.toml +28 -0
- requirements.txt +3 -0
- src/moderators/__init__.py +2 -0
- src/moderators/auto_model.py +117 -0
- src/moderators/cli.py +34 -0
- src/moderators/integrations/__init__.py +1 -0
- src/moderators/integrations/base.py +69 -0
- src/moderators/integrations/transformers_moderator.py +110 -0
- src/moderators/utils/__init__.py +11 -0
- src/moderators/utils/deps.py +81 -0
- src/moderators/utils/image.py +41 -0
app.py
ADDED
|
@@ -0,0 +1,204 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import os
|
| 3 |
+
import json
|
| 4 |
+
from typing import Any, Dict, Generator
|
| 5 |
+
from dotenv import load_dotenv
|
| 6 |
+
import gradio.themes as gr_themes
|
| 7 |
+
import io
|
| 8 |
+
from contextlib import redirect_stdout, redirect_stderr
|
| 9 |
+
|
| 10 |
+
# Load environment variables from a .env file for local development
|
| 11 |
+
load_dotenv()
|
| 12 |
+
|
| 13 |
+
# --- Secure Token Management ---
|
| 14 |
+
VIDDEXA_TOKEN = os.getenv("HF_TOKEN")
|
| 15 |
+
|
| 16 |
+
# A simple cache to store loaded model instances
|
| 17 |
+
_MODEL_CACHE: Dict[str, Any] = {}
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
def _load_model(model_id: str, user_hf_token: str | None = None):
|
| 21 |
+
"""
|
| 22 |
+
Loads a model, caches it, and handles token management.
|
| 23 |
+
All print outputs from this function will be captured.
|
| 24 |
+
"""
|
| 25 |
+
if model_id in _MODEL_CACHE:
|
| 26 |
+
print(f"Model '{model_id}' found in cache.")
|
| 27 |
+
return _MODEL_CACHE[model_id]
|
| 28 |
+
|
| 29 |
+
print(f"Loading model '{model_id}'...")
|
| 30 |
+
active_token = None
|
| 31 |
+
if model_id == "viddexa/mobilenet_v2_1.0_224":
|
| 32 |
+
if not VIDDEXA_TOKEN:
|
| 33 |
+
raise gr.Error(
|
| 34 |
+
"The featured model 'viddexa/mobilenet_v2_1.0_224' requires an 'HF_TOKEN' to be set in the Space Secrets or a local .env file."
|
| 35 |
+
)
|
| 36 |
+
active_token = VIDDEXA_TOKEN
|
| 37 |
+
elif user_hf_token:
|
| 38 |
+
active_token = user_hf_token
|
| 39 |
+
|
| 40 |
+
try:
|
| 41 |
+
from moderators.auto_model import AutoModerator
|
| 42 |
+
model = AutoModerator.from_pretrained(model_id, token=active_token, use_fast=True)
|
| 43 |
+
_MODEL_CACHE[model_id] = model
|
| 44 |
+
print("Model loaded successfully.")
|
| 45 |
+
return model
|
| 46 |
+
except Exception as e:
|
| 47 |
+
error_msg = f"Failed to load model: {model_id}. Error: {e}"
|
| 48 |
+
if "401" in str(e):
|
| 49 |
+
error_msg += "\n\nThis model may be private. Please ensure you have provided a valid Hugging Face token if required."
|
| 50 |
+
raise gr.Error(error_msg)
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
def _to_jsonable(results: Any) -> Any:
|
| 54 |
+
"""Helper function to make model outputs JSON-serializable."""
|
| 55 |
+
try:
|
| 56 |
+
return [getattr(r, "classifications", r) for r in results]
|
| 57 |
+
except TypeError:
|
| 58 |
+
return results
|
| 59 |
+
except Exception:
|
| 60 |
+
return results
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
# --- NEW: Rewritten 'infer' function as a generator to stream logs ---
|
| 64 |
+
def infer(image_path: str, model_choice: str, custom_model_id: str, user_hf_token: str) -> Generator[
|
| 65 |
+
tuple[str, Any], None, None]:
|
| 66 |
+
"""
|
| 67 |
+
The main inference function that now yields updates to stream logs to the UI.
|
| 68 |
+
"""
|
| 69 |
+
# 1. Clear previous outputs and show an initial message
|
| 70 |
+
yield "Starting analysis...", None
|
| 71 |
+
|
| 72 |
+
if not image_path:
|
| 73 |
+
raise gr.Error("Please upload an image first.")
|
| 74 |
+
|
| 75 |
+
log_stream = io.StringIO()
|
| 76 |
+
try:
|
| 77 |
+
# 2. Capture all printed output from the loading and inference process
|
| 78 |
+
with redirect_stdout(log_stream), redirect_stderr(log_stream):
|
| 79 |
+
if model_choice == "Custom Model":
|
| 80 |
+
model_id = (custom_model_id or "").strip()
|
| 81 |
+
if not model_id:
|
| 82 |
+
raise gr.Error("Please enter the Hugging Face ID for your custom model.")
|
| 83 |
+
else:
|
| 84 |
+
model_id = model_choice
|
| 85 |
+
user_hf_token = ""
|
| 86 |
+
|
| 87 |
+
# Load model and yield logs generated during loading
|
| 88 |
+
model = _load_model(model_id, user_hf_token)
|
| 89 |
+
yield log_stream.getvalue(), None
|
| 90 |
+
|
| 91 |
+
# Run inference and yield any new logs
|
| 92 |
+
print("\nRunning inference on the image...")
|
| 93 |
+
results = model(image_path)
|
| 94 |
+
print("Inference complete.")
|
| 95 |
+
yield log_stream.getvalue(), None
|
| 96 |
+
|
| 97 |
+
# 3. Process the final result and yield it with the complete log
|
| 98 |
+
final_json = json.loads(json.dumps(_to_jsonable(results), ensure_ascii=False, indent=2))
|
| 99 |
+
yield log_stream.getvalue(), final_json
|
| 100 |
+
|
| 101 |
+
except gr.Error as e:
|
| 102 |
+
# If a Gradio error happens, show it in the logs
|
| 103 |
+
yield str(e), None
|
| 104 |
+
except Exception as e:
|
| 105 |
+
# For other exceptions, capture the error message and show it
|
| 106 |
+
yield f"An unexpected error occurred:\n{e}", None
|
| 107 |
+
|
| 108 |
+
|
| 109 |
+
def on_model_choice_change(choice: str):
|
| 110 |
+
"""Shows or hides the custom model input fields based on the dropdown selection."""
|
| 111 |
+
return gr.update(visible=(choice == "Custom Model"))
|
| 112 |
+
|
| 113 |
+
|
| 114 |
+
# --- Enhanced Gradio Interface with a Log Viewer ---
|
| 115 |
+
with gr.Blocks(
|
| 116 |
+
theme=gr_themes.Default(
|
| 117 |
+
primary_hue="blue",
|
| 118 |
+
secondary_hue="neutral",
|
| 119 |
+
font=gr_themes.GoogleFont("Inter")
|
| 120 |
+
),
|
| 121 |
+
title="Moderators - Visual Content Moderation"
|
| 122 |
+
) as demo:
|
| 123 |
+
gr.Markdown("# 🖼️ Moderators: Visual Content Moderation")
|
| 124 |
+
gr.Markdown(
|
| 125 |
+
"Analyze an image using the featured `viddexa/mobilenet_v2_1.0_224` model, "
|
| 126 |
+
"or select another model from the list. You can also use your own private or public model from the Hub."
|
| 127 |
+
)
|
| 128 |
+
|
| 129 |
+
with gr.Row(variant="panel"):
|
| 130 |
+
# Column 1: Controls and Inputs
|
| 131 |
+
with gr.Column(scale=1):
|
| 132 |
+
gr.Markdown("### ⚙️ Controls")
|
| 133 |
+
model_choice = gr.Dropdown(
|
| 134 |
+
choices=[
|
| 135 |
+
"viddexa/mobilenet_v2_1.0_224",
|
| 136 |
+
"Falconsai/nsfw_image_detection",
|
| 137 |
+
"Custom Model",
|
| 138 |
+
],
|
| 139 |
+
value="viddexa/mobilenet_v2_1.0_224",
|
| 140 |
+
label="Select Model",
|
| 141 |
+
info="Choose a model for the analysis.",
|
| 142 |
+
)
|
| 143 |
+
|
| 144 |
+
with gr.Group(visible=False) as custom_model_group:
|
| 145 |
+
custom_model_id = gr.Textbox(
|
| 146 |
+
label="Custom Hugging Face Model ID",
|
| 147 |
+
placeholder="username/model-name",
|
| 148 |
+
info="Enter the ID of the model you want to use."
|
| 149 |
+
)
|
| 150 |
+
user_hf_token = gr.Textbox(
|
| 151 |
+
label='HF Token (if your model is private)',
|
| 152 |
+
type="password",
|
| 153 |
+
placeholder="hf_...",
|
| 154 |
+
info="An access token is required for private models."
|
| 155 |
+
)
|
| 156 |
+
|
| 157 |
+
gr.Markdown("### 🖼️ Upload Image")
|
| 158 |
+
image_input = gr.Image(type="filepath", label="Image to analyze")
|
| 159 |
+
|
| 160 |
+
run_btn = gr.Button("Analyze", variant="primary")
|
| 161 |
+
|
| 162 |
+
gr.Examples(
|
| 163 |
+
examples=[
|
| 164 |
+
["examples/safe_image.png", "viddexa/mobilenet_v2_1.0_224"],
|
| 165 |
+
["examples/hentai.jpg", "Falconsai/nsfw_image_detection"],
|
| 166 |
+
["examples/porn.jpg", "viddexa/mobilenet_v2_1.0_224"],
|
| 167 |
+
],
|
| 168 |
+
inputs=[image_input, model_choice],
|
| 169 |
+
label="Click an example to run",
|
| 170 |
+
)
|
| 171 |
+
|
| 172 |
+
# Column 2: Outputs
|
| 173 |
+
with gr.Column(scale=2):
|
| 174 |
+
gr.Markdown("### 📊 Results")
|
| 175 |
+
# --- NEW: Status Log Textbox ---
|
| 176 |
+
status_log = gr.Textbox(
|
| 177 |
+
label="Status Logs",
|
| 178 |
+
info="Shows model loading progress and other technical details.",
|
| 179 |
+
interactive=False,
|
| 180 |
+
lines=8, # Give it some height
|
| 181 |
+
)
|
| 182 |
+
output_json = gr.JSON(label="Model Output (JSON)")
|
| 183 |
+
|
| 184 |
+
# Define the interactive events
|
| 185 |
+
run_btn.click(
|
| 186 |
+
fn=infer,
|
| 187 |
+
inputs=[image_input, model_choice, custom_model_id, user_hf_token],
|
| 188 |
+
# --- NEW: The click event now updates both the log and the JSON output ---
|
| 189 |
+
outputs=[status_log, output_json],
|
| 190 |
+
)
|
| 191 |
+
|
| 192 |
+
model_choice.change(
|
| 193 |
+
fn=on_model_choice_change,
|
| 194 |
+
inputs=model_choice,
|
| 195 |
+
outputs=custom_model_group,
|
| 196 |
+
)
|
| 197 |
+
|
| 198 |
+
if __name__ == "__main__":
|
| 199 |
+
if not os.path.exists("examples"):
|
| 200 |
+
os.makedirs("examples")
|
| 201 |
+
print("Created 'examples' directory.")
|
| 202 |
+
print("Please add some images like 'safe_image.jpg' to it for the examples to work.")
|
| 203 |
+
|
| 204 |
+
demo.launch()
|
pyproject.toml
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[build-system]
|
| 2 |
+
requires = ["setuptools>=61.0", "wheel"]
|
| 3 |
+
build-backend = "setuptools.build_meta"
|
| 4 |
+
|
| 5 |
+
[project]
|
| 6 |
+
name = "moderators"
|
| 7 |
+
version = "0.1.0"
|
| 8 |
+
description = "Moderators: ModelHubMixin-based factory and core skeleton (Phase 1)."
|
| 9 |
+
readme = "README.md"
|
| 10 |
+
requires-python = ">=3.8"
|
| 11 |
+
authors = [{ name = "Moderators Team" }]
|
| 12 |
+
dependencies = [
|
| 13 |
+
"huggingface-hub>=0.22"
|
| 14 |
+
]
|
| 15 |
+
|
| 16 |
+
[project.optional-dependencies]
|
| 17 |
+
transformers = ["transformers>=4.36"]
|
| 18 |
+
dev = [
|
| 19 |
+
"pytest>=7.0",
|
| 20 |
+
"Pillow>=9.0"
|
| 21 |
+
]
|
| 22 |
+
|
| 23 |
+
[project.scripts]
|
| 24 |
+
moderators = "moderators.cli:main"
|
| 25 |
+
|
| 26 |
+
[tool.setuptools.packages.find]
|
| 27 |
+
where = ["src"]
|
| 28 |
+
|
requirements.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
gradio
|
| 2 |
+
python-dotenv
|
| 3 |
+
.
|
src/moderators/__init__.py
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
__all__ = ["auto_model", "cli"]
|
| 2 |
+
__version__ = "0.1.0"
|
src/moderators/auto_model.py
ADDED
|
@@ -0,0 +1,117 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# python
|
| 2 |
+
from __future__ import annotations
|
| 3 |
+
|
| 4 |
+
import importlib
|
| 5 |
+
import json
|
| 6 |
+
from pathlib import Path
|
| 7 |
+
from typing import Any, Dict, Optional
|
| 8 |
+
|
| 9 |
+
try:
|
| 10 |
+
from huggingface_hub import ModelHubMixin # do not import hf_hub_download here
|
| 11 |
+
except Exception:
|
| 12 |
+
class ModelHubMixin:
|
| 13 |
+
@classmethod
|
| 14 |
+
def from_pretrained(cls, *args, **kwargs):
|
| 15 |
+
return cls._from_pretrained(*args, **kwargs)
|
| 16 |
+
|
| 17 |
+
def _load_config(identifier: str, *, local_files_only: bool = False) -> Dict[str, Any]:
|
| 18 |
+
p = Path(identifier)
|
| 19 |
+
if p.exists():
|
| 20 |
+
cfg_path = p / "config.json"
|
| 21 |
+
if not cfg_path.exists():
|
| 22 |
+
raise FileNotFoundError(f"config.json not found in local folder: {cfg_path}")
|
| 23 |
+
return json.loads(cfg_path.read_text())
|
| 24 |
+
|
| 25 |
+
# Lazy import to avoid pulling heavy deps during module import
|
| 26 |
+
from huggingface_hub import hf_hub_download
|
| 27 |
+
|
| 28 |
+
cfg_fp = hf_hub_download(
|
| 29 |
+
repo_id=identifier,
|
| 30 |
+
filename="config.json",
|
| 31 |
+
repo_type="model",
|
| 32 |
+
local_files_only=local_files_only,
|
| 33 |
+
)
|
| 34 |
+
return json.loads(Path(cfg_fp).read_text())
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
def _is_transformers_cfg(cfg: Dict[str, Any]) -> bool:
|
| 38 |
+
# `architectures` is not enough alone to identify a Transformers model
|
| 39 |
+
has_tf_sig = any(
|
| 40 |
+
k in cfg for k in ("transformers_version", "model_type", "id2label", "label2id")
|
| 41 |
+
)
|
| 42 |
+
has_arch_list = isinstance(cfg.get("architectures"), list)
|
| 43 |
+
return has_arch_list and has_tf_sig
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
def _infer_task(cfg: Dict[str, Any]) -> Optional[str]:
|
| 47 |
+
# get general task from architectures or problem_type
|
| 48 |
+
archs = [str(a).lower() for a in cfg.get("architectures", [])]
|
| 49 |
+
if any("classification" in a for a in archs):
|
| 50 |
+
return "image-classification"
|
| 51 |
+
prob = str(cfg.get("problem_type", "")).lower()
|
| 52 |
+
if "classification" in prob:
|
| 53 |
+
return "image-classification"
|
| 54 |
+
return None
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
class AutoModerator(ModelHubMixin):
|
| 58 |
+
def __init__(self, *args, **kwargs) -> None:
|
| 59 |
+
raise EnvironmentError(
|
| 60 |
+
"AutoModerator is a factory class and cannot be instantiated directly. "
|
| 61 |
+
"Please use the `AutoModerator.from_pretrained('model_id')` method."
|
| 62 |
+
)
|
| 63 |
+
|
| 64 |
+
@classmethod
|
| 65 |
+
def _from_pretrained(
|
| 66 |
+
cls,
|
| 67 |
+
model_id: str,
|
| 68 |
+
config: Optional[dict] = None,
|
| 69 |
+
local_files_only: bool = False,
|
| 70 |
+
**kwargs: Any,
|
| 71 |
+
):
|
| 72 |
+
cfg = dict(config or _load_config(model_id, local_files_only=local_files_only))
|
| 73 |
+
|
| 74 |
+
architecture = cfg.get("architecture")
|
| 75 |
+
if not architecture:
|
| 76 |
+
if _is_transformers_cfg(cfg):
|
| 77 |
+
cfg["architecture"] = "TransformersModerator"
|
| 78 |
+
if not cfg.get("task"):
|
| 79 |
+
inferred = _infer_task(cfg)
|
| 80 |
+
if inferred:
|
| 81 |
+
cfg["task"] = inferred
|
| 82 |
+
else:
|
| 83 |
+
raise ValueError(
|
| 84 |
+
"Could not infer 'task' from the Transformers config. "
|
| 85 |
+
"Please specify 'task' in the model's config.json "
|
| 86 |
+
"(e.g. 'image-classification')."
|
| 87 |
+
)
|
| 88 |
+
else:
|
| 89 |
+
raise ValueError(
|
| 90 |
+
f"Could not determine 'architecture' from config.json for model '{model_id}'. "
|
| 91 |
+
)
|
| 92 |
+
|
| 93 |
+
architecture = cfg["architecture"]
|
| 94 |
+
|
| 95 |
+
# For MVP, only TransformersModerator is implemented
|
| 96 |
+
if architecture != "TransformersModerator":
|
| 97 |
+
raise NotImplementedError(
|
| 98 |
+
f"'{architecture}' is not yet supported in this version of Moderators. "
|
| 99 |
+
"As of now, only 'TransformersModerator' is implemented."
|
| 100 |
+
)
|
| 101 |
+
|
| 102 |
+
module_name = architecture.replace("Moderator", "_moderator").lower()
|
| 103 |
+
module_path = f"moderators.integrations.{module_name}"
|
| 104 |
+
|
| 105 |
+
try:
|
| 106 |
+
module = importlib.import_module(module_path)
|
| 107 |
+
moderator_class = getattr(module, architecture)
|
| 108 |
+
except (ImportError, AttributeError) as e:
|
| 109 |
+
raise ImportError(
|
| 110 |
+
f"Could not find or import the class '{architecture}'. "
|
| 111 |
+
f"Please ensure it is defined in '{module_path}.py'. Error: {e}"
|
| 112 |
+
)
|
| 113 |
+
|
| 114 |
+
instance = moderator_class(model_id=model_id, config=cfg, **kwargs)
|
| 115 |
+
instance.load_model()
|
| 116 |
+
return instance
|
| 117 |
+
|
src/moderators/cli.py
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# src/moderators/cli.py
|
| 2 |
+
import argparse
|
| 3 |
+
import json
|
| 4 |
+
from dataclasses import asdict, is_dataclass
|
| 5 |
+
from moderators.auto_model import AutoModerator
|
| 6 |
+
|
| 7 |
+
def _to_jsonable(obj):
|
| 8 |
+
if is_dataclass(obj):
|
| 9 |
+
return asdict(obj)
|
| 10 |
+
if isinstance(obj, (list, dict, str, int, float)) or obj is None:
|
| 11 |
+
return obj
|
| 12 |
+
return str(obj)
|
| 13 |
+
|
| 14 |
+
def main():
|
| 15 |
+
parser = argparse.ArgumentParser(prog="moderators", description="Moderators CLI")
|
| 16 |
+
parser.add_argument("model", nargs="?", help="Local model folder or HF model id")
|
| 17 |
+
parser.add_argument("input", nargs="?", help="Input text or file path")
|
| 18 |
+
parser.add_argument("--local-files-only", action="store_true", dest="local_files_only",
|
| 19 |
+
help="Use only local files")
|
| 20 |
+
args = parser.parse_args()
|
| 21 |
+
|
| 22 |
+
if not args.model:
|
| 23 |
+
parser.print_help()
|
| 24 |
+
return 0
|
| 25 |
+
|
| 26 |
+
mod = AutoModerator.from_pretrained(args.model, local_files_only=args.local_files_only)
|
| 27 |
+
if args.input:
|
| 28 |
+
out = mod(args.input)
|
| 29 |
+
print(json.dumps([_to_jsonable(x) for x in out], ensure_ascii=False, indent=2))
|
| 30 |
+
else:
|
| 31 |
+
print("Model loaded. Provide the 'input' argument to run inference.")
|
| 32 |
+
|
| 33 |
+
if __name__ == "__main__":
|
| 34 |
+
main()
|
src/moderators/integrations/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
# Integration package init
|
src/moderators/integrations/base.py
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from dataclasses import dataclass, field
|
| 4 |
+
from typing import Any, Dict, List
|
| 5 |
+
from abc import ABC, abstractmethod # added
|
| 6 |
+
|
| 7 |
+
from huggingface_hub import ModelHubMixin
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
@dataclass
|
| 11 |
+
class Box:
|
| 12 |
+
# xyxy: [x1, y1, x2, y2]
|
| 13 |
+
xyxy: List[float]
|
| 14 |
+
label: str
|
| 15 |
+
score: float
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
@dataclass
|
| 19 |
+
class PredictionResult:
|
| 20 |
+
# Context about the source (file path, URL, etc.)
|
| 21 |
+
source_path: str = ""
|
| 22 |
+
# Probability map for classification
|
| 23 |
+
classifications: Dict[str, float] = field(default_factory=dict)
|
| 24 |
+
# Detection results
|
| 25 |
+
detections: List[Box] = field(default_factory=list)
|
| 26 |
+
# Raw output specific to models/integrations
|
| 27 |
+
raw_output: Any = None
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
class BaseModerator(ABC, ModelHubMixin):
|
| 31 |
+
def __init__(self, config: Dict[str, Any], model_id: str, **kwargs: Any) -> None:
|
| 32 |
+
self.config: Dict[str, Any] = dict(config or {})
|
| 33 |
+
self.model_id: str = model_id
|
| 34 |
+
|
| 35 |
+
@abstractmethod
|
| 36 |
+
def load_model(self) -> None:
|
| 37 |
+
"""Load model/pipeline and any processors if present."""
|
| 38 |
+
pass
|
| 39 |
+
|
| 40 |
+
# Inference flow
|
| 41 |
+
def __call__(self, source: Any, **kwargs: Any):
|
| 42 |
+
# self.run_callbacks("on_predict_start")
|
| 43 |
+
processed_inputs = self._preprocess(source)
|
| 44 |
+
model_outputs = self._predict(processed_inputs)
|
| 45 |
+
results = self._postprocess(model_outputs)
|
| 46 |
+
# self.run_callbacks("on_predict_end")
|
| 47 |
+
return results
|
| 48 |
+
|
| 49 |
+
@abstractmethod
|
| 50 |
+
def _preprocess(self, inputs: Any) -> Any:
|
| 51 |
+
"""Convert inputs to model-ready format."""
|
| 52 |
+
pass
|
| 53 |
+
|
| 54 |
+
@abstractmethod
|
| 55 |
+
def _predict(self, processed_inputs: Any) -> Any:
|
| 56 |
+
"""Run model inference."""
|
| 57 |
+
pass
|
| 58 |
+
|
| 59 |
+
@abstractmethod
|
| 60 |
+
def _postprocess(self, model_outputs: Any) -> Any:
|
| 61 |
+
"""Convert outputs to PredictionResult format."""
|
| 62 |
+
pass
|
| 63 |
+
|
| 64 |
+
@abstractmethod
|
| 65 |
+
def save_pretrained(self, save_directory: str, **kwargs: Any) -> str:
|
| 66 |
+
"""
|
| 67 |
+
Save model and any processors to the given directory.
|
| 68 |
+
"""
|
| 69 |
+
raise NotImplementedError
|
src/moderators/integrations/transformers_moderator.py
ADDED
|
@@ -0,0 +1,110 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import json
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
from typing import Any, Dict, List
|
| 6 |
+
|
| 7 |
+
from .base import BaseModerator, PredictionResult
|
| 8 |
+
from moderators.utils import (
|
| 9 |
+
auto_install,
|
| 10 |
+
ensure_transformers,
|
| 11 |
+
ensure_dl_framework,
|
| 12 |
+
ensure_pillow_for_task,
|
| 13 |
+
preprocess_image_input,
|
| 14 |
+
)
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
class TransformersModerator(BaseModerator):
|
| 18 |
+
def load_model(self) -> None:
|
| 19 |
+
task = self.config.get("task")
|
| 20 |
+
if not task:
|
| 21 |
+
raise ValueError("TransformersModerator requires 'task' in config.json")
|
| 22 |
+
|
| 23 |
+
# Ensure transformers is available
|
| 24 |
+
try:
|
| 25 |
+
_transformers = ensure_transformers(auto_install)
|
| 26 |
+
except Exception as e:
|
| 27 |
+
raise ImportError(
|
| 28 |
+
"TransformersModerator requires the 'transformers' package. "
|
| 29 |
+
"Install with: uv pip install -e '.[transformers]' or: uv pip install transformers"
|
| 30 |
+
) from e
|
| 31 |
+
pipeline = _transformers.pipeline
|
| 32 |
+
|
| 33 |
+
# Ensure a DL framework (pt/tf/flax)
|
| 34 |
+
framework = ensure_dl_framework(auto_install)
|
| 35 |
+
|
| 36 |
+
# Ensure Pillow for image tasks
|
| 37 |
+
ensure_pillow_for_task(task, auto_install)
|
| 38 |
+
|
| 39 |
+
# Build pipeline
|
| 40 |
+
self._pipe = pipeline(task, model=self.model_id, framework=framework)
|
| 41 |
+
|
| 42 |
+
def _preprocess(self, inputs: Any) -> Any:
|
| 43 |
+
task = str(self.config.get("task", "")).lower()
|
| 44 |
+
if "image" in task:
|
| 45 |
+
return preprocess_image_input(inputs)
|
| 46 |
+
return inputs
|
| 47 |
+
|
| 48 |
+
def _predict(self, processed_inputs: Any) -> Any:
|
| 49 |
+
return self._pipe(processed_inputs)
|
| 50 |
+
|
| 51 |
+
def _postprocess(self, model_outputs: Any) -> List[PredictionResult]:
|
| 52 |
+
# Pipelines typically return dict or list[dict]
|
| 53 |
+
outputs = model_outputs
|
| 54 |
+
if isinstance(outputs, dict):
|
| 55 |
+
outputs = [outputs]
|
| 56 |
+
|
| 57 |
+
results: List[PredictionResult] = []
|
| 58 |
+
for out in outputs:
|
| 59 |
+
classifications: Dict[str, float] = {}
|
| 60 |
+
label = out.get("label")
|
| 61 |
+
score = out.get("score")
|
| 62 |
+
if label is not None and score is not None:
|
| 63 |
+
classifications[str(label)] = float(score)
|
| 64 |
+
|
| 65 |
+
results.append(
|
| 66 |
+
PredictionResult(
|
| 67 |
+
source_path=str(self.config.get("source", "")),
|
| 68 |
+
classifications=classifications,
|
| 69 |
+
detections=[],
|
| 70 |
+
raw_output=out,
|
| 71 |
+
)
|
| 72 |
+
)
|
| 73 |
+
return results
|
| 74 |
+
|
| 75 |
+
def save_pretrained(self, save_directory: str, **kwargs: Any) -> str:
|
| 76 |
+
"""
|
| 77 |
+
- Saves model, tokenizer, processor (if any) to `save_directory`.
|
| 78 |
+
- Also saves/updates `config.json` with architecture and task info.
|
| 79 |
+
Returns the `save_directory` path.
|
| 80 |
+
"""
|
| 81 |
+
out_dir = Path(save_directory)
|
| 82 |
+
out_dir.mkdir(parents=True, exist_ok=True)
|
| 83 |
+
|
| 84 |
+
pipe = getattr(self, "_pipe", None)
|
| 85 |
+
model = getattr(pipe, "model", None) if pipe is not None else None
|
| 86 |
+
tokenizer = getattr(pipe, "tokenizer", None) if pipe is not None else None
|
| 87 |
+
processor = getattr(pipe, "processor", None) if pipe is not None else getattr(pipe, "feature_extractor", None)
|
| 88 |
+
|
| 89 |
+
if model and hasattr(model, "save_pretrained"):
|
| 90 |
+
model.save_pretrained(out_dir)
|
| 91 |
+
if tokenizer and hasattr(tokenizer, "save_pretrained"):
|
| 92 |
+
tokenizer.save_pretrained(out_dir)
|
| 93 |
+
if processor and hasattr(processor, "save_pretrained"):
|
| 94 |
+
processor.save_pretrained(out_dir)
|
| 95 |
+
|
| 96 |
+
# config.json'u garanti altına al ve özel alanları ekle
|
| 97 |
+
cfg_path = out_dir / "config.json"
|
| 98 |
+
cfg = {}
|
| 99 |
+
if cfg_path.exists():
|
| 100 |
+
try:
|
| 101 |
+
cfg = json.loads(cfg_path.read_text(encoding="utf-8"))
|
| 102 |
+
except Exception:
|
| 103 |
+
cfg = {}
|
| 104 |
+
|
| 105 |
+
cfg["architecture"] = "TransformersModerator"
|
| 106 |
+
if self.config.get("task"):
|
| 107 |
+
cfg["task"] = self.config["task"]
|
| 108 |
+
cfg_path.write_text(json.dumps(cfg, ensure_ascii=False, indent=2), encoding="utf-8")
|
| 109 |
+
return str(out_dir)
|
| 110 |
+
|
src/moderators/utils/__init__.py
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# filepath init for utils package
|
| 2 |
+
from .deps import auto_install, ensure_transformers, ensure_dl_framework, ensure_pillow_for_task
|
| 3 |
+
from .image import preprocess_image_input
|
| 4 |
+
|
| 5 |
+
__all__ = [
|
| 6 |
+
"auto_install",
|
| 7 |
+
"ensure_transformers",
|
| 8 |
+
"ensure_dl_framework",
|
| 9 |
+
"ensure_pillow_for_task",
|
| 10 |
+
"preprocess_image_input",
|
| 11 |
+
]
|
src/moderators/utils/deps.py
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import os
|
| 4 |
+
import shutil
|
| 5 |
+
import subprocess
|
| 6 |
+
import sys
|
| 7 |
+
from typing import Callable, List
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
def auto_install(packages: List[str]) -> bool:
|
| 11 |
+
"""
|
| 12 |
+
Try to auto-install required packages using 'uv' if available, otherwise fall back to 'pip'.
|
| 13 |
+
Controlled by env var: MODERATORS_DISABLE_AUTO_INSTALL=1 to disable.
|
| 14 |
+
"""
|
| 15 |
+
if str(os.environ.get("MODERATORS_DISABLE_AUTO_INSTALL", "")).lower() in ("1", "true", "yes"):
|
| 16 |
+
return False
|
| 17 |
+
|
| 18 |
+
uv = shutil.which("uv")
|
| 19 |
+
cmd = [uv, "pip", "install", *packages] if uv else [sys.executable, "-m", "pip", "install", *packages]
|
| 20 |
+
|
| 21 |
+
try:
|
| 22 |
+
subprocess.check_call(cmd)
|
| 23 |
+
return True
|
| 24 |
+
except Exception:
|
| 25 |
+
return False
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
def ensure_transformers(install_fn: Callable[[List[str]], bool]):
|
| 29 |
+
"""Ensure 'transformers' is importable; optionally auto-install and retry."""
|
| 30 |
+
try:
|
| 31 |
+
import transformers as _transformers # noqa: F401
|
| 32 |
+
return _transformers
|
| 33 |
+
except Exception:
|
| 34 |
+
if not install_fn(["transformers"]):
|
| 35 |
+
raise
|
| 36 |
+
import transformers as _transformers # type: ignore
|
| 37 |
+
return _transformers
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
def ensure_dl_framework(install_fn: Callable[[List[str]], bool]) -> str:
|
| 41 |
+
"""
|
| 42 |
+
Ensure at least one DL framework is available.
|
| 43 |
+
Preference: PyTorch ('pt'), TensorFlow ('tf'), JAX/Flax ('flax').
|
| 44 |
+
Tries to auto-install torch first.
|
| 45 |
+
"""
|
| 46 |
+
try:
|
| 47 |
+
import torch # noqa: F401
|
| 48 |
+
return "pt"
|
| 49 |
+
except Exception:
|
| 50 |
+
if install_fn(["torch"]):
|
| 51 |
+
try:
|
| 52 |
+
import torch # noqa: F401
|
| 53 |
+
return "pt"
|
| 54 |
+
except Exception:
|
| 55 |
+
pass
|
| 56 |
+
try:
|
| 57 |
+
import tensorflow # noqa: F401
|
| 58 |
+
return "tf"
|
| 59 |
+
except Exception:
|
| 60 |
+
pass
|
| 61 |
+
try:
|
| 62 |
+
import jax # noqa: F401
|
| 63 |
+
return "flax"
|
| 64 |
+
except Exception:
|
| 65 |
+
pass
|
| 66 |
+
raise ImportError(
|
| 67 |
+
"A deep learning framework is required for transformers pipelines. "
|
| 68 |
+
"Install PyTorch with: uv pip install torch"
|
| 69 |
+
)
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
def ensure_pillow_for_task(task: str, install_fn: Callable[[List[str]], bool]) -> None:
|
| 73 |
+
"""For image tasks, ensure Pillow is available; auto-install if missing."""
|
| 74 |
+
if "image" not in str(task).lower():
|
| 75 |
+
return
|
| 76 |
+
try:
|
| 77 |
+
import PIL # noqa: F401
|
| 78 |
+
except Exception:
|
| 79 |
+
if not install_fn(["Pillow"]):
|
| 80 |
+
raise ImportError("This image task requires Pillow. Install with: uv pip install Pillow")
|
| 81 |
+
import PIL # noqa: F401
|
src/moderators/utils/image.py
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from pathlib import Path
|
| 4 |
+
from typing import Any
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
def preprocess_image_input(inputs: Any, min_side: int = 16) -> Any:
|
| 8 |
+
"""
|
| 9 |
+
Open path-like inputs with PIL, convert to RGB, ensure a minimal spatial size,
|
| 10 |
+
and return a PIL.Image.Image. If PIL is unavailable or input is unsupported, return original input.
|
| 11 |
+
"""
|
| 12 |
+
try:
|
| 13 |
+
from PIL import Image
|
| 14 |
+
except Exception:
|
| 15 |
+
return inputs
|
| 16 |
+
|
| 17 |
+
img = None
|
| 18 |
+
if isinstance(inputs, (str, Path)):
|
| 19 |
+
try:
|
| 20 |
+
img = Image.open(str(inputs))
|
| 21 |
+
except Exception:
|
| 22 |
+
return inputs
|
| 23 |
+
elif hasattr(inputs, "mode") and hasattr(inputs, "convert"):
|
| 24 |
+
img = inputs
|
| 25 |
+
else:
|
| 26 |
+
return inputs
|
| 27 |
+
|
| 28 |
+
try:
|
| 29 |
+
if getattr(img, "mode", "") != "RGB":
|
| 30 |
+
img = img.convert("RGB")
|
| 31 |
+
except Exception:
|
| 32 |
+
return inputs
|
| 33 |
+
|
| 34 |
+
try:
|
| 35 |
+
w, h = img.size
|
| 36 |
+
if w < min_side or h < min_side:
|
| 37 |
+
img = img.resize((max(min_side, w), max(min_side, h)), Image.BILINEAR)
|
| 38 |
+
except Exception:
|
| 39 |
+
pass
|
| 40 |
+
|
| 41 |
+
return img
|