Spaces:

3DReflecNet
/

3DReflecNet_Explorer

Running

File size: 6,478 Bytes
#!/usr/bin/env python3
"""Shared utilities for 3DReflecNet HF release apps."""
from __future__ import annotations

import logging
from typing import Any

import pandas as pd

logger = logging.getLogger("hf_release")

FILTER_ALL = "ALL"
BOOL_FILTER_CHOICES = [FILTER_ALL, "True", "False"]


def setup_logging(level: int = logging.INFO) -> None:
    """Configure logging for hf_release modules."""
    logging.basicConfig(
        level=level,
        format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
        datefmt="%Y-%m-%d %H:%M:%S",
    )


def require_columns(df: pd.DataFrame, columns: list[str], context: str) -> None:
    missing = [column for column in columns if column not in df.columns]
    if missing:
        raise KeyError(f"Missing required column(s) in {context}: {', '.join(missing)}")


def require_bool_columns(df: pd.DataFrame, columns: list[str], context: str) -> None:
    require_columns(df, columns, context)
    for column in columns:
        if df[column].isna().any():
            raise ValueError(f"Boolean column {column!r} contains null values in {context}.")
        if not pd.api.types.is_bool_dtype(df[column]):
            raise TypeError(f"Expected boolean dtype for column {column!r} in {context}, got {df[column].dtype}.")


def require_text_columns(df: pd.DataFrame, columns: list[str], context: str) -> None:
    require_columns(df, columns, context)
    for column in columns:
        if df[column].isna().any():
            raise ValueError(f"Text column {column!r} contains null values in {context}.")
        invalid = df[column].map(lambda value: not isinstance(value, str))
        if invalid.any():
            bad_type = type(df.loc[invalid, column].iloc[0]).__name__
            raise TypeError(f"Expected string values for column {column!r} in {context}, got {bad_type}.")


def parse_bool_filter_value(selected_value: str) -> bool:
    if selected_value == "True":
        return True
    if selected_value == "False":
        return False
    raise ValueError(f"Unsupported boolean filter value: {selected_value!r}")


def apply_bool_filter(df: pd.DataFrame, column: str, selected_value: str) -> pd.DataFrame:
    """Apply tri-state bool filter (ALL/True/False) to a DataFrame column."""
    if selected_value == FILTER_ALL:
        return df
    if column not in df.columns:
        raise KeyError(f"Missing required boolean filter column: {column}")
    if not pd.api.types.is_bool_dtype(df[column]):
        raise TypeError(f"Expected boolean dtype for column {column!r}, got {df[column].dtype}.")
    target = parse_bool_filter_value(selected_value)
    return df[df[column] == target]


def get_distinct_text_choices(df: pd.DataFrame, column: str, all_label: str = FILTER_ALL) -> list[str]:
    """Build dropdown choices from distinct non-empty text values."""
    if column not in df.columns:
        raise KeyError(f"Missing required text choice column: {column}")
    values = {
        str(v).strip()
        for v in df[column].dropna().tolist()
        if str(v).strip()
    }
    if not values:
        raise ValueError(f"Column {column!r} has no non-empty values.")
    return [all_label] + sorted(values)


def _apply_text_equals(df: pd.DataFrame, column: str, selected_value: str, all_label: str = FILTER_ALL) -> pd.DataFrame:
    if column not in df.columns:
        raise KeyError(f"Missing required text filter column: {column}")
    text = (selected_value or "").strip()
    if not text or text == all_label:
        return df
    return df[df[column].astype(str).str.strip() == text]


def filter_dataframe_advanced(
    df: pd.DataFrame,
    model_name: str = FILTER_ALL,
    material_name: str = FILTER_ALL,
    env_name: str = FILTER_ALL,
    has_glass: str = FILTER_ALL,
    is_generated: str = FILTER_ALL,
    transparent: str = FILTER_ALL,
    near_light: str = FILTER_ALL,
) -> pd.DataFrame:
    """Filter by model/material/environment exact selection and four tri-state bool fields."""
    selected = df
    selected = _apply_text_equals(selected, "model_name", model_name)
    selected = _apply_text_equals(selected, "material_name", material_name)
    selected = _apply_text_equals(selected, "env_name", env_name)
    selected = apply_bool_filter(selected, "hasGlass", has_glass)
    selected = apply_bool_filter(selected, "isGenerated", is_generated)
    selected = apply_bool_filter(selected, "transparent", transparent)
    selected = apply_bool_filter(selected, "near_light", near_light)
    return selected.reset_index(drop=True)


def aggregate_by_model(
    df: pd.DataFrame,
    extra_columns: list[str] | None = None,
) -> pd.DataFrame:
    """Group instances by model_name, counting instances and collecting IDs."""
    base_cols = ["model_name", "main_category", "sub_category", "instance_count", "instance_ids"]
    extra = extra_columns or []
    all_cols = base_cols + extra

    if df.empty:
        return pd.DataFrame(columns=all_cols)

    require_columns(df, ["model_name", "main_category", "sub_category", "instance_id"] + extra, "model aggregation")

    rows: list[dict[str, Any]] = []
    for model_name, group in df.groupby("model_name", dropna=False, sort=True):
        instance_ids = sorted({
            str(v) for v in group["instance_id"].dropna().tolist() if str(v).strip()
        })
        row: dict[str, Any] = {
            "model_name": str(model_name),
            "main_category": str(group["main_category"].iloc[0]),
            "sub_category": str(group["sub_category"].iloc[0]),
            "instance_count": len(instance_ids),
            "instance_ids": "\n".join(instance_ids),
        }
        for col in extra:
            candidates = [str(v) for v in group[col].dropna().tolist() if str(v).strip()]
            row[col] = candidates[0] if candidates else ""
        rows.append(row)
    return pd.DataFrame(rows)


def format_model_choice(index: int, row: dict[str, Any]) -> str:
    return f"{index:04d} | {row['model_name']} | instances {row['instance_count']}"


def format_instance_choice(index: int, row: dict[str, Any]) -> str:
    return f"{index:04d} | {row['instance_id']} | {row['model_name']}"


def parse_choice_index(choice: str, length: int) -> int | None:
    """Extract the numeric index from a formatted choice string."""
    index_str = choice.split("|", 1)[0].strip()
    try:
        idx = int(index_str)
    except ValueError:
        return None
    if idx < 0 or idx >= length:
        return None
    return idx