Asnly's picture
Deploy Space app files
7591256 verified
#!/usr/bin/env python3
"""Shared utilities for 3DReflecNet HF release apps."""
from __future__ import annotations
import logging
from typing import Any
import pandas as pd
logger = logging.getLogger("hf_release")
FILTER_ALL = "ALL"
BOOL_FILTER_CHOICES = [FILTER_ALL, "True", "False"]
def setup_logging(level: int = logging.INFO) -> None:
"""Configure logging for hf_release modules."""
logging.basicConfig(
level=level,
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
)
def require_columns(df: pd.DataFrame, columns: list[str], context: str) -> None:
missing = [column for column in columns if column not in df.columns]
if missing:
raise KeyError(f"Missing required column(s) in {context}: {', '.join(missing)}")
def require_bool_columns(df: pd.DataFrame, columns: list[str], context: str) -> None:
require_columns(df, columns, context)
for column in columns:
if df[column].isna().any():
raise ValueError(f"Boolean column {column!r} contains null values in {context}.")
if not pd.api.types.is_bool_dtype(df[column]):
raise TypeError(f"Expected boolean dtype for column {column!r} in {context}, got {df[column].dtype}.")
def require_text_columns(df: pd.DataFrame, columns: list[str], context: str) -> None:
require_columns(df, columns, context)
for column in columns:
if df[column].isna().any():
raise ValueError(f"Text column {column!r} contains null values in {context}.")
invalid = df[column].map(lambda value: not isinstance(value, str))
if invalid.any():
bad_type = type(df.loc[invalid, column].iloc[0]).__name__
raise TypeError(f"Expected string values for column {column!r} in {context}, got {bad_type}.")
def parse_bool_filter_value(selected_value: str) -> bool:
if selected_value == "True":
return True
if selected_value == "False":
return False
raise ValueError(f"Unsupported boolean filter value: {selected_value!r}")
def apply_bool_filter(df: pd.DataFrame, column: str, selected_value: str) -> pd.DataFrame:
"""Apply tri-state bool filter (ALL/True/False) to a DataFrame column."""
if selected_value == FILTER_ALL:
return df
if column not in df.columns:
raise KeyError(f"Missing required boolean filter column: {column}")
if not pd.api.types.is_bool_dtype(df[column]):
raise TypeError(f"Expected boolean dtype for column {column!r}, got {df[column].dtype}.")
target = parse_bool_filter_value(selected_value)
return df[df[column] == target]
def get_distinct_text_choices(df: pd.DataFrame, column: str, all_label: str = FILTER_ALL) -> list[str]:
"""Build dropdown choices from distinct non-empty text values."""
if column not in df.columns:
raise KeyError(f"Missing required text choice column: {column}")
values = {
str(v).strip()
for v in df[column].dropna().tolist()
if str(v).strip()
}
if not values:
raise ValueError(f"Column {column!r} has no non-empty values.")
return [all_label] + sorted(values)
def _apply_text_equals(df: pd.DataFrame, column: str, selected_value: str, all_label: str = FILTER_ALL) -> pd.DataFrame:
if column not in df.columns:
raise KeyError(f"Missing required text filter column: {column}")
text = (selected_value or "").strip()
if not text or text == all_label:
return df
return df[df[column].astype(str).str.strip() == text]
def filter_dataframe_advanced(
df: pd.DataFrame,
model_name: str = FILTER_ALL,
material_name: str = FILTER_ALL,
env_name: str = FILTER_ALL,
has_glass: str = FILTER_ALL,
is_generated: str = FILTER_ALL,
transparent: str = FILTER_ALL,
near_light: str = FILTER_ALL,
) -> pd.DataFrame:
"""Filter by model/material/environment exact selection and four tri-state bool fields."""
selected = df
selected = _apply_text_equals(selected, "model_name", model_name)
selected = _apply_text_equals(selected, "material_name", material_name)
selected = _apply_text_equals(selected, "env_name", env_name)
selected = apply_bool_filter(selected, "hasGlass", has_glass)
selected = apply_bool_filter(selected, "isGenerated", is_generated)
selected = apply_bool_filter(selected, "transparent", transparent)
selected = apply_bool_filter(selected, "near_light", near_light)
return selected.reset_index(drop=True)
def aggregate_by_model(
df: pd.DataFrame,
extra_columns: list[str] | None = None,
) -> pd.DataFrame:
"""Group instances by model_name, counting instances and collecting IDs."""
base_cols = ["model_name", "main_category", "sub_category", "instance_count", "instance_ids"]
extra = extra_columns or []
all_cols = base_cols + extra
if df.empty:
return pd.DataFrame(columns=all_cols)
require_columns(df, ["model_name", "main_category", "sub_category", "instance_id"] + extra, "model aggregation")
rows: list[dict[str, Any]] = []
for model_name, group in df.groupby("model_name", dropna=False, sort=True):
instance_ids = sorted({
str(v) for v in group["instance_id"].dropna().tolist() if str(v).strip()
})
row: dict[str, Any] = {
"model_name": str(model_name),
"main_category": str(group["main_category"].iloc[0]),
"sub_category": str(group["sub_category"].iloc[0]),
"instance_count": len(instance_ids),
"instance_ids": "\n".join(instance_ids),
}
for col in extra:
candidates = [str(v) for v in group[col].dropna().tolist() if str(v).strip()]
row[col] = candidates[0] if candidates else ""
rows.append(row)
return pd.DataFrame(rows)
def format_model_choice(index: int, row: dict[str, Any]) -> str:
return f"{index:04d} | {row['model_name']} | instances {row['instance_count']}"
def format_instance_choice(index: int, row: dict[str, Any]) -> str:
return f"{index:04d} | {row['instance_id']} | {row['model_name']}"
def parse_choice_index(choice: str, length: int) -> int | None:
"""Extract the numeric index from a formatted choice string."""
index_str = choice.split("|", 1)[0].strip()
try:
idx = int(index_str)
except ValueError:
return None
if idx < 0 or idx >= length:
return None
return idx