Spaces:

3DReflecNet
/

3DReflecNet_Explorer

Running

App Files Files Community

3DReflecNet_Explorer / utils.py

Asnly

Deploy Space app files

7591256 verified 19 days ago

raw

history blame contribute delete

6.48 kB

	#!/usr/bin/env python3
	"""Shared utilities for 3DReflecNet HF release apps."""
	from __future__ import annotations

	import logging
	from typing import Any

	import pandas as pd

	logger = logging.getLogger("hf_release")

	FILTER_ALL = "ALL"
	BOOL_FILTER_CHOICES = [FILTER_ALL, "True", "False"]


	def setup_logging(level: int = logging.INFO) -> None:
	"""Configure logging for hf_release modules."""
	logging.basicConfig(
	level=level,
	format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
	datefmt="%Y-%m-%d %H:%M:%S",
	)


	def require_columns(df: pd.DataFrame, columns: list[str], context: str) -> None:
	missing = [column for column in columns if column not in df.columns]
	if missing:
	raise KeyError(f"Missing required column(s) in {context}: {', '.join(missing)}")


	def require_bool_columns(df: pd.DataFrame, columns: list[str], context: str) -> None:
	require_columns(df, columns, context)
	for column in columns:
	if df[column].isna().any():
	raise ValueError(f"Boolean column {column!r} contains null values in {context}.")
	if not pd.api.types.is_bool_dtype(df[column]):
	raise TypeError(f"Expected boolean dtype for column {column!r} in {context}, got {df[column].dtype}.")


	def require_text_columns(df: pd.DataFrame, columns: list[str], context: str) -> None:
	require_columns(df, columns, context)
	for column in columns:
	if df[column].isna().any():
	raise ValueError(f"Text column {column!r} contains null values in {context}.")
	invalid = df[column].map(lambda value: not isinstance(value, str))
	if invalid.any():
	bad_type = type(df.loc[invalid, column].iloc[0]).__name__
	raise TypeError(f"Expected string values for column {column!r} in {context}, got {bad_type}.")


	def parse_bool_filter_value(selected_value: str) -> bool:
	if selected_value == "True":
	return True
	if selected_value == "False":
	return False
	raise ValueError(f"Unsupported boolean filter value: {selected_value!r}")


	def apply_bool_filter(df: pd.DataFrame, column: str, selected_value: str) -> pd.DataFrame:
	"""Apply tri-state bool filter (ALL/True/False) to a DataFrame column."""
	if selected_value == FILTER_ALL:
	return df
	if column not in df.columns:
	raise KeyError(f"Missing required boolean filter column: {column}")
	if not pd.api.types.is_bool_dtype(df[column]):
	raise TypeError(f"Expected boolean dtype for column {column!r}, got {df[column].dtype}.")
	target = parse_bool_filter_value(selected_value)
	return df[df[column] == target]


	def get_distinct_text_choices(df: pd.DataFrame, column: str, all_label: str = FILTER_ALL) -> list[str]:
	"""Build dropdown choices from distinct non-empty text values."""
	if column not in df.columns:
	raise KeyError(f"Missing required text choice column: {column}")
	values = {
	str(v).strip()
	for v in df[column].dropna().tolist()
	if str(v).strip()
	}
	if not values:
	raise ValueError(f"Column {column!r} has no non-empty values.")
	return [all_label] + sorted(values)


	def _apply_text_equals(df: pd.DataFrame, column: str, selected_value: str, all_label: str = FILTER_ALL) -> pd.DataFrame:
	if column not in df.columns:
	raise KeyError(f"Missing required text filter column: {column}")
	text = (selected_value or "").strip()
	if not text or text == all_label:
	return df
	return df[df[column].astype(str).str.strip() == text]


	def filter_dataframe_advanced(
	df: pd.DataFrame,
	model_name: str = FILTER_ALL,
	material_name: str = FILTER_ALL,
	env_name: str = FILTER_ALL,
	has_glass: str = FILTER_ALL,
	is_generated: str = FILTER_ALL,
	transparent: str = FILTER_ALL,
	near_light: str = FILTER_ALL,
	) -> pd.DataFrame:
	"""Filter by model/material/environment exact selection and four tri-state bool fields."""
	selected = df
	selected = _apply_text_equals(selected, "model_name", model_name)
	selected = _apply_text_equals(selected, "material_name", material_name)
	selected = _apply_text_equals(selected, "env_name", env_name)
	selected = apply_bool_filter(selected, "hasGlass", has_glass)
	selected = apply_bool_filter(selected, "isGenerated", is_generated)
	selected = apply_bool_filter(selected, "transparent", transparent)
	selected = apply_bool_filter(selected, "near_light", near_light)
	return selected.reset_index(drop=True)


	def aggregate_by_model(
	df: pd.DataFrame,
	extra_columns: list[str] \| None = None,
	) -> pd.DataFrame:
	"""Group instances by model_name, counting instances and collecting IDs."""
	base_cols = ["model_name", "main_category", "sub_category", "instance_count", "instance_ids"]
	extra = extra_columns or []
	all_cols = base_cols + extra

	if df.empty:
	return pd.DataFrame(columns=all_cols)

	require_columns(df, ["model_name", "main_category", "sub_category", "instance_id"] + extra, "model aggregation")

	rows: list[dict[str, Any]] = []
	for model_name, group in df.groupby("model_name", dropna=False, sort=True):
	instance_ids = sorted({
	str(v) for v in group["instance_id"].dropna().tolist() if str(v).strip()
	})
	row: dict[str, Any] = {
	"model_name": str(model_name),
	"main_category": str(group["main_category"].iloc[0]),
	"sub_category": str(group["sub_category"].iloc[0]),
	"instance_count": len(instance_ids),
	"instance_ids": "\n".join(instance_ids),
	}
	for col in extra:
	candidates = [str(v) for v in group[col].dropna().tolist() if str(v).strip()]
	row[col] = candidates[0] if candidates else ""
	rows.append(row)
	return pd.DataFrame(rows)


	def format_model_choice(index: int, row: dict[str, Any]) -> str:
	return f"{index:04d} \| {row['model_name']} \| instances {row['instance_count']}"


	def format_instance_choice(index: int, row: dict[str, Any]) -> str:
	return f"{index:04d} \| {row['instance_id']} \| {row['model_name']}"


	def parse_choice_index(choice: str, length: int) -> int \| None:
	"""Extract the numeric index from a formatted choice string."""
	index_str = choice.split("\|", 1)[0].strip()
	try:
	idx = int(index_str)
	except ValueError:
	return None
	if idx < 0 or idx >= length:
	return None
	return idx