Spaces:

DevikaJ2005
/

fraudshield-1

Sleeping

App Files Files Community

fraudshield-1 / utils.py

DevikaJ2005

Add training-first RL architecture with tracking

ce9edc2 15 days ago

raw

history blame contribute delete

2.54 kB

	"""Shared utilities for FraudShield training and evaluation."""

	from __future__ import annotations

	import json
	import os
	import random
	from pathlib import Path
	from typing import Any, Iterable, Sequence

	import numpy as np


	def seed_everything(seed: int) -> None:
	"""Seed Python, NumPy, and torch when available."""

	random.seed(seed)
	np.random.seed(seed)
	os.environ["PYTHONHASHSEED"] = str(seed)
	try: # pragma: no cover - torch is optional at runtime
	import torch

	torch.manual_seed(seed)
	if torch.cuda.is_available():
	torch.cuda.manual_seed_all(seed)
	except Exception:
	pass


	def ensure_dir(path: str \| Path) -> Path:
	"""Create a directory if needed and return it as a ``Path``."""

	resolved = Path(path)
	resolved.mkdir(parents=True, exist_ok=True)
	return resolved


	def save_json(payload: Any, path: str \| Path) -> None:
	"""Write JSON with stable indentation."""

	Path(path).write_text(json.dumps(payload, indent=2), encoding="utf-8")


	def load_json(path: str \| Path) -> Any:
	"""Load JSON from disk."""

	return json.loads(Path(path).read_text(encoding="utf-8"))


	def extract_json_object(text: str) -> dict[str, Any]:
	"""Extract the first JSON object from model output."""

	start = text.find("{")
	end = text.rfind("}")
	if start == -1 or end == -1 or end < start:
	raise ValueError("Model output did not contain a JSON object.")
	return json.loads(text[start : end + 1])


	def moving_average(values: Sequence[float], window: int = 10) -> list[float]:
	"""Compute a simple moving average."""

	if not values:
	return []
	window = max(1, int(window))
	averaged: list[float] = []
	for idx in range(len(values)):
	start = max(0, idx - window + 1)
	chunk = values[start : idx + 1]
	averaged.append(sum(chunk) / len(chunk))
	return averaged


	def approximate_token_count(text: str) -> int:
	"""Cheap token estimate that works without a tokenizer."""

	stripped = text.strip()
	if not stripped:
	return 0
	return max(1, int(len(stripped.split()) * 1.3))


	def flatten_dict_items(mapping: dict[str, Any], prefix: str = "") -> Iterable[tuple[str, Any]]:
	"""Flatten nested dictionaries for logging."""

	for key, value in mapping.items():
	full_key = f"{prefix}.{key}" if prefix else key
	if isinstance(value, dict):
	yield from flatten_dict_items(value, prefix=full_key)
	else:
	yield full_key, value