Usman / src /data_loader.py
Waqasjan123's picture
Update src/data_loader.py
feb68d9 verified
"""
Data Loader - Handles loading data from local storage or HuggingFace.
Automatically switches based on DEV_MODE configuration.
Includes save/load functions for box profiles.
"""
import json
import uuid
import io
from pathlib import Path
from typing import Tuple, List, Optional
from datetime import datetime
from config import (
DEV_MODE,
HF_REPO_ID,
HF_REPO_TYPE,
HF_TOKEN,
PAPER_DB_FILENAME,
FACTORY_SETTINGS_FILENAME,
BOX_PROFILES_FILENAME
)
from models import FluteProfile, PaperGrade, FactoryConfig
# Get the directory where this file is located
BASE_DIR = Path(__file__).parent
# ============================================================================
# CORE DATA LOADING
# ============================================================================
def _load_from_local() -> Tuple[List[PaperGrade], "FactoryConfig", List[FluteProfile]]:
"""Load data from local data/ folder."""
print("πŸ“ Loading data from LOCAL storage...")
paper_db_path = BASE_DIR / "data" / PAPER_DB_FILENAME
factory_settings_path = BASE_DIR / "data" / FACTORY_SETTINGS_FILENAME
with open(paper_db_path, "r") as f:
paper_db = [PaperGrade(**p) for p in json.load(f)]
with open(factory_settings_path, "r") as f:
fs_data = json.load(f)
flutes, factory_config = _parse_factory_settings(fs_data)
print(f"βœ… Loaded {len(paper_db)} paper grades, {len(flutes)} flute profiles")
return paper_db, factory_config, flutes
def _load_from_huggingface() -> Tuple[List[PaperGrade], "FactoryConfig", List[FluteProfile]]:
"""Load data from HuggingFace dataset repository."""
print(f"☁️ Loading data from HuggingFace: {HF_REPO_ID}...")
try:
from huggingface_hub import hf_hub_download
except ImportError:
raise ImportError(
"huggingface_hub is required for production mode. "
"Install with: pip install huggingface_hub"
)
# Download files from HuggingFace (cached automatically)
# Pass token for private repository access
paper_db_path = hf_hub_download(
repo_id=HF_REPO_ID,
filename=PAPER_DB_FILENAME,
repo_type=HF_REPO_TYPE,
token=HF_TOKEN
)
factory_settings_path = hf_hub_download(
repo_id=HF_REPO_ID,
filename=FACTORY_SETTINGS_FILENAME,
repo_type=HF_REPO_TYPE,
token=HF_TOKEN
)
with open(paper_db_path, "r") as f:
paper_db = [PaperGrade(**p) for p in json.load(f)]
with open(factory_settings_path, "r") as f:
fs_data = json.load(f)
flutes, factory_config = _parse_factory_settings(fs_data)
print(f"βœ… Loaded {len(paper_db)} paper grades, {len(flutes)} flute profiles from HuggingFace")
return paper_db, factory_config, flutes
def _parse_factory_settings(fs_data: dict) -> Tuple[List[FluteProfile], "FactoryConfig"]:
"""Parse factory settings JSON into typed objects."""
flutes = [FluteProfile(**fp) for fp in fs_data['flutes']]
wastage = fs_data['wastage']
costs = fs_data['costs']
reels = fs_data['reels']
factory_config = FactoryConfig(
wastage_process_pct=wastage['process_pct'],
cost_conversion_per_kg=costs['conversion_per_kg'],
cost_fixed_setup=costs['fixed_setup'],
# Value-Add Costs (optional processes)
cost_printing_per_1000=costs.get('printing_per_1000', 0.0),
cost_printing_plate=costs.get('printing_plate', 0.0),
cost_uv_per_1000=costs.get('uv_per_1000', 0.0),
cost_lamination_per_1000=costs.get('lamination_per_1000', 0.0),
cost_die_cutting_per_1000=costs.get('die_cutting_per_1000', 0.0),
cost_die_frame=costs.get('die_frame', 0.0),
margin_pct=costs['margin_pct'],
process_efficiency_pct=costs.get('process_efficiency_pct', 85.0),
ect_conversion_factor=costs.get('ect_conversion_factor', 0.85),
currency=costs['currency'],
available_reel_sizes=reels
)
return flutes, factory_config
def load_all_data() -> Tuple[List[PaperGrade], "FactoryConfig", List[FluteProfile]]:
"""
Main entry point for loading data.
Automatically chooses local or HuggingFace based on DEV_MODE.
Returns:
Tuple of (paper_db, factory_config, flute_profiles)
"""
print(f"πŸ”§ DEV_MODE = {DEV_MODE}")
if DEV_MODE:
return _load_from_local()
else:
return _load_from_huggingface()
# ============================================================================
# BOX PROFILES - LOAD / SAVE / DELETE
# ============================================================================
def load_box_profiles() -> List[dict]:
"""Load box profiles from local or HuggingFace."""
try:
if DEV_MODE:
profiles_path = BASE_DIR / "data" / BOX_PROFILES_FILENAME
if profiles_path.exists():
with open(profiles_path, "r") as f:
return json.load(f)
return []
else:
from huggingface_hub import hf_hub_download
try:
# force_download=True bypasses cache to get fresh data after saves
profiles_path = hf_hub_download(
repo_id=HF_REPO_ID,
filename=BOX_PROFILES_FILENAME,
repo_type=HF_REPO_TYPE,
token=HF_TOKEN,
force_download=True # Always fetch fresh data
)
with open(profiles_path, "r") as f:
return json.load(f)
except Exception:
# File doesn't exist yet
return []
except Exception as e:
print(f"⚠️ Error loading box profiles: {e}")
return []
def _upload_to_huggingface(filename: str, data: any) -> bool:
"""Upload JSON data to HuggingFace dataset repository."""
try:
from huggingface_hub import HfApi
api = HfApi()
# Convert data to JSON bytes
json_bytes = json.dumps(data, indent=2).encode('utf-8')
# Upload file
api.upload_file(
path_or_fileobj=io.BytesIO(json_bytes),
path_in_repo=filename,
repo_id=HF_REPO_ID,
repo_type=HF_REPO_TYPE,
token=HF_TOKEN
)
print(f"βœ… Uploaded {filename} to HuggingFace")
return True
except Exception as e:
print(f"❌ Failed to upload to HuggingFace: {e}")
return False
def save_box_profile(profile_data: dict) -> Tuple[bool, str]:
"""
Save a box profile. Adds to existing profiles and syncs to HuggingFace.
Args:
profile_data: Dict with name, ply_type, dimensions, layers, processes
Returns:
Tuple of (success: bool, message: str)
"""
try:
# Generate unique ID and timestamp
profile_data['id'] = str(uuid.uuid4())[:8]
profile_data['created_at'] = datetime.now().isoformat()
# Load existing profiles
profiles = load_box_profiles()
# Add new profile
profiles.append(profile_data)
if DEV_MODE:
# Save locally in development mode
local_path = BASE_DIR / "data" / BOX_PROFILES_FILENAME
with open(local_path, "w") as f:
json.dump(profiles, f, indent=2)
else:
# In production (HF Spaces), upload directly to HuggingFace
# HF Spaces filesystem is read-only, so we can't save locally
success = _upload_to_huggingface(BOX_PROFILES_FILENAME, profiles)
if not success:
return False, "Failed to save to cloud storage"
return True, f"Profile '{profile_data['name']}' saved successfully!"
except Exception as e:
return False, f"Error saving profile: {e}"
def delete_box_profile(profile_id: str) -> Tuple[bool, str]:
"""
Delete a box profile by ID.
Args:
profile_id: The unique ID of the profile to delete
Returns:
Tuple of (success: bool, message: str)
"""
try:
# Load existing profiles
profiles = load_box_profiles()
# Find and remove the profile
original_count = len(profiles)
profiles = [p for p in profiles if p.get('id') != profile_id]
if len(profiles) == original_count:
return False, "Profile not found"
if DEV_MODE:
# Save locally in development mode
local_path = BASE_DIR / "data" / BOX_PROFILES_FILENAME
with open(local_path, "w") as f:
json.dump(profiles, f, indent=2)
else:
# In production (HF Spaces), upload directly to HuggingFace
success = _upload_to_huggingface(BOX_PROFILES_FILENAME, profiles)
if not success:
return False, "Failed to delete from cloud storage"
return True, "Profile deleted successfully!"
except Exception as e:
return False, f"Error deleting profile: {e}"