Waqasjan123 commited on
Commit
42556a6
Β·
verified Β·
1 Parent(s): 67d5c0e

Update src/data_loader.py

Browse files
Files changed (1) hide show
  1. src/data_loader.py +123 -119
src/data_loader.py CHANGED
@@ -1,119 +1,123 @@
1
- """
2
- Data Loader - Handles loading data from local storage or HuggingFace.
3
- Automatically switches based on DEV_MODE configuration.
4
- """
5
- import json
6
- from pathlib import Path
7
- from typing import Tuple, List
8
-
9
- from config import (
10
- DEV_MODE,
11
- HF_REPO_ID,
12
- HF_REPO_TYPE,
13
- PAPER_DB_FILENAME,
14
- FACTORY_SETTINGS_FILENAME
15
- )
16
- from models import FluteProfile, PaperGrade, FactoryConfig
17
-
18
- # Get the directory where this file is located
19
- BASE_DIR = Path(__file__).parent
20
-
21
-
22
- def _load_from_local() -> Tuple[List[PaperGrade], "FactoryConfig", List[FluteProfile]]:
23
- """Load data from local data/ folder."""
24
- print("πŸ“ Loading data from LOCAL storage...")
25
-
26
- paper_db_path = BASE_DIR / "data" / PAPER_DB_FILENAME
27
- factory_settings_path = BASE_DIR / "data" / FACTORY_SETTINGS_FILENAME
28
-
29
- with open(paper_db_path, "r") as f:
30
- paper_db = [PaperGrade(**p) for p in json.load(f)]
31
-
32
- with open(factory_settings_path, "r") as f:
33
- fs_data = json.load(f)
34
-
35
- flutes, factory_config = _parse_factory_settings(fs_data)
36
-
37
- print(f"βœ… Loaded {len(paper_db)} paper grades, {len(flutes)} flute profiles")
38
- return paper_db, factory_config, flutes
39
-
40
-
41
- def _load_from_huggingface() -> Tuple[List[PaperGrade], "FactoryConfig", List[FluteProfile]]:
42
- """Load data from HuggingFace dataset repository."""
43
- print(f"☁️ Loading data from HuggingFace: {HF_REPO_ID}...")
44
-
45
- try:
46
- from huggingface_hub import hf_hub_download
47
- except ImportError:
48
- raise ImportError(
49
- "huggingface_hub is required for production mode. "
50
- "Install with: pip install huggingface_hub"
51
- )
52
-
53
- # Download files from HuggingFace (cached automatically)
54
- paper_db_path = hf_hub_download(
55
- repo_id=HF_REPO_ID,
56
- filename=PAPER_DB_FILENAME,
57
- repo_type=HF_REPO_TYPE
58
- )
59
-
60
- factory_settings_path = hf_hub_download(
61
- repo_id=HF_REPO_ID,
62
- filename=FACTORY_SETTINGS_FILENAME,
63
- repo_type=HF_REPO_TYPE
64
- )
65
-
66
- with open(paper_db_path, "r") as f:
67
- paper_db = [PaperGrade(**p) for p in json.load(f)]
68
-
69
- with open(factory_settings_path, "r") as f:
70
- fs_data = json.load(f)
71
-
72
- flutes, factory_config = _parse_factory_settings(fs_data)
73
-
74
- print(f"βœ… Loaded {len(paper_db)} paper grades, {len(flutes)} flute profiles from HuggingFace")
75
- return paper_db, factory_config, flutes
76
-
77
-
78
- def _parse_factory_settings(fs_data: dict) -> Tuple[List[FluteProfile], "FactoryConfig"]:
79
- """Parse factory settings JSON into typed objects."""
80
- flutes = [FluteProfile(**fp) for fp in fs_data['flutes']]
81
- wastage = fs_data['wastage']
82
- costs = fs_data['costs']
83
- reels = fs_data['reels']
84
-
85
- factory_config = FactoryConfig(
86
- wastage_process_pct=wastage['process_pct'],
87
- cost_conversion_per_kg=costs['conversion_per_kg'],
88
- cost_fixed_setup=costs['fixed_setup'],
89
- # Value-Add Costs (optional processes)
90
- cost_printing_per_1000=costs.get('printing_per_1000', 0.0),
91
- cost_printing_plate=costs.get('printing_plate', 0.0),
92
- cost_uv_per_1000=costs.get('uv_per_1000', 0.0),
93
- cost_lamination_per_1000=costs.get('lamination_per_1000', 0.0),
94
- cost_die_cutting_per_1000=costs.get('die_cutting_per_1000', 0.0),
95
- cost_die_frame=costs.get('die_frame', 0.0),
96
- margin_pct=costs['margin_pct'],
97
- process_efficiency_pct=costs.get('process_efficiency_pct', 85.0),
98
- ect_conversion_factor=costs.get('ect_conversion_factor', 0.85),
99
- currency=costs['currency'],
100
- available_reel_sizes=reels
101
- )
102
-
103
- return flutes, factory_config
104
-
105
-
106
- def load_all_data() -> Tuple[List[PaperGrade], "FactoryConfig", List[FluteProfile]]:
107
- """
108
- Main entry point for loading data.
109
- Automatically chooses local or HuggingFace based on DEV_MODE.
110
-
111
- Returns:
112
- Tuple of (paper_db, factory_config, flute_profiles)
113
- """
114
- print(f"πŸ”§ DEV_MODE = {DEV_MODE}")
115
-
116
- if DEV_MODE:
117
- return _load_from_local()
118
- else:
119
- return _load_from_huggingface()
 
 
 
 
 
1
+ """
2
+ Data Loader - Handles loading data from local storage or HuggingFace.
3
+ Automatically switches based on DEV_MODE configuration.
4
+ """
5
+ import json
6
+ from pathlib import Path
7
+ from typing import Tuple, List
8
+
9
+ from config import (
10
+ DEV_MODE,
11
+ HF_REPO_ID,
12
+ HF_REPO_TYPE,
13
+ HF_TOKEN,
14
+ PAPER_DB_FILENAME,
15
+ FACTORY_SETTINGS_FILENAME
16
+ )
17
+ from models import FluteProfile, PaperGrade, FactoryConfig
18
+
19
+ # Get the directory where this file is located
20
+ BASE_DIR = Path(__file__).parent
21
+
22
+
23
+ def _load_from_local() -> Tuple[List[PaperGrade], "FactoryConfig", List[FluteProfile]]:
24
+ """Load data from local data/ folder."""
25
+ print("πŸ“ Loading data from LOCAL storage...")
26
+
27
+ paper_db_path = BASE_DIR / "data" / PAPER_DB_FILENAME
28
+ factory_settings_path = BASE_DIR / "data" / FACTORY_SETTINGS_FILENAME
29
+
30
+ with open(paper_db_path, "r") as f:
31
+ paper_db = [PaperGrade(**p) for p in json.load(f)]
32
+
33
+ with open(factory_settings_path, "r") as f:
34
+ fs_data = json.load(f)
35
+
36
+ flutes, factory_config = _parse_factory_settings(fs_data)
37
+
38
+ print(f"βœ… Loaded {len(paper_db)} paper grades, {len(flutes)} flute profiles")
39
+ return paper_db, factory_config, flutes
40
+
41
+
42
+ def _load_from_huggingface() -> Tuple[List[PaperGrade], "FactoryConfig", List[FluteProfile]]:
43
+ """Load data from HuggingFace dataset repository."""
44
+ print(f"☁️ Loading data from HuggingFace: {HF_REPO_ID}...")
45
+
46
+ try:
47
+ from huggingface_hub import hf_hub_download
48
+ except ImportError:
49
+ raise ImportError(
50
+ "huggingface_hub is required for production mode. "
51
+ "Install with: pip install huggingface_hub"
52
+ )
53
+
54
+ # Download files from HuggingFace (cached automatically)
55
+ # Pass token for private repository access
56
+ paper_db_path = hf_hub_download(
57
+ repo_id=HF_REPO_ID,
58
+ filename=PAPER_DB_FILENAME,
59
+ repo_type=HF_REPO_TYPE,
60
+ token=HF_TOKEN
61
+ )
62
+
63
+ factory_settings_path = hf_hub_download(
64
+ repo_id=HF_REPO_ID,
65
+ filename=FACTORY_SETTINGS_FILENAME,
66
+ repo_type=HF_REPO_TYPE,
67
+ token=HF_TOKEN
68
+ )
69
+
70
+ with open(paper_db_path, "r") as f:
71
+ paper_db = [PaperGrade(**p) for p in json.load(f)]
72
+
73
+ with open(factory_settings_path, "r") as f:
74
+ fs_data = json.load(f)
75
+
76
+ flutes, factory_config = _parse_factory_settings(fs_data)
77
+
78
+ print(f"βœ… Loaded {len(paper_db)} paper grades, {len(flutes)} flute profiles from HuggingFace")
79
+ return paper_db, factory_config, flutes
80
+
81
+
82
+ def _parse_factory_settings(fs_data: dict) -> Tuple[List[FluteProfile], "FactoryConfig"]:
83
+ """Parse factory settings JSON into typed objects."""
84
+ flutes = [FluteProfile(**fp) for fp in fs_data['flutes']]
85
+ wastage = fs_data['wastage']
86
+ costs = fs_data['costs']
87
+ reels = fs_data['reels']
88
+
89
+ factory_config = FactoryConfig(
90
+ wastage_process_pct=wastage['process_pct'],
91
+ cost_conversion_per_kg=costs['conversion_per_kg'],
92
+ cost_fixed_setup=costs['fixed_setup'],
93
+ # Value-Add Costs (optional processes)
94
+ cost_printing_per_1000=costs.get('printing_per_1000', 0.0),
95
+ cost_printing_plate=costs.get('printing_plate', 0.0),
96
+ cost_uv_per_1000=costs.get('uv_per_1000', 0.0),
97
+ cost_lamination_per_1000=costs.get('lamination_per_1000', 0.0),
98
+ cost_die_cutting_per_1000=costs.get('die_cutting_per_1000', 0.0),
99
+ cost_die_frame=costs.get('die_frame', 0.0),
100
+ margin_pct=costs['margin_pct'],
101
+ process_efficiency_pct=costs.get('process_efficiency_pct', 85.0),
102
+ ect_conversion_factor=costs.get('ect_conversion_factor', 0.85),
103
+ currency=costs['currency'],
104
+ available_reel_sizes=reels
105
+ )
106
+
107
+ return flutes, factory_config
108
+
109
+
110
+ def load_all_data() -> Tuple[List[PaperGrade], "FactoryConfig", List[FluteProfile]]:
111
+ """
112
+ Main entry point for loading data.
113
+ Automatically chooses local or HuggingFace based on DEV_MODE.
114
+
115
+ Returns:
116
+ Tuple of (paper_db, factory_config, flute_profiles)
117
+ """
118
+ print(f"πŸ”§ DEV_MODE = {DEV_MODE}")
119
+
120
+ if DEV_MODE:
121
+ return _load_from_local()
122
+ else:
123
+ return _load_from_huggingface()