haileyhalimj@gmail.com
Rename src/etl to src/preprocess for better code organization
1131bea
raw
history blame
4.93 kB
#!/usr/bin/env python3
"""
Kit Hierarchy Parser - Converts CSV hierarchy data to optimized formats
This module provides functions to:
1. Parse Kit_Composition_and_relation.csv
2. Generate JSON hierarchy structure
3. Create production order CSV
4. Build DAG for optimization constraints
"""
import pandas as pd
import json
from typing import Dict, List, Tuple, Set
from collections import defaultdict, deque
class KitHierarchyParser:
"""
Parses kit composition data and creates hierarchy structures
for production order optimization.
"""
def __init__(self, csv_path: str = "data/real_data_excel/converted_csv/Kit_Composition_and_relation.csv"):
self.csv_path = csv_path
self.df = None
self.hierarchy_json = {}
self.production_order_csv = []
self.dependency_graph = {'nodes': set(), 'edges': set()}
def load_data(self):
"""Load and clean the CSV data"""
self.df = pd.read_csv(self.csv_path)
print(f"Loaded {len(self.df)} rows from {self.csv_path}")
def parse_hierarchy(self) -> Dict:
"""
Parse the hierarchy from CSV into JSON structure
Returns: Nested dictionary representing the hierarchy
"""
if self.df is None:
self.load_data()
# Get unique relationships
relationships = self.df[['Master Kit', 'Master Kit Description',
'Sub kit', 'Sub kit description',
'Prepack', 'Prepack Description']].drop_duplicates()
hierarchy = defaultdict(lambda: {
'name': '',
'type': 'master',
'subkits': defaultdict(lambda: {
'name': '',
'type': 'subkit',
'prepacks': [],
'dependencies': []
}),
'dependencies': []
})
for _, row in relationships.iterrows():
master_id = row['Master Kit']
master_desc = row['Master Kit Description']
subkit_id = row['Sub kit']
subkit_desc = row['Sub kit description']
prepack_id = row['Prepack']
prepack_desc = row['Prepack Description']
if pd.notna(master_id):
# Set master info
hierarchy[master_id]['name'] = master_desc if pd.notna(master_desc) else ''
if pd.notna(subkit_id):
# Set subkit info
hierarchy[master_id]['subkits'][subkit_id]['name'] = subkit_desc if pd.notna(subkit_desc) else ''
# Add subkit to master dependencies
if subkit_id not in hierarchy[master_id]['dependencies']:
hierarchy[master_id]['dependencies'].append(subkit_id)
if pd.notna(prepack_id):
# Set prepack info
if prepack_id not in hierarchy[master_id]['subkits'][subkit_id]['prepacks']:
hierarchy[master_id]['subkits'][subkit_id]['prepacks'].append(prepack_id)
# Add prepack to subkit dependencies
if prepack_id not in hierarchy[master_id]['subkits'][subkit_id]['dependencies']:
hierarchy[master_id]['subkits'][subkit_id]['dependencies'].append(prepack_id)
elif pd.notna(prepack_id):
# Handle direct master-prepack relationship (no subkit)
# Add direct_prepacks list to hierarchy if it doesn't exist
if 'direct_prepacks' not in hierarchy[master_id]:
hierarchy[master_id]['direct_prepacks'] = []
# Add prepack directly to master
if prepack_id not in hierarchy[master_id]['direct_prepacks']:
hierarchy[master_id]['direct_prepacks'].append(prepack_id)
# Add prepack to master dependencies
if prepack_id not in hierarchy[master_id]['dependencies']:
hierarchy[master_id]['dependencies'].append(prepack_id)
# Convert defaultdict to regular dict for JSON serialization
self.hierarchy_json = json.loads(json.dumps(hierarchy, default=dict))
return self.hierarchy_json
def main():
"""Demo the hierarchy parser"""
parser = KitHierarchyParser()
print("๐Ÿ”„ Parsing kit hierarchy...")
hierarchy = parser.parse_hierarchy()
#export to json
with open('data/hierarchy_exports/kit_hierarchy.json', 'w') as f:
json.dump(hierarchy, f,indent=4)
print(f"๐Ÿ“Š Found {len(hierarchy)} master kits")
if __name__ == "__main__":
main()