HaLim
commited on
Commit
·
02fd3ca
1
Parent(s):
179d6f0
tarted working on database
Browse files- .env.example +0 -0
- docker-compose.yaml +36 -0
- docker/init/001_init.sql +16 -0
- pyproject.toml +0 -7
- requirements.txt +7 -0
- src/config/optimization_config.py +13 -4
- src/etl/__init__.py +0 -0
- src/etl/db.py +17 -0
- src/etl/extract.py +58 -0
- src/etl/models.py +1 -0
- src/etl/transform.py +20 -0
- src/project +1 -0
.env.example
ADDED
|
File without changes
|
docker-compose.yaml
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version: '3.8'
|
| 2 |
+
services:
|
| 3 |
+
db:
|
| 4 |
+
image: postgres:16-alpine
|
| 5 |
+
container_name: sd_postgres
|
| 6 |
+
restart: unless-stopped
|
| 7 |
+
ports:
|
| 8 |
+
- "${DB_PORT:-5432}:5432"
|
| 9 |
+
environment:
|
| 10 |
+
POSTGRES_USER: hjun
|
| 11 |
+
POSTGRES_PASSWORD: alsdfjwpoejfkd
|
| 12 |
+
POSTGRES_DB: sd_roster_real
|
| 13 |
+
volumes:
|
| 14 |
+
- db_data:/var/lib/postgresql/data
|
| 15 |
+
- ./docker/init:/docker-entrypoint-initdb.d:ro # 초기 스키마/권한 스크립트
|
| 16 |
+
healthcheck:
|
| 17 |
+
test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER} -d ${POSTGRES_DB}"]
|
| 18 |
+
interval: 5s
|
| 19 |
+
timeout: 3s
|
| 20 |
+
retries: 20
|
| 21 |
+
|
| 22 |
+
pgadmin:
|
| 23 |
+
image: dpage/pgadmin4
|
| 24 |
+
container_name: sd_pgadmin
|
| 25 |
+
restart: unless-stopped
|
| 26 |
+
environment:
|
| 27 |
+
PGADMIN_DEFAULT_EMAIL: hjun@unicef.org
|
| 28 |
+
PGADMIN_DEFAULT_PASSWORD: alsdfjwpoejfkd
|
| 29 |
+
ports:
|
| 30 |
+
- "${PGADMIN_PORT:-5050}:80"
|
| 31 |
+
depends_on:
|
| 32 |
+
db:
|
| 33 |
+
condition: service_healthy
|
| 34 |
+
|
| 35 |
+
volumes:
|
| 36 |
+
db_data:
|
docker/init/001_init.sql
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
CREATE SCHEMA IF NOT EXISTS stg;
|
| 2 |
+
CREATE SCHEMA IF NOT EXISTS dim;
|
| 3 |
+
CREATE SCHEMA IF NOT EXISTS fact;
|
| 4 |
+
CREATE SCHEMA IF NOT EXISTS rej;
|
| 5 |
+
CREATE SCHEMA IF NOT EXISTS meta;
|
| 6 |
+
|
| 7 |
+
CREATE TABLE IF NOT EXISTS meta.batch_log (
|
| 8 |
+
batch_id BIGSERIAL PRIMARY KEY,
|
| 9 |
+
source_file TEXT NOT NULL,
|
| 10 |
+
source_hash TEXT,
|
| 11 |
+
rows_read INT,
|
| 12 |
+
rows_loaded INT,
|
| 13 |
+
rows_rejected INT,
|
| 14 |
+
started_at TIMESTAMPTZ DEFAULT NOW(),
|
| 15 |
+
finished_at TIMESTAMPTZ
|
| 16 |
+
);
|
pyproject.toml
CHANGED
|
@@ -11,13 +11,6 @@ requires-python = ">=3.10,<3.11"
|
|
| 11 |
dependencies = [
|
| 12 |
"pandas>=2.1.3",
|
| 13 |
"or-tools>=10.0.0",
|
| 14 |
-
"numpy>=1.26.4",
|
| 15 |
-
"matplotlib>=3.8.0",
|
| 16 |
-
"seaborn>=0.13.2",
|
| 17 |
-
"scipy>=1.13.0",
|
| 18 |
-
"scikit-learn>=1.3.2",
|
| 19 |
-
"statsmodels>=0.14.4",
|
| 20 |
-
"plotly>=5.19.0",
|
| 21 |
]
|
| 22 |
|
| 23 |
|
|
|
|
| 11 |
dependencies = [
|
| 12 |
"pandas>=2.1.3",
|
| 13 |
"or-tools>=10.0.0",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
]
|
| 15 |
|
| 16 |
|
requirements.txt
CHANGED
|
@@ -254,3 +254,10 @@ xyzservices==2025.1.0
|
|
| 254 |
yarl==1.20.0
|
| 255 |
zipp==3.21.0
|
| 256 |
zstandard==0.23.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 254 |
yarl==1.20.0
|
| 255 |
zipp==3.21.0
|
| 256 |
zstandard==0.23.0
|
| 257 |
+
|
| 258 |
+
# Database and ETL dependencies
|
| 259 |
+
sqlalchemy==2.0.36
|
| 260 |
+
psycopg2-binary==2.9.9
|
| 261 |
+
python-dotenv==1.0.0
|
| 262 |
+
pydantic==2.10.6
|
| 263 |
+
alembic==1.14.0
|
src/config/optimization_config.py
CHANGED
|
@@ -1,8 +1,17 @@
|
|
| 1 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
PRODUCT_LIST = ["a", "b", "c"] # COOIS_Planned_and_Released.csv
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
LINE_LIST_PER_TYPE = {
|
| 7 |
"long": 2,
|
| 8 |
"short": 3,
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import etl.transform as transformed_data
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
DATE_SPAN = list(range(1, 8)) # Default 7 days - other than this - user input
|
| 6 |
PRODUCT_LIST = ["a", "b", "c"] # COOIS_Planned_and_Released.csv
|
| 7 |
+
PRODUCT_LIST = transformed_data.get_product_list()
|
| 8 |
+
EMPLOYEE_TYPE_LIST = ["Fixed", "Humanizer"] # WH_Workforce_Hourly_payment_scale.csv -> This is default setting. User should be able to manipulate
|
| 9 |
+
EMPLOYEE_TYPE_LIST = transformed_data.get_employee_type_list()
|
| 10 |
+
|
| 11 |
+
# WH_Workforce_Hourly_payment_scale.csv -> This is default setting. User should be able to manipulate
|
| 12 |
+
SHIFT_LIST = [1, 2, 3] # WH_Workforce_Hourly_Pay_Scale.csv -> This is default setting. User should be able to manipulate
|
| 13 |
+
LINE_LIST = ["long", "short"] # WH_Workforce_Hourly_Pay_Scale.csv -> This is default setting. User should be able to manipulate
|
| 14 |
+
|
| 15 |
LINE_LIST_PER_TYPE = {
|
| 16 |
"long": 2,
|
| 17 |
"short": 3,
|
src/etl/__init__.py
ADDED
|
File without changes
|
src/etl/db.py
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from sqlalchemy import create_engine
|
| 2 |
+
from dotenv import load_dotenv
|
| 3 |
+
import os
|
| 4 |
+
|
| 5 |
+
load_dotenv()
|
| 6 |
+
USER = os.getenv("POSTGRES_USER", "myuser")
|
| 7 |
+
PWD = os.getenv("POSTGRES_PASSWORD", "mypass")
|
| 8 |
+
DB = os.getenv("POSTGRES_DB", "mydb")
|
| 9 |
+
PORT = os.getenv("DB_PORT", "5432")
|
| 10 |
+
HOST = "localhost"
|
| 11 |
+
|
| 12 |
+
engine = create_engine(
|
| 13 |
+
f"postgresql+psycopg2://{USER}:{PWD}@{HOST}:{PORT}/{DB}", future=True
|
| 14 |
+
)
|
| 15 |
+
if __name__ == "__main__":
|
| 16 |
+
with engine.begin() as conn:
|
| 17 |
+
print(conn.execute("select version();").scalar())
|
src/etl/extract.py
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import datetime
|
| 3 |
+
from datetime import date, timedelta
|
| 4 |
+
|
| 5 |
+
START_DATE = pd.Timestamp(2025, 7, 7)
|
| 6 |
+
END_DATE = pd.Timestamp(2025, 7, 11)
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
def read_excel(path: str) -> pd.DataFrame:
|
| 10 |
+
return pd.read_excel(path, dtype={"id": "Int64"})
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
def read_demand_data(
|
| 14 |
+
path="data/real_data_excel/converted_csv/COOIS_Planned_and_Released.csv",
|
| 15 |
+
) -> pd.DataFrame:
|
| 16 |
+
df = pd.read_csv(path)
|
| 17 |
+
df["Basic start date"] = pd.to_datetime(df["Basic start date"])
|
| 18 |
+
df["Basic finish date"] = pd.to_datetime(df["Basic finish date"])
|
| 19 |
+
df = df[
|
| 20 |
+
(df["Basic start date"] >= START_DATE) & (df["Basic finish date"] <= END_DATE)
|
| 21 |
+
]
|
| 22 |
+
|
| 23 |
+
return df
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
def read_employee_data(
|
| 27 |
+
path="data/real_data_excel/converted_csv/WH_Workforce_Hourly_Pay_Scale.csv",
|
| 28 |
+
) -> pd.DataFrame:
|
| 29 |
+
return pd.read_csv(path)
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
def read_shift_data(
|
| 33 |
+
path="data/real_data_excel/converted_csv/Work_Shifts_timing.csv",
|
| 34 |
+
) -> pd.DataFrame:
|
| 35 |
+
return pd.read_csv(path)
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
def read_kit_data(
|
| 39 |
+
path="data/real_data_excel/converted_csv/Kits__Calculation.csv",
|
| 40 |
+
) -> pd.DataFrame:
|
| 41 |
+
return pd.read_csv(path)
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
def read_work_center_capacity(
|
| 45 |
+
path="data/real_data_excel/converted_csv/Work_Centre_Capacity.csv",
|
| 46 |
+
) -> pd.DataFrame:
|
| 47 |
+
return pd.read_csv(path)
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
def read_material_master(
|
| 51 |
+
path="data/real_data_excel/converted_csv/Material_Master_WMS.csv",
|
| 52 |
+
) -> pd.DataFrame:
|
| 53 |
+
return pd.read_csv(path)
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
if __name__ == "__main__":
|
| 57 |
+
demand_data = read_demand_data()
|
| 58 |
+
print(demand_data.head())
|
src/etl/models.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
|
src/etl/transform.py
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import extract as ex
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
def get_product_list():
|
| 6 |
+
demand = ex.read_demand_data()
|
| 7 |
+
print(demand["Material Number"].unique())
|
| 8 |
+
return demand["Material Number"].unique()
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
def get_employee_list():
|
| 12 |
+
employee = ex.read_employee_data()
|
| 13 |
+
employee = employee["Description"]
|
| 14 |
+
|
| 15 |
+
return employee["Employee_Type"].unique()
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
if __name__ == "__main__":
|
| 19 |
+
get_product_list()
|
| 20 |
+
# get_employee_list()
|
src/project
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
Subproject commit 2e1b97c9d8196552a23dd5a4c536f25e53c033dc
|