HaLim commited on
Commit
02fd3ca
·
1 Parent(s): 179d6f0

tarted working on database

Browse files
.env.example ADDED
File without changes
docker-compose.yaml ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ version: '3.8'
2
+ services:
3
+ db:
4
+ image: postgres:16-alpine
5
+ container_name: sd_postgres
6
+ restart: unless-stopped
7
+ ports:
8
+ - "${DB_PORT:-5432}:5432"
9
+ environment:
10
+ POSTGRES_USER: hjun
11
+ POSTGRES_PASSWORD: alsdfjwpoejfkd
12
+ POSTGRES_DB: sd_roster_real
13
+ volumes:
14
+ - db_data:/var/lib/postgresql/data
15
+ - ./docker/init:/docker-entrypoint-initdb.d:ro # 초기 스키마/권한 스크립트
16
+ healthcheck:
17
+ test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER} -d ${POSTGRES_DB}"]
18
+ interval: 5s
19
+ timeout: 3s
20
+ retries: 20
21
+
22
+ pgadmin:
23
+ image: dpage/pgadmin4
24
+ container_name: sd_pgadmin
25
+ restart: unless-stopped
26
+ environment:
27
+ PGADMIN_DEFAULT_EMAIL: hjun@unicef.org
28
+ PGADMIN_DEFAULT_PASSWORD: alsdfjwpoejfkd
29
+ ports:
30
+ - "${PGADMIN_PORT:-5050}:80"
31
+ depends_on:
32
+ db:
33
+ condition: service_healthy
34
+
35
+ volumes:
36
+ db_data:
docker/init/001_init.sql ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ CREATE SCHEMA IF NOT EXISTS stg;
2
+ CREATE SCHEMA IF NOT EXISTS dim;
3
+ CREATE SCHEMA IF NOT EXISTS fact;
4
+ CREATE SCHEMA IF NOT EXISTS rej;
5
+ CREATE SCHEMA IF NOT EXISTS meta;
6
+
7
+ CREATE TABLE IF NOT EXISTS meta.batch_log (
8
+ batch_id BIGSERIAL PRIMARY KEY,
9
+ source_file TEXT NOT NULL,
10
+ source_hash TEXT,
11
+ rows_read INT,
12
+ rows_loaded INT,
13
+ rows_rejected INT,
14
+ started_at TIMESTAMPTZ DEFAULT NOW(),
15
+ finished_at TIMESTAMPTZ
16
+ );
pyproject.toml CHANGED
@@ -11,13 +11,6 @@ requires-python = ">=3.10,<3.11"
11
  dependencies = [
12
  "pandas>=2.1.3",
13
  "or-tools>=10.0.0",
14
- "numpy>=1.26.4",
15
- "matplotlib>=3.8.0",
16
- "seaborn>=0.13.2",
17
- "scipy>=1.13.0",
18
- "scikit-learn>=1.3.2",
19
- "statsmodels>=0.14.4",
20
- "plotly>=5.19.0",
21
  ]
22
 
23
 
 
11
  dependencies = [
12
  "pandas>=2.1.3",
13
  "or-tools>=10.0.0",
 
 
 
 
 
 
 
14
  ]
15
 
16
 
requirements.txt CHANGED
@@ -254,3 +254,10 @@ xyzservices==2025.1.0
254
  yarl==1.20.0
255
  zipp==3.21.0
256
  zstandard==0.23.0
 
 
 
 
 
 
 
 
254
  yarl==1.20.0
255
  zipp==3.21.0
256
  zstandard==0.23.0
257
+
258
+ # Database and ETL dependencies
259
+ sqlalchemy==2.0.36
260
+ psycopg2-binary==2.9.9
261
+ python-dotenv==1.0.0
262
+ pydantic==2.10.6
263
+ alembic==1.14.0
src/config/optimization_config.py CHANGED
@@ -1,8 +1,17 @@
1
- DATE_SPAN = list(range(1, 8))
 
 
 
 
2
  PRODUCT_LIST = ["a", "b", "c"] # COOIS_Planned_and_Released.csv
3
- EMPLOYEE_TYPE_LIST = ["Fixed", "Humanizer"] # WH_Workforce_Hourly_payment_scale.csv
4
- SHIFT_LIST = [1, 2, 3] # WH_Workforce_Hourly_Pay_Scale.csv
5
- LINE_LIST = ["long", "short"] # WH_Workforce_Hourly_Pay_Scale.csv
 
 
 
 
 
6
  LINE_LIST_PER_TYPE = {
7
  "long": 2,
8
  "short": 3,
 
1
+ import pandas as pd
2
+ import etl.transform as transformed_data
3
+
4
+
5
+ DATE_SPAN = list(range(1, 8)) # Default 7 days - other than this - user input
6
  PRODUCT_LIST = ["a", "b", "c"] # COOIS_Planned_and_Released.csv
7
+ PRODUCT_LIST = transformed_data.get_product_list()
8
+ EMPLOYEE_TYPE_LIST = ["Fixed", "Humanizer"] # WH_Workforce_Hourly_payment_scale.csv -> This is default setting. User should be able to manipulate
9
+ EMPLOYEE_TYPE_LIST = transformed_data.get_employee_type_list()
10
+
11
+ # WH_Workforce_Hourly_payment_scale.csv -> This is default setting. User should be able to manipulate
12
+ SHIFT_LIST = [1, 2, 3] # WH_Workforce_Hourly_Pay_Scale.csv -> This is default setting. User should be able to manipulate
13
+ LINE_LIST = ["long", "short"] # WH_Workforce_Hourly_Pay_Scale.csv -> This is default setting. User should be able to manipulate
14
+
15
  LINE_LIST_PER_TYPE = {
16
  "long": 2,
17
  "short": 3,
src/etl/__init__.py ADDED
File without changes
src/etl/db.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sqlalchemy import create_engine
2
+ from dotenv import load_dotenv
3
+ import os
4
+
5
+ load_dotenv()
6
+ USER = os.getenv("POSTGRES_USER", "myuser")
7
+ PWD = os.getenv("POSTGRES_PASSWORD", "mypass")
8
+ DB = os.getenv("POSTGRES_DB", "mydb")
9
+ PORT = os.getenv("DB_PORT", "5432")
10
+ HOST = "localhost"
11
+
12
+ engine = create_engine(
13
+ f"postgresql+psycopg2://{USER}:{PWD}@{HOST}:{PORT}/{DB}", future=True
14
+ )
15
+ if __name__ == "__main__":
16
+ with engine.begin() as conn:
17
+ print(conn.execute("select version();").scalar())
src/etl/extract.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import datetime
3
+ from datetime import date, timedelta
4
+
5
+ START_DATE = pd.Timestamp(2025, 7, 7)
6
+ END_DATE = pd.Timestamp(2025, 7, 11)
7
+
8
+
9
+ def read_excel(path: str) -> pd.DataFrame:
10
+ return pd.read_excel(path, dtype={"id": "Int64"})
11
+
12
+
13
+ def read_demand_data(
14
+ path="data/real_data_excel/converted_csv/COOIS_Planned_and_Released.csv",
15
+ ) -> pd.DataFrame:
16
+ df = pd.read_csv(path)
17
+ df["Basic start date"] = pd.to_datetime(df["Basic start date"])
18
+ df["Basic finish date"] = pd.to_datetime(df["Basic finish date"])
19
+ df = df[
20
+ (df["Basic start date"] >= START_DATE) & (df["Basic finish date"] <= END_DATE)
21
+ ]
22
+
23
+ return df
24
+
25
+
26
+ def read_employee_data(
27
+ path="data/real_data_excel/converted_csv/WH_Workforce_Hourly_Pay_Scale.csv",
28
+ ) -> pd.DataFrame:
29
+ return pd.read_csv(path)
30
+
31
+
32
+ def read_shift_data(
33
+ path="data/real_data_excel/converted_csv/Work_Shifts_timing.csv",
34
+ ) -> pd.DataFrame:
35
+ return pd.read_csv(path)
36
+
37
+
38
+ def read_kit_data(
39
+ path="data/real_data_excel/converted_csv/Kits__Calculation.csv",
40
+ ) -> pd.DataFrame:
41
+ return pd.read_csv(path)
42
+
43
+
44
+ def read_work_center_capacity(
45
+ path="data/real_data_excel/converted_csv/Work_Centre_Capacity.csv",
46
+ ) -> pd.DataFrame:
47
+ return pd.read_csv(path)
48
+
49
+
50
+ def read_material_master(
51
+ path="data/real_data_excel/converted_csv/Material_Master_WMS.csv",
52
+ ) -> pd.DataFrame:
53
+ return pd.read_csv(path)
54
+
55
+
56
+ if __name__ == "__main__":
57
+ demand_data = read_demand_data()
58
+ print(demand_data.head())
src/etl/models.py ADDED
@@ -0,0 +1 @@
 
 
1
+
src/etl/transform.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import extract as ex
3
+
4
+
5
+ def get_product_list():
6
+ demand = ex.read_demand_data()
7
+ print(demand["Material Number"].unique())
8
+ return demand["Material Number"].unique()
9
+
10
+
11
+ def get_employee_list():
12
+ employee = ex.read_employee_data()
13
+ employee = employee["Description"]
14
+
15
+ return employee["Employee_Type"].unique()
16
+
17
+
18
+ if __name__ == "__main__":
19
+ get_product_list()
20
+ # get_employee_list()
src/project ADDED
@@ -0,0 +1 @@
 
 
1
+ Subproject commit 2e1b97c9d8196552a23dd5a4c536f25e53c033dc