asteroidddd commited on
Commit
03d252b
·
1 Parent(s): 7eb54e6

Add trained pipelines for orders 1~5 (개별 파라미터) + training script

Browse files
models_by_order/order1/pipeline.pkl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:041e9b5169234d467c3498975c5ba3448ade260c439ce76d32ac516c78b4a677
3
- size 180342
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a0c44665887973e36da591e20679119108a153146c7acc32821f73791905546
3
+ size 181491
models_by_order/order2/pipeline.pkl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e0c69aac43a89ac4f3fae7ef4dcde3828073fdca21ff27483d937f23b23cf6d6
3
- size 274597
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1fc6025c9adcf3666cb2b4dd1629494bd661e958df64a193c33d768e5f83cbdd
3
+ size 279511
models_by_order/order3/pipeline.pkl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:85bc231dd68e1371c39185517de6eb65d13e42fc9e72636b9b541f0ee9d8b8be
3
- size 382023
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b01379db433360e5fb2739bca9406abde2ee05f8c8d2a704f28d12dc8576cc23
3
+ size 457372
models_by_order/order4/pipeline.pkl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:df27cff94b39d0a360c32b1ba14ced6fc12ac833f7655db3195341b9d8659ead
3
- size 609739
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:469691b3d9bd1912462663813b49444ed7bb4853a780ca7aa8f5528e82785a17
3
+ size 808159
models_by_order/order5/pipeline.pkl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c97e664735d7c448609cd943837432879bae5a1afcb9ee977416b023e9a77c88
3
- size 1316200
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd7c6a80ff2eac40411c76f8333f1d855859f1297cc2a23b8f3bb46deff40650
3
+ size 1301920
onbid-map-etcp-train.py CHANGED
@@ -1,4 +1,4 @@
1
- # onbid_map_round_train.py
2
 
3
  import os
4
  import shutil
@@ -10,6 +10,7 @@ from sklearn.compose import ColumnTransformer
10
  from sklearn.pipeline import Pipeline
11
  from xgboost import XGBRegressor
12
  from huggingface_hub import HfApi, Repository
 
13
 
14
  # 차수별 하이퍼파라미터 설정
15
  ORDER_PARAMS = {
@@ -75,24 +76,35 @@ def rm_readonly(func, path, exc_info):
75
 
76
  def main():
77
 
 
 
 
 
 
 
 
 
 
78
  # 데이터 불러오기
79
- df = pd.read_pickle(r'C:\Users\hwang\Desktop\OSSP\data.pkl')
 
 
 
 
 
80
 
81
  # '자동차' 대분류 행 제거
82
  if "대분류" in df.columns:
83
  df = df[~(df["대분류"] == "자동차")].reset_index(drop=True)
84
 
85
  # '낙찰차수' 컬럼을 정수형으로 변환하고, 5 이상은 5로 통일
86
- df["낙찰차수"] = df["낙찰차수"].astype(int).apply(lambda x: x if x < 5 else 5)
87
 
88
  # 차수별 모델 학습 & 저장
89
  for order in [1, 2, 3, 4, 5]:
90
 
91
  # 해당 차수 데이터만 필터링
92
- subset = df[df["낙찰차수"] == order].copy().reset_index(drop=True)
93
- if subset.empty:
94
- print(f"차수 {order} 데이터가 없습니다. 건너뜁니다.")
95
- continue
96
 
97
  # 날짜 컬럼(datetime) 파생변수 생성
98
  if "최초입찰시기" in subset.columns:
 
1
+ # onbid-map-etcp-train.py
2
 
3
  import os
4
  import shutil
 
10
  from sklearn.pipeline import Pipeline
11
  from xgboost import XGBRegressor
12
  from huggingface_hub import HfApi, Repository
13
+ import snowflake.connector
14
 
15
  # 차수별 하이퍼파라미터 설정
16
  ORDER_PARAMS = {
 
76
 
77
  def main():
78
 
79
+ conn = snowflake.connector.connect(
80
+ user='EKRHKD',
81
+ password='Ehdrnreorhdth5wh',
82
+ account='iwhmypb-tg22545',
83
+ warehouse='COMPUTE_WH',
84
+ database='ONVID_DB',
85
+ schema='ANALYSIS'
86
+ )
87
+
88
  # 데이터 불러오기
89
+ query = "SELECT * FROM ONBID_RESULTS"
90
+ cur = conn.cursor()
91
+ cur.execute(query)
92
+ df = cur.fetch_pandas_all()
93
+ cur.close()
94
+ conn.close()
95
 
96
  # '자동차' 대분류 행 제거
97
  if "대분류" in df.columns:
98
  df = df[~(df["대분류"] == "자동차")].reset_index(drop=True)
99
 
100
  # '낙찰차수' 컬럼을 정수형으로 변환하고, 5 이상은 5로 통일
101
+ df["낙찰차수"] = df["낙찰차수"].astype(int)
102
 
103
  # 차수별 모델 학습 & 저장
104
  for order in [1, 2, 3, 4, 5]:
105
 
106
  # 해당 차수 데이터만 필터링
107
+ subset = df[df["낙찰차수"] >= order].copy().reset_index(drop=True)
 
 
 
108
 
109
  # 날짜 컬럼(datetime) 파생변수 생성
110
  if "최초입찰시기" in subset.columns: