Add trained pipelines for orders 1~5 (개별 파라미터) + training script

Files changed (6) hide show

models_by_order/order1/pipeline.pkl CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:041e9b5169234d467c3498975c5ba3448ade260c439ce76d32ac516c78b4a677
-size 180342

 version https://git-lfs.github.com/spec/v1
+oid sha256:9a0c44665887973e36da591e20679119108a153146c7acc32821f73791905546
+size 181491

models_by_order/order2/pipeline.pkl CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e0c69aac43a89ac4f3fae7ef4dcde3828073fdca21ff27483d937f23b23cf6d6
-size 274597

 version https://git-lfs.github.com/spec/v1
+oid sha256:1fc6025c9adcf3666cb2b4dd1629494bd661e958df64a193c33d768e5f83cbdd
+size 279511

models_by_order/order3/pipeline.pkl CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:85bc231dd68e1371c39185517de6eb65d13e42fc9e72636b9b541f0ee9d8b8be
-size 382023

 version https://git-lfs.github.com/spec/v1
+oid sha256:b01379db433360e5fb2739bca9406abde2ee05f8c8d2a704f28d12dc8576cc23
+size 457372

models_by_order/order4/pipeline.pkl CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:df27cff94b39d0a360c32b1ba14ced6fc12ac833f7655db3195341b9d8659ead
-size 609739

 version https://git-lfs.github.com/spec/v1
+oid sha256:469691b3d9bd1912462663813b49444ed7bb4853a780ca7aa8f5528e82785a17
+size 808159

models_by_order/order5/pipeline.pkl CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c97e664735d7c448609cd943837432879bae5a1afcb9ee977416b023e9a77c88
-size 1316200

 version https://git-lfs.github.com/spec/v1
+oid sha256:cd7c6a80ff2eac40411c76f8333f1d855859f1297cc2a23b8f3bb46deff40650
+size 1301920

onbid-map-etcp-train.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# onbid_map_round_train.py
 import os
 import shutil
@@ -10,6 +10,7 @@ from sklearn.compose import ColumnTransformer
 from sklearn.pipeline import Pipeline
 from xgboost import XGBRegressor
 from huggingface_hub import HfApi, Repository
 # 차수별 하이퍼파라미터 설정
 ORDER_PARAMS = {
@@ -75,24 +76,35 @@ def rm_readonly(func, path, exc_info):
 def main():
     # 데이터 불러오기
-    df = pd.read_pickle(r'C:\Users\hwang\Desktop\OSSP\data.pkl')
     # '자동차' 대분류 행 제거
     if "대분류" in df.columns:
         df = df[~(df["대분류"] == "자동차")].reset_index(drop=True)
     # '낙찰차수' 컬럼을 정수형으로 변환하고, 5 이상은 5로 통일
-    df["낙찰차수"] = df["낙찰차수"].astype(int).apply(lambda x: x if x < 5 else 5)
     # 차수별 모델 학습 & 저장
     for order in [1, 2, 3, 4, 5]:
         # 해당 차수 데이터만 필터링
-        subset = df[df["낙찰차수"] == order].copy().reset_index(drop=True)
-        if subset.empty:
-            print(f"차수 {order} 데이터가 없습니다. 건너뜁니다.")
-            continue
         # 날짜 컬럼(datetime) 파생변수 생성
         if "최초입찰시기" in subset.columns:

+# onbid-map-etcp-train.py
 import os
 import shutil
 from sklearn.pipeline import Pipeline
 from xgboost import XGBRegressor
 from huggingface_hub import HfApi, Repository
+import snowflake.connector
 # 차수별 하이퍼파라미터 설정
 ORDER_PARAMS = {
 def main():
+    conn = snowflake.connector.connect(
+        user='EKRHKD',
+        password='Ehdrnreorhdth5wh',
+        account='iwhmypb-tg22545',
+        warehouse='COMPUTE_WH',
+        database='ONVID_DB',
+        schema='ANALYSIS'
+    )
     # 데이터 불러오기
+    query = "SELECT * FROM ONBID_RESULTS"
+    cur = conn.cursor()
+    cur.execute(query)
+    df = cur.fetch_pandas_all()
+    cur.close()
+    conn.close()
     # '자동차' 대분류 행 제거
     if "대분류" in df.columns:
         df = df[~(df["대분류"] == "자동차")].reset_index(drop=True)
     # '낙찰차수' 컬럼을 정수형으로 변환하고, 5 이상은 5로 통일
+    df["낙찰차수"] = df["낙찰차수"].astype(int)
     # 차수별 모델 학습 & 저장
     for order in [1, 2, 3, 4, 5]:
         # 해당 차수 데이터만 필터링
+        subset = df[df["낙찰차수"] >= order].copy().reset_index(drop=True)
         # 날짜 컬럼(datetime) 파생변수 생성
         if "최초입찰시기" in subset.columns: