G2BB_dashbord / preprocess.py
hariqueen's picture
Upload 4 files
192a401 verified
raw
history blame contribute delete
891 Bytes
import pandas as pd
def preprocess_bid_data(input_csv: str) -> pd.DataFrame:
df = pd.read_csv(input_csv)
df["λ…„"] = df["λ…„"].astype(str).str.extract(r'(\d{4})').astype(int)
df["μ›”"] = df["μ›”"].astype(int)
df["λ…„μ›”"] = df["λ…„"].astype(str) + df["μ›”"].astype(str).str.zfill(2)
df["μš©μ—­κΈ°κ°„(κ°œμ›”)"] = df["μš©μ—­κΈ°κ°„(κ°œμ›”)"].fillna(0).astype(int)
df["곡고_μ‹œμž‘μΌ"] = pd.to_datetime(df["λ…„"].astype(str) + "-" + df["μ›”"].astype(str) + "-01")
df["μ˜ˆμƒ_μž…μ°°μΌ"] = df["곡고_μ‹œμž‘μΌ"] + pd.to_timedelta(df["μš©μ—­κΈ°κ°„(κ°œμ›”)"] * 30, unit='D')
df_processed = df[[
"λ…„", "μ›”", "λ…„μ›”", "μ‹€μˆ˜μš”κΈ°κ΄€", "곡고λͺ…", "λ¬Όλ™λŸ‰ 평균",
"μš©μ—­κΈ°κ°„(κ°œμ›”)", "계약 κΈ°κ°„ λ‚΄", "μž…μ°°κ²°κ³Ό_1μˆœμœ„", "μž…μ°°κΈˆμ•‘_1μˆœμœ„",
"곡고_μ‹œμž‘μΌ", "μ˜ˆμƒ_μž…μ°°μΌ"
]]
return df_processed