Spaces:
Sleeping
Sleeping
Add
Browse files- Screenshot 2026-04-04 at 7.21.42 PM.png +0 -3
- app.py +0 -0
- config.py +19 -0
- credit_app.py +307 -0
- dataset/report.html +0 -0
- evaluation/check.ipynb +0 -830
- evaluation/model_quantative_eval.ipynb +0 -142
- llm/chabot_chain.py +0 -58
- llm/prompt.py +25 -0
- llm/vectorization.py +14 -16
- evaluation/confusion_matrix.png → models/preprocessor.pkl +2 -2
- evaluation/permutation_importance.png → models/telecom_cb_model.keras +2 -2
- predictors/cb_predictors.ipynb +0 -101
- predictors/score_predict.ipynb +167 -0
- predictors/score_prediction.py +60 -0
Screenshot 2026-04-04 at 7.21.42 PM.png
DELETED
Git LFS Details
|
app.py
DELETED
|
File without changes
|
config.py
CHANGED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# ============================================================
|
| 2 |
+
# 프로젝트 공통 설정 — 모델명은 여기서만 변경하세요
|
| 3 |
+
# ============================================================
|
| 4 |
+
|
| 5 |
+
# Ollama 로컬 LLM 모델
|
| 6 |
+
LLM_MODEL = "gemma2:2b"
|
| 7 |
+
|
| 8 |
+
# 임베딩 모델 (FAISS 인덱스와 반드시 동일해야 함)
|
| 9 |
+
# 변경 시 FAISS 인덱스 재빌드 필요 → python -c "from llm.vectorization import make_vectorization; make_vectorization()"
|
| 10 |
+
EMBEDDING_MODEL = "intfloat/multilingual-e5-small"
|
| 11 |
+
|
| 12 |
+
# FAISS 인덱스 경로
|
| 13 |
+
FAISS_PATH = "./faiss"
|
| 14 |
+
|
| 15 |
+
# PDF 소스 경로
|
| 16 |
+
PDF_SOURCE = "./sources/KCB_Credit_Evaluation_System.pdf"
|
| 17 |
+
|
| 18 |
+
# FAISS 검색 시 반환할 문서 수 (클수록 정확하지만 프롬프트가 길어져 느려짐)
|
| 19 |
+
RETRIEVER_K = 3
|
credit_app.py
ADDED
|
@@ -0,0 +1,307 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import numpy as np
|
| 4 |
+
import os
|
| 5 |
+
import json
|
| 6 |
+
import time
|
| 7 |
+
|
| 8 |
+
from langchain_huggingface import HuggingFaceEmbeddings
|
| 9 |
+
from langchain_community.vectorstores import FAISS
|
| 10 |
+
from langchain_ollama import OllamaLLM
|
| 11 |
+
|
| 12 |
+
# 모듈화된 파일들 불러오기
|
| 13 |
+
from config import LLM_MODEL, EMBEDDING_MODEL, FAISS_PATH, RETRIEVER_K
|
| 14 |
+
from llm.prompt import QA_PROMPT
|
| 15 |
+
from predictors.score_prediction import predictor
|
| 16 |
+
|
| 17 |
+
# ==========================================
|
| 18 |
+
# 1. 피처 정의
|
| 19 |
+
# ==========================================
|
| 20 |
+
FEATURE_ORDER = [
|
| 21 |
+
'C1Z001386', 'C1M210000', 'C18210000', 'C1L120001', 'C1L120004',
|
| 22 |
+
'L10210000', 'L90210100', 'L90210200', 'L10210B00', 'L10216000',
|
| 23 |
+
'L10217000', 'D10110000', 'D10133000', 'PERF1'
|
| 24 |
+
]
|
| 25 |
+
|
| 26 |
+
FEATURES = {
|
| 27 |
+
'C1Z001386': '1년내 카드 총 이용금액 (만원)',
|
| 28 |
+
'C1M210000': '신용카드 건수',
|
| 29 |
+
'C18210000': '체크카드 건수',
|
| 30 |
+
'C1L120001': '카드 총 한도금액 (만원)',
|
| 31 |
+
'C1L120004': '신용카드 개설 후 경과일수',
|
| 32 |
+
'L10210000': '대출 건수',
|
| 33 |
+
'L90210100': '은행업종 대출 건수',
|
| 34 |
+
'L90210200': '카드업종 대출 건수',
|
| 35 |
+
'L10210B00': '보험업종 대출 건수',
|
| 36 |
+
'L10216000': '신용 대출 건수',
|
| 37 |
+
'L10217000': '담보 대출 건수',
|
| 38 |
+
'D10110000': '연체 건수',
|
| 39 |
+
'D10133000': '총 연체 상환 금액 (만원)',
|
| 40 |
+
'PERF1': '1년내 90일 이상 연체 여부',
|
| 41 |
+
}
|
| 42 |
+
|
| 43 |
+
# ==========================================
|
| 44 |
+
# 2. RAG & Chatbot 로직
|
| 45 |
+
# ==========================================
|
| 46 |
+
class CreditRAGConsultant:
|
| 47 |
+
def __init__(self):
|
| 48 |
+
self.embedding_model = HuggingFaceEmbeddings(
|
| 49 |
+
model_name=EMBEDDING_MODEL
|
| 50 |
+
)
|
| 51 |
+
if os.path.exists(FAISS_PATH):
|
| 52 |
+
self.vectorstore = FAISS.load_local(
|
| 53 |
+
FAISS_PATH, self.embedding_model, allow_dangerous_deserialization=True
|
| 54 |
+
)
|
| 55 |
+
self.retriever = self.vectorstore.as_retriever(search_kwargs={"k": RETRIEVER_K})
|
| 56 |
+
else:
|
| 57 |
+
self.vectorstore = None
|
| 58 |
+
self.retriever = None
|
| 59 |
+
|
| 60 |
+
# streaming=True: 토큰 생성 즉시 반환
|
| 61 |
+
self.llm = OllamaLLM(model=LLM_MODEL, streaming=True)
|
| 62 |
+
|
| 63 |
+
def generate_response(self, history, user_message, analysis_report):
|
| 64 |
+
"""
|
| 65 |
+
RAG 파이프라인을 R → A → G 단계로 직접 분리해서 각 진행상황을 실시간 표시.
|
| 66 |
+
분석 리포트가 없어도 일반 신용 상담이 가능하도록 예외 없이 처리.
|
| 67 |
+
"""
|
| 68 |
+
if not user_message:
|
| 69 |
+
yield history, ""
|
| 70 |
+
return
|
| 71 |
+
|
| 72 |
+
# "생각 중..." 플레이스홀더 제거
|
| 73 |
+
if history and isinstance(history[-1], dict) and history[-1].get("content") == "생각 중...":
|
| 74 |
+
history = history[:-1]
|
| 75 |
+
|
| 76 |
+
if not self.retriever:
|
| 77 |
+
history = history + [{
|
| 78 |
+
"role": "assistant",
|
| 79 |
+
"content": "⚠️ 시스템: FAISS 인덱스를 로드할 수 없어 문서 검색이 불가합니다."
|
| 80 |
+
}]
|
| 81 |
+
yield history, ""
|
| 82 |
+
return
|
| 83 |
+
|
| 84 |
+
# 응답 슬롯 추가
|
| 85 |
+
history = history + [{"role": "assistant", "content": ""}]
|
| 86 |
+
|
| 87 |
+
t0 = time.time()
|
| 88 |
+
|
| 89 |
+
# ──────────────────────────────────────────────────
|
| 90 |
+
# R (Retrieval) — 관련 문서 검색
|
| 91 |
+
# ──────────────────────────────────────────────────
|
| 92 |
+
history[-1]["content"] = "📂 **[R] 관련 문서 검색 중...**"
|
| 93 |
+
yield history, ""
|
| 94 |
+
|
| 95 |
+
try:
|
| 96 |
+
docs = self.retriever.invoke(user_message)
|
| 97 |
+
except Exception as e:
|
| 98 |
+
history[-1]["content"] = f"⚠️ 문서 검색 실패: {e}"
|
| 99 |
+
yield history, ""
|
| 100 |
+
return
|
| 101 |
+
|
| 102 |
+
t_retrieve = time.time() - t0
|
| 103 |
+
context = "\n\n---\n\n".join(
|
| 104 |
+
[f"[문서 {i+1}]\n{doc.page_content}" for i, doc in enumerate(docs)]
|
| 105 |
+
)
|
| 106 |
+
|
| 107 |
+
# ──────────────────────────────────────────────────
|
| 108 |
+
# A (Augmentation) — 분석 리포트 + 사용자 질문으로 프롬프트 구성
|
| 109 |
+
# ──────────────────────────────────────────────────
|
| 110 |
+
history[-1]["content"] = (
|
| 111 |
+
f"📂 **[R] 문서 검색 완료** `{t_retrieve:.1f}s | {len(docs)}개 문서`\n\n"
|
| 112 |
+
f"🔗 **[A] 프롬프트 구성 중...**"
|
| 113 |
+
)
|
| 114 |
+
yield history, ""
|
| 115 |
+
|
| 116 |
+
if analysis_report:
|
| 117 |
+
score_val = int(analysis_report["score"])
|
| 118 |
+
# 피처 데이터를 읽기 좋은 텍스트로 변환
|
| 119 |
+
features_text = "\n".join([f"- {FEATURES.get(k, k)}: {v}" for k, v in analysis_report['features'].items()])
|
| 120 |
+
|
| 121 |
+
report_section = (
|
| 122 |
+
f"■ 현재 분석 결과\n"
|
| 123 |
+
f"- 예측 신용 점수: {score_val}점\n"
|
| 124 |
+
f"{features_text}\n"
|
| 125 |
+
)
|
| 126 |
+
query_text = (
|
| 127 |
+
f"{report_section}\n"
|
| 128 |
+
f"■ 질문: {user_message}"
|
| 129 |
+
)
|
| 130 |
+
else:
|
| 131 |
+
# 분석 미입력: 일반 신용 상담 모드 — 예외 없이 정상 처리
|
| 132 |
+
query_text = f"■ 질문: {user_message} (현재 분석된 지표 데이터 없음)"
|
| 133 |
+
|
| 134 |
+
prompt_text = QA_PROMPT.format(context=context, query=query_text)
|
| 135 |
+
|
| 136 |
+
t_augment = time.time() - t0
|
| 137 |
+
|
| 138 |
+
# ──────────────────────────────────────────────────
|
| 139 |
+
# G (Generation) — LLM 스트리밍 응답 생성
|
| 140 |
+
# ──────────────────────────────────────────────────
|
| 141 |
+
history[-1]["content"] = (
|
| 142 |
+
f"📂 **[R] 문서 검색 완료** `{t_retrieve:.1f}s | {len(docs)}개 문서`\n\n"
|
| 143 |
+
f"🔗 **[A] 프롬프트 구성 완료** `{t_augment - t_retrieve:.2f}s`\n\n"
|
| 144 |
+
f"💬 **[G] 응답 생성 중...**\n\n"
|
| 145 |
+
)
|
| 146 |
+
yield history, ""
|
| 147 |
+
|
| 148 |
+
t_gen_start = time.time()
|
| 149 |
+
answer_buffer = ""
|
| 150 |
+
|
| 151 |
+
try:
|
| 152 |
+
for chunk in self.llm.stream(prompt_text):
|
| 153 |
+
answer_buffer += chunk
|
| 154 |
+
history[-1]["content"] = (
|
| 155 |
+
f"📂 **[R] 문서 검색 완료** `{t_retrieve:.1f}s | {len(docs)}개 문서`\n\n"
|
| 156 |
+
f"🔗 **[A] 프롬프트 구성 완료** `{t_augment - t_retrieve:.2f}s`\n\n"
|
| 157 |
+
f"💬 **[G] 응답 생성 중...**\n\n"
|
| 158 |
+
f"{answer_buffer}"
|
| 159 |
+
)
|
| 160 |
+
yield history, ""
|
| 161 |
+
|
| 162 |
+
t_gen = time.time() - t_gen_start
|
| 163 |
+
t_total = time.time() - t0
|
| 164 |
+
|
| 165 |
+
# 최종: 진행상황 헤더 제거하고 답변만 + 타임 요약
|
| 166 |
+
history[-1]["content"] = (
|
| 167 |
+
f"{answer_buffer}\n\n"
|
| 168 |
+
f"---\n"
|
| 169 |
+
f"⏱️ 검색 `{t_retrieve:.1f}s` → 구성 `{t_augment - t_retrieve:.2f}s` "
|
| 170 |
+
f"→ 생성 `{t_gen:.1f}s` | **총 `{t_total:.1f}s`**"
|
| 171 |
+
)
|
| 172 |
+
yield history, ""
|
| 173 |
+
|
| 174 |
+
except Exception as e:
|
| 175 |
+
import traceback
|
| 176 |
+
traceback.print_exc()
|
| 177 |
+
history[-1]["content"] = f"상담 중 오류가 발생했습니다: {e}"
|
| 178 |
+
yield history, ""
|
| 179 |
+
|
| 180 |
+
|
| 181 |
+
consultant = CreditRAGConsultant()
|
| 182 |
+
|
| 183 |
+
# ==========================================
|
| 184 |
+
# 3. Gradio Interface
|
| 185 |
+
# ==========================================
|
| 186 |
+
with gr.Blocks() as demo:
|
| 187 |
+
analysis_report = gr.State(None)
|
| 188 |
+
pending_msg = gr.State("") # 텍스트박스 클리어 후에도 원본 메시지를 보존하기 위한 State
|
| 189 |
+
|
| 190 |
+
gr.Markdown(f"# 🛡️ KCB AI 신용 점수 분석 시스템")
|
| 191 |
+
gr.Markdown(
|
| 192 |
+
f"> 🤖 **LLM**: `{LLM_MODEL}` (Ollama 로컬) | "
|
| 193 |
+
f"📚 **임베딩**: `{EMBEDDING_MODEL}` | **RAG**: FAISS 검색"
|
| 194 |
+
)
|
| 195 |
+
gr.Markdown("금융 지표를 입력하여 AI 예측 점수를 확인하고, 맞춤 상담을 받아보세요. *(분석 없이도 일반 신용 상담 가능)*")
|
| 196 |
+
|
| 197 |
+
with gr.Row():
|
| 198 |
+
with gr.Column(scale=1):
|
| 199 |
+
gr.Markdown("### 📊 지표 입력")
|
| 200 |
+
input_list = []
|
| 201 |
+
ui_input_keys_for_ui = [k for k in FEATURE_ORDER if k in FEATURES and k != 'L10210000']
|
| 202 |
+
for key in ui_input_keys_for_ui:
|
| 203 |
+
label = FEATURES[key]
|
| 204 |
+
if key == 'PERF1':
|
| 205 |
+
input_list.append(gr.Checkbox(label=f"{label} (90일 이상 연체)", value=False))
|
| 206 |
+
else:
|
| 207 |
+
input_list.append(gr.Textbox(label=label, placeholder="0"))
|
| 208 |
+
|
| 209 |
+
predict_btn = gr.Button("📈 점수 분석하기", variant="primary", size="lg")
|
| 210 |
+
|
| 211 |
+
with gr.Column(scale=2):
|
| 212 |
+
with gr.Group():
|
| 213 |
+
gr.Markdown("### 📈 분석 결과 리포트")
|
| 214 |
+
result_display = gr.Label(label="예측 신용 점수")
|
| 215 |
+
result_info = gr.Markdown("버튼을 눌러 분석을 시작하세요.")
|
| 216 |
+
|
| 217 |
+
gr.Markdown("### 💬 실시간 신용 상담 챗봇")
|
| 218 |
+
chatbot = gr.Chatbot(
|
| 219 |
+
label="상담 챗봇",
|
| 220 |
+
height=500,
|
| 221 |
+
layout="bubble",
|
| 222 |
+
render_markdown=True,
|
| 223 |
+
)
|
| 224 |
+
|
| 225 |
+
with gr.Row():
|
| 226 |
+
msg = gr.Textbox(
|
| 227 |
+
placeholder="신용 관리에 대해 궁금한 점을 질문해보세요... (예: 점수를 올리려면?)",
|
| 228 |
+
show_label=False,
|
| 229 |
+
scale=8,
|
| 230 |
+
container=False
|
| 231 |
+
)
|
| 232 |
+
submit_btn = gr.Button("전송 📤", variant="primary", scale=1)
|
| 233 |
+
|
| 234 |
+
with gr.Row():
|
| 235 |
+
clear_btn = gr.Button("🗑️ 대화 내역 초기화", variant="secondary", size="sm")
|
| 236 |
+
|
| 237 |
+
def handle_predict(*args):
|
| 238 |
+
try:
|
| 239 |
+
ui_input_keys = [k for k in FEATURE_ORDER if k in FEATURES and k != 'L10210000']
|
| 240 |
+
features_dict = {}
|
| 241 |
+
for i in range(len(args)):
|
| 242 |
+
key = ui_input_keys[i]
|
| 243 |
+
if key == 'PERF1':
|
| 244 |
+
features_dict[key] = int(args[i])
|
| 245 |
+
else:
|
| 246 |
+
val_str = str(args[i]).strip() if args[i] is not None else ""
|
| 247 |
+
if not val_str:
|
| 248 |
+
val = 0.0
|
| 249 |
+
else:
|
| 250 |
+
try:
|
| 251 |
+
val = float(val_str.replace(',', ''))
|
| 252 |
+
except ValueError:
|
| 253 |
+
return None, 0.0, f"⚠️ 오류: '{FEATURES[key]}' 항목에 올바른 숫자만 입력해주세요."
|
| 254 |
+
|
| 255 |
+
if val < 0:
|
| 256 |
+
return None, 0.0, f"⚠️ 오류: '{FEATURES[key]}' 항목에 0 이상의 숫자를 적어주세요."
|
| 257 |
+
features_dict[key] = val
|
| 258 |
+
|
| 259 |
+
features_dict['L10210000'] = (
|
| 260 |
+
features_dict.get('L90210100', 0.0) +
|
| 261 |
+
features_dict.get('L90210200', 0.0) +
|
| 262 |
+
features_dict.get('L10210B00', 0.0)
|
| 263 |
+
)
|
| 264 |
+
|
| 265 |
+
score = predictor.predict(features_dict)
|
| 266 |
+
|
| 267 |
+
if np.isnan(score) or not np.isfinite(score):
|
| 268 |
+
return None, 0.0, "⚠️ 모델 추론 중 데이터 오류가 발생했습니다."
|
| 269 |
+
|
| 270 |
+
display_score = int(min(max(round(score), 0), 1000))
|
| 271 |
+
report = {"features": features_dict, "score": float(display_score)}
|
| 272 |
+
return report, display_score, f"✅ 분석 완료! 당신의 신용 점수는 **{display_score}점**입니다."
|
| 273 |
+
except Exception as e:
|
| 274 |
+
return None, 0.0, f"⚠️ 분석 에러: {str(e)}"
|
| 275 |
+
|
| 276 |
+
def user_msg(user_message, history):
|
| 277 |
+
"""사용자 메시지를 히스토리에 즉시 추가하고, 텍스트박스는 클리어.
|
| 278 |
+
원본 메시지는 pending_msg State에 저장해서 generate_response로 전달."""
|
| 279 |
+
if not user_message:
|
| 280 |
+
return history, "", ""
|
| 281 |
+
return history + [
|
| 282 |
+
{"role": "user", "content": user_message},
|
| 283 |
+
{"role": "assistant", "content": "생각 중..."}
|
| 284 |
+
], "", user_message # chatbot, msg(클리어), pending_msg(원본 보존)
|
| 285 |
+
|
| 286 |
+
predict_btn.click(
|
| 287 |
+
handle_predict,
|
| 288 |
+
inputs=input_list,
|
| 289 |
+
outputs=[analysis_report, result_display, result_info]
|
| 290 |
+
)
|
| 291 |
+
|
| 292 |
+
# pending_msg에서 원본 메시지를 읽어 generate_response에 전달
|
| 293 |
+
msg.submit(
|
| 294 |
+
user_msg, [msg, chatbot], [chatbot, msg, pending_msg], queue=False
|
| 295 |
+
).then(
|
| 296 |
+
consultant.generate_response, [chatbot, pending_msg, analysis_report], [chatbot, msg]
|
| 297 |
+
)
|
| 298 |
+
submit_btn.click(
|
| 299 |
+
user_msg, [msg, chatbot], [chatbot, msg, pending_msg], queue=False
|
| 300 |
+
).then(
|
| 301 |
+
consultant.generate_response, [chatbot, pending_msg, analysis_report], [chatbot, msg]
|
| 302 |
+
)
|
| 303 |
+
|
| 304 |
+
clear_btn.click(lambda: [], None, chatbot, queue=False)
|
| 305 |
+
|
| 306 |
+
if __name__ == "__main__":
|
| 307 |
+
demo.launch()
|
dataset/report.html
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
evaluation/check.ipynb
DELETED
|
@@ -1,830 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"cells": [
|
| 3 |
-
{
|
| 4 |
-
"cell_type": "code",
|
| 5 |
-
"execution_count": null,
|
| 6 |
-
"id": "e00459fc",
|
| 7 |
-
"metadata": {},
|
| 8 |
-
"outputs": [
|
| 9 |
-
{
|
| 10 |
-
"name": "stdout",
|
| 11 |
-
"output_type": "stream",
|
| 12 |
-
"text": [
|
| 13 |
-
"STDT int64\n",
|
| 14 |
-
"ID object\n",
|
| 15 |
-
"GENDER int64\n",
|
| 16 |
-
"AGE_BAND int64\n",
|
| 17 |
-
"C1Z001373 int64\n",
|
| 18 |
-
" ... \n",
|
| 19 |
-
"SCORE_6M int64\n",
|
| 20 |
-
"PERF1 int64\n",
|
| 21 |
-
"PERF2 int64\n",
|
| 22 |
-
"PERF3 int64\n",
|
| 23 |
-
"PERF4 int64\n",
|
| 24 |
-
"Length: 157, dtype: object\n",
|
| 25 |
-
"AL0C00005 87.861277\n",
|
| 26 |
-
"U81302010 0.007542\n",
|
| 27 |
-
"dtype: float64\n",
|
| 28 |
-
" STDT GENDER AGE_BAND C1Z001373 C1M2B4W03 \\\n",
|
| 29 |
-
"count 3129036.0 3.129036e+06 3.129036e+06 3.129036e+06 3.129036e+06 \n",
|
| 30 |
-
"mean 202212.0 1.502075e+00 4.270428e+00 7.055230e+03 4.249793e+03 \n",
|
| 31 |
-
"std 0.0 4.999958e-01 1.600465e+00 1.083864e+04 7.584188e+03 \n",
|
| 32 |
-
"min 202212.0 1.000000e+00 1.000000e+00 -7.665200e+04 -4.726000e+04 \n",
|
| 33 |
-
"25% 202212.0 1.000000e+00 3.000000e+00 1.177500e+02 0.000000e+00 \n",
|
| 34 |
-
"50% 202212.0 2.000000e+00 4.000000e+00 4.559000e+03 1.778000e+03 \n",
|
| 35 |
-
"75% 202212.0 2.000000e+00 5.000000e+00 1.043000e+04 6.421000e+03 \n",
|
| 36 |
-
"max 202212.0 2.000000e+00 9.000000e+00 2.006550e+05 1.669330e+05 \n",
|
| 37 |
-
"\n",
|
| 38 |
-
" C1M2B5W03 C1Z001386 C1M210000 C1M210001 C1M210003 \\\n",
|
| 39 |
-
"count 3.129036e+06 3.129036e+06 3.129036e+06 3.129036e+06 3.129036e+06 \n",
|
| 40 |
-
"mean 1.174364e+03 2.723652e+04 1.789889e+00 2.077317e-05 1.050164e-02 \n",
|
| 41 |
-
"std 2.006592e+03 4.488836e+04 1.687745e+00 4.695859e-03 1.128589e-01 \n",
|
| 42 |
-
"min -4.990000e+03 -1.741950e+05 0.000000e+00 0.000000e+00 0.000000e+00 \n",
|
| 43 |
-
"25% 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 \n",
|
| 44 |
-
"50% 0.000000e+00 1.696900e+04 1.000000e+00 0.000000e+00 0.000000e+00 \n",
|
| 45 |
-
"75% 2.169000e+03 4.126900e+04 3.000000e+00 0.000000e+00 0.000000e+00 \n",
|
| 46 |
-
"max 2.372700e+04 8.274750e+05 2.000000e+01 2.000000e+00 8.000000e+00 \n",
|
| 47 |
-
"\n",
|
| 48 |
-
" ... AL0C00005 AP0910001 AP0910002 AS120G001 SCORE \\\n",
|
| 49 |
-
"count ... 3.129036e+06 3129036.0 3129036.0 3.129036e+06 3.129036e+06 \n",
|
| 50 |
-
"mean ... -8.786115e+07 9.0 9.0 -3.904906e+00 8.176420e+02 \n",
|
| 51 |
-
"std ... 3.265802e+07 0.0 0.0 5.852996e+00 1.892052e+02 \n",
|
| 52 |
-
"min ... -1.000000e+08 9.0 9.0 -9.000000e+00 0.000000e+00 \n",
|
| 53 |
-
"25% ... -1.000000e+08 9.0 9.0 -9.000000e+00 7.250000e+02 \n",
|
| 54 |
-
"50% ... -1.000000e+08 9.0 9.0 -9.000000e+00 8.820000e+02 \n",
|
| 55 |
-
"75% ... -1.000000e+08 9.0 9.0 3.000000e+00 9.510000e+02 \n",
|
| 56 |
-
"max ... 5.570000e+03 9.0 9.0 5.000000e+00 1.000000e+03 \n",
|
| 57 |
-
"\n",
|
| 58 |
-
" SCORE_6M PERF1 PERF2 PERF3 PERF4 \n",
|
| 59 |
-
"count 3.129036e+06 3.129036e+06 3.129036e+06 3.129036e+06 3.129036e+06 \n",
|
| 60 |
-
"mean 8.175420e+02 3.623480e-03 4.139614e-03 3.796057e-03 3.486697e-03 \n",
|
| 61 |
-
"std 1.892047e+02 6.008620e-02 6.420653e-02 6.149511e-02 5.894524e-02 \n",
|
| 62 |
-
"min 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 \n",
|
| 63 |
-
"25% 7.240000e+02 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 \n",
|
| 64 |
-
"50% 8.830000e+02 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 \n",
|
| 65 |
-
"75% 9.520000e+02 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 \n",
|
| 66 |
-
"max 1.000000e+03 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 \n",
|
| 67 |
-
"\n",
|
| 68 |
-
"[8 rows x 152 columns]\n"
|
| 69 |
-
]
|
| 70 |
-
}
|
| 71 |
-
],
|
| 72 |
-
"source": [
|
| 73 |
-
"\n",
|
| 74 |
-
"import pandas as pd\n",
|
| 75 |
-
"\n",
|
| 76 |
-
"df = pd.read_csv(\"../dataset/personal_cv.csv\")\n",
|
| 77 |
-
"\n",
|
| 78 |
-
"# 데이터 타입 확인\n",
|
| 79 |
-
"print(df.dtypes)\n",
|
| 80 |
-
"\n",
|
| 81 |
-
"# 결측값(-99999999) 비율 확인\n",
|
| 82 |
-
"missing = (df == -99999999).sum() / len(df) * 100\n",
|
| 83 |
-
"print(missing[missing > 0].sort_values(ascending=False))\n",
|
| 84 |
-
"\n",
|
| 85 |
-
"# 기본 통계\n",
|
| 86 |
-
"print(df.describe())"
|
| 87 |
-
]
|
| 88 |
-
},
|
| 89 |
-
{
|
| 90 |
-
"cell_type": "code",
|
| 91 |
-
"execution_count": 8,
|
| 92 |
-
"id": "66e6c43d",
|
| 93 |
-
"metadata": {},
|
| 94 |
-
"outputs": [
|
| 95 |
-
{
|
| 96 |
-
"name": "stdout",
|
| 97 |
-
"output_type": "stream",
|
| 98 |
-
"text": [
|
| 99 |
-
"0점 제거 후: 321,186행\n",
|
| 100 |
-
"\n",
|
| 101 |
-
"신용점수 구간 분포:\n",
|
| 102 |
-
"SCORE_BIN\n",
|
| 103 |
-
"고신용(900~1000) 173351\n",
|
| 104 |
-
"중고신용(700~900) 98714\n",
|
| 105 |
-
"중저신용(500~700) 39676\n",
|
| 106 |
-
"저신용(0~500) 9445\n",
|
| 107 |
-
"Name: count, dtype: int64\n"
|
| 108 |
-
]
|
| 109 |
-
},
|
| 110 |
-
{
|
| 111 |
-
"name": "stderr",
|
| 112 |
-
"output_type": "stream",
|
| 113 |
-
"text": [
|
| 114 |
-
"/var/folders/9d/6nccn4756nq7c7030tvhwnj00000gn/T/ipykernel_85303/903746687.py:37: FutureWarning: DataFrameGroupBy.apply operated on the grouping columns. This behavior is deprecated, and in a future version of pandas the grouping columns will be excluded from the operation. Either pass `include_groups=False` to exclude the groupings or explicitly select the grouping columns after groupby to silence this warning.\n",
|
| 115 |
-
" df_sampled = df_clean.groupby('SCORE_BIN', observed=True).apply(\n"
|
| 116 |
-
]
|
| 117 |
-
},
|
| 118 |
-
{
|
| 119 |
-
"name": "stdout",
|
| 120 |
-
"output_type": "stream",
|
| 121 |
-
"text": [
|
| 122 |
-
"\n",
|
| 123 |
-
"샘플링 후: 149,121행\n",
|
| 124 |
-
"SCORE_BIN\n",
|
| 125 |
-
"중고신용(700~900) 50000\n",
|
| 126 |
-
"고신용(900~1000) 50000\n",
|
| 127 |
-
"중저신용(500~700) 39676\n",
|
| 128 |
-
"저신용(0~500) 9445\n",
|
| 129 |
-
"Name: count, dtype: int64\n",
|
| 130 |
-
"\n",
|
| 131 |
-
"피처 수: 437개\n",
|
| 132 |
-
"타겟 분포:\n",
|
| 133 |
-
"count 149121.000000\n",
|
| 134 |
-
"mean 788.597917\n",
|
| 135 |
-
"std 170.132903\n",
|
| 136 |
-
"min 356.000000\n",
|
| 137 |
-
"25% 656.000000\n",
|
| 138 |
-
"50% 814.000000\n",
|
| 139 |
-
"75% 945.000000\n",
|
| 140 |
-
"max 1000.000000\n",
|
| 141 |
-
"Name: PYE_SC0000000, dtype: float64\n",
|
| 142 |
-
"\n",
|
| 143 |
-
"train: 119,296개 | test: 29,825개\n",
|
| 144 |
-
"\n",
|
| 145 |
-
"========================================\n",
|
| 146 |
-
"Random Forest\n",
|
| 147 |
-
"RMSE: 64.83 | MAE: 46.64 | R2: 0.8544\n",
|
| 148 |
-
"\n",
|
| 149 |
-
"========================================\n",
|
| 150 |
-
"XGBoost\n",
|
| 151 |
-
"RMSE: 61.69 | MAE: 45.22 | R2: 0.8681\n",
|
| 152 |
-
"[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.079039 seconds.\n",
|
| 153 |
-
"You can set `force_row_wise=true` to remove the overhead.\n",
|
| 154 |
-
"And if memory is not enough, you can set `force_col_wise=true`.\n",
|
| 155 |
-
"[LightGBM] [Info] Total Bins 35224\n",
|
| 156 |
-
"[LightGBM] [Info] Number of data points in the train set: 119296, number of used features: 367\n",
|
| 157 |
-
"[LightGBM] [Info] Start training from score 788.477032\n",
|
| 158 |
-
"\n",
|
| 159 |
-
"========================================\n",
|
| 160 |
-
"LightGBM\n",
|
| 161 |
-
"RMSE: 64.32 | MAE: 46.98 | R2: 0.8566\n",
|
| 162 |
-
"\n",
|
| 163 |
-
"최종 선정 모델: XGBoost (R2: 0.8681)\n",
|
| 164 |
-
"사용자 입력 가능 피처: 14개\n",
|
| 165 |
-
"['AGE', 'SEX', 'JB_TP', 'PYE_ICM', 'PYE_ICM_RT', 'TOT_ASST', 'HOUS_LN_BAL', 'CRDT_LN_BAL', 'CD_USE_AMT', 'OWN_HOUS_CNT', 'OWN_LIV_YN', 'PYE_FAM_CNT', 'PYE_CAR_OWN', 'DAR']\n",
|
| 166 |
-
"[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.004249 seconds.\n",
|
| 167 |
-
"You can set `force_row_wise=true` to remove the overhead.\n",
|
| 168 |
-
"And if memory is not enough, you can set `force_col_wise=true`.\n",
|
| 169 |
-
"[LightGBM] [Info] Total Bins 1668\n",
|
| 170 |
-
"[LightGBM] [Info] Number of data points in the train set: 119296, number of used features: 14\n",
|
| 171 |
-
"[LightGBM] [Info] Start training from score 788.477032\n",
|
| 172 |
-
"\n",
|
| 173 |
-
"사용자 입력 피처만으로 재학습 결과\n",
|
| 174 |
-
"RMSE: 108.66 | MAE: 82.22 | R2: 0.5908\n"
|
| 175 |
-
]
|
| 176 |
-
}
|
| 177 |
-
],
|
| 178 |
-
"source": [
|
| 179 |
-
"import pandas as pd\n",
|
| 180 |
-
"import numpy as np\n",
|
| 181 |
-
"from sklearn.model_selection import train_test_split\n",
|
| 182 |
-
"from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor\n",
|
| 183 |
-
"from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error\n",
|
| 184 |
-
"import xgboost as xgb\n",
|
| 185 |
-
"import lightgbm as lgb\n",
|
| 186 |
-
"import matplotlib.pyplot as plt\n",
|
| 187 |
-
"\n",
|
| 188 |
-
"# ========================\n",
|
| 189 |
-
"# 1단계: 데이터 로드 & 결측치 50% 이상 컬럼 제거\n",
|
| 190 |
-
"# ========================\n",
|
| 191 |
-
"df = pd.read_csv('../dataset/telecom_cb.csv')\n",
|
| 192 |
-
"threshold = 0.5\n",
|
| 193 |
-
"df_clean = df[df.columns[df.isna().mean() < threshold]]\n",
|
| 194 |
-
"\n",
|
| 195 |
-
"# ========================\n",
|
| 196 |
-
"# 2단계: 타겟 이상값 제거 (0점 제거)\n",
|
| 197 |
-
"# ========================\n",
|
| 198 |
-
"df_clean = df_clean[df_clean['PYE_SC0000000'] > 0].reset_index(drop=True)\n",
|
| 199 |
-
"print(f\"0점 제거 후: {len(df_clean):,}행\")\n",
|
| 200 |
-
"\n",
|
| 201 |
-
"# ========================\n",
|
| 202 |
-
"# 3단계: 신용점수 구간별 층화 샘플링\n",
|
| 203 |
-
"# ========================\n",
|
| 204 |
-
"# 신용점수 구간 생성\n",
|
| 205 |
-
"df_clean['SCORE_BIN'] = pd.cut(\n",
|
| 206 |
-
" df_clean['PYE_SC0000000'],\n",
|
| 207 |
-
" bins=[0, 500, 700, 900, 1000],\n",
|
| 208 |
-
" labels=['저신용(0~500)', '중저신용(500~700)', '중고신용(700~900)', '고신용(900~1000)']\n",
|
| 209 |
-
")\n",
|
| 210 |
-
"\n",
|
| 211 |
-
"print(f\"\\n신용점수 구간 분포:\")\n",
|
| 212 |
-
"print(df_clean['SCORE_BIN'].value_counts())\n",
|
| 213 |
-
"\n",
|
| 214 |
-
"# 구간별 균등 샘플링 (각 구간 최대 3000개)\n",
|
| 215 |
-
"df_sampled = df_clean.groupby('SCORE_BIN', observed=True).apply(\n",
|
| 216 |
-
" lambda x: x.sample(n=min(len(x), 50000), random_state=42)\n",
|
| 217 |
-
").reset_index(drop=True)\n",
|
| 218 |
-
"\n",
|
| 219 |
-
"print(f\"\\n샘플링 후: {len(df_sampled):,}행\")\n",
|
| 220 |
-
"print(df_sampled['SCORE_BIN'].value_counts())\n",
|
| 221 |
-
"\n",
|
| 222 |
-
"# ========================\n",
|
| 223 |
-
"# 4단계: 피처 & 타겟 분리\n",
|
| 224 |
-
"# ========================\n",
|
| 225 |
-
"drop_cols = ['PYE_SC0000000', 'SCORE_BIN', 'PYE_MAX_DLQ_DAY',\n",
|
| 226 |
-
" 'CUST_ID', 'BASE_YM']\n",
|
| 227 |
-
"drop_cols = [c for c in drop_cols if c in df_sampled.columns]\n",
|
| 228 |
-
"\n",
|
| 229 |
-
"X = df_sampled.drop(columns=drop_cols)\n",
|
| 230 |
-
"y = df_sampled['PYE_SC0000000']\n",
|
| 231 |
-
"\n",
|
| 232 |
-
"X = X.select_dtypes(include=[np.number])\n",
|
| 233 |
-
"X = X.fillna(X.median())\n",
|
| 234 |
-
"\n",
|
| 235 |
-
"print(f\"\\n피처 수: {X.shape[1]}개\")\n",
|
| 236 |
-
"print(f\"타겟 분포:\\n{y.describe()}\")\n",
|
| 237 |
-
"\n",
|
| 238 |
-
"# ========================\n",
|
| 239 |
-
"# 5단계: train/test 분리\n",
|
| 240 |
-
"# ========================\n",
|
| 241 |
-
"X_train, X_test, y_train, y_test = train_test_split(\n",
|
| 242 |
-
" X, y, test_size=0.2, random_state=42\n",
|
| 243 |
-
")\n",
|
| 244 |
-
"\n",
|
| 245 |
-
"print(f\"\\ntrain: {len(X_train):,}개 | test: {len(X_test):,}개\")\n",
|
| 246 |
-
"\n",
|
| 247 |
-
"# ========================\n",
|
| 248 |
-
"# 6단계: 모델 비교\n",
|
| 249 |
-
"# ========================\n",
|
| 250 |
-
"models = {\n",
|
| 251 |
-
" 'Random Forest' : RandomForestRegressor(n_estimators=100, random_state=42),\n",
|
| 252 |
-
" 'XGBoost' : xgb.XGBRegressor(random_state=42),\n",
|
| 253 |
-
" 'LightGBM' : lgb.LGBMRegressor(random_state=42)\n",
|
| 254 |
-
"}\n",
|
| 255 |
-
"\n",
|
| 256 |
-
"results = {}\n",
|
| 257 |
-
"\n",
|
| 258 |
-
"for name, model in models.items():\n",
|
| 259 |
-
" model.fit(X_train, y_train)\n",
|
| 260 |
-
" y_pred = model.predict(X_test)\n",
|
| 261 |
-
"\n",
|
| 262 |
-
" rmse = np.sqrt(mean_squared_error(y_test, y_pred))\n",
|
| 263 |
-
" mae = mean_absolute_error(y_test, y_pred)\n",
|
| 264 |
-
" r2 = r2_score(y_test, y_pred)\n",
|
| 265 |
-
"\n",
|
| 266 |
-
" results[name] = {'model': model, 'rmse': rmse, 'mae': mae, 'r2': r2}\n",
|
| 267 |
-
"\n",
|
| 268 |
-
" print(f\"\\n{'='*40}\")\n",
|
| 269 |
-
" print(f\"{name}\")\n",
|
| 270 |
-
" print(f\"RMSE: {rmse:.2f} | MAE: {mae:.2f} | R2: {r2:.4f}\")\n",
|
| 271 |
-
"\n",
|
| 272 |
-
"# ========================\n",
|
| 273 |
-
"# 7단계: 최종 모델 선정\n",
|
| 274 |
-
"# ========================\n",
|
| 275 |
-
"best_name = max(results, key=lambda x: results[x]['r2'])\n",
|
| 276 |
-
"best_model = results[best_name]['model']\n",
|
| 277 |
-
"\n",
|
| 278 |
-
"print(f\"\\n최종 선정 모델: {best_name} (R2: {results[best_name]['r2']:.4f})\")\n",
|
| 279 |
-
"\n",
|
| 280 |
-
"# 사용자 입력 가능한 피처만 선택\n",
|
| 281 |
-
"user_input_cols = [\n",
|
| 282 |
-
" 'AGE', # 나이\n",
|
| 283 |
-
" 'SEX', # 성별\n",
|
| 284 |
-
" 'JB_TP', # 직업군\n",
|
| 285 |
-
" 'PYE_ICM', # 연소득\n",
|
| 286 |
-
" 'PYE_ICM_RT', # 연소득 백분위\n",
|
| 287 |
-
" 'TOT_ASST', # 총자산\n",
|
| 288 |
-
" 'HOUS_LN_BAL', # 주택담보대출잔액\n",
|
| 289 |
-
" 'CRDT_LN_BAL', # 신용대출잔액\n",
|
| 290 |
-
" 'CD_USE_AMT', # 카드소비금액\n",
|
| 291 |
-
" 'OWN_HOUS_CNT', # 주택보유건수\n",
|
| 292 |
-
" 'OWN_LIV_YN', # 자가거주여부\n",
|
| 293 |
-
" 'PYE_FAM_CNT', # 가구원수\n",
|
| 294 |
-
" 'PYE_CAR_OWN', # 차량보유여부\n",
|
| 295 |
-
" 'DAR', # 자산대비 부채비중\n",
|
| 296 |
-
"]\n",
|
| 297 |
-
"\n",
|
| 298 |
-
"# 실제 데이터에 있는 것만 필터\n",
|
| 299 |
-
"user_input_cols = [c for c in user_input_cols if c in df_sampled.columns]\n",
|
| 300 |
-
"\n",
|
| 301 |
-
"X_user = df_sampled[user_input_cols].copy()\n",
|
| 302 |
-
"X_user = X_user.select_dtypes(include=[np.number])\n",
|
| 303 |
-
"X_user = X_user.fillna(X_user.median())\n",
|
| 304 |
-
"y_user = df_sampled['PYE_SC0000000']\n",
|
| 305 |
-
"\n",
|
| 306 |
-
"print(f\"사용자 입력 가능 피처: {X_user.shape[1]}개\")\n",
|
| 307 |
-
"print(X_user.columns.tolist())\n",
|
| 308 |
-
"\n",
|
| 309 |
-
"# train/test 분리\n",
|
| 310 |
-
"X_tr, X_te, y_tr, y_te = train_test_split(\n",
|
| 311 |
-
" X_user, y_user, test_size=0.2, random_state=42\n",
|
| 312 |
-
")\n",
|
| 313 |
-
"\n",
|
| 314 |
-
"# LightGBM 재학습\n",
|
| 315 |
-
"model_user = lgb.LGBMRegressor(random_state=42)\n",
|
| 316 |
-
"model_user.fit(X_tr, y_tr)\n",
|
| 317 |
-
"y_pred = model_user.predict(X_te)\n",
|
| 318 |
-
"\n",
|
| 319 |
-
"rmse = np.sqrt(mean_squared_error(y_te, y_pred))\n",
|
| 320 |
-
"mae = mean_absolute_error(y_te, y_pred)\n",
|
| 321 |
-
"r2 = r2_score(y_te, y_pred)\n",
|
| 322 |
-
"\n",
|
| 323 |
-
"print(f\"\\n사용자 입력 피처만으로 재학습 결과\")\n",
|
| 324 |
-
"print(f\"RMSE: {rmse:.2f} | MAE: {mae:.2f} | R2: {r2:.4f}\")"
|
| 325 |
-
]
|
| 326 |
-
},
|
| 327 |
-
{
|
| 328 |
-
"cell_type": "code",
|
| 329 |
-
"execution_count": 3,
|
| 330 |
-
"id": "6fa48db3",
|
| 331 |
-
"metadata": {},
|
| 332 |
-
"outputs": [
|
| 333 |
-
{
|
| 334 |
-
"name": "stdout",
|
| 335 |
-
"output_type": "stream",
|
| 336 |
-
"text": [
|
| 337 |
-
"결측치 50% 이상 제거 후: 446개 컬럼\n",
|
| 338 |
-
"0점 제거 후: 321,186행\n"
|
| 339 |
-
]
|
| 340 |
-
},
|
| 341 |
-
{
|
| 342 |
-
"name": "stderr",
|
| 343 |
-
"output_type": "stream",
|
| 344 |
-
"text": [
|
| 345 |
-
"/var/folders/9d/6nccn4756nq7c7030tvhwnj00000gn/T/ipykernel_85303/1799362720.py:31: FutureWarning: DataFrameGroupBy.apply operated on the grouping columns. This behavior is deprecated, and in a future version of pandas the grouping columns will be excluded from the operation. Either pass `include_groups=False` to exclude the groupings or explicitly select the grouping columns after groupby to silence this warning.\n",
|
| 346 |
-
" df_sampled = df_clean.groupby('SCORE_BIN', observed=True).apply(\n"
|
| 347 |
-
]
|
| 348 |
-
},
|
| 349 |
-
{
|
| 350 |
-
"name": "stdout",
|
| 351 |
-
"output_type": "stream",
|
| 352 |
-
"text": [
|
| 353 |
-
"\n",
|
| 354 |
-
"샘플링 후: 12,000행\n",
|
| 355 |
-
"SCORE_BIN\n",
|
| 356 |
-
"저신용(0~500) 3000\n",
|
| 357 |
-
"중저신용(500~700) 3000\n",
|
| 358 |
-
"중고신용(700~900) 3000\n",
|
| 359 |
-
"고신용(900~1000) 3000\n",
|
| 360 |
-
"Name: count, dtype: int64\n",
|
| 361 |
-
"\n",
|
| 362 |
-
"전부 0인 컬럼: 58개 제거\n",
|
| 363 |
-
"분산 0인 컬럼: 2개 제거\n",
|
| 364 |
-
"최종 피처 수: 377개\n",
|
| 365 |
-
"\n",
|
| 366 |
-
"사용자 입력 가능 피처: 12개\n",
|
| 367 |
-
"['AGE', 'PYE_ICM', 'PYE_ICM_RT', 'TOT_ASST', 'HOUS_LN_BAL', 'CRDT_LN_BAL', 'CD_USE_AMT', 'OWN_HOUS_CNT', 'PYE_FAM_CNT', 'DAR', 'PYE_CAR_LN_AMT', 'CRDT_LN_BAL_NEW']\n",
|
| 368 |
-
"\n",
|
| 369 |
-
"=== 피처별 값 범위 ===\n",
|
| 370 |
-
"AGE: 고유값 11개 | 범위 20.0 ~ 70.0\n",
|
| 371 |
-
"PYE_ICM: 고유값 10570개 | 범위 11657.0 ~ 132303.0\n",
|
| 372 |
-
"PYE_ICM_RT: 고유값 98개 | 범위 1.0 ~ 98.0\n",
|
| 373 |
-
"TOT_ASST: 고유값 11821개 | 범위 0.0 ~ 2676924.0\n",
|
| 374 |
-
"HOUS_LN_BAL: 고유값 1504개 | 범위 0.0 ~ 216887.0\n",
|
| 375 |
-
"CRDT_LN_BAL: 고유값 4905개 | 범위 0.0 ~ 215771.0\n",
|
| 376 |
-
"CD_USE_AMT: 고유값 7283개 | 범위 0.0 ~ 73952.0\n",
|
| 377 |
-
"OWN_HOUS_CNT: 고유값 3개 | 범위 0.0 ~ 2.0\n",
|
| 378 |
-
"PYE_FAM_CNT: 고유값 7개 | 범위 0.0 ~ 6.0\n",
|
| 379 |
-
"DAR: 고유값 6028개 | 범위 0.0 ~ 5.0\n",
|
| 380 |
-
"PYE_CAR_LN_AMT: 고유값 1395개 | 범위 0.0 ~ 30825.0\n",
|
| 381 |
-
"CRDT_LN_BAL_NEW: 고유값 2개 | 범위 0.0 ~ 1.0\n",
|
| 382 |
-
"[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001097 seconds.\n",
|
| 383 |
-
"You can set `force_row_wise=true` to remove the overhead.\n",
|
| 384 |
-
"And if memory is not enough, you can set `force_col_wise=true`.\n",
|
| 385 |
-
"[LightGBM] [Info] Total Bins 1908\n",
|
| 386 |
-
"[LightGBM] [Info] Number of data points in the train set: 9600, number of used features: 12\n",
|
| 387 |
-
"[LightGBM] [Info] Start training from score 710.157187\n",
|
| 388 |
-
"\n",
|
| 389 |
-
"=== 최종 모델 성능 ===\n",
|
| 390 |
-
"RMSE: 126.39 | MAE: 96.84 | R2: 0.6189\n"
|
| 391 |
-
]
|
| 392 |
-
}
|
| 393 |
-
],
|
| 394 |
-
"source": [
|
| 395 |
-
"import pandas as pd\n",
|
| 396 |
-
"import numpy as np\n",
|
| 397 |
-
"from sklearn.model_selection import train_test_split\n",
|
| 398 |
-
"from sklearn.feature_selection import VarianceThreshold\n",
|
| 399 |
-
"import lightgbm as lgb\n",
|
| 400 |
-
"from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error\n",
|
| 401 |
-
"\n",
|
| 402 |
-
"# ========================\n",
|
| 403 |
-
"# 1단계: 데이터 로드 & 결측치 50% 이상 컬럼 제거\n",
|
| 404 |
-
"# ========================\n",
|
| 405 |
-
"df = pd.read_csv('../dataset/telecom_cb.csv')\n",
|
| 406 |
-
"threshold = 0.5\n",
|
| 407 |
-
"df_clean = df[df.columns[df.isna().mean() < threshold]]\n",
|
| 408 |
-
"print(f\"결측치 50% 이상 제거 후: {df_clean.shape[1]}개 컬럼\")\n",
|
| 409 |
-
"\n",
|
| 410 |
-
"# ========================\n",
|
| 411 |
-
"# 2단계: 타겟 이상값 제거 (0점 제거)\n",
|
| 412 |
-
"# ========================\n",
|
| 413 |
-
"df_clean = df_clean[df_clean['PYE_SC0000000'] > 0].reset_index(drop=True)\n",
|
| 414 |
-
"print(f\"0점 제거 후: {len(df_clean):,}행\")\n",
|
| 415 |
-
"\n",
|
| 416 |
-
"# ========================\n",
|
| 417 |
-
"# 3단계: 신용점수 구간별 층화 샘플링\n",
|
| 418 |
-
"# ========================\n",
|
| 419 |
-
"df_clean['SCORE_BIN'] = pd.cut(\n",
|
| 420 |
-
" df_clean['PYE_SC0000000'],\n",
|
| 421 |
-
" bins=[0, 500, 700, 900, 1000],\n",
|
| 422 |
-
" labels=['저신용(0~500)', '중저신용(500~700)', '중고신용(700~900)', '고신용(900~1000)']\n",
|
| 423 |
-
")\n",
|
| 424 |
-
"\n",
|
| 425 |
-
"df_sampled = df_clean.groupby('SCORE_BIN', observed=True).apply(\n",
|
| 426 |
-
" lambda x: x.sample(n=min(len(x), 3000), random_state=42)\n",
|
| 427 |
-
").reset_index(drop=True)\n",
|
| 428 |
-
"\n",
|
| 429 |
-
"print(f\"\\n샘플링 후: {len(df_sampled):,}행\")\n",
|
| 430 |
-
"print(df_sampled['SCORE_BIN'].value_counts())\n",
|
| 431 |
-
"\n",
|
| 432 |
-
"# ========================\n",
|
| 433 |
-
"# 4단계: 전체 컬럼 대상 분산 0 & 전부 0 제거\n",
|
| 434 |
-
"# ========================\n",
|
| 435 |
-
"drop_cols = ['PYE_SC0000000', 'SCORE_BIN', 'PYE_MAX_DLQ_DAY',\n",
|
| 436 |
-
" 'CUST_ID', 'BASE_YM']\n",
|
| 437 |
-
"drop_cols = [c for c in drop_cols if c in df_sampled.columns]\n",
|
| 438 |
-
"\n",
|
| 439 |
-
"# 수치형만 선택\n",
|
| 440 |
-
"X_all = df_sampled.drop(columns=drop_cols).select_dtypes(include=[np.number])\n",
|
| 441 |
-
"X_all = X_all.fillna(X_all.median())\n",
|
| 442 |
-
"\n",
|
| 443 |
-
"# 전부 0인 컬럼 제거\n",
|
| 444 |
-
"zero_cols = [col for col in X_all.columns if (X_all[col] == 0).all()]\n",
|
| 445 |
-
"print(f\"\\n전부 0인 컬럼: {len(zero_cols)}개 제거\")\n",
|
| 446 |
-
"X_all = X_all.drop(columns=zero_cols)\n",
|
| 447 |
-
"\n",
|
| 448 |
-
"# 분산 0인 컬럼 제거\n",
|
| 449 |
-
"selector = VarianceThreshold(threshold=0)\n",
|
| 450 |
-
"selector.fit(X_all)\n",
|
| 451 |
-
"low_var_cols = X_all.columns[~selector.get_support()].tolist()\n",
|
| 452 |
-
"print(f\"분산 0인 컬럼: {len(low_var_cols)}개 제거\")\n",
|
| 453 |
-
"X_all = X_all.drop(columns=low_var_cols)\n",
|
| 454 |
-
"\n",
|
| 455 |
-
"print(f\"최종 피처 수: {X_all.shape[1]}개\")\n",
|
| 456 |
-
"\n",
|
| 457 |
-
"# ========================\n",
|
| 458 |
-
"# 5단계: 사용자 입력 가능 피처만 선택\n",
|
| 459 |
-
"# ========================\n",
|
| 460 |
-
"user_input_cols = [\n",
|
| 461 |
-
" 'AGE', 'PYE_ICM', 'PYE_ICM_RT', 'TOT_ASST',\n",
|
| 462 |
-
" 'HOUS_LN_BAL', 'CRDT_LN_BAL', 'CD_USE_AMT',\n",
|
| 463 |
-
" 'OWN_HOUS_CNT', 'PYE_FAM_CNT', 'DAR',\n",
|
| 464 |
-
" 'PYE_CAR_LN_AMT', 'HOUS_LN_BAL_NEW', 'CRDT_LN_BAL_NEW'\n",
|
| 465 |
-
"]\n",
|
| 466 |
-
"\n",
|
| 467 |
-
"# 실제 X_all에 있는 것만 필터\n",
|
| 468 |
-
"user_input_cols = [c for c in user_input_cols if c in X_all.columns]\n",
|
| 469 |
-
"X_user = X_all[user_input_cols].copy()\n",
|
| 470 |
-
"\n",
|
| 471 |
-
"print(f\"\\n사용자 입력 가능 피처: {X_user.shape[1]}개\")\n",
|
| 472 |
-
"print(X_user.columns.tolist())\n",
|
| 473 |
-
"\n",
|
| 474 |
-
"# 피처별 값 범위 확인\n",
|
| 475 |
-
"print(\"\\n=== 피처별 값 범위 ===\")\n",
|
| 476 |
-
"for col in X_user.columns:\n",
|
| 477 |
-
" unique_count = X_user[col].nunique()\n",
|
| 478 |
-
" col_min = X_user[col].min()\n",
|
| 479 |
-
" col_max = X_user[col].max()\n",
|
| 480 |
-
" print(f\"{col}: 고유값 {unique_count}개 | 범위 {col_min:.1f} ~ {col_max:.1f}\")\n",
|
| 481 |
-
"\n",
|
| 482 |
-
"# ========================\n",
|
| 483 |
-
"# 6단계: train/test 분리 & 모델 학습\n",
|
| 484 |
-
"# ========================\n",
|
| 485 |
-
"y = df_sampled['PYE_SC0000000']\n",
|
| 486 |
-
"\n",
|
| 487 |
-
"X_train, X_test, y_train, y_test = train_test_split(\n",
|
| 488 |
-
" X_user, y, test_size=0.2, random_state=42\n",
|
| 489 |
-
")\n",
|
| 490 |
-
"\n",
|
| 491 |
-
"model = lgb.LGBMRegressor(random_state=42)\n",
|
| 492 |
-
"model.fit(X_train, y_train)\n",
|
| 493 |
-
"y_pred = model.predict(X_test)\n",
|
| 494 |
-
"\n",
|
| 495 |
-
"rmse = np.sqrt(mean_squared_error(y_test, y_pred))\n",
|
| 496 |
-
"mae = mean_absolute_error(y_test, y_pred)\n",
|
| 497 |
-
"r2 = r2_score(y_test, y_pred)\n",
|
| 498 |
-
"\n",
|
| 499 |
-
"print(f\"\\n=== 최종 모델 성능 ===\")\n",
|
| 500 |
-
"print(f\"RMSE: {rmse:.2f} | MAE: {mae:.2f} | R2: {r2:.4f}\")"
|
| 501 |
-
]
|
| 502 |
-
},
|
| 503 |
-
{
|
| 504 |
-
"cell_type": "code",
|
| 505 |
-
"execution_count": 4,
|
| 506 |
-
"id": "b803062f",
|
| 507 |
-
"metadata": {},
|
| 508 |
-
"outputs": [
|
| 509 |
-
{
|
| 510 |
-
"name": "stdout",
|
| 511 |
-
"output_type": "stream",
|
| 512 |
-
"text": [
|
| 513 |
-
"=== Permutation Importance ===\n",
|
| 514 |
-
" feature importance std\n",
|
| 515 |
-
"1 PYE_ICM 1.237040 0.042766\n",
|
| 516 |
-
"2 PYE_ICM_RT 0.726069 0.028536\n",
|
| 517 |
-
"5 CRDT_LN_BAL 0.272620 0.012101\n",
|
| 518 |
-
"6 CD_USE_AMT 0.088819 0.008274\n",
|
| 519 |
-
"0 AGE 0.051002 0.003838\n",
|
| 520 |
-
"7 OWN_HOUS_CNT 0.050263 0.004501\n",
|
| 521 |
-
"9 DAR 0.014202 0.001999\n",
|
| 522 |
-
"10 PYE_CAR_LN_AMT 0.011054 0.001366\n",
|
| 523 |
-
"3 TOT_ASST 0.009042 0.001913\n",
|
| 524 |
-
"8 PYE_FAM_CNT 0.005112 0.001194\n",
|
| 525 |
-
"4 HOUS_LN_BAL 0.001540 0.000333\n",
|
| 526 |
-
"11 CRDT_LN_BAL_NEW 0.001468 0.000416\n"
|
| 527 |
-
]
|
| 528 |
-
},
|
| 529 |
-
{
|
| 530 |
-
"name": "stderr",
|
| 531 |
-
"output_type": "stream",
|
| 532 |
-
"text": [
|
| 533 |
-
"/var/folders/9d/6nccn4756nq7c7030tvhwnj00000gn/T/ipykernel_85303/3641905308.py:33: UserWarning: Glyph 44048 (\\N{HANGUL SYLLABLE GAM}) missing from font(s) DejaVu Sans.\n",
|
| 534 |
-
" plt.tight_layout()\n",
|
| 535 |
-
"/var/folders/9d/6nccn4756nq7c7030tvhwnj00000gn/T/ipykernel_85303/3641905308.py:33: UserWarning: Glyph 49548 (\\N{HANGUL SYLLABLE SO}) missing from font(s) DejaVu Sans.\n",
|
| 536 |
-
" plt.tight_layout()\n",
|
| 537 |
-
"/var/folders/9d/6nccn4756nq7c7030tvhwnj00000gn/T/ipykernel_85303/3641905308.py:33: UserWarning: Glyph 47049 (\\N{HANGUL SYLLABLE RYANG}) missing from font(s) DejaVu Sans.\n",
|
| 538 |
-
" plt.tight_layout()\n",
|
| 539 |
-
"/var/folders/9d/6nccn4756nq7c7030tvhwnj00000gn/T/ipykernel_85303/3641905308.py:34: UserWarning: Glyph 44048 (\\N{HANGUL SYLLABLE GAM}) missing from font(s) DejaVu Sans.\n",
|
| 540 |
-
" plt.savefig('permutation_importance.png', dpi=150)\n",
|
| 541 |
-
"/var/folders/9d/6nccn4756nq7c7030tvhwnj00000gn/T/ipykernel_85303/3641905308.py:34: UserWarning: Glyph 49548 (\\N{HANGUL SYLLABLE SO}) missing from font(s) DejaVu Sans.\n",
|
| 542 |
-
" plt.savefig('permutation_importance.png', dpi=150)\n",
|
| 543 |
-
"/var/folders/9d/6nccn4756nq7c7030tvhwnj00000gn/T/ipykernel_85303/3641905308.py:34: UserWarning: Glyph 47049 (\\N{HANGUL SYLLABLE RYANG}) missing from font(s) DejaVu Sans.\n",
|
| 544 |
-
" plt.savefig('permutation_importance.png', dpi=150)\n",
|
| 545 |
-
"/Users/yuje/aiProjects/llm_prj/.venv/lib/python3.10/site-packages/IPython/core/pylabtools.py:170: UserWarning: Glyph 44048 (\\N{HANGUL SYLLABLE GAM}) missing from font(s) DejaVu Sans.\n",
|
| 546 |
-
" fig.canvas.print_figure(bytes_io, **kw)\n",
|
| 547 |
-
"/Users/yuje/aiProjects/llm_prj/.venv/lib/python3.10/site-packages/IPython/core/pylabtools.py:170: UserWarning: Glyph 49548 (\\N{HANGUL SYLLABLE SO}) missing from font(s) DejaVu Sans.\n",
|
| 548 |
-
" fig.canvas.print_figure(bytes_io, **kw)\n",
|
| 549 |
-
"/Users/yuje/aiProjects/llm_prj/.venv/lib/python3.10/site-packages/IPython/core/pylabtools.py:170: UserWarning: Glyph 47049 (\\N{HANGUL SYLLABLE RYANG}) missing from font(s) DejaVu Sans.\n",
|
| 550 |
-
" fig.canvas.print_figure(bytes_io, **kw)\n"
|
| 551 |
-
]
|
| 552 |
-
},
|
| 553 |
-
{
|
| 554 |
-
"data": {
|
| 555 |
-
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA90AAAJOCAYAAACqS2TfAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAaBBJREFUeJzt3Qd4FOX6//87EEIIEHqX3qUJqAiIQACDBhS+dARBEKWDoDRBelWKIiAqRTQ0ERARiVRpIiK9o0hRQem9s//rfn7/3bObbCo7G7J5v65rTrKzs7NTcs7hM/dT/Gw2m00AAAAAAIDHpfD8LgEAAAAAAKEbAAAAAAALUekGAAAAAMAihG4AAAAAACxC6AYAAAAAwCKEbgAAAAAALELoBgAAAADAIoRuAAAAAAAsQugGAAAAAMAihG4AAJAojh8/Ln5+fjJ79mzuAADAZxG6AQDwEA2PGiLtS2BgoBQrVky6du0q//77b5K8zgcOHJAhQ4aYgJxQc+fOlUmTJsmjpG3btpIuXTpJqm7cuGHuy/r16xP7UAAAsfCPbQMAABA/w4YNk4IFC8qtW7dk06ZNMm3aNFmxYoXs27dPgoKCklzoHjp0qNSoUUMKFCiQ4NCt596zZ0+X9fnz55ebN29KqlSpPHS0yYeGbr0vSu8NAODRRegGAMDDXnjhBXnyySfN76+//rpkyZJFJkyYIN9++620aNHiocNWUgvu0bG3BkDcPXjwQO7cucMlA4AkhOblAABYLCQkxPz8888/Heu++uorqVixoqRJk0YyZ84szZs3l1OnTrl8TiuYpUuXlt9++02ee+45E7YHDBjg6Av9wQcfyJQpU6RQoULmveeff97sw2azyfDhw+Wxxx4z+3/55ZflwoULLvvWz2vz5Mi0mq1Nr+3N5Zs0aWJ+r1mzpqPZvL1Jsz5ECAsLk9y5c0vq1KmlcOHC5nvv37/vcg7ff/+9nDhxwvF5e8U8uj7da9eulWrVqknatGklY8aM5vgPHjzoso0eu372999/N8er22XIkEFee+0182AiIfS46tWrZ85PH5rotStTpozjfBcvXmxe64MCvXc7d+5022T92LFjEhoaao5fr422fNB74uz69evSu3dvyZs3r7l2xYsXN/cz8nZ6jto9ITw8XEqVKmW2/eSTTyRbtmzmfa1226+r/X7u2bPHHIv+Xeix5syZU9q1ayfnz59/qGuof7NPP/20+VvLlCmT+Zv88ccfXbb54YcfHPcuffr05u9j//79CbofAOArqHQDAGCxP/74w/zUircaOXKkDBo0SJo2bWoq4WfPnpXJkyebEKNBTsOPnQYlrZxrKG/VqpXkyJHD8Z4GMa16duvWzYTqcePGmX1qyNeg2LdvXxOodN9vv/22zJw5M17HrcfTvXt3+eijj0zYL1mypFlv/6lhWUNmr169zE8Ny++9955cuXJF3n//fbPNu+++K5cvX5a//vpLJk6caNbF1Jd69erV5nw1MGoo1ObnevxVq1aVHTt2RGniruerTflHjx5t3v/8888le/bsMnbsWEkIvV4tW7aUN99801xvDcL169c3QVevQefOnc12+n363YcPH5YUKf5Xw9AHDnXr1pVnnnnG3I+VK1fK4MGD5d69eyZ8Kw3WL730kqxbt07at28vTzzxhERERMg777wjf//9t+M62el1XbhwoQnfWbNmlXLlypkuC506dZKGDRvK//3f/5ntypYta36uWrXKBH8Nzxq4NfR++umn5ufWrVtN0I7vNdRwr/ejSpUq5jwCAgLkl19+McemD3vUl19+KW3atDEPHPSzGtz1OJ999lnzd53Q7gkAkOTZAACAR8yaNUvLlLbVq1fbzp49azt16pRt/vz5tixZstjSpElj++uvv2zHjx+3pUyZ0jZy5EiXz+7du9fm7+/vsr569epmf5988onLtn/++adZny1bNtulS5cc6/v372/WlytXznb37l3H+hYtWtgCAgJst27dcqzT7QYPHhzlHPLnz29r06aN4/XXX39ttl23bl2UbW/cuBFl3ZtvvmkLCgpy+a6wsDCz38js56HXze6JJ56wZc+e3Xb+/HnHut27d9tSpEhhe/XVVx3r9Nj1s+3atXPZZ8OGDc31jo2eY9q0aaOcu+5zy5YtjnURERFmnd6/EydOONZPnz49ynXRfeq6bt26OdY9ePDAnL9ef/2bUEuXLjXbjRgxwuX7GzdubPPz87P9/vvvjnW6nZ77/v37XbbVfUV3D93dl3nz5pntN2zYEO9rePToUXMMuv7+/fsu2+r5qatXr9oyZsxo69Chg8v7Z86csWXIkCHKegBITmheDgCAh9WuXds0/9Wmw1qh1srukiVLJE+ePKaJsvbL1eriuXPnHItWJIsWLWqqn860ObFWLN3Rpt/aHNiuUqVK5qdWaP39/V3Wa0Vcq6iepM2v7a5evWrOQ5sWa4Xz0KFD8d7f6dOnZdeuXaapsza5t9MKbp06dcxgdJF17NjR5bV+v7YO0Gp7Qjz++ONSuXLlKNdUWw/ky5cvynqtKEemFenIzcP1+msVX+l5pEyZ0rQicKbNzTVnaxNtZ9WrVzfHlZD7ooP56X3RyrvSSnZ8r+HSpUvN36y2YnCu6tvPz15dv3TpkhmzwPnvWs9Tr1Xkv2sASE5oXg4AgIdpP2udKkyDrzYH1/669rBy9OhRE6w0YLsTeSRvDeralNcd5xCo7AFcw7679RcvXhRP0ubKAwcONE2MI4dcbVIeX9rvW+n1ikybtGsTbO0Lrf2Fo7sG2tfYfq7BwcHxPoaHvaZ6n7VpvDP9W1D2adf0PLWvt/Z5jnyO9vedadPv+NCuBtocfP78+fLff//Fel9iu4baPULPK6bgr3/XzuMXRJaQewEAvoLQDQCAh+lgU/bRyyPTiqFWB7WaqVXAyCL3d3auWkbm7vMxrY88SJc7zoOgxUSrmlqB1TClfXx1EDUdtEsrqdqXXM/TGx7mXOOzP09/T3zE9Dfgjrai2LJli+kjrv3F9W9K74f2NXd3Xzxxbvb9ar9ubbURmXPLCwBIbvhfQAAAvEjDqYYZrV7aK6CJQauZGpydaRNobeLtLPKgW3Y6UJs2Qdbm8jrgmp3zCO2x7SMynbdb6eBkkWlzdR1EzLnK/SjS8KlNzp3v7ZEjR8xP+0Biep7a1Fyb5DtXu+1N8u3XISbRXVOtTq9Zs8ZUurU5eORKdEL/ZvW8dM52DfHRbaN0ADbtXgEA+B/6dAMA4EU60rRWFjUURa4k6uvI0zpZRUPShg0bXNbpCNeRK932kBs5oNuro87noKF96tSpUb5L9xGX5ua5cuUyoe6LL75w+b59+/aZqalefPFFSQo+/vhjx+96ffS1dhuoVauWWafnodfZeTulo5ZrmNbR22Njn6s9LvdFTZo0KcHn06BBA9O8XFs0RK6U279HRyzXVg+jRo2Su3fvRtmHjtAPAMkVlW4AALxIw+6IESOkf//+po+vBhqtdmqFWAdbe+ONN8z0XlbTqcp0AK1GjRqZQcp2795t+kxrNdmZhmANcjoFlAZnHdhN++3q1FFaLdcponRAMA2L2rTYXZNkndN6wYIFZmqxp556yjR31mm43NGpxjR06mBmOp2Wfcow7UPtbl7xR402sddpwvS66ABi2o1A5ynX6cbsc2vrueu85zqdmv4N6BRg+lBB5z3v2bOno2ocW5Nz7WOt11Wr6jrwnM7prou2PNDpyjT86pgAum93LRDiqkiRIuZYdQ52HWRNHxzp38Gvv/5q+qbrVGMauHV6sNatW0uFChXMAIJ6vidPnjTnr1O+RX7IAADJBZVuAAC8rF+/fvLNN9+Y6qFWvDVkL1u2zMx3rPM3e0OHDh1M32utduuo2RrKdATqyM23tX+uzlGtA3JpCNbRqbWZsc45vnz5clOd1sHUdD5rDe8a9iLTua117utZs2aZnzqveHS0abKGVt2/No/W/erI25s3b473gGKJQR9Q6PGfOXPG9KnWYKrzdGtgtdP7rvdbA7ZeQ/2p11QfOEyYMCHO36XzaWuofuutt8x9WbRokVk/d+5cU3nWAf304Y5W2SOPiB5fWuXWed71IYgGcL03OuCbvXqv9N5q03Y9Jj2XHj16mMHc9MFNdCPwA0By4KfzhiX2QQAAACR1OtWZBt9r164l9qEAAB4hVLoBAAAAALAIoRsAAAAAAIsQugEAAAAAsAh9ugEAAAAAsAiVbgAAAAAALELoBgAAAADAIv5W7Rhw58GDB/LPP/9I+vTpxc/Pj4sEAAAAIEnS2bevXr0quXPnlhQpoq9nE7rhVRq48+bNy1UHAAAA4BNOnToljz32WLTvE7rhVVrhtv9hBgcHc/UBAAAAJElXrlwxBUV7xokOoRteZW9SroGb0A0AAAAgqYut2ywDqQEAAAAAYBFCNwAAAAAAFiF0AwAAAABgEUI3AAAAAAAWIXQDAAAAAGARQjcAAAAAABYhdAMAAAAAYBFCNwAAAAAAFiF0AwAAAABgEUI3AAAAAAAWIXQDAAAAAGARQjcAAAAAABYhdAMAAAAAYBFCNwAAAAAAFiF0AwAAAABgEUI3AAAAAAAWIXQDAAAAAGARQjcAAAAAABYhdAMAAAAAYBFCNwAAAAAAFvG3asdATBqOjRD/wCAuEgAAAJBERQwKS+xDSBKodAMAAAAAYBFCNwAAAAAAFiF0AwAAAABgEUI3AAAAAAAWIXQDAAAAAGARQjcAAAAAABYhdAMAAAAAYBFCNwAAAAAAFiF0AwAAAABgEUI3AAAAAAAWIXR7Udu2bcXPz88sAQEBUqRIERk2bJi0b99eypQpI3fu3HHZfsWKFWa7HTt2yPHjxx2fjbxs3bo11u+ePXu2ZMyY0WWdft+4ceOkXLlyEhQUJFmzZpWqVavKrFmz5O7duy7H3LFjxyj77NKli3lPtwEAAAAAREXo9rK6devK6dOn5ejRo9K7d28ZMmSI5MuXT65evSqDBw92bHfp0iXp0KGDDBo0SCpUqOBYv3r1avN556VixYrxPg4N3KGhoTJmzBh54403ZMuWLbJt2zYTpCdPniz79+93bJs3b16ZP3++3Lx507Hu1q1bMnfuXHPsAAAAAAD3/KNZD4ukTp1acubMaX7v1KmTLFmyRFauXGmqyxqCGzRoIJUqVZKePXtKnjx5pH///i6fz5Ili+PzD2PSpEmyYcMG2b59u5QvX96xvlChQtKkSROXqruG/j/++EMWL14sr7zyilmnv2vgLliw4EMfCwAAAAD4KkJ3IkuTJo2cP39eatasKZ07d5Y2bdrI8OHDZeHChaZZub+/NbcoPDxcateu7RK47VKlSmUWZ+3atTMPBuyhe+bMmfLaa6/J+vXrLTk+AAAAAPF3784tr12269eve+V70qZNK0kZoTuR2Gw2WbNmjUREREi3bt3MutGjR5uqd/PmzWX8+PFSokSJKJ+rUqWKpEjh2ivg2rVr8f5+bd5eo0aNOG/fqlUrU3U/ceKEeb1582bT5Dy20H379m2z2F25ciXexwoAAAAgbtaOaOy1S5VuhPeyU1JG6Pay5cuXS7p06cxAZQ8ePJCWLVuaft32qvfbb78tb731lvTo0cPt5xcsWCAlS5b0+h9utmzZJCwszAzIpp/V33Xgtdjog4ShQ4c+xJECAAAAQNJF6PYybUY+bdo0Myp57ty5ozQf19cpU6Y0o4K7o4Oa6ajnD6tYsWJy6NCheH1Gm5h37drV/D5lypQ4fUar47169XKpdOs5AAAAAPC8kIGLvHZZl/Wr67XvSsoI3V6m/RE8EZofllbYBwwYIDt37ozSr1ur8DqQWuS+Ezryuq7XBwI66FtcB47TBQAAAID1/AMCvXaZk3pfa29hyrAkRgddO3PmjMui03fFl46OrnNy16pVy1Std+/eLceOHTMDuD3zzDOmz3dkWoE/ePCgHDhwwPwOAAAAAIgZle4kRkccj2zevHlm8LX40OrzqlWrZOLEiTJ9+nTTlzwoKMj0F+/evbuULl3a7eeCg4MTfOwAAAAAkNz42ZL6UHBIUrRPd4YMGSRkwELxDwxK7MMBAAAAkEARg8KS9bW78v9nm8uXL8dYnKR5OQAAAAAAFiF0+4hSpUqZqcjcLeHh4Yl9eAAAAACQLNGn20esWLHCjDruTo4cObx+PAAAAAAAQrfPyJ8/f2IfAgAAAAAgEpqXAwAAAABgEUI3AAAAAAAWIXQDAAAAAGARQjcAAAAAABYhdAMAAAAAYBGmDEOiWNI3VIKDg7n6AAAAAHwalW4AAAAAACxC6AYAAAAAwCKEbgAAAAAALELoBgAAAADAIoRuAAAAAAAsQugGAAAAAMAihG4AAAAAACzCPN1IFA3HRoh/YBBXHwDg0yIGhSX2IQAAEhmVbgAAAAAALELoBgAAAADAIoRuAAAAAAAsQugGAAAAAMAihG4AAAAAACxC6AYAAAAAwCKEbgAAAAAALELoBgAAAADAIoRuAAAAAAAsQugGAAAAAMAihG4nbdu2FT8/P7MEBARIkSJFZNiwYdK+fXspU6aM3Llzx+XirVixwmy3Y8cOOX78uOOzkZetW7fGeiNmz54tGTNmdFmn3zdu3DgpV66cBAUFSdasWaVq1aoya9YsuXv3rssxd+zYMco+u3TpYt7TbeJ7/qlSpZKCBQtKnz595NatW+b4ojs/+6LXAAAAAADwP4TuSOrWrSunT5+Wo0ePSu/evWXIkCGSL18+uXr1qgwePNix3aVLl6RDhw4yaNAgqVChgmP96tWrzeedl4oVK0p8aeAODQ2VMWPGyBtvvCFbtmyRbdu2mSA9efJk2b9/v2PbvHnzyvz58+XmzZuOdRqU586da449Ied/7NgxmThxokyfPt2cd7NmzVzOqXLlyub8ndfpcQAAAAAA/sff6XeISOrUqSVnzpzmWnTq1EmWLFkiK1euNNVlDcENGjSQSpUqSc+ePSVPnjzSv39/l+uWJUsWx+cfxqRJk2TDhg2yfft2KV++vGN9oUKFpEmTJi5Vdw39f/zxhyxevFheeeUVs05/18Ct1eqEnr+G6Nq1a8uqVatk7NixkiZNGsd2WuHX6rsnzhUAAAAAfBWhOxYaNM+fPy81a9aUzp07S5s2bWT48OGycOFC06zc39+aSxgeHm4Cr3PgttOm37o4a9eunXkwYA/dM2fOlNdee03Wr1+f4GPYt2+fqbDnz58/wfsAACApunfnlkf2c/369YfeR9q0aT1yLACAxEHojobNZpM1a9ZIRESEdOvWzawbPXq0qXo3b95cxo8fLyVKlIjyuSpVqkiKFK6t9q9duxbvG6PN22vUqBHn7Vu1amWq7idOnDCvN2/ebJqcxzd0L1++XNKlSyf37t2T27dvm3P5+OOPJaF0H7rYXblyJcH7AgDAW9aOaOyR/aQb4Zl/kwAAki5CdzShUwcqe/DggbRs2dL067ZXvd9++2156623pEePHm4v6IIFC6RkyZJe/z/YbNmySVhYmBnwTD+rv+vAa/GlFf1p06aZJ/Pap1sr+Y0aNZKE0gcVQ4cOTfDnAQAAACApI3RHEzq1z3Lu3LmjNB/X1ylTpjSjdbuj/aB11POHVaxYMTl06FC8PqNNzLt27Wp+nzJlSoKbsNmPX5uo68jpM2bMMCO4J4RW33v16uVS6WbANQDAoy5k4CKP7GdZv7oe2Q8AIOkidMcQOhOTVtgHDBggO3fujNKvW6vwOpBa5D5eOvK4rtcHAjro28PSpuV6DBqa9XicB1KLz8BsugAAkJT4BwR6ZD/0xwYAMGWYh+mga2fOnHFZdPqu+NLR0XVO7lq1apmq9e7du800XjqA2zPPPGP6fEemFfiDBw/KgQMHzO+eoCOl674SWjkHAAAAgOSM0O1hOuJ4rly5XJalS5fGez9aHdapuvr06WPmytag/dRTT8lHH30k3bt3l9KlS7v9XHBwsFk8RZvTa5P1cePGeWQEVgAAAABITvxsDIkJL9I+3RkyZJCQAQvFPzCIaw8A8GkRg8IS+xAAABZnm8uXL8dY+KTSDQAAAACARQjdXlKqVCkzFZm7JTw83PLvP3nyZLTfr4u+DwAAAADwLEYv95IVK1aYUcfdyZEjh+Xfr9Of7dq1K8b3AQAAAACeRej2kvz580ti0gHRHoWp0AAAAAAgOaF5OQAAAAAAFiF0AwAAAABgEUI3AAAAAAAWIXQDAAAAAGARQjcAAAAAABZh9HIkiiV9QyU4OJirDwAAAMCnUekGAAAAAMAihG4AAAAAACxC6AYAAAAAwCKEbgAAAAAALELoBgAAAADAIoRuAAAAAAAsQugGAAAAAMAizNONRNFwbIT4BwZx9fFIiRgUltiHAAAAAB9DpRsAAAAAAIsQugEAAAAAsAihGwAAAAAAixC6AQAAAACwCKEbAAAAAACLELoBAAAAALAIoRsAAAAAAIsQugEAAAAAsAihGwAAAAAAixC6AQAAAACwiE+G7jNnzki3bt2kUKFCkjp1asmbN6/Ur19f1qxZY94vUKCA+Pn5mSUoKEjKlCkjn3/+ucs+1q9f79gmRYoUkiFDBilfvrz06dNHTp8+7djOeV/ulrZt28Z6vLrd0qVL3b5nP45SpUrJ/fv3Xd7LmDGjzJ49O07XxPk4U6ZMKblz55b27dvLxYsX3W5fokQJc+30WkZWo0YN6dmzZ5y+FwAAAACSM58L3cePH5eKFSvK2rVr5f3335e9e/fKypUrpWbNmtKlSxfHdsOGDTPhed++fdKqVSvp0KGD/PDDD1H2d/jwYfnnn3/k119/lb59+8rq1auldOnSZr9K1+t+dPnmm28cn7Gv+/DDDz1yXseOHZM5c+Y81D7s53zy5EkJDw+XDRs2SPfu3aNst2nTJrl586Y0btxYvvjii4f6TgAAAABIznwudHfu3NlUc7dt2yaNGjWSYsWKmSpxr169ZOvWrY7t0qdPLzlz5jTVcA3TmTNnllWrVkXZX/bs2c12up/mzZvL5s2bJVu2bNKpUyfzvv6u7+ui+3D+jC5aIfcErdwPHjxYbt++neB92M85T5485iFEmzZtZMeOHVG2mzFjhrRs2VJat24tM2fOfMgjBwAAAIDky6dC94ULF0xVWyvaadOmjfK+NseO7MGDB6ZCrc2sAwICYv2ONGnSSMeOHU34/u+//8RbtDn3vXv3ZPLkyR7Z399//y3fffedVKpUyWX91atX5euvvzbV/zp16sjly5dl48aNHvlO4FFw786taJfr16/HuAAAAADx5S8+5PfffxebzWb6I8dGq9sDBw40lWMNs1qlfv311+P0Pfb9a1N2rWp7g/Y910r3gAEDTFP4hFTQ7eesfcNv3bplAveECRNctpk/f74ULVrUtA5QWt3Xyne1atUSdNx6fZ2r81euXEnQfgBPWTuicbTvpRsR82f1f18AAACA+PCpSnd8/kH8zjvvyK5du0zfbw2fEydOlCJFisTre7QZuzfpwGdZsmSRsWPHJujz9nPes2ePY1C5sLAwlwHatDm5Vrnt9HetfGsFPCFGjx5tHhDYFx3UDgAAAACSC5+qdGuFVoPwoUOHYt02a9asJmTroqFSRzB/8skn5fHHH4/1swcPHnSMCO5N/v7+MnLkSDMieteuXeP9efs526/VpEmTpHLlyrJu3TqpXbu2HDhwwPR71/7wWhW301CuFXCtsMdX//79TX9650o3wRuJKWTgomjfW9avrlePBQAAAL7Ppyrd2kQ8NDRUpkyZ4rb/5aVLl9x+TkNgs2bNTECMjY7q/emnn8pzzz1nBlHztiZNmpim30OHDn3ofenUYfZzUtqMXM9r9+7dpiJuXzQ063sJodOOBQcHuyxAYvIPCIx20bEgYloAAACAZF3pVhq4q1atKk8//bSZIqts2bKmz7aOTD5t2jRHlTqyHj16mKnAtm/fbiredjpYmvZ/1ubVv/32m4wbN07OnTsnixcv9uhx//nnnybgOtNqtDtjxowxDxfiS89B593W5vGnTp0yc47rg4MqVarI3bt35csvvzTXTK+DM+3rrn2/9+/f7+jrffbs2SjHmytXLsmRI0e8jwsAAAAAfJVPVbqVTgGm02DplFi9e/c2AVJH4dY+zBq6o6PNyp9//nl57733XNYXL15ccufObeb+1rCrzbB1bu+4NEOPD60mly9f3mXZuXOn221DQkLMog8T4kPPTYOxnk+9evVM5e7HH380/cSXLVsm58+fl4YNG0b5XMmSJc3iXO2eO3dulOP97LPPEnDmAAAAAOC7/GwMxwsv0j7dOqBayICF4h8YxLXHIyViUFhiHwIAAACSWLbRaZZj6kbrc5VuAAAAAAAeFYRui40aNUrSpUvndnnhhRc88h3h4eHRfoe9DzYAAAAAwPt8biC1R03Hjh2ladOmbt9LkyaNR77jpZdeMnONu5MqVSqPfAcAAAAAIP4I3V6YxkwXK6VPn94sAAAAAIBHC83LAQAAAACwCKEbAAAAAACLELoBAAAAALAIoRsAAAAAAIsQugEAAAAAsAijlyNRLOkbKsHBwVx9AAAAAD6NSjcAAAAAABYhdAMAAAAAYBFCNwAAAAAAFiF0AwAAAABgEUI3AAAAAAAWIXQDAAAAAGARQjcAAAAAABZhnm4kioZjI8Q/MMjnrn7EoLDEPgQAAAAAjxAq3QAAAAAAWITQDQAAAACARQjdAAAAAABYhNANAAAAAIBFCN0AAAAAAFiE0A0AAAAAgEUI3QAAAAAAWITQDQAAAACARQjdAAAAAABYhNANAAAAAIBFkk3oPnPmjHTr1k0KFSokqVOnlrx580r9+vVlzZo15v0CBQqIn5+fWdKkSWNeN23aVNauXRvn71i/fr35/KVLl6K8p/ubNGmS4/VPP/0kISEhkjlzZgkKCpKiRYtKmzZt5M6dOy77crfoucTVX3/9JQEBAVK6dGm379v3uXXrVpf1t2/flixZspj39Fhmz54d7fHYl+PHj8f5uAAAAAAgOUgWoVvDYMWKFU2Afv/992Xv3r2ycuVKqVmzpnTp0sWx3bBhw+T06dNy+PBhmTNnjmTMmFFq164tI0eO9OjxHDhwQOrWrStPPvmkbNiwwRzP5MmTTTi+f/++y7Z6LHpMzkv27Nnj/F0alvXhwZUrV+SXX35xu40+gJg1a5bLuiVLlki6dOkcr5s1a+ZyDJUrV5YOHTq4rNP9AAAAAAD+x1+Sgc6dO5tK7LZt2yRt2rSO9aVKlZJ27do5XqdPn15y5sxpfs+XL58899xzkitXLnnvvfekcePGUrx4cY8cz48//mi+Z9y4cY51hQsXNkE8Mg3YGv4TwmazmTA9depUeeyxx2TGjBlSqVKlKNtphf2jjz4ylXit8quZM2ea9cOHDzevdb39PaUPCLRCb79eydG9O7eirLt+/XqUdc5/cwAAAACSF5+vdF+4cMFUtbWi7S78xBZoe/ToYcLrt99+67Fj0qCqlWGtcltp3bp1cuPGDVOtb9WqlcyfP99tKNRWANr8/ZtvvjGvT548aY6tdevWD30M2kxdq+zOi69YO6JxlEVbB0ReAAAAACRfPh+6f//9dxOaS5QokaDPa59rrTZ7sr9ykyZNpEWLFlK9enVTSW/YsKF8/PHHbgOpVqidA5xW5+NKK9vNmzeXlClTmj7d2p/966+/drutVvy1um1vkv7iiy9KtmzZ5GGNHj1aMmTI4Fhogg4AAAAgOfH55uUauD2xD22e7ikagrXZ94gRI0w/c+1rPWrUKBk7dqxpAq9B3G7jxo2m2btdqlSp4vQdOpjb4sWLZdOmTY51Wu3WIN62bdso2+t7/fr1k2PHjpnQrc3NPaF///7Sq1cvx2t9sOArwTtk4KIo65b1i9pFAAAAAEDy5fOhW0cF18B86NChBH3+/PnzcvbsWSlYsGCs2wYHB5ufly9fjtJsXUOwVnqd5cmTxzTh1kX7ThcrVkw++eQTGTp0qGMb/d6E9OmeO3eu3Lp1y6UPtz48ePDggRw5csR8lzMdqbxevXrSvn1787kXXnhBrl69Kg9LR4rXxRf5BwRGWUf/bQAAAADJqnm5Ng8PDQ2VKVOmuO3P7G56L2cffvihpEiRQho0aBCngK/b/vbbby7rtXqsQTxy0HWWKVMmU+F2d4wJoRXt3r17y65duxzL7t27pVq1ao5m5O6amOv0YK+++qqpxgMAAAAAHo7PV7qVBu6qVavK008/baYFK1u2rNy7d09WrVol06ZNk4MHD5rttLKrc2DfvXtX/vzzT/nqq6/k888/N/2SixQpEuv3aDPw119/3YRdf39/KVOmjJw6dUr69u0rzzzzjFSpUsVsN336dBOCtS+3jlqulWWdomz//v1m6jBn//33n3k/clU6pmbmuu8dO3ZIeHh4lL7s2pdcr4E2bddjdKajp2tV316xBwAAAAA8HJ+vdCsdQExDqM7LrYFYBxWrU6eOrFmzxoRuO50aTKvNGrC1ybdWp3UbDc1xpZVxnWpLP6ODnmn/aQ353333naNfuIb/a9euSceOHc02OqDa1q1bZenSpeZ3ZzpNmR6T8xK5ku6uyv3444+7HTxOg74G+RUrVkR5T48va9asZjowAAAAAMDD87N5YqQxII50IDXt2x4yYKH4Bwb53HWLGBSW2IcAAAAAwIvZRou1MbUWThaVbgAAAAAAEgOhOx60ObjznNnOi77nTdEdhy46zRgAAAAAIPEli4HUPEUHIHv77bfdvuftwcd0sLTo6FRkAAAAAIDER+iOh+zZs5vlURCX0dQBAAAAAImL5uUAAAAAAFiE0A0AAAAAgEUI3QAAAAAAWITQDQAAAACARQjdAAAAAABYhNHLkSiW9A31+jRrAAAAAOBtVLoBAAAAALAIoRsAAAAAAIsQugEAAAAAIHQDAAAAAJC0UOkGAAAAAMAihG4AAAAAACxC6AYAAAAAwCLM041E0XBshPgHBnn9eyMGhXn9OwEAAAAkX1S6AQAAAACwCKEbAAAAAACLELoBAAAAALAIoRsAAAAAAIsQugEAAAAAsAihGwAAAAAAixC6AQAAAACwCKEbAAAAAACLELoBAAAAALAIoRsAAAAAAIsQupOJn3/+WVKmTClhYWFR3rtz5468//77UqFCBUmbNq1kyJBBypUrJwMHDpR//vnHsV3btm3Fz88vylK3bl0vnw0AAAAAJA2E7mRixowZ0q1bN9mwYYNLkL59+7bUqVNHRo0aZUK1vr9371756KOP5Ny5czJ58mSX/WjAPn36tMsyb968RDgjAAAAAHj0+Sf2AcB6165dkwULFsj27dvlzJkzMnv2bBkwYIB5b+LEibJp0ybzXvny5R2fyZcvn1SvXl1sNpvLvlKnTi05c+ZMcrft3p1b5uf169cd67SqDwAAAABWotKdDCxcuFBKlCghxYsXl1atWsnMmTMdYVqr1Frpdg7czrT5+MPQSvqVK1dclsSwdkRjs6RLl86xAAAAAIDVCN3JpGm5hm178/DLly/LTz/9ZF4fOXLEhHFnDRs2dATTKlWquLy3fPlyl+CqizZNj87o0aNNH3H7kjdvXkvOEQAAAAAeRTQv93GHDx+Wbdu2yZIlS8xrf39/adasmQniNWrUcPuZqVOnmmbY2q9b+3g7q1mzpkybNs1lXebMmaP9/v79+0uvXr0cr7XSnRjBO2TgIvNzWT8GfQMAAADgPYRuH6fh+t69e5I7d27HOm1arn2zP/74YylatKgJ5s5y5coVbZjWftBFihSJ8/fr9+iS2PwDAs1P+nEDAAAA8Caal/swDdtz5syR8ePHy65duxzL7t27TQjX/twtWrSQVatWyc6dOxP7cAEAAADA51Dp9mHa//rixYvSvn1705/aWaNGjUwVfOPGjfL9999LrVq1ZPDgwVKtWjXJlCmT6ev9ww8/mLm9Iw+MpiOgO9Mm61mzZvXKOQEAAABAUkLo9mEaqmvXrh0lcNtD97hx40y4XrNmjUyaNElmzZpl+mA/ePBAChYsKC+88IK89dZbLp9buXKlo/m5nQ7EdujQIcvPBwAAAACSGj9b5ImYAQvpQGr6ECBkwELxDwzy+rWOGBTm9e8EAAAA4LvZRmeHCg4OjnY7+nQDAAAAAGARQjcAAAAAABYhdAMAAAAAYBFCNwAAAAAAFiF0AwAAAABgEUI3AAAAAAAWIXQDAAAAAGARQjcAAAAAABYhdAMAAAAAYBF/q3YMxGRJ31AJDg7mIgEAAADwaVS6AQAAAACwCKEbAAAAAACLELoBAAAAALAIoRsAAAAAAIsQugEAAAAAsAihGwAAAAAAixC6AQAAAACwCPN0I1E0HBsh/oFBXv/eiEFhXv9OAAAAAMkXlW4AAAAAACxC6AYAAAAAwCKEbgAAAAAALELoBgAAAADAIoRuAAAAAAAsQugGAAAAAMAihG4AAAAAACxC6AYAAAAAwCKEbgAAAAAALELoBgAAAADAV0P3qVOnpF27dpI7d24JCAiQ/PnzS48ePeT8+fPm/X79+kmJEiVcPnPo0CHx8/OTtm3buqyfPXu2pE6dWm7evGle6zaBgYFy4sQJl+0aNGgQ5bPR0e10+8jWr19v9n/p0iXHuvv378vEiROlTJky5nszZcokL7zwgmzevNnls0OGDJEnnngiyj6PHz9u9rlr1y7Hus8++0zKlSsn6dKlk4wZM0r58uVl9OjREldXrlyRd99911xDPaacOXNK7dq1ZfHixWKz2cw2NWrUMN87f/58l89OmjRJChQo4LJNdIu+DwAAAAB4hEL3sWPH5Mknn5SjR4/KvHnz5Pfff5dPPvlE1qxZI5UrV5YLFy5IzZo15fDhw3LmzBnH59atWyd58+Y1wdeZrn/mmWckTZo0jnUaCN977z3Lz0UDbPPmzWXYsGHmocHBgwfN8elxaiBdunRpvPc5c+ZM6dmzp3Tv3t0EcQ3vffr0kWvXrsXp8/pAoEqVKjJnzhzp37+/7NixQzZs2CDNmjUz+7l8+bJjWw3kAwcOlLt377rdl4b006dPm2Xbtm1m3erVqx3r9H0AAAAAgCt/SURdunQx1e0ff/zREZTz5ctnqrmFCxc2FdoPPvhAUqVKZQKshlqlv+tnR44caarD9mqsrn/ttddcvqNr164yYcIEeeedd6R06dKWncvChQtl0aJFsmzZMqlfv75j/aeffmqq9q+//rrUqVNH0qZNG+d96r6aNm0q7du3d6wrVapUnD8/YMAAc32OHDliWhLYFStWTFq0aGGCtp2+1u/Tynrnzp2j7Ctz5syO32/dumV+ZsmSxVTOH3X37vy/41XXr193/B6fewEAAAAASarSrVXsiIgIE/CcK9NKg9wrr7wiCxYskKCgIHnqqadMFdtOw3WtWrWkatWqjvVaNT958qSpjDvTberVq2eaqVtp7ty5Jsw6B2673r17m+C9atWqeO1Tr8PWrVujNI+PiwcPHpjm4nodnQO3nTZX9/f/3zOX4OBg85BDK/XOwfRh3b592zRxd168be2Ixo5Fz9u+AAAAAIDPhm5tUq5NskuWLOn2fV1/8eJFOXv2rAnS9qbkBw4cMJVWrYY/99xzjvX6Uyu32rw8Mu0DvXLlStm4cWOCjnX58uUuYU0X7avtTKvJMZ2LfZv4GDx4sOnHrZX84sWLm/7lWlHXQB2bc+fOmesXuT98TPQBiF5DbRngKXrtM2TI4Fi0uT0AAAAAJBeJPpCafTCvmGifaA2s2ndYw/Wzzz4rKVOmlOrVq7uEbu2/rAOpRfb444/Lq6++muBqt4Z+7VPtvHz++ecJOpf4yJUrl/z888+yd+9e00/83r170qZNG6lbt26swTshx6LXTivd2qRfQ7snaF9y7TtuX3TgPG8LGbjIsWh/ePsCAAAAAD4buosUKWIGOdMBx9zR9Tr6d7Zs2UwTce37rU3JddGwrbTZuYZDbVquoTskJCTa7xs6dKgZSCwhA5pp3189XuclT548Ltto0/KYzsW+jb0pt/MgZnb2kdC1IuxM+6JrFfqrr74yTdR1+emnn2I8Zr1uWiXXkd7jo1WrVmYE+REjRognaJDX83VevM0/INCx6L20LwAAAADgs6FbB+HSgcWmTp3qmOLLTkcqDw8PN6NsazDXPt+VKlUywVrDpn16Kh1gTZuTz5gxw1RQI/fndqbNmnVQNR1cTKf28jQd5E2bzH/33XdR3hs/frzjfJU2Ff/rr7/k33//ddlOHwpo824dTC46WrVXsfW7TpEihTkmvY7//PNPlPe10quVc3ef0ybh06ZNM4OwAQAAAACSaPPyjz/+2Ay0FRoaaqay0uCsfa81nGolWUcnt9NArQODaX/uChUqONZr1Xvy5MmmcqmV79iaOmsA1amuPE0DbsOGDU3zb30IoIF1z5498uabb5pRwbU5ur26querwVtHDN+yZYup1OvI5zpllzYj16bzqlOnTjJ8+HAzVZgOpqaDqmkzea1i65RqsdHrpw8b9IGFThum/eH1wYBORaZ94qNrYh0WFmY+M336dA9fJQAAAABIXhI1dBctWlS2b98uhQoVMlNj6TRhb7zxhgnY2pfZeZoqXXf16lXT1Nx51G0N3bpe+3lr5Tsmur++ffs6przyJK3I6yBnWkmfOHGiCdXVqlUzYVkr9A0aNHBsq8ev06RpRVuDtzYf10HTNHBryLarXbu2CdpNmjQxTdMbNWpkKuE6j7lWzmOj56uf1ybj2lxcg7Yek86J/v7770dpxu5s7NixllwnAAAAAEhO/GyeHv0LiIFOGaZhP2TAQvEPDPL6tYoYFOb17wQAAADgu9lGx+uKaeyqRB+9HAAAAAAAX5WsQ/fJkyejzL/tvOj7j7KYjj2hc5IDAAAAADznf52jk6HcuXObObdjev9RFtOxR57SDAAAAADgfck6dOuAZjrndlKVlI8dAAAAAJKDZN28HAAAAAAAKxG6AQAAAACwCKEbAAAAAACLELoBAAAAALAIoRsAAAAAAIsk69HLkXiW9A2V4OBgbgEAAAAAn0alGwAAAAAAixC6AQAAAACwCKEbAAAAAACLELoBAAAAALAIoRsAAAAAAIsQugEAAAAAsAhThiFRNBwbIf6BQdG+HzEozKvHAwAAAABWoNINAAAAAIBFCN0AAAAAAFiE0A0AAAAAgEUI3QAAAAAAWITQDQAAAACARQjdAAAAAABYhNANAAAAAIBFCN0AAAAAAFiE0A0AAAAAgEUI3QAAAAAAWITQ7cPatm0rfn5+ZkmVKpXkyJFD6tSpIzNnzpQHDx5E2T40NFRSpkwpv/76a6z7KliwoPTp00du3brlpbMBAAAAgKSH0O3j6tatK6dPn5bjx4/LDz/8IDVr1pQePXpIvXr15N69e47tTp48KVu2bJGuXbuaUB7Tvo4dOyYTJ06U6dOny+DBg714NgAAAACQtBC6fVzq1KklZ86ckidPHqlQoYIMGDBAvv32WxPAZ8+e7dhu1qxZJoh36tRJ5s2bJzdv3ox2X3nz5pUGDRpI7dq1ZdWqVV4+IwAAAABIOgjdyVBISIiUK1dOFi9ebF7bbDYTulu1aiUlSpSQIkWKyKJFi2Lcx759+0xlPCAgwOPHd+/OLbl+/brH9wsAAAAA3kboTqY0XGuTc7V69Wq5ceOG6dOtNHzPmDEjymeWL18u6dKlk8DAQClTpoz8999/8s4778T4Pbdv35YrV664LLFZO6Kx+R4AAAAASOoI3cmUVrd1UDSlfbibNWsm/v7+5nWLFi1k8+bN8scff7h8RvuD79q1S3755Rdp06aNvPbaa9KoUaMYv2f06NGSIUMGx6JN0wEAAAAguSB0J1MHDx40I5BfuHBBlixZIlOnTjWhWxft/62DrEUeUC1t2rSm6bk2Tdf3NHy7q4g769+/v1y+fNmxnDp1KtZjCxm4SK5du/bQ5wgAAAAAiY3QnQytXbtW9u7da6rU4eHh8thjj8nu3btNFdu+jB8/3gy0dv/+fbf7SJEihRmUbeDAgW4HXXMefC04ONhliY1/QKAJ+AAAAACQ1BG6fZz2qT5z5oz8/fffsmPHDhk1apS8/PLLZqTyV1991VSqGzduLKVLl3ZZ2rdvL+fOnZOVK1dGu+8mTZqYeb2nTJni1XMCAAAAgKSC0O3jNDTnypVLChQoYObZXrdunXz00Udm2jCtaGuF212/bO1/XatWrRibj2tTdJ3Xe9y4cYw2DgAAAABu+Nl0RC3AS3T0cg30IQMWin9gULTbRQwK454AAAAAeOSzjY5dFVM3WirdAAAAAABYhNANAAAAAIBFCN0AAAAAAFiE0A0AAAAAgEUI3QAAAAAAWITQDQAAAACARQjdAAAAAABYhNANAAAAAIBFCN0AAAAAAFiE0A0AAAAAgEX8rdoxEJMlfUMlODiYiwQAAADAp1HpBgAAAADAIoRuAAAAAAAsQugGAAAAAMAihG4AAAAAACxC6AYAAAAAwCKEbgAAAAAALMKUYUgUDcdGiH9gUJT1EYPCEuV4AAAAAMAKVLoBAAAAALAIoRsAAAAAAIsQugEAAAAAsAihGwAAAAAAixC6AQAAAACwCKEbAAAAAACLELoBAAAAALAIoRsAAAAAAIsQugEAAAAAsAihGwAAAACApB6627ZtK35+fmYJCAiQIkWKyLBhw6R9+/ZSpkwZuXPnjsv2K1asMNvt2LFDjh8/7vhs5GXr1q1x+n7d/7hx46RcuXISFBQkWbNmlapVq8qsWbPk7t27Ltv+/PPPkjJlSgkLC4uyn8jHkjlzZqlevbps3LgxztdiyJAh8sQTT0T7fo0aNcy+58+f77J+0qRJUqBAAYmPmzdvmmPU8719+3aU93V/7r5LlSpVyrw3e/ZsWb9+fbT3wL7oNgAAAACARKp0161bV06fPi1Hjx6V3r17m/CZL18+uXr1qgwePNix3aVLl6RDhw4yaNAgqVChgmP96tWrzeedl4oVK8YpcIeGhsqYMWPkjTfekC1btsi2bdukS5cuMnnyZNm/f7/L9jNmzJBu3brJhg0b5J9//nG7T/ux6Da5c+eWevXqyb///iueEhgYKAMHDozyQCC+vvnmGxOeS5QoIUuXLnW7Td68ec3DB2f6MOPMmTOSNm1a87pKlSou171p06aO+2lfdBsAAAAAQCKF7tSpU0vOnDklf/780qlTJ6ldu7asXLnSBL7x48fLL7/8Yrbr2bOn5MmTR/r37+/y+SxZspjPOy+pUqWK9Xu1QqzheM2aNSZoa5W5UKFC0rJlS/OdRYsWdWx77do1WbBggTk+rXRrldcd+7GULl1aBgwYIFeuXHEcvye0aNHCPHz47LPPHmo/+gChVatWZtHf3XnllVfkp59+klOnTjnWzZw506z39/c3r7XVgfN1T5MmjeN+2hfdBgAAAADwiPTp1uCmVeiaNWtK586dpU2bNvL111/LwoULZc6cOY7A97DCw8NNwC9fvnyU9zS026u5Sr9bq8LFixc3QVXDp81mi7H5th6r8mToDA4Olnfffdc0wb9+/XqC9vHHH3+YpvJaldZFm8CfOHEiynY5cuQwLQG++OIL8/rGjRvmwUO7du3Em+7duZXgcwUAAACAR1GihG4Nsdo8OyIiQkJCQsy60aNHm5/NmzeXUaNGmeAbmTZfTpcuncsSF9qc3d3+YqoMK20+ffnyZVMFju5YNLB/8MEHppl7rVq1xJP0QYQ2M58wYUKCPq8PDF544QXJlCmT6detwTpyM3I7Ddha1dd7s2jRIilcuHCM/c7jSvuRaysA5yU6a0c0jvM9BQAAAICkwKuhe/ny5SZUaZDUMNisWTPTr9te9X777bfNIGc9evRw+3mtvu7atctliYuYKtXODh8+bPp6a9NupZV2PUZ3zbL1WHbu3Gn6TOugcBpY49LUPT60+bZWujXUnzt3Ll6fvX//vqlc2x8gKP1dj/PBgwdRttem9Nq0Xpvha1j3VJVbH6ZkyJDBsWj/cQAAAABILjzTfjuOtBn5tGnTTDNsHXwscvNxfa2jhutI2O5oYNOAG1/FihWTQ4cOxbqdhut79+6ZY3MO7Bp+P/74YxManY9F+4Lrop9p2LCh7Nu3z2zrSRqUNXSPGDEiXiOXayuCv//+2zw0iBzGtW97nTp1olz71q1bmwHttG/6kiVLPHL82i+/V69ejtda6Y4ueIcMXCTL+tX1yPcCAAAAQLKrdGtTbA3NOmK5p/prx4UOmKbN2bUyHZmODq79iDU4a99sHdDNuZK+e/duE8LnzZsX7f4bN25szmfq1KkeP/YUKVKYarE+rNDpyuJKHyBoU/3ILQN0XXQDqml1W5vSv/zyy6ZJuifoQwjtn+68RMc/INClfz0AAAAAJHVerXQ/rPPnz5tprJxlzJjRNFePiY6G/v3335s+18OHD5dnn31W0qdPL9u3b5exY8eaEKqB9uLFi2becOeKtmrUqJHZpmPHjm73r5X57t27m6byb775pmkiHxsdgC1y83g9Ju1L7a7pd6VKlWT69Olm0LPYnD17Vr777jtZtmyZGV3d2auvvmqq8hcuXDD9vJ2VLFnSNGOPy/EDAAAAAB7x0cvjS0cgz5Url8sS3dzTkautq1atkj59+pjg+swzz8hTTz0lH330kQnLGkw1VOv+Iwdue+jWgL5nz55ov0NHXtequTZDj4sjR46Y0dSdFw3s0dGHA7du3YrTvrVirxVjdwO76TrtP//VV19FOxWavg8AAAAAeHh+triOMgZ4gPbp1gcbIQMWin9g1Ip6xKAwrjMAAACAJJNtdMarmLrRJqlKNwAAAAAASYlPhO5SpUpFmb/bvoSHh3v9eKI7Fl02btzok+cMAAAAAEjiA6lFZ8WKFaY/tTtxGXjM02KaPzxPnjw+ec4AAAAAAB8N3fnz55dHSULmEk/q5wwAAAAA8NHm5QAAAAAAPIoI3QAAAAAAWITQDQAAAACARQjdAAAAAABYhNANAAAAAIBFfGL0ciQ9S/qGSnBwcGIfBgAAAABYiko3AAAAAAAWIXQDAAAAAGARQjcAAAAAABYhdAMAAAAAYBFCNwAAAAAAFiF0AwAAAABgEUI3AAAAAAAWYZ5uJIqGYyPEPzDIZV3EoDDuBgAAAACfQqUbAAAAAACLELoBAAAAALAIoRsAAAAAAIsQugEAAAAAsAihGwAAAAAAixC6AQAAAACwCKEbAAAAAACLELoBAAAAALAIoRsAAAAAAIsQugEAAAAAsAihOwH8/PxiXIYMGeLY9osvvpCnnnpKgoKCJH369FK9enVZvny54/22bdvGuK8CBQrE+bjmzZsnKVOmlC5durh9/7PPPpNy5cpJunTpJGPGjFK+fHkZPXq04/0bN25I//79pXDhwhIYGCjZsmUzx/vtt9/K8ePHYz3v2bNnJ+RyAgAAAIDP8k/sA0iKTp8+7fh9wYIF8t5778nhw4cd6zTUqrfffls+/vhjGTFihDRo0EDu3r0rX331lbz88svy4YcfSteuXc3PMWPGOD6bK1cumTVrltStW9e81hAdVzNmzJA+ffrI9OnTZfz48SY4282cOVN69uwpH330kQnSt2/flj179si+ffsc23Ts2FF++eUXmTx5sjz++ONy/vx52bJli/mZN29el/P+4IMPZOXKlbJ69WrHugwZMsTzSgIAAACAbyN0J0DOnDldgqZWeZ3Xqa1bt5rgqyG3W7dujvUjR46UW7duSa9evUz41jAbOaxqFTry/mLz559/moD8zTffyLp162Tx4sXSsmVLx/vLli2Tpk2bSvv27R3rSpUq5bIP3UYfArz44ovmtVbZK1as6Pa89cGCv79/vI8TAAAAAJITmpdbRJt6azB98803o7zXu3dvU/XWgOwpWh0PCwszAb5Vq1am6u1Mw7E+CDhx4kS0+9BtVqxYIVevXhVvu3fnlly/ft3r3wsAAAAAViJ0W+TIkSOmb3RAQECU93Lnzi3BwcFmG0948OCB6U+tYVs1b95cNm3aZKrfdoMHDzYVdK1eFy9e3PQlX7hwofms3aeffmqq5VmyZDH90N966y3ZvHnzQx2bNmO/cuWKy+LO2hGNHc3yAQAAAMBXELotZLPZxBtWrVplqsT2ZuFZs2aVOnXqmH7czn3Ff/75Z9m7d6/06NFD7t27J23atDF9x+3B+7nnnpNjx47JmjVrpHHjxrJ//36pVq2aDB8+PMHHpgO1afXdvmhzegAAAABILgjdFilWrJgJsHfu3Iny3j///GMqvrqNJ2hT8gsXLkiaNGlMP2tdtJm4jpzuXMlWpUuXls6dO5sB3TSs6/LTTz853k+VKpUJ2n379pUff/xRhg0bZkK3u/OICx0N/fLly47l1KlTbrcLGbhIrl27lqDvAAAAAIBHFaHbItrEW0OkjiQemY78reG2UaNGD/09OrK4Tuk1f/582bVrl2PZuXOnXLx40QTn6OgI5SqmvtS6jVbFdfC3hEidOrVpSu+8uOMfEChp06ZN0HcAAAAAwKOK0cstUrlyZdOM+5133jFVYucpw3SE8EmTJnmkqfWXX35p+mDryOQ6irozbW6uVXBtQt6pUyfTlzwkJEQee+wxM/2XTmWmc3HrsaoaNWpIixYt5MknnzT7PHDggAwYMEBq1qwZbVgGAAAAAESP0G0hDdZly5aVqVOnysCBA82c2xUqVJClS5dK/fr1PfId2m+7YcOGUQK30kp669at5dy5c1K7dm2z7bRp00x1XPt9a9jW/tsasFVoaKhpkq5B+8aNGyak16tXz8xDDgAAAACIPz+bt0b7AkRMX3YdUC1kwELxDwxyuSYRg8K4RgAAAACSVLbRsatiahlMn24AAAAAACxC6H7Ebdy40cxfHd0CAAAAAHh00af7EaeDmulo5AAAAACApIfQ/YjTubeLFCmS2IcBAAAAAEgAmpcDAAAAAGARQjcAAAAAABYhdAMAAAAAYBFCNwAAAAAAFiF0AwAAAABgEUYvR6JY0jdUgoODufoAAAAAfBqVbgAAAAAALELoBgAAAADAIoRuAAAAAAAsQugGAAAAAMAihG4AAAAAACxC6AYAAAAAwCKEbgAAAAAALELoRqJoODZCQod/z9UHAAAA4NMI3QAAAAAAWITQDQAAAACARQjdAAAAAABYhNANAAAAAIBFCN0AAAAAAFiE0A0AAAAAgEUI3QAAAAAAWITQDQAAAACARQjdAAAAAABYhNANAAAAAIBFfDJ0t23bVvz8/MwSEBAgRYoUkWHDhkn79u2lTJkycufOHZftV6xYYbbbsWOHHD9+3PHZyMvWrVtj/e7Zs2e7/eznn3/u2Obnn3+WlClTSlhYWJTP279f3//7779d3jt9+rT4+/ub93W7uPrmm2+kRo0akiFDBkmXLp2ULVvWXI8LFy64HHPdunVdPnfp0iWzfv369dGel/MSn2MCAAAAgOTAJ0O30gCpIfXo0aPSu3dvGTJkiOTLl0+uXr0qgwcPdgmWHTp0kEGDBkmFChUc61evXm0+77xUrFgxTt8dHBwc5bOvvPKK4/0ZM2ZIt27dZMOGDfLPP/+43UeePHlkzpw5Luu++OILsz4+3n33XWnWrJk89dRT8sMPP8i+fftk/Pjxsnv3bvnyyy8d22mY13Net26d2/3oPpzPp3Llyua6Oa/LmzdvvI4NAAAAAHydv/io1KlTS86cOc3vnTp1kiVLlsjKlStl1qxZEhoaKg0aNJBKlSpJz549TZDt37+/y+ezZMni+Hx8adU3us9eu3ZNFixYINu3b5czZ86YCvKAAQOibNemTRtzrM7Hpa91/fDhw+N0HNu2bZNRo0bJpEmTpEePHo71BQoUkDp16pgHDnZp06aVpk2bSr9+/eSXX36Jsq80adKYxU5bBgQFBSX4GgEAAABAcuCzle7INDBqs/KaNWtK586dTXj9+uuvZeHChaairJVeb9DvK1GihBQvXlxatWolM2fOFJvNFmW7l156SS5evCibNm0yr/Wnvq5fv36cvys8PNw0J9fzdSdjxowur7U1wN69e2XRokXiLdevX/fadwEAAACAt/l86NZAq82mIyIiJCQkxKwbPXq0+dm8eXNTCdYQHFmVKlVMYHVe4ury5csun3OuBmvTcg3b9ibwuu1PP/0UZR+pUqVyhHKlP/W1ro8rbVpfqFChOH8md+7cpiKuTdLv3bsnnnD79m25cuWKy+IsPtcVAAAAAJIanw3dy5cvN4EuMDBQXnjhBdMnWSu59qr322+/bZpHOze7dqZNwHft2uWyxFX69OldPrdlyxaz/vDhw6bJd4sWLcxrra7rcWkQd6ddu3amGq/N0PWnvo4PdxX02PTt21fOnj3rCPsPSx9w6ABu9oV+3wAAAACSE5/t063NyKdNm2b6HmsFN3LzcX2tI4Rr/2t3NBzqqOcJkSJFCref1XCtFWQ9HudgrP3PP/74YxNKnelI61qF15BesmRJKV26dLzCf7FixUyz9Lt378a52q1NzrUf+dChQ6VevXrysHRfvXr1crzWSrdz8NY+7gAAAADgq3y20q0Dg2nw1RHLvdVfOyYatrXvuI4c7lwF11HENYTPmzfP7ee0uq1TdsW3yq1atmxpQu3UqVPdvu88kJozHVldHxx8+OGH8rD0gYKO5u68RL5PAAAAAOCrEj+NPqLOnz9vmnVHrgJrc/WENnfXgdB0rvDIFe1GjRqZKnjHjh2jfE6n5WrSpEmUQc/iQkdn79Onj5kyTef8btiwoQn4v//+u3zyySfy7LPPum1er+eole4uXbrE+zsBAAAAAMmg0v2wateuLbly5XJZli5dmuD9aajWfUYO3PbQrVOI7dmzJ8p7WqXPmjVrgqv1Y8eOlblz55ppwHSqtFKlSpnm3mXLljUjuEdH39NB2AAAAAAACednS8hoW0ACaZ9uffAQMmCh+AcGScSgMK4lAAAAgCSbbXRGqsjdaJ1R6QYAAAAAwCKE7njS5tmR5++2L+Hh4eJN2gc8umNx1z8cAAAAAOBdDKQWTytWrDBTcLmTI0cO8aZhw4aZ+cbdial5AwAAAADAOwjd8ZQ/f355VGTPnt0sAAAAAIBHE83LAQAAAACwCKEbAAAAAACLELoBAAAAALAIoRsAAAAAAIsQugEAAAAAsAijlyNRLOkbyrRmAAAAAHwelW4AAAAAACxC6AYAAAAAwCKEbgAAAAAALELoBgAAAADAIoRuAAAAAAAsQugGAAAAAMAihG4AAAAAACxC6AYAAAAAwCKEbgAAAAAALELoBgAAAADAIoRuAAAAAAAsQugGAAAAAMAihG4AAAAAACxC6AYAAAAAwCKEbgAAAAAALELoBgAAAADAIoRuAAAAAAAsQugGAAAAAMAiPhO627ZtKw0aNIiyfv369eLn5yeXLl0yr+/fvy8TJ06UMmXKSGBgoGTKlEleeOEF2bx5s8vnhgwZIk888USU/R0/ftzsb9euXY51n332mZQrV07SpUsnGTNmlPLly8vo0aPjdNzRfY9djRo1zPfNnz/fZf2kSZOkQIECcfqO2bNnm33YFz3OihUryuLFi91uP2/ePEmZMqV06dIl1usJAAAAAEgGoTsubDabNG/eXIYNGyY9evSQgwcPmhCZN29eE26XLl0a733OnDlTevbsKd27dzdBXMN7nz595Nq1ax47bn04MHDgQLl7926C9xEcHCynT582y86dOyU0NFSaNm0qhw8fjrLtjBkzzDlo+L5169ZDHj0AAAAAJF/JKnQvXLhQFi1aJHPmzJHXX39dChYsaCrUn376qbz00ktm3fXr1+O1z2XLlpnw2r59eylSpIiUKlVKWrRoISNHjvTYcev+tLKsFfWE0up0zpw5zVK0aFEZMWKEpEiRQvbs2eOy3Z9//ilbtmyRfv36SbFixaKthgMAAAAAYpesQvfcuXNNkKxfv36U93r37i3nz5+XVatWxWufGmK3bt0qJ06cEKtolfrdd981Ffr4PhRwR5vYf/HFF+b3ChUquLw3a9YsCQsLkwwZMkirVq1M1fth3L59W65cueKyAAAAAEBy4VOhe/ny5aa/svOi/bXtjhw5IiVLlnT7Wft63SY+Bg8ebPpxa//q4sWLm77lWlF/8OCBeFLnzp1NM/MJEyYk6POXL192XJOAgADp1KmTqfAXLlzYsY0es/b/1rCttCn+pk2bTPU7obRvuwZ4+6JN+QEAAAAgufCp0F2zZk3Tr9p5+fzzz6P06/akXLlyyc8//yx79+41/cTv3bsnbdq0kbp163o0eKdOndpUuj/44AM5d+5cvD+fPn16xzXRPt2jRo2Sjh07ynfffefYRqv8Wkl/8cUXzeusWbNKnTp1TL/1hOrfv78J/Pbl1KlTCd4XAAAAACQ1/uJD0qZNa/pVO/vrr78cv2vTch08zR37et3G3qRbQ2Jk9lG7tWrrrHTp0mbRirSG2WrVqslPP/1kHgR4ilagNXRrf+y4jlxup/23na9N2bJl5ccff5SxY8c6mttrU/ILFy5ImjRpHNvpgwPt9z106FCzj4Q8LNAFAAAAAJIjn6p0x0abSx89etSlums3fvx4yZIli6nsKm0qroH933//ddlux44dppl3vnz5ov2exx9/3Pz0RP9rZxp6tbn2tGnTzNRlD0unBbt586b5Xfuzf/vtt2ZqMueWAloVv3jxognoAAAAAIBkXOmOS+j++uuvTfPv999/X2rVqmUG9poyZYoZhVzf02q50im1NHjryOFaWdYB0zRw69Rd2oxcA6vSvtG5c+eWkJAQeeyxx8yUXLp9tmzZpHLlynE6Lg2+zvN+25uDO/e3ttNBzipVqiTTp0+XHDlyxPnctVn9mTNnHN+nTckjIiLkvffeM+u+/PJL89BBR2LXkc6daXNzrYJrk3k7bU6vx2inn9GR4AEAAAAAyTR0azDUQc4mTZokEydOdAxOpuFY5+uuWrWqY1t/f39T3R0wYIAJ3mfPnjVTjGng7tWrl2O72rVrmz7PWn3WarH2g9b9rVmzxoTYuNDB28qXL++yTh8IrF692u322iS8SpUq8Tp3fbig/c+VNvfOnz+/6SPet29fs07PoWHDhlECt2rUqJG0bt3apS/5c88957KNPoTQ/uwAAAAAgP/xs3l6ZDEglvCv/eG1v7z2mwcAAAAAX842yapPNwAAAAAA3kTotljkecOdl40bN3rkO0qVKhXtd4SHh3vkOwAAAAAA8Zes+nQnhsgDpDnLkyePR75jxYoVcvfuXbfvxWewNQAAAACAZxG6LRZ53nAr6KBoAAAAAIBHD83LAQAAAACwCKEbAAAAAABCNwAAAAAASQuVbgAAAAAALELoBgAAAADAIoRuAAAAAAAsQugGAAAAAMAihG4AAAAAACxC6AYAAAAAwCKEbgAAAAAALELoBgAAAADAIoRuAAAAAAAsQugGAAAAAMAihG4AAAAAACxC6AYAAAAAwCKEbgAAAAAALELoBgAAAADAIoRuAAAAAAAsQugGAAAAAMAihG4AAAAAACxC6AYAAAAAwCKEbgAAAAAALELoBgAAAADAIoRuAAAAAAAepdB95swZ6datmxQqVEhSp04tefPmlfr168uaNWvM+wUKFBA/Pz+zBAUFSZkyZeTzzz932cf69esd26RIkUIyZMgg5cuXlz59+sjp06cd2znvy93Stm3bWI9Xt1u6dKnb9+zHUapUKbl//77LexkzZpTZs2fH6Zo4H2fKlCkld+7c0r59e7l48aLb7UuUKGGunV7LyGrUqCE9e/aUhNDvDwwMlBMnTrisb9Cggcu10t/dXc+6deua95s3b+743W7lypVmmyFDhris19f58uVL0PECAAAAgC+Ld+g+fvy4VKxYUdauXSvvv/++7N2714SxmjVrSpcuXRzbDRs2zITnffv2SatWraRDhw7yww8/RNnf4cOH5Z9//pFff/1V+vbtK6tXr5bSpUub/Spdr/vR5ZtvvnF8xr7uww8/FE84duyYzJkz56H2YT/nkydPSnh4uGzYsEG6d+8eZbtNmzbJzZs3pXHjxvLFF1+Ip2kwfu+992LdTkO1/Tral3nz5pn39H5u3rxZ7t2759h+3bp15gGLPqhwput1ewAAAADAQ4buzp07m1C3bds2adSokRQrVsxUiXv16iVbt251bJc+fXrJmTOnqYZrmM6cObOsWrUqyv6yZ89uttP9aHVVg162bNmkU6dO5n39Xd/XRffh/BldtELuCVq5Hzx4sNy+fTvB+7Cfc548eUwIbdOmjezYsSPKdjNmzJCWLVtK69atZebMmeJpXbt2la+++so88IiJVtrt19G+ZMqUybynx3/t2jXZvn27Y3sN2/369ZNffvlFbt26ZdbpT31N6AYAAACAhwzdFy5cMFVtrWinTZs2yvvaHDuyBw8emAq1NrMOCAiI9TvSpEkjHTt2NOH7v//+E2/R5txa1Z08ebJH9vf333/Ld999J5UqVXJZf/XqVfn6669N9b9OnTpy+fJl2bhxo3hS1apVpV69eiYgJ5Q+BNEm8lrFth+3PkBo0qSJaUr/888/m/VbtmwxDyqiC9363pUrV1wWAAAAAEgu4hW6f//9d7HZbKY/cmy0up0uXTpTTdVm1FpBff311+P0Pfb9a1N2b9G+51rpHj16tAnCCWE/Z31w8Nhjj5kWARMmTHDZZv78+VK0aFHTOkD7fmt1XyvfnqbnoQ9IYgr0y5cvN8frvIwaNcrxvgZpe1Ny3Y8GcW158NxzzznW68+CBQtK/vz5oz0ObY1gX7R5OgAAAAAkF/EK3Rq44+qdd96RXbt2mb7fWu2dOHGiFClSJF7fo6HVm3TgsyxZssjYsWMT9Hn7Oe/Zs8cxqFxYWJjLAG3anFyr3Hb6u1a+tZLsSY8//ri8+uqrMVa7NVTr8Tov2srAeUA3bXFw9+5dE671tapevbpL6I6paXn//v3NQwz7curUKY+eJwAAAAD4TOjWCq0G4UOHDsW6bdasWU3IrlatmgmVOqDYgQMH4vQ9Bw8eND+1GbM3+fv7y8iRI83gbDq4W3zZz1mvU0hIiEyaNMk0v7Y30dbz137vOkK7fpcuzzzzjNy4ccNUwD1t6NChpkl4dCO3axcBPV7nxd5vXmmYvn79uhnMTs9Bw7bSn9qPW7sb6E891+hoS4fg4GCXBQAAAACSi3iFbg1koaGhMmXKFBPGIrt06ZLbz2mT4mbNmpmqZ2x0VO9PP/3UNGHWpszepn2Wtem3BtaHpc3H7eektBm5ntfu3btdqss6CJ0VTcz1uuugagMGDIgyHVpcFC5c2Oxj2bJl5jjtoVsHitNl/PjxcufOHQZRAwAAAIBo+Es8aeDWgbqefvppM0VW2bJlzQBkOjL5tGnTHFXqyHr06GGmAtPRsJ988knHeh0sTUfA1ubVv/32m4wbN07OnTsnixcvFk/6888/TXB0phVpd8aMGWMeLsSXnoPOu63N47UZtVa09cFBlSpVTBPtL7/80lwzvQ7OtK+79v3ev3+/Cfzq7NmzUY43V65ckiNHjngdkz7o+Oyzz8z564OPyIOcRZ4nXKvvWrF3rnZPnTrVVMGdv1sDuA46Zx9wDQAAAADggSnDdAowbbKsYax3794mQOoo3NqHWUN3TH2Mn3/++SjzRxcvXtyENp37W8Nu7dq1zVRXur0naTW5fPnyLsvOnTvdbqvNpXVxnqM6LvTcNBjr+ejo4dp8+8cffzT9xLVafP78eWnYsGGUz5UsWdIsztXuuXPnRjleDc/xpa0TdIA3+xRfznSgNT1e5+XZZ5912Ubvsz5MsPfndg7dup6pwgAAAAAgen62+IyOBjwknTJMRzHXQdXo3w0AAADA17NNvCvdAAAAAAAgbpJ86NZ5pSPPNW1fXnjhBY98R3h4eLTfYe+D7SvnCgAAAADwnCTfvFynrdLFnTRp0phRth+W9l3+999/3b6XKlUqyZ8/v/jKuVqN5uUAAAAAfEFcs028Ry9/1OhAYc5zS1shffr0ZkkO5woAAAAA8Jwk37wcAAAAAIBHFaEbAAAAAACLELoBAAAAALAIoRsAAAAAAIsQugEAAAAAsAihGwAAAAAAixC6AQAAAACwCKEbAAAAAACLELoBAAAAALAIoRsAAAAAAIsQugEAAAAAsAihGwAAAAAAixC6AQAAAACwCKEbAAAAAACLELoBAAAAALAIoRsAAAAAAIsQugEAAAAAsAihGwAAAAAAixC6AQAAAACwCKEbAAAAAACLELoBAAAAACB0AwAAAACQtFDpBgAAAADAIoRuAAAAAAAsQugGAAAAAMAihG4AAAAAACxC6AYAAAAAwCL+Vu0YcMdms5mfV65c4QIBAAAASLLsmcaecaJD6IZXnT9/3vzMmzcvVx4AAABAknf16lXJkCFDtO8TuuFVmTNnNj9PnjwZ4x8mfOPJnz5cOXXqlAQHByf24cAi3Ofkg3udPHCfkw/udfLAfbaWVrg1cOfOnTvG7Qjd8KoUKf7fMAIauAliyYPeZ+617+M+Jx/c6+SB+5x8cK+TB+6zdeJSSGQgNQAAAAAALELoBgAAAADAIoRueFXq1Kll8ODB5id8G/c6eeA+Jx/c6+SB+5x8cK+TB+7zo8HPFtv45gAAAAAAIEGodAMAAAAAYBFCNwAAAAAAFiF0AwAAAABgEUI3PG7KlClSoEABCQwMlEqVKsm2bdti3P7rr7+WEiVKmO3LlCkjK1as4K744L3+7LPPpFq1apIpUyaz1K5dO9a/DSTN/07bzZ8/X/z8/KRBgwaWHyO8f58vXbokXbp0kVy5cpmBeooVK8b/fvvovZ40aZIUL15c0qRJI3nz5pW33npLbt265bXjRfxt2LBB6tevL7lz5zb/O7x06dJYP7N+/XqpUKGC+e9zkSJFZPbs2Vx6H7zXixcvljp16ki2bNnM3N2VK1eWiIgIrx1vckXohkctWLBAevXqZUYo37Fjh5QrV05CQ0Plv//+c7v9li1bpEWLFtK+fXvZuXOn+ce5Lvv27ePO+Ni91v8z13u9bt06+fnnn80/3J5//nn5+++/vX7ssO4+2x0/flzefvtt86AFvnef79y5Y/7Rpvd50aJFcvjwYfNgLU+ePF4/dlh7r+fOnSv9+vUz2x88eFBmzJhh9jFgwAAu/SPs+vXr5t7qA5a4+PPPPyUsLExq1qwpu3btkp49e8rrr79OGPPBe60hXf/3W4tcv/32m7nnGtr13+GwkI5eDnjK008/bevSpYvj9f379225c+e2jR492u32TZs2tYWFhbmsq1Spku3NN9/kpvjYvY7s3r17tvTp09u++OILC48SiXGf9d5WqVLF9vnnn9vatGlje/nll7kRPnafp02bZitUqJDtzp07XjxKJMa91m1DQkJc1vXq1ctWtWpVbkgSof/cX7JkSYzb9OnTx1aqVCmXdc2aNbOFhoZafHTw9r125/HHH7cNHTqUm2EhKt3wGK186BMzbTZslyJFCvNaK5vu6Hrn7ZU+cY9ueyTdex3ZjRs35O7du5I5c2YLjxSJcZ+HDRsm2bNnNy1Y4Jv3edmyZaZJojYvz5Ejh5QuXVpGjRol9+/f9+KRwxv3ukqVKuYz9ibox44dMxWyF198kRvgQ/j3WPL14MEDuXr1Kv8es5i/1V+A5OPcuXPmH1z6DzBn+vrQoUNuP3PmzBm32+t6+Na9jqxv376m/1Hkhy5I2vd506ZNpvmpNk+E795nDV5r166VV155xQSw33//XTp37mwepGkzZPjOvW7ZsqX53LPPPqutI+XevXvSsWNHmpf7mOj+PXblyhW5efOm6c8P3/TBBx/ItWvXpGnTpol9KD6NSjcArxszZowZZGvJkiVmIB/4Bn1S3rp1a9O3N2vWrIl9OLC4MqKtGT799FOpWLGiNGvWTN5991355JNPuO4+Rsfj0FYMU6dONX3AdRCm77//XoYPH57YhwbgIemYDUOHDpWFCxea/02Hdah0w2P0H9kpU6aUf//912W9vs6ZM6fbz+j6+GyPpHuvnZ+oauhevXq1lC1b1uIjhTfv8x9//GEG1tIBWZzDmfL39zeDbRUuXJib4gP/fdYRy1OlSmU+Z1eyZElTLdMmzAEBAZYfN7xzrwcNGmQepumgWkpnGdGBm9544w3zoEWbpyPpi+7fYzq6NVVu36TFD/3vtc4iRKtD6/G/lPAY/UeWVjzWrFnj8g9ufa19/9zR9c7bq1WrVkW7PZLuvVbjxo0z1ZGVK1fKk08+6aWjhbfus079t3fvXtO03L689NJLjtFwdcR6+MZ/n6tWrWqalNsfqqgjR46YME7g9q17reNvRA7W9oct/2/cJvgC/j2WvMybN09ee+0181NHrYcXWDlKG5Kf+fPn21KnTm2bPXu27cCBA7Y33njDljFjRtuZM2fM+61bt7b169fPsf3mzZtt/v7+tg8++MB28OBB2+DBg22pUqWy7d27NxHPAlbc6zFjxtgCAgJsixYtsp0+fdqxXL16lQvuQ/c5MkYv9837fPLkSTP7QNeuXW2HDx+2LV++3JY9e3bbiBEjEvEsYMW91v9f1ns9b94827Fjx2w//vijrXDhwmb2ETy69P9bd+7caRb95/6ECRPM7ydOnDDv6z3We22n9zYoKMj2zjvvmH+PTZkyxZYyZUrbypUrE/EsYMW9Dg8PN//21nvs/O+xS5cuccEtROiGx02ePNmWL18+E7B0apKtW7c63qtevbr5R7izhQsX2ooVK2a21+kqvv/+e+6KD97r/Pnzm/8ziLzoP+jgW/+ddkbo9t37vGXLFjPFowY4nT5s5MiRZro4+Na9vnv3rm3IkCEmaAcGBtry5s1r69y5s+3ixYuJdPSIi3Xr1rn9/1z7vdWfeq8jf+aJJ54wfxf63+lZs2ZxsX3wXuvvMW0Pa/jpf3ijog4AAAAAQHJDn24AAAAAACxC6AYAAAAAwCKEbgAAAAAALELoBgAAAADAIoRuAAAAAAAsQugGAAAAAMAihG4AAAAAACxC6AYAAAAAwCKEbgAAACfnz5+X7Nmzy/Hjx33iuqxcuVKeeOIJefDgQWIfCgAkS342m82W2AcBAAASpm3btnLp0iVZunTpI3kJNbgWLFhQdu7caYJfUtCrVy+5evWqfPbZZy7nYJcpUyYpU6aMjBgxQqpVq+ZYr9vPmTNH9u3bZ15XrFhRRo0aJU8//bTb77l9+7aUKlVK0qVLF+W91KlTyy+//CLdunWTn376SVKkcK2T3Lp1S6ZPn25+f/PNNyUwMNDlfQ3Y1atXl8mTJ5vXTz31lHTv3l1at279EFcGAJAQ/gn6FAAAQCzu3LmT5K7RjRs3ZMaMGRIRERHlvdWrV5uQfO7cORk5cqTUq1dPjhw5Ijly5DDvr1+/Xlq0aCFVqlQxIXjs2LHy/PPPy/79+yVPnjxR9qd1j8cee8x8LrJnnnnG/Dx79qwsW7ZMChQo4PL+kCFD5ObNm+b35s2bm9fO9EFBv379XB7OfPTRR4RuAEgENC8HAMCH1KhRw1RHe/bsaSqyGgi1Anv9+nV57bXXJH369FKkSBH54YcfHJ/R0Ofn5yfff/+9lC1b1gRGDX32iq3dN998Y0KnVmE1BI4fP97lfV03fPhwefXVVyU4OFjeeOMNR4W4fPny5jv0+NSvv/4qderUkaxZs0qGDBlMVXbHjh0u+9PtP//8c2nYsKEEBQVJ0aJFTQB1poFWw69+n56bVp7/+OMPx/v6+ZIlS5pzKlGihEydOjXG67dixQpzfvbQ6yxLliySM2dOKV26tAwYMECuXLliqtF24eHh0rlzZ1PR1+/S79aK85o1aySx1a9fX7Zv3+5ybQAA3kHoBgDAx3zxxRcmzG7bts0E8E6dOkmTJk1MBVaDrVZftZmxVnWdvfPOOyZIayDOli2bCWp379417/3222/StGlTU1Xdu3evqawOGjRIZs+e7bKPDz74QMqVK2eak+v7egz2KvHp06dl8eLF5rU2327Tpo1s2rRJtm7dagL1iy++aNY7Gzp0qPnePXv2mPdfeeUVuXDhgnnv77//lueee86E5LVr15pjbNeundy7d88Rgt977z1TlT548KBp6q3HpNcnOhs3bjTNwmOiFWZtRq4CAgKi3U6vr16/zJkzS2LLly+feQCj5wcA8C6alwMA4GM09A4cOND83r9/fxkzZowJ4R06dDDrNIhOmzbNBFnniu7gwYNN9VlpMNWmz0uWLDGhd8KECVKrVi0TWlWxYsXkwIED8v7775umy3YhISHSu3dvx+uUKVO6VImdt3P26aefSsaMGU3/Za1c2+m+tcm20tCsTaQ1yNetW1emTJliquTz58+XVKlSOY7L+Xz0IcL//d//mddadddj1r7QGvjdOXHihOTOndvte/rQQvtWa5jWpuEazvWaRKdv375mX7Vr15ZHgR6Lnh8AwLuodAMA4GO0ibhz6NXAqwN/2dn7IP/3338un6tcubLjd63OFi9e3FSIlf6sWrWqy/b6+ujRo3L//n3HuieffDJOx/jvv/+ahwBa4dbgrM3Dr127JidPnoz2XNKmTWu2sx/3rl27THNye+B2ps3ptSl1+/btzUBl9kUHP4upibVWsSMPSma3YMECU8HXZvbaRF+r/O6+W+mDDn0YoA8totuft6VJkyZK6wYAgPWodAMA4GMiB0HtG+28Tl8rK6aQ0mAcF1pp1qm5PvzwQ8mfP79pIq6hP/Lga+7OxX7cGiKjowFeaX/2SpUqubxnr767oy0CLl686Pa9vHnzmocEumgTdu1rrv3e9dgjN7HX0K1N6p0fGiQ2bZav3QYAAN5FpRsAABjat9pOg6eOzK2DkCn9uXnzZpcrpa+1OXdMIdbe59m5Gm7/rE5hpf207YOz6ajg8aGBVvso2/udO9NqvjanPnbsmKlKOy/O039FpgO+aRP02DRu3Fj8/f2jDMw2btw4M5iczo0d16q/N+gUY1rh1/MDAHgXoRsAABjDhg0zI21r9Vb7UmvVt0GDBuY97aet72mg1DCufb4//vhjefvtt2O8etmzZzcVaQ2h2qT88uXLZr1Wi7/88kvTbF1HANcB0mKqXLvTtWtXM4K4Du6mI3NrU3fd5+HDhx2DsI0ePdr0A9dj1gHgZs2aZfqnRyc0NNSMiB5dtdu54q4PDbSibW+yrVOEaZ/3mTNnmpHcz5w5YxZ71T2xH6jYWxMAALyL0A0AAAwNkD169DADhGlY/O677xyV6goVKsjChQtNP2WdMksHY9OQ7jyImjtaDdbQq4OXaeX55ZdfNut1LmwNtrpfHUldA6wG9PjQvuo6armGWp1yTI9bm5Pbm6S//vrrZtouDdrap1230X7YMVW6dTv7ucalibxW2fXhg9LB6bR5vFbBc+XK5Vi0uXlimzdvnnmwoVOvAQC8y8+mw28CAIBkS+fprlmzpgnBOoJ4cqfzlev0aVrx19HKrWzyraOw6/WPTEeV1+q0VvH1YYhWzp3plG32ked1O33t7Pjx49KvXz/zkESb7eugeNoaIKYHDgAAazCQGgAAgJOwsDDTVF3nAdfB05I6DeDa95zADQCJg9ANAAAQSc+ePS2/JlpF16bx7gZc0/70qnDhwqa5enT9z9Xy5cvNEt37uv9HaVA3AEhuaF4OAAAAAIBFGEgNAAAAAACLELoBAAAAALAIoRsAAAAAAIsQugEAAAAAsAihGwAAAAAAixC6AQAAAACwCKEbAAAAAACLELoBAAAAALAIoRsAAAAAALHG/weSklPofKSPowAAAABJRU5ErkJggg==",
|
| 556 |
-
"text/plain": [
|
| 557 |
-
"<Figure size 1000x600 with 1 Axes>"
|
| 558 |
-
]
|
| 559 |
-
},
|
| 560 |
-
"metadata": {},
|
| 561 |
-
"output_type": "display_data"
|
| 562 |
-
}
|
| 563 |
-
],
|
| 564 |
-
"source": [
|
| 565 |
-
"from sklearn.inspection import permutation_importance\n",
|
| 566 |
-
"import matplotlib.pyplot as plt\n",
|
| 567 |
-
"import matplotlib.font_manager as fm\n",
|
| 568 |
-
"\n",
|
| 569 |
-
"# Permutation Importance 계산\n",
|
| 570 |
-
"perm_importance = permutation_importance(\n",
|
| 571 |
-
" model, X_test, y_test,\n",
|
| 572 |
-
" n_repeats=10,\n",
|
| 573 |
-
" random_state=42,\n",
|
| 574 |
-
" scoring='r2'\n",
|
| 575 |
-
")\n",
|
| 576 |
-
"\n",
|
| 577 |
-
"# 결과 정리\n",
|
| 578 |
-
"perm_df = pd.DataFrame({\n",
|
| 579 |
-
" 'feature' : X_user.columns,\n",
|
| 580 |
-
" 'importance' : perm_importance.importances_mean,\n",
|
| 581 |
-
" 'std' : perm_importance.importances_std\n",
|
| 582 |
-
"}).sort_values('importance', ascending=False)\n",
|
| 583 |
-
"\n",
|
| 584 |
-
"print(\"=== Permutation Importance ===\")\n",
|
| 585 |
-
"print(perm_df)\n",
|
| 586 |
-
"\n",
|
| 587 |
-
"# 시각화\n",
|
| 588 |
-
"plt.figure(figsize=(10, 6))\n",
|
| 589 |
-
"plt.barh(\n",
|
| 590 |
-
" perm_df['feature'][::-1],\n",
|
| 591 |
-
" perm_df['importance'][::-1],\n",
|
| 592 |
-
" xerr=perm_df['std'][::-1],\n",
|
| 593 |
-
" color='steelblue'\n",
|
| 594 |
-
")\n",
|
| 595 |
-
"plt.xlabel('Importance (R2 감소량)')\n",
|
| 596 |
-
"plt.title('Permutation Importance')\n",
|
| 597 |
-
"plt.tight_layout()\n",
|
| 598 |
-
"plt.savefig('permutation_importance.png', dpi=150)\n",
|
| 599 |
-
"plt.show()"
|
| 600 |
-
]
|
| 601 |
-
},
|
| 602 |
-
{
|
| 603 |
-
"cell_type": "code",
|
| 604 |
-
"execution_count": 5,
|
| 605 |
-
"id": "e04186ff",
|
| 606 |
-
"metadata": {},
|
| 607 |
-
"outputs": [
|
| 608 |
-
{
|
| 609 |
-
"name": "stdout",
|
| 610 |
-
"output_type": "stream",
|
| 611 |
-
"text": [
|
| 612 |
-
"Linear Regression | RMSE: 179.69 | MAE: 152.58 | R2: 0.2299\n",
|
| 613 |
-
"Ridge | RMSE: 179.69 | MAE: 152.58 | R2: 0.2299\n",
|
| 614 |
-
"Lasso | RMSE: 179.82 | MAE: 152.76 | R2: 0.2287\n",
|
| 615 |
-
"ElasticNet | RMSE: 181.58 | MAE: 154.96 | R2: 0.2135\n",
|
| 616 |
-
"Random Forest | RMSE: 126.58 | MAE: 96.14 | R2: 0.6178\n",
|
| 617 |
-
"Gradient Boosting | RMSE: 140.61 | MAE: 113.22 | R2: 0.5284\n",
|
| 618 |
-
"XGBoost | RMSE: 130.61 | MAE: 100.03 | R2: 0.5931\n",
|
| 619 |
-
"[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000623 seconds.\n",
|
| 620 |
-
"You can set `force_row_wise=true` to remove the overhead.\n",
|
| 621 |
-
"And if memory is not enough, you can set `force_col_wise=true`.\n",
|
| 622 |
-
"[LightGBM] [Info] Total Bins 1908\n",
|
| 623 |
-
"[LightGBM] [Info] Number of data points in the train set: 9600, number of used features: 12\n",
|
| 624 |
-
"[LightGBM] [Info] Start training from score 710.157187\n",
|
| 625 |
-
"LightGBM | RMSE: 126.39 | MAE: 96.84 | R2: 0.6189\n",
|
| 626 |
-
"\n",
|
| 627 |
-
"최종 선정: LightGBM (R2: 0.6189)\n"
|
| 628 |
-
]
|
| 629 |
-
}
|
| 630 |
-
],
|
| 631 |
-
"source": [
|
| 632 |
-
"from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet\n",
|
| 633 |
-
"from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor\n",
|
| 634 |
-
"from sklearn.svm import SVR\n",
|
| 635 |
-
"from sklearn.neighbors import KNeighborsRegressor\n",
|
| 636 |
-
"import xgboost as xgb\n",
|
| 637 |
-
"import lightgbm as lgb\n",
|
| 638 |
-
"import numpy as np\n",
|
| 639 |
-
"from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error\n",
|
| 640 |
-
"\n",
|
| 641 |
-
"models = {\n",
|
| 642 |
-
" 'Linear Regression' : LinearRegression(),\n",
|
| 643 |
-
" 'Ridge' : Ridge(),\n",
|
| 644 |
-
" 'Lasso' : Lasso(),\n",
|
| 645 |
-
" 'ElasticNet' : ElasticNet(),\n",
|
| 646 |
-
" 'Random Forest' : RandomForestRegressor(n_estimators=100, random_state=42),\n",
|
| 647 |
-
" 'Gradient Boosting' : GradientBoostingRegressor(random_state=42),\n",
|
| 648 |
-
" 'XGBoost' : xgb.XGBRegressor(random_state=42),\n",
|
| 649 |
-
" 'LightGBM' : lgb.LGBMRegressor(random_state=42),\n",
|
| 650 |
-
"}\n",
|
| 651 |
-
"\n",
|
| 652 |
-
"results = {}\n",
|
| 653 |
-
"\n",
|
| 654 |
-
"for name, model in models.items():\n",
|
| 655 |
-
" model.fit(X_train, y_train)\n",
|
| 656 |
-
" y_pred = model.predict(X_test)\n",
|
| 657 |
-
"\n",
|
| 658 |
-
" rmse = np.sqrt(mean_squared_error(y_test, y_pred))\n",
|
| 659 |
-
" mae = mean_absolute_error(y_test, y_pred)\n",
|
| 660 |
-
" r2 = r2_score(y_test, y_pred)\n",
|
| 661 |
-
"\n",
|
| 662 |
-
" results[name] = {'model': model, 'rmse': rmse, 'mae': mae, 'r2': r2}\n",
|
| 663 |
-
" print(f\"{name:25s} | RMSE: {rmse:.2f} | MAE: {mae:.2f} | R2: {r2:.4f}\")\n",
|
| 664 |
-
"\n",
|
| 665 |
-
"# 최종 선정\n",
|
| 666 |
-
"best_name = max(results, key=lambda x: results[x]['r2'])\n",
|
| 667 |
-
"print(f\"\\n최종 선정: {best_name} (R2: {results[best_name]['r2']:.4f})\")"
|
| 668 |
-
]
|
| 669 |
-
},
|
| 670 |
-
{
|
| 671 |
-
"cell_type": "code",
|
| 672 |
-
"execution_count": null,
|
| 673 |
-
"id": "5ea119e2",
|
| 674 |
-
"metadata": {},
|
| 675 |
-
"outputs": [],
|
| 676 |
-
"source": [
|
| 677 |
-
"import numpy as np\n",
|
| 678 |
-
"import tensorflow as tf\n",
|
| 679 |
-
"from sklearn.tree import DecisionTreeRegressor\n",
|
| 680 |
-
"from sklearn.ensemble import RandomForestRegressor\n",
|
| 681 |
-
"from sklearn.preprocessing import StandardScaler\n",
|
| 682 |
-
"from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error\n",
|
| 683 |
-
"import lightgbm as lgb\n",
|
| 684 |
-
"\n",
|
| 685 |
-
"results = {}\n",
|
| 686 |
-
"\n",
|
| 687 |
-
"# ========================\n",
|
| 688 |
-
"# 1. Decision Tree\n",
|
| 689 |
-
"# ========================\n",
|
| 690 |
-
"dt_model = DecisionTreeRegressor(\n",
|
| 691 |
-
" max_depth=10,\n",
|
| 692 |
-
" min_samples_split=20,\n",
|
| 693 |
-
" random_state=42\n",
|
| 694 |
-
")\n",
|
| 695 |
-
"dt_model.fit(X_train, y_train)\n",
|
| 696 |
-
"y_pred_dt = dt_model.predict(X_test)\n",
|
| 697 |
-
"\n",
|
| 698 |
-
"results['Decision Tree'] = {\n",
|
| 699 |
-
" 'model' : dt_model,\n",
|
| 700 |
-
" 'rmse' : np.sqrt(mean_squared_error(y_test, y_pred_dt)),\n",
|
| 701 |
-
" 'mae' : mean_absolute_error(y_test, y_pred_dt),\n",
|
| 702 |
-
" 'r2' : r2_score(y_test, y_pred_dt)\n",
|
| 703 |
-
"}\n",
|
| 704 |
-
"\n",
|
| 705 |
-
"# ========================\n",
|
| 706 |
-
"# 2. Random Forest\n",
|
| 707 |
-
"# ========================\n",
|
| 708 |
-
"rf_model = RandomForestRegressor(\n",
|
| 709 |
-
" n_estimators=200,\n",
|
| 710 |
-
" max_depth=10,\n",
|
| 711 |
-
" min_samples_split=20,\n",
|
| 712 |
-
" random_state=42\n",
|
| 713 |
-
")\n",
|
| 714 |
-
"rf_model.fit(X_train, y_train)\n",
|
| 715 |
-
"y_pred_rf = rf_model.predict(X_test)\n",
|
| 716 |
-
"\n",
|
| 717 |
-
"results['Random Forest'] = {\n",
|
| 718 |
-
" 'model' : rf_model,\n",
|
| 719 |
-
" 'rmse' : np.sqrt(mean_squared_error(y_test, y_pred_rf)),\n",
|
| 720 |
-
" 'mae' : mean_absolute_error(y_test, y_pred_rf),\n",
|
| 721 |
-
" 'r2' : r2_score(y_test, y_pred_rf)\n",
|
| 722 |
-
"}\n",
|
| 723 |
-
"\n",
|
| 724 |
-
"# ========================\n",
|
| 725 |
-
"# 3. LightGBM\n",
|
| 726 |
-
"# ========================\n",
|
| 727 |
-
"lgb_model = lgb.LGBMRegressor(\n",
|
| 728 |
-
" n_estimators=500,\n",
|
| 729 |
-
" learning_rate=0.05,\n",
|
| 730 |
-
" max_depth=6,\n",
|
| 731 |
-
" num_leaves=31,\n",
|
| 732 |
-
" random_state=42\n",
|
| 733 |
-
")\n",
|
| 734 |
-
"lgb_model.fit(X_train, y_train)\n",
|
| 735 |
-
"y_pred_lgb = lgb_model.predict(X_test)\n",
|
| 736 |
-
"\n",
|
| 737 |
-
"results['LightGBM'] = {\n",
|
| 738 |
-
" 'model' : lgb_model,\n",
|
| 739 |
-
" 'rmse' : np.sqrt(mean_squared_error(y_test, y_pred_lgb)),\n",
|
| 740 |
-
" 'mae' : mean_absolute_error(y_test, y_pred_lgb),\n",
|
| 741 |
-
" 'r2' : r2_score(y_test, y_pred_lgb)\n",
|
| 742 |
-
"}\n",
|
| 743 |
-
"\n",
|
| 744 |
-
"# ========================\n",
|
| 745 |
-
"# 4. TensorFlow DNN 회귀\n",
|
| 746 |
-
"# ========================\n",
|
| 747 |
-
"scaler = StandardScaler()\n",
|
| 748 |
-
"X_train_scaled = scaler.fit_transform(X_train)\n",
|
| 749 |
-
"X_test_scaled = scaler.transform(X_test)\n",
|
| 750 |
-
"\n",
|
| 751 |
-
"# 수정된 DNN 모델\n",
|
| 752 |
-
"tf_model = tf.keras.Sequential([\n",
|
| 753 |
-
" tf.keras.layers.Dense(256, activation='relu', input_shape=(X_train.shape[1],)),\n",
|
| 754 |
-
" tf.keras.layers.BatchNormalization(),\n",
|
| 755 |
-
" # 초기 단계에서는 Dropout을 제외하거나 낮게 설정\n",
|
| 756 |
-
" \n",
|
| 757 |
-
" tf.keras.layers.Dense(128, activation='relu'),\n",
|
| 758 |
-
" tf.keras.layers.BatchNormalization(),\n",
|
| 759 |
-
" \n",
|
| 760 |
-
" tf.keras.layers.Dense(64, activation='relu'),\n",
|
| 761 |
-
" tf.keras.layers.Dense(1) # 회귀를 위한 선형 출력\n",
|
| 762 |
-
"])\n",
|
| 763 |
-
"\n",
|
| 764 |
-
"tf_model.compile(\n",
|
| 765 |
-
" optimizer=tf.keras.optimizers.Adam(learning_rate=0.005), # 학습률 상향\n",
|
| 766 |
-
" loss='mse',\n",
|
| 767 |
-
" metrics=['mae']\n",
|
| 768 |
-
")\n",
|
| 769 |
-
"\n",
|
| 770 |
-
"# 조기 종료 설정 (교재 반영)\n",
|
| 771 |
-
"early_stop = tf.keras.callbacks.EarlyStopping(\n",
|
| 772 |
-
" monitor='val_loss', \n",
|
| 773 |
-
" patience=20, # 충분히 학습하도록 인내심 상향\n",
|
| 774 |
-
" restore_best_weights=True\n",
|
| 775 |
-
")\n",
|
| 776 |
-
"\n",
|
| 777 |
-
"tf_model.fit(\n",
|
| 778 |
-
" X_train_scaled, y_train,\n",
|
| 779 |
-
" epochs=500,\n",
|
| 780 |
-
" batch_size=64, # 배치를 작게 하여 더 자주 가중치 업데이트 [cite: 3079]\n",
|
| 781 |
-
" validation_split=0.2,\n",
|
| 782 |
-
" callbacks=[early_stop],\n",
|
| 783 |
-
" verbose=0\n",
|
| 784 |
-
")\n",
|
| 785 |
-
"y_pred_tf = tf_model.predict(X_test_scaled).flatten()\n",
|
| 786 |
-
"\n",
|
| 787 |
-
"results['TensorFlow DNN'] = {\n",
|
| 788 |
-
" 'model' : tf_model,\n",
|
| 789 |
-
" 'rmse' : np.sqrt(mean_squared_error(y_test, y_pred_tf)),\n",
|
| 790 |
-
" 'mae' : mean_absolute_error(y_test, y_pred_tf),\n",
|
| 791 |
-
" 'r2' : r2_score(y_test, y_pred_tf)\n",
|
| 792 |
-
"}\n",
|
| 793 |
-
"\n",
|
| 794 |
-
"# ========================\n",
|
| 795 |
-
"# 최종 결과 비교\n",
|
| 796 |
-
"# ========================\n",
|
| 797 |
-
"print(f\"\\n{'='*65}\")\n",
|
| 798 |
-
"print(f\"{'모델':<20} | {'RMSE':>8} | {'MAE':>8} | {'R2':>8}\")\n",
|
| 799 |
-
"print(f\"{'='*65}\")\n",
|
| 800 |
-
"\n",
|
| 801 |
-
"for name, res in sorted(results.items(), key=lambda x: x[1]['r2'], reverse=True):\n",
|
| 802 |
-
" print(f\"{name:<20} | {res['rmse']:>8.2f} | {res['mae']:>8.2f} | {res['r2']:>8.4f}\")\n",
|
| 803 |
-
"\n",
|
| 804 |
-
"best_name = max(results, key=lambda x: results[x]['r2'])\n",
|
| 805 |
-
"print(f\"\\n최종 선정: {best_name} (R2: {results[best_name]['r2']:.4f})\")"
|
| 806 |
-
]
|
| 807 |
-
}
|
| 808 |
-
],
|
| 809 |
-
"metadata": {
|
| 810 |
-
"kernelspec": {
|
| 811 |
-
"display_name": ".venv",
|
| 812 |
-
"language": "python",
|
| 813 |
-
"name": "python3"
|
| 814 |
-
},
|
| 815 |
-
"language_info": {
|
| 816 |
-
"codemirror_mode": {
|
| 817 |
-
"name": "ipython",
|
| 818 |
-
"version": 3
|
| 819 |
-
},
|
| 820 |
-
"file_extension": ".py",
|
| 821 |
-
"mimetype": "text/x-python",
|
| 822 |
-
"name": "python",
|
| 823 |
-
"nbconvert_exporter": "python",
|
| 824 |
-
"pygments_lexer": "ipython3",
|
| 825 |
-
"version": "3.10.6"
|
| 826 |
-
}
|
| 827 |
-
},
|
| 828 |
-
"nbformat": 4,
|
| 829 |
-
"nbformat_minor": 5
|
| 830 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
evaluation/model_quantative_eval.ipynb
DELETED
|
@@ -1,142 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"cells": [
|
| 3 |
-
{
|
| 4 |
-
"cell_type": "code",
|
| 5 |
-
"execution_count": null,
|
| 6 |
-
"id": "520dfcbe",
|
| 7 |
-
"metadata": {},
|
| 8 |
-
"outputs": [
|
| 9 |
-
{
|
| 10 |
-
"name": "stderr",
|
| 11 |
-
"output_type": "stream",
|
| 12 |
-
"text": [
|
| 13 |
-
"/Users/yuje/aiProjects/llm_prj/.venv/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
|
| 14 |
-
" from .autonotebook import tqdm as notebook_tqdm\n",
|
| 15 |
-
"Using the latest cached version of the dataset since KRX-Data/Won-Instruct couldn't be found on the Hugging Face Hub\n",
|
| 16 |
-
"Found the latest cached dataset configuration 'default' at /Users/yuje/.cache/huggingface/datasets/KRX-Data___won-instruct/default/0.0.0/1a3648dbef97a48736111d646fa29c4d562f8fd5 (last modified on Sun Apr 5 16:29:07 2026).\n"
|
| 17 |
-
]
|
| 18 |
-
},
|
| 19 |
-
{
|
| 20 |
-
"name": "stdout",
|
| 21 |
-
"output_type": "stream",
|
| 22 |
-
"text": [
|
| 23 |
-
"데이터셋 로드 중...\n",
|
| 24 |
-
"\n",
|
| 25 |
-
"🚀 qwen2.5:4b 모델 시험 치는 중...\n"
|
| 26 |
-
]
|
| 27 |
-
},
|
| 28 |
-
{
|
| 29 |
-
"name": "stderr",
|
| 30 |
-
"output_type": "stream",
|
| 31 |
-
"text": [
|
| 32 |
-
"100%|██████████| 50/50 [00:00<00:00, 169.68it/s]\n"
|
| 33 |
-
]
|
| 34 |
-
},
|
| 35 |
-
{
|
| 36 |
-
"name": "stdout",
|
| 37 |
-
"output_type": "stream",
|
| 38 |
-
"text": [
|
| 39 |
-
"\n",
|
| 40 |
-
"🚀 exaone3.5:2.4b 모델 시험 치는 중...\n"
|
| 41 |
-
]
|
| 42 |
-
},
|
| 43 |
-
{
|
| 44 |
-
"name": "stderr",
|
| 45 |
-
"output_type": "stream",
|
| 46 |
-
"text": [
|
| 47 |
-
"100%|██████████| 50/50 [48:27<00:00, 58.15s/it] \n"
|
| 48 |
-
]
|
| 49 |
-
},
|
| 50 |
-
{
|
| 51 |
-
"name": "stdout",
|
| 52 |
-
"output_type": "stream",
|
| 53 |
-
"text": [
|
| 54 |
-
"\n",
|
| 55 |
-
"🚀 exaone3.5:7.8b 모델 시험 치는 중...\n"
|
| 56 |
-
]
|
| 57 |
-
},
|
| 58 |
-
{
|
| 59 |
-
"name": "stderr",
|
| 60 |
-
"output_type": "stream",
|
| 61 |
-
"text": [
|
| 62 |
-
" 4%|▍ | 2/50 [21:00<7:21:57, 552.45s/it] "
|
| 63 |
-
]
|
| 64 |
-
}
|
| 65 |
-
],
|
| 66 |
-
"source": [
|
| 67 |
-
"import ollama\n",
|
| 68 |
-
"from datasets import load_dataset\n",
|
| 69 |
-
"from rouge import Rouge # pip install rouge-score\n",
|
| 70 |
-
"import pandas as pd\n",
|
| 71 |
-
"from tqdm import tqdm\n",
|
| 72 |
-
"\n",
|
| 73 |
-
"# 1. 정석대로 데이터셋 로드\n",
|
| 74 |
-
"print(\"데이터셋 로드 중...\")\n",
|
| 75 |
-
"ds = load_dataset(\"KRX-Data/Won-Instruct\", split=\"train\")\n",
|
| 76 |
-
"\n",
|
| 77 |
-
"# 2. 평가 모델 리스트 (Ollama)\n",
|
| 78 |
-
"models = [\"exaone3.5:2.4b\", \n",
|
| 79 |
-
" # \"exaone3.5:7.8b\",\n",
|
| 80 |
-
" \"llama3.2:3b\", \n",
|
| 81 |
-
" # \"llama3.1:8b\", \n",
|
| 82 |
-
" \"gemma4:e4b\"] # 일단 이거 사용\n",
|
| 83 |
-
"\n",
|
| 84 |
-
"# 3. 벤치마크 실행 (샘플 50개)\n",
|
| 85 |
-
"test_samples = ds.select(range(50)) \n",
|
| 86 |
-
"rouge = Rouge()\n",
|
| 87 |
-
"results = []\n",
|
| 88 |
-
"\n",
|
| 89 |
-
"for model_name in models:\n",
|
| 90 |
-
" print(f\"\\n🚀 {model_name} 모델 시험 치는 중...\")\n",
|
| 91 |
-
" total_score = 0\n",
|
| 92 |
-
" \n",
|
| 93 |
-
" for item in tqdm(test_samples):\n",
|
| 94 |
-
" # Won-Instruct의 필드명 사용\n",
|
| 95 |
-
" prompt = item['prompt']\n",
|
| 96 |
-
" reference = item['original_response'] # 이게 '정답지' 역할을 합니다.\n",
|
| 97 |
-
" \n",
|
| 98 |
-
" try:\n",
|
| 99 |
-
" response = ollama.chat(\n",
|
| 100 |
-
" model=model_name, \n",
|
| 101 |
-
" messages=[{'role': 'user', 'content': prompt}],\n",
|
| 102 |
-
" options={'temperature': 0} # 벤치마크는 결과가 고정되어야 하므로 0\n",
|
| 103 |
-
" )\n",
|
| 104 |
-
" prediction = response['message']['content']\n",
|
| 105 |
-
" \n",
|
| 106 |
-
" # 정량 지표 계산 (ROUGE-L)\n",
|
| 107 |
-
" if prediction.strip():\n",
|
| 108 |
-
" scores = rouge.get_scores(prediction, reference)\n",
|
| 109 |
-
" total_score += scores[0]['rouge-l']['f']\n",
|
| 110 |
-
" except:\n",
|
| 111 |
-
" continue\n",
|
| 112 |
-
"\n",
|
| 113 |
-
" avg_acc = (total_score / len(test_samples)) * 100\n",
|
| 114 |
-
" results.append({\"Model\": model_name, \"Benchmark_Score\": f\"{avg_acc:.2f}\"})\n",
|
| 115 |
-
"\n",
|
| 116 |
-
"# 4. 결과 출력\n",
|
| 117 |
-
"print(\"\\n\" + pd.DataFrame(results).to_markdown(index=False))"
|
| 118 |
-
]
|
| 119 |
-
}
|
| 120 |
-
],
|
| 121 |
-
"metadata": {
|
| 122 |
-
"kernelspec": {
|
| 123 |
-
"display_name": ".venv",
|
| 124 |
-
"language": "python",
|
| 125 |
-
"name": "python3"
|
| 126 |
-
},
|
| 127 |
-
"language_info": {
|
| 128 |
-
"codemirror_mode": {
|
| 129 |
-
"name": "ipython",
|
| 130 |
-
"version": 3
|
| 131 |
-
},
|
| 132 |
-
"file_extension": ".py",
|
| 133 |
-
"mimetype": "text/x-python",
|
| 134 |
-
"name": "python",
|
| 135 |
-
"nbconvert_exporter": "python",
|
| 136 |
-
"pygments_lexer": "ipython3",
|
| 137 |
-
"version": "3.10.6"
|
| 138 |
-
}
|
| 139 |
-
},
|
| 140 |
-
"nbformat": 4,
|
| 141 |
-
"nbformat_minor": 5
|
| 142 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
llm/chabot_chain.py
DELETED
|
@@ -1,58 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import streamlit as st
|
| 3 |
-
from langchain_huggingface import HuggingFaceEmbeddings
|
| 4 |
-
from langchain_community.vectorstores import FAISS
|
| 5 |
-
from langchain_classic.chains import ConversationalRetrievalChain
|
| 6 |
-
from langchain_ollama import OllamaLLM
|
| 7 |
-
from vectorization import make_vectorization
|
| 8 |
-
|
| 9 |
-
st.set_page_config(page_title="Page2: RAG chatbot")
|
| 10 |
-
st.title("Langchain + Gemma4:e4b")
|
| 11 |
-
|
| 12 |
-
@st.cache_resource
|
| 13 |
-
def load_models():
|
| 14 |
-
embedding_model = HuggingFaceEmbeddings(model_name="intfloat/multilingual-e5-small")
|
| 15 |
-
llm = OllamaLLM(model="gemma4:e4b")
|
| 16 |
-
return embedding_model, llm
|
| 17 |
-
|
| 18 |
-
@st.cache_resource
|
| 19 |
-
def get_vectorstore(_embedding_model):
|
| 20 |
-
if os.path.exists("./faiss/index.faiss"):
|
| 21 |
-
vectorstore = FAISS.load_local("./faiss", _embedding_model, allow_dangerous_deserialization=True)
|
| 22 |
-
else:
|
| 23 |
-
vectorstore = make_vectorization()
|
| 24 |
-
retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
|
| 25 |
-
return vectorstore, retriever
|
| 26 |
-
|
| 27 |
-
embedding_model, llm = load_models()
|
| 28 |
-
vectorstore, retriever = get_vectorstore(embedding_model)
|
| 29 |
-
qa_chain = ConversationalRetrievalChain.from_llm(llm, retriever)
|
| 30 |
-
|
| 31 |
-
if "chat_history" not in st.session_state:
|
| 32 |
-
st.session_state.chat_history = []
|
| 33 |
-
if "messages" not in st.session_state:
|
| 34 |
-
st.session_state.messages = []
|
| 35 |
-
|
| 36 |
-
for msg in st.session_state.messages:
|
| 37 |
-
with st.chat_message(msg["role"]):
|
| 38 |
-
st.markdown(msg["content"])
|
| 39 |
-
|
| 40 |
-
if user_question := st.chat_input("Ask your question :)"):
|
| 41 |
-
|
| 42 |
-
with st.chat_message("user"):
|
| 43 |
-
st.markdown(user_question)
|
| 44 |
-
st.session_state.messages.append({"role": "user", "content": user_question})
|
| 45 |
-
|
| 46 |
-
with st.chat_message("assistant"):
|
| 47 |
-
with st.spinner("Thinking..."):
|
| 48 |
-
result = qa_chain.invoke({"question": user_question, "chat_history": st.session_state.chat_history})
|
| 49 |
-
answer = result["answer"]
|
| 50 |
-
st.markdown(answer)
|
| 51 |
-
|
| 52 |
-
st.session_state.messages.append({"role": "assistant", "content": answer})
|
| 53 |
-
st.session_state.chat_history.append((user_question, answer))
|
| 54 |
-
|
| 55 |
-
if st.button("Clear Conversation"):
|
| 56 |
-
st.session_state.chat_history = []
|
| 57 |
-
st.session_state.messages = []
|
| 58 |
-
st.rerun()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
llm/prompt.py
CHANGED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from langchain_core.prompts import PromptTemplate
|
| 2 |
+
|
| 3 |
+
QA_TEMPLATE = """당신은 KCB 신용평가 전문가이자 따뜻하고 친절한 신용 상담사입니다.
|
| 4 |
+
기계적인 말투를 피하고, 실제 사람과 대화하듯 자연스럽게 상담해 주세요.
|
| 5 |
+
|
| 6 |
+
[참고 지식]
|
| 7 |
+
{context}
|
| 8 |
+
|
| 9 |
+
[고객 상황 및 질문]
|
| 10 |
+
{query}
|
| 11 |
+
|
| 12 |
+
[상담 가이드]
|
| 13 |
+
1. 어조(Tone): 고객의 고민에 공감하며, "~요", "~습니다"를 적절히 섞어 따뜻하고 부드러운 대화체로 답변하세요.
|
| 14 |
+
2. 답변 구조:
|
| 15 |
+
- 인사 및 공감 (예: "안녕하세요! 현재 점수에 대해 고민이 많으셨군요. 제가 차근차근 설명해 드릴게요.")
|
| 16 |
+
- 고객의 현재 지표를 바탕으로 한 구체적인 칭찬 또는 원인 분석 (예: "고객님의 경우 연체 건수가 아쉬운 부분입니다.")
|
| 17 |
+
- [참고 지식]을 활용한 실질적인 점수 상승 팁 제시
|
| 18 |
+
- 마무리 인사 및 격려
|
| 19 |
+
3. 주의사항:
|
| 20 |
+
- "사용자 데이터와 질문을 종합적으로 고려하여" 같은 딱딱하고 기계적인 표현은 절대 쓰지 마세요.
|
| 21 |
+
- 고객이 제공한 데이터 밖의 내용을 지어내지 마세요.
|
| 22 |
+
- 불필요한 시스템 태그("[사용자 질문]", "답변:")를 출력하지 마세요. 바로 사람에게 말하듯 시작하세요.
|
| 23 |
+
"""
|
| 24 |
+
|
| 25 |
+
QA_PROMPT = PromptTemplate(template=QA_TEMPLATE, input_variables=["context", "query"])
|
llm/vectorization.py
CHANGED
|
@@ -2,24 +2,22 @@ from langchain_community.document_loaders import PyPDFLoader
|
|
| 2 |
from langchain_community.vectorstores import FAISS
|
| 3 |
from langchain_huggingface import HuggingFaceEmbeddings
|
| 4 |
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
|
|
|
| 5 |
|
| 6 |
def make_vectorization():
|
| 7 |
-
|
| 8 |
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
text_splitter = RecursiveCharacterTextSplitter(
|
| 13 |
-
chunk_size = 1000,
|
| 14 |
-
chunk_overlap = 100,
|
| 15 |
-
length_function = len,
|
| 16 |
-
is_separator_regex=False
|
| 17 |
-
)
|
| 18 |
-
documents = text_splitter.split_documents(documents)
|
| 19 |
-
vectorstore = FAISS.from_documents(documents, embedding_model)
|
| 20 |
-
vectorstore.save_local("../faiss")
|
| 21 |
-
|
| 22 |
-
return vectorstore
|
| 23 |
-
|
| 24 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
|
|
|
|
|
|
| 2 |
from langchain_community.vectorstores import FAISS
|
| 3 |
from langchain_huggingface import HuggingFaceEmbeddings
|
| 4 |
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
| 5 |
+
from config import EMBEDDING_MODEL, FAISS_PATH, PDF_SOURCE
|
| 6 |
|
| 7 |
def make_vectorization():
|
| 8 |
+
embedding_model = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL)
|
| 9 |
|
| 10 |
+
loader = PyPDFLoader(PDF_SOURCE)
|
| 11 |
+
documents = loader.load()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
|
| 13 |
+
text_splitter = RecursiveCharacterTextSplitter(
|
| 14 |
+
chunk_size=1000,
|
| 15 |
+
chunk_overlap=100,
|
| 16 |
+
length_function=len,
|
| 17 |
+
is_separator_regex=False
|
| 18 |
+
)
|
| 19 |
+
documents = text_splitter.split_documents(documents)
|
| 20 |
+
vectorstore = FAISS.from_documents(documents, embedding_model)
|
| 21 |
+
vectorstore.save_local(FAISS_PATH)
|
| 22 |
|
| 23 |
+
return vectorstore
|
evaluation/confusion_matrix.png → models/preprocessor.pkl
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d0cafaa6a9a854d12ff332ce454529db31d4f8e6982b91afe81826209de3e9b4
|
| 3 |
+
size 3640
|
evaluation/permutation_importance.png → models/telecom_cb_model.keras
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6535dec164d10666590895d9889acedae334318607a471c192238b6877d847ce
|
| 3 |
+
size 548533
|
predictors/cb_predictors.ipynb
DELETED
|
@@ -1,101 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"cells": [
|
| 3 |
-
{
|
| 4 |
-
"cell_type": "code",
|
| 5 |
-
"execution_count": 1,
|
| 6 |
-
"id": "9d112a34",
|
| 7 |
-
"metadata": {},
|
| 8 |
-
"outputs": [
|
| 9 |
-
{
|
| 10 |
-
"name": "stderr",
|
| 11 |
-
"output_type": "stream",
|
| 12 |
-
"text": [
|
| 13 |
-
"/Users/yuje/aiProjects/llm_prj/.venv/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
|
| 14 |
-
" from .autonotebook import tqdm as notebook_tqdm\n",
|
| 15 |
-
"100%|██████████| 23/23 [00:01<00:00, 18.58it/s]1<00:00, 9.42it/s, Describe variable: PYE_SC0000000] \n",
|
| 16 |
-
"Summarize dataset: 100%|██████████| 153/153 [00:19<00:00, 7.95it/s, Completed] \n",
|
| 17 |
-
"Generate report structure: 100%|██████████| 1/1 [00:02<00:00, 2.59s/it]\n",
|
| 18 |
-
"Render HTML: 100%|██████████| 1/1 [00:00<00:00, 1.64it/s]\n",
|
| 19 |
-
"Export report to file: 100%|██████████| 1/1 [00:00<00:00, 118.82it/s]"
|
| 20 |
-
]
|
| 21 |
-
},
|
| 22 |
-
{
|
| 23 |
-
"name": "stdout",
|
| 24 |
-
"output_type": "stream",
|
| 25 |
-
"text": [
|
| 26 |
-
"📊 Profiling Report Created\n"
|
| 27 |
-
]
|
| 28 |
-
},
|
| 29 |
-
{
|
| 30 |
-
"name": "stderr",
|
| 31 |
-
"output_type": "stream",
|
| 32 |
-
"text": [
|
| 33 |
-
"\n"
|
| 34 |
-
]
|
| 35 |
-
}
|
| 36 |
-
],
|
| 37 |
-
"source": [
|
| 38 |
-
"import pandas as pd\n",
|
| 39 |
-
"\n",
|
| 40 |
-
"df= pd.read_csv(\"../dataset/telecom_cb.csv\")\n",
|
| 41 |
-
"\n",
|
| 42 |
-
"# TRIM csv because the number of column is 160... can't open in PC\n",
|
| 43 |
-
"keep_columns = [\n",
|
| 44 |
-
"'SEX',\n",
|
| 45 |
-
"'AGE',\n",
|
| 46 |
-
"'JB_TP',\n",
|
| 47 |
-
"'LIF_STG',\n",
|
| 48 |
-
"'HB_1ST',\n",
|
| 49 |
-
"'BUY_LUX_YN',\n",
|
| 50 |
-
"'CAR_YN',\n",
|
| 51 |
-
"'VIP_CARD_YN',\n",
|
| 52 |
-
"'TRAVEL_OS',\n",
|
| 53 |
-
"'TRAVEL_JJ',\n",
|
| 54 |
-
"'GOLF_INDOOR',\n",
|
| 55 |
-
"'PREFER_SPORTS',\n",
|
| 56 |
-
"'FST_CAR_ELPS',\n",
|
| 57 |
-
"'TOT_ASST',\n",
|
| 58 |
-
"'PYE_FAM_CNT',\n",
|
| 59 |
-
"'OWN_HOUS_CNT',\n",
|
| 60 |
-
"'OWN_LIV_YN',\n",
|
| 61 |
-
"'FAM_OWN_HOUS_CNT',\n",
|
| 62 |
-
"'FAM_OWN_LIV_YN',\n",
|
| 63 |
-
"'HOUS_LN_BAL',\n",
|
| 64 |
-
"'CRDT_LN_BAL',\n",
|
| 65 |
-
"'CD_USE_AMT',\n",
|
| 66 |
-
"'PYE_SC0000000',\n",
|
| 67 |
-
"]\n",
|
| 68 |
-
"df = df[keep_columns]\n",
|
| 69 |
-
"df.to_csv('../dataset/trimmed_telecom_cb.csv', index=False)\n",
|
| 70 |
-
"\n",
|
| 71 |
-
"from ydata_profiling import ProfileReport\n",
|
| 72 |
-
"profile = ProfileReport(df, title=\"Trimmed CB Dataset Profiling Report\")\n",
|
| 73 |
-
"\n",
|
| 74 |
-
"outputfile = '../dataset/report.html'\n",
|
| 75 |
-
"profile.to_file(outputfile)\n",
|
| 76 |
-
"print(\"📊 Profiling Report Created\")\n"
|
| 77 |
-
]
|
| 78 |
-
}
|
| 79 |
-
],
|
| 80 |
-
"metadata": {
|
| 81 |
-
"kernelspec": {
|
| 82 |
-
"display_name": ".venv",
|
| 83 |
-
"language": "python",
|
| 84 |
-
"name": "python3"
|
| 85 |
-
},
|
| 86 |
-
"language_info": {
|
| 87 |
-
"codemirror_mode": {
|
| 88 |
-
"name": "ipython",
|
| 89 |
-
"version": 3
|
| 90 |
-
},
|
| 91 |
-
"file_extension": ".py",
|
| 92 |
-
"mimetype": "text/x-python",
|
| 93 |
-
"name": "python",
|
| 94 |
-
"nbconvert_exporter": "python",
|
| 95 |
-
"pygments_lexer": "ipython3",
|
| 96 |
-
"version": "3.10.6"
|
| 97 |
-
}
|
| 98 |
-
},
|
| 99 |
-
"nbformat": 4,
|
| 100 |
-
"nbformat_minor": 5
|
| 101 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
predictors/score_predict.ipynb
ADDED
|
@@ -0,0 +1,167 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "code",
|
| 5 |
+
"execution_count": null,
|
| 6 |
+
"metadata": {
|
| 7 |
+
"colab": {
|
| 8 |
+
"base_uri": "https://localhost:8080/"
|
| 9 |
+
},
|
| 10 |
+
"executionInfo": {
|
| 11 |
+
"elapsed": 20954,
|
| 12 |
+
"status": "ok",
|
| 13 |
+
"timestamp": 1776342392847,
|
| 14 |
+
"user": {
|
| 15 |
+
"displayName": "탁유제",
|
| 16 |
+
"userId": "06706106398592029119"
|
| 17 |
+
},
|
| 18 |
+
"user_tz": -540
|
| 19 |
+
},
|
| 20 |
+
"id": "ylszil82Df6M",
|
| 21 |
+
"outputId": "5e0b6c01-08ed-4d3e-c15d-8810a89f42ac"
|
| 22 |
+
},
|
| 23 |
+
"outputs": [
|
| 24 |
+
{
|
| 25 |
+
"name": "stdout",
|
| 26 |
+
"output_type": "stream",
|
| 27 |
+
"text": [
|
| 28 |
+
"Mounted at /content/drive\n"
|
| 29 |
+
]
|
| 30 |
+
}
|
| 31 |
+
],
|
| 32 |
+
"source": [
|
| 33 |
+
"import pandas as pd\n",
|
| 34 |
+
"import numpy as np\n",
|
| 35 |
+
"import seaborn as sns\n",
|
| 36 |
+
"import joblib\n",
|
| 37 |
+
"import matplotlib.pyplot as plt\n",
|
| 38 |
+
"import matplotlib.font_manager as fm\n",
|
| 39 |
+
"import scipy.stats as stats\n",
|
| 40 |
+
"import tensorflow as tf\n",
|
| 41 |
+
"from tensorflow.keras import layers, Sequential, callbacks\n",
|
| 42 |
+
"from sklearn.compose import ColumnTransformer\n",
|
| 43 |
+
"from sklearn.preprocessing import StandardScaler, MinMaxScaler\n",
|
| 44 |
+
"from sklearn.model_selection import train_test_split\n",
|
| 45 |
+
"from sklearn.metrics import r2_score, mean_absolute_error\n",
|
| 46 |
+
"import os\n",
|
| 47 |
+
"import warnings\n",
|
| 48 |
+
"\n",
|
| 49 |
+
"warnings.filterwarnings('ignore')\n",
|
| 50 |
+
"\n",
|
| 51 |
+
"# !apt-get -qq install fonts-nanum > /dev/null\n",
|
| 52 |
+
"font_path = '/usr/share/fonts/truetype/nanum/NanumBarunGothic.ttf'\n",
|
| 53 |
+
"fe = fm.FontEntry(fname=font_path, name='NanumBarunGothic')\n",
|
| 54 |
+
"fm.fontManager.ttflist.insert(0, fe)\n",
|
| 55 |
+
"plt.rc('font', family='NanumBarunGothic')\n",
|
| 56 |
+
"plt.rcParams['axes.unicode_minus'] = False\n",
|
| 57 |
+
"\n",
|
| 58 |
+
"name_mapping = {\n",
|
| 59 |
+
" 'C1Z001386': '1년내카드총이용금액', 'C1M210000': '신용카드건수',\n",
|
| 60 |
+
" 'C18210000': '체크카드건수', 'C1L120001': '카드총한도금액',\n",
|
| 61 |
+
" 'C1L120004': '신용카드개설후경과일수', 'L10210000': '대출건수',\n",
|
| 62 |
+
" 'L90210100': '은행업종대출건수', 'L90210200': '카드업종대출건수',\n",
|
| 63 |
+
" 'L10210B00': '보험업종대출건수', 'L10216000': '신용대출건수',\n",
|
| 64 |
+
" 'L10217000': '담보대출건수', 'D10110000': '연체건수',\n",
|
| 65 |
+
" 'D10133000': '총연체상환금액', 'PERF1': '1년내90일이상연체여부',\n",
|
| 66 |
+
" 'SCORE': '신용평가점수'\n",
|
| 67 |
+
"}\n",
|
| 68 |
+
"\n",
|
| 69 |
+
"path = \"/content/drive/MyDrive/AI-Web/LLMPRJ/CB_predict/dataset/telecom_cb.csv\"\n",
|
| 70 |
+
"use_cols = list(name_mapping.keys())\n",
|
| 71 |
+
"df = pd.read_csv(path, usecols=use_cols, nrows=300000)\n",
|
| 72 |
+
"\n",
|
| 73 |
+
"df['PERF1'] = df['PERF1'].astype(int)\n",
|
| 74 |
+
"df.replace([np.inf, -np.inf], np.nan, inplace=True)\n",
|
| 75 |
+
"df.fillna(0, inplace=True)\n",
|
| 76 |
+
"\n",
|
| 77 |
+
"log_cols = ['C1Z001386', 'C1L120004', 'D10110000', 'D10133000', 'L90210200',\n",
|
| 78 |
+
" 'L10216000', 'L10210B00', 'L10217000', 'L90210100', 'L10210000']\n",
|
| 79 |
+
"df[log_cols] = np.log1p(df[log_cols].clip(lower=0))\n",
|
| 80 |
+
"\n",
|
| 81 |
+
"X = df.drop(columns=['SCORE'])\n",
|
| 82 |
+
"y = df['SCORE']\n",
|
| 83 |
+
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n",
|
| 84 |
+
"\n",
|
| 85 |
+
"mm_cols = ['C1M210000', 'C18210000']\n",
|
| 86 |
+
"std_cols = [c for c in X_train.columns if c not in log_cols + mm_cols]\n",
|
| 87 |
+
"\n",
|
| 88 |
+
"preprocessor = ColumnTransformer([\n",
|
| 89 |
+
" ('log_std', StandardScaler(), log_cols),\n",
|
| 90 |
+
" ('minmax', MinMaxScaler(), mm_cols),\n",
|
| 91 |
+
" ('std', StandardScaler(), std_cols)\n",
|
| 92 |
+
"])\n",
|
| 93 |
+
"\n",
|
| 94 |
+
"X_train_scaled = preprocessor.fit_transform(X_train)\n",
|
| 95 |
+
"X_test_scaled = preprocessor.transform(X_test)\n",
|
| 96 |
+
"\n",
|
| 97 |
+
"best_lr, best_dr, best_epochs = 0.01, 0.3, 100\n",
|
| 98 |
+
"tf.keras.backend.clear_session()\n",
|
| 99 |
+
"\n",
|
| 100 |
+
"model = Sequential([\n",
|
| 101 |
+
" layers.Dense(256, activation='relu', input_shape=(X_train_scaled.shape[1],)),\n",
|
| 102 |
+
" layers.BatchNormalization(),\n",
|
| 103 |
+
" layers.Dropout(best_dr),\n",
|
| 104 |
+
" layers.Dense(128, activation='relu'),\n",
|
| 105 |
+
" layers.BatchNormalization(),\n",
|
| 106 |
+
" layers.Dropout(best_dr),\n",
|
| 107 |
+
" layers.Dense(64, activation='relu'),\n",
|
| 108 |
+
" layers.BatchNormalization(),\n",
|
| 109 |
+
" layers.Dense(32, activation='relu'),\n",
|
| 110 |
+
" layers.Dense(1)\n",
|
| 111 |
+
"])\n",
|
| 112 |
+
"\n",
|
| 113 |
+
"model.compile(optimizer=tf.keras.optimizers.AdamW(learning_rate=best_lr), loss='mse', metrics=['mae'])\n",
|
| 114 |
+
"\n",
|
| 115 |
+
"early_stop = callbacks.EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True)\n",
|
| 116 |
+
"history = model.fit(X_train_scaled, y_train, epochs=best_epochs, batch_size=128,\n",
|
| 117 |
+
" validation_split=0.2, callbacks=[early_stop], verbose=1)\n",
|
| 118 |
+
"\n",
|
| 119 |
+
"plt.figure(figsize=(12, 6))\n",
|
| 120 |
+
"plt.plot(history.history['loss'], label='Train Loss', color='orange')\n",
|
| 121 |
+
"plt.plot(history.history['val_loss'], label='Validation Loss', color='green')\n",
|
| 122 |
+
"plt.title('Training and Validation Loss', fontsize=14)\n",
|
| 123 |
+
"plt.xlabel('Epochs')\n",
|
| 124 |
+
"plt.ylabel('Loss')\n",
|
| 125 |
+
"plt.legend(loc='upper right')\n",
|
| 126 |
+
"plt.show()\n",
|
| 127 |
+
"\n",
|
| 128 |
+
"y_pred = model.predict(X_test_scaled).flatten()\n",
|
| 129 |
+
"r2 = r2_score(y_test, y_pred)\n",
|
| 130 |
+
"mae = mean_absolute_error(y_test, y_pred)\n",
|
| 131 |
+
"\n",
|
| 132 |
+
"plt.figure(figsize=(15, 6))\n",
|
| 133 |
+
"sample_range = 440\n",
|
| 134 |
+
"plt.plot(y_test.values[:sample_range], label='Actual Prices', color='blue', linewidth=1.5)\n",
|
| 135 |
+
"plt.plot(y_pred[:sample_range], label='Predicted Prices', color='red', linewidth=1.5)\n",
|
| 136 |
+
"\n",
|
| 137 |
+
"plt.title(f'KCB Score Prediction\\nR2 Score: {r2:.4f} | MAE: {mae:.2f}', fontsize=14)\n",
|
| 138 |
+
"plt.xlabel('Sample Datas')\n",
|
| 139 |
+
"plt.ylabel('KCB Score')\n",
|
| 140 |
+
"plt.legend(loc='upper left')\n",
|
| 141 |
+
"plt.show()\n",
|
| 142 |
+
"\n",
|
| 143 |
+
"save_dir = \"/content/drive/MyDrive/AI-Web/LLMPRJ/CB_predict/model/\"\n",
|
| 144 |
+
"os.makedirs(save_dir, exist_ok=True)\n",
|
| 145 |
+
"model.save(save_dir + \"telecom_cb_model.keras\")\n",
|
| 146 |
+
"joblib.dump(preprocessor, save_dir + \"preprocessor.pkl\")\n",
|
| 147 |
+
"print(f\"✅ 모델 및 전처리기 저장 완료: {save_dir}\")\n",
|
| 148 |
+
"print(f\"📊 최종 성능 - R2: {r2:.4f}, MAE: {mae:.2f}\")"
|
| 149 |
+
]
|
| 150 |
+
}
|
| 151 |
+
],
|
| 152 |
+
"metadata": {
|
| 153 |
+
"colab": {
|
| 154 |
+
"authorship_tag": "ABX9TyNVOknhNFA2EiqGiXNtqGAY",
|
| 155 |
+
"provenance": []
|
| 156 |
+
},
|
| 157 |
+
"kernelspec": {
|
| 158 |
+
"display_name": "Python 3",
|
| 159 |
+
"name": "python3"
|
| 160 |
+
},
|
| 161 |
+
"language_info": {
|
| 162 |
+
"name": "python"
|
| 163 |
+
}
|
| 164 |
+
},
|
| 165 |
+
"nbformat": 4,
|
| 166 |
+
"nbformat_minor": 0
|
| 167 |
+
}
|
predictors/score_prediction.py
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import joblib
|
| 3 |
+
import pandas as pd
|
| 4 |
+
import numpy as np
|
| 5 |
+
import tensorflow as tf
|
| 6 |
+
from typing import Dict, Any
|
| 7 |
+
|
| 8 |
+
class CreditPredictor:
|
| 9 |
+
def __init__(self):
|
| 10 |
+
self.preprocessor_path = "models/preprocessor.pkl"
|
| 11 |
+
self.model_path = "models/telecom_cb_model.keras"
|
| 12 |
+
self.preprocessor = None
|
| 13 |
+
self.model = None
|
| 14 |
+
self.load_resources()
|
| 15 |
+
|
| 16 |
+
def load_resources(self):
|
| 17 |
+
try:
|
| 18 |
+
if os.path.exists(self.preprocessor_path):
|
| 19 |
+
self.preprocessor = joblib.load(self.preprocessor_path)
|
| 20 |
+
print("Preprocessor loaded successfully.")
|
| 21 |
+
if os.path.exists(self.model_path):
|
| 22 |
+
self.model = tf.keras.models.load_model(self.model_path, compile=False)
|
| 23 |
+
print("Model loaded successfully.")
|
| 24 |
+
except Exception as e:
|
| 25 |
+
print(f"Error loading model/preprocessor: {e}")
|
| 26 |
+
|
| 27 |
+
def predict(self, features_dict: Dict[str, Any]) -> float:
|
| 28 |
+
if self.model is None or self.preprocessor is None:
|
| 29 |
+
return np.nan
|
| 30 |
+
|
| 31 |
+
try:
|
| 32 |
+
ALL_FEATURES = [
|
| 33 |
+
'C1Z001386', 'C1M210000', 'C18210000', 'C1L120001', 'C1L120004',
|
| 34 |
+
'L10210000', 'L90210100', 'L90210200', 'L10210B00', 'L10216000',
|
| 35 |
+
'L10217000', 'D10110000', 'D10133000', 'PERF1'
|
| 36 |
+
]
|
| 37 |
+
|
| 38 |
+
full_input = []
|
| 39 |
+
for col in ALL_FEATURES:
|
| 40 |
+
val = features_dict.get(col, 0.0)
|
| 41 |
+
full_input.append(float(val))
|
| 42 |
+
|
| 43 |
+
df = pd.DataFrame([full_input], columns=ALL_FEATURES)
|
| 44 |
+
|
| 45 |
+
log1p_cols = ['C1Z001386', 'C1L120004', 'D10110000', 'D10133000', 'L90210200',
|
| 46 |
+
'L10216000', 'L10210B00', 'L10217000', 'L90210100', 'L10210000']
|
| 47 |
+
|
| 48 |
+
valid_log1p = [c for c in log1p_cols if c in df.columns]
|
| 49 |
+
df[valid_log1p] = np.log1p(df[valid_log1p].clip(lower=0))
|
| 50 |
+
|
| 51 |
+
scaled_data = self.preprocessor.transform(df)
|
| 52 |
+
score = self.model.predict(scaled_data, verbose=0)[0][0]
|
| 53 |
+
|
| 54 |
+
return float(score)
|
| 55 |
+
except Exception as e:
|
| 56 |
+
import traceback
|
| 57 |
+
traceback.print_exc()
|
| 58 |
+
return np.nan
|
| 59 |
+
|
| 60 |
+
predictor = CreditPredictor()
|