Spaces:

Corin1998
/

Score

Sleeping

App Files Files Community

Corin1998 commited on Aug 28, 2025

Commit

0a34695

verified ·

1 Parent(s): 92efe44

Create external_scoring.py

Browse files

Files changed (1) hide show

core/external_scoring.py +379 -0

core/external_scoring.py ADDED Viewed

	@@ -0,0 +1,379 @@

+# core/external_scoring.py
+from __future__ import annotations
+from typing import Dict, Any, List, Tuple, Optional
+import pandas as pd
+import math
+import re
+__all__ = [
+    "get_external_template_df",
+    "fill_missing_with_external",
+    "score_external_from_df",
+    "score_external",          # UI からはこれを呼べばOK（薄いラッパー）
+]
+# ===== 入力テンプレ（外部評価で UI から埋める想定） =====
+_TEMPLATE_ROWS: List[Tuple[str, str]] = [
+    # 経営者能力
+    ("経営者能力", "予実達成率_3年平均(%)"),
+    ("経営者能力", "監査・内部統制の重大な不備 件数(過去3年)"),
+    ("経営者能力", "重大コンプライアンス件数(過去3年)"),
+    ("経営者能力", "社外取締役比率(%)"),
+    ("経営者能力", "代表者の業界経験年数"),
+    ("経営者能力", "現預金(円)"),
+    ("経営者能力", "月商(円)"),
+    ("経営者能力", "担保余力評価額(円)"),
+    ("経営者能力", "倒産歴の有無(TRUE/FALSE)"),
+    ("経営者能力", "倒産からの経過年数"),
+    ("経営者能力", "重大事件・事故件数(過去10年)"),
+    # 成長率
+    ("成長率", "売上_期3(最新期)"),
+    ("成長率", "売上_期2"),
+    ("成長率", "売上_期1(最古期)"),
+    ("成長率", "営業利益_期3(最新期)"),
+    ("成長率", "営業利益_期2"),
+    ("成長率", "営業利益_期1(最古期)"),
+    ("成長率", "主力商品数"),
+    ("成長率", "成長中主力商品数"),
+    # 安定性
+    ("安定性", "自己資本比率(%)"),
+    ("安定性", "利益剰余金(円)"),
+    ("安定性", "支払遅延件数(直近12ヶ月)"),
+    ("安定性", "不渡り件数(直近12ヶ月)"),
+    ("安定性", "平均支払遅延日数"),
+    ("安定性", "メインバンク明確か(TRUE/FALSE)"),
+    ("安定性", "借入先数"),
+    ("安定性", "メインバンク借入シェア(%)"),
+    ("安定性", "コミットメントライン等の長期与信枠あり(TRUE/FALSE)"),
+    ("安定性", "担保余力評価額(円)"),
+    ("安定性", "月商(円)_再掲"),
+    ("安定性", "主要顧客上位1社売上比率(%)"),
+    ("安定性", "主要顧客上位3社売上比率(%)"),
+    ("安定性", "主要顧客の平均信用スコア(0-100)"),
+    ("安定性", "不良債権件数(直近12ヶ月)"),
+    ("安定性", "業歴(年)"),
+    # 公平性・総合世評
+    ("公平性・総合世評", "有価証券報告書提出企業か(TRUE/FALSE)"),
+    ("公平性・総合世評", "決算公告や官報での公開あり(TRUE/FALSE)"),
+    ("公平性・総合世評", "HP/IRサイトで財務資料公開あり(TRUE/FALSE)"),
+    ("公平性・総合世評", "直近更新が定め通りか(TRUE/FALSE)"),
+]
+def get_external_template_df() -> pd.DataFrame:
+    """UI 側で空の雛形を出すときに利用"""
+    return pd.DataFrame([(c, i, "") for c, i in _TEMPLATE_ROWS],
+                        columns=["カテゴリー", "入力項目", "値"])
+def fill_missing_with_external(df: pd.DataFrame, company: str = "", country: str = "") -> pd.DataFrame:
+    """
+    将来：外部DBやLLMで不足値を補完する場所。
+    いまは何もしないでそのまま返す。
+    """
+    return df.copy()
+# ===== スコア計算（定量化 & ユニット頑健化） =====
+_WEIGHTS = {
+    # 経営者能力
+    ("経営者能力", "経営姿勢"): 8,
+    ("経営者能力", "事業経験"): 5,
+    ("経営者能力", "資産担保力"): 6,
+    ("経営者能力", "減点事項"): 7,
+    # 成長率
+    ("成長率", "売上高伸長性"): 10,
+    ("成長率", "利益伸長性"): 10,
+    ("成長率", "商品"): 6,
+    # 安定性
+    ("安定性", "自己資本"): 8,
+    ("安定性", "決済振り"): 10,
+    ("安定性", "金融取引"): 6,
+    ("安定性", "資産担保余力"): 6,
+    ("安定性", "取引先"): 6,
+    ("安定性", "業歴"): 4,
+    # 公平性
+    ("公平性・総合世評", "ディスクロージャー"): 8,
+}
+_WEIGHT_NORM = 100.0 / float(sum(_WEIGHTS.values()))
+def _clamp(v: float, a: float, b: float) -> float:
+    return max(a, min(b, v))
+def _add(items: List[Dict[str, Any]], cat: str, name: str,
+         raw: float, weight: float, reason: str):
+    items.append({
+        "category": cat,
+        "name": name,
+        "raw": None if raw is None else round(raw, 2),
+        "weight": round(weight * _WEIGHT_NORM, 2),
+        "score": 0.0 if raw is None else round((raw / 10.0) * weight * _WEIGHT_NORM, 2),
+        "reason": reason
+    })
+# ---- 数値パーサ（日本語単位に強い） ----
+_UNIT = {"兆": 1e12, "億": 1e8, "万": 1e4}
+def _to_float(x) -> Optional[float]:
+    if x is None:
+        return None
+    s = str(x).strip()
+    if s == "":
+        return None
+    # ▲, △ は負号扱い
+    sign = -1 if ("▲" in s or s.startswith("-")) else 1
+    # 兆/億/万/千 の単位
+    mul = 1.0
+    for k, v in _UNIT.items():
+        if k in s:
+            mul *= v
+    # 「千円」「3千万円」等
+    if "千" in s:
+        mul *= 1e3
+    # 数字のみ抽出
+    s_num = re.sub(r"[^\d\.]", "", s)
+    if not s_num:
+        return None
+    try:
+        return sign * float(s_num) * mul
+    except Exception:
+        try:
+            return sign * float(s_num)
+        except Exception:
+            return None
+def _to_bool(x) -> Optional[bool]:
+    if x is None:
+        return None
+    s = str(x).strip().lower()
+    if s in ("true", "t", "1", "yes", "y", "有", "あり", "○", "◯"):
+        return True
+    if s in ("false", "f", "0", "no", "n", "無", "なし", "×"):
+        return False
+    return None
+def _ratio(a: Optional[float], b: Optional[float]) -> Optional[float]:
+    if a is None or b is None or b == 0:
+        return None
+    return a / b
+def _ramp(x: Optional[float], good: float, bad: float,
+          lo: float = 0.0, hi: float = 10.0, neutral: Optional[float] = None) -> float:
+    """
+    x が good 側に近いほど高得点（10）、bad 側ほど低得点（0）。
+    欠損は neutral（指定なければ 5）。
+    """
+    if x is None:
+        return neutral if neutral is not None else (lo + hi) / 2.0
+    if good > bad:
+        if x <= bad: return lo
+        if x >= good: return hi
+        return lo + (hi - lo) * (x - bad) / (good - bad)
+    else:
+        if x >= bad: return lo
+        if x <= good: return hi
+        return lo + (hi - lo) * (x - good) / (bad - good)
+# ===== メイン：DataFrame からスコア作成 =====
+def score_external_from_df(df: pd.DataFrame) -> Dict[str, Any]:
+    """
+    df: カラム ["カテゴリー","入力項目","値"] を前提。
+    値は '億', '万', '千円', '▲' などを含んでもOK（自動正規化）。
+    """
+    def ref(label: str):
+        m = df["入力項目"].eq(label)
+        return df.loc[m, "値"].values[0] if m.any() else None
+    items: List[Dict[str, Any]] = []
+    # ---------- 経営者能力 ----------
+    yoy3 = _to_float(ref("予実達成率_3年平均(%)"))
+    audit_bad = _to_float(ref("監査・内部統制の重大な不備 件数(過去3年)"))
+    comp_bad = _to_float(ref("重大コンプライアンス件数(過去3年)"))
+    indep = _to_float(ref("社外取締役比率(%)"))
+    exp_years = _to_float(ref("代表者の業界経験年数"))
+    cash = _to_float(ref("現預金(円)"))
+    sales_m = _to_float(ref("月商(円)"))
+    collat = _to_float(ref("担保余力評価額(円)"))
+    has_bk = _to_bool(ref("倒産歴の有無(TRUE/FALSE)"))
+    bk_years = _to_float(ref("倒産からの経過年数"))
+    incidents = _to_float(ref("重大事件・事故件数(過去10年)"))
+    # ---------- 成長率 ----------
+    s1 = _to_float(ref("売上_期1(最古期)"))
+    s2 = _to_float(ref("売上_期2"))
+    s3 = _to_float(ref("売上_期3(最新期)"))
+    p1 = _to_float(ref("営業利益_期1(最古期)"))
+    p2 = _to_float(ref("営業利益_期2"))
+    p3 = _to_float(ref("営業利益_期3(最新期)"))
+    prod_all = _to_float(ref("主力商品数"))
+    prod_grow = _to_float(ref("成長中主力商品数"))
+    # ---------- 安定性 ----------
+    equity = _to_float(ref("自己資本比率(%)"))
+    delay_cnt = _to_float(ref("支払遅延件数(直近12ヶ月)"))
+    boun_cnt = _to_float(ref("不渡り件数(直近12ヶ月)"))
+    delay_days = _to_float(ref("平均支払遅延日数"))
+    mainbank = _to_bool(ref("メインバンク明確か(TRUE/FALSE)"))
+    lenders = _to_float(ref("借入先数"))
+    main_share = _to_float(ref("メインバンク借入シェア(%)"))
+    has_line = _to_bool(ref("コミットメントライン等の長期与信枠あり(TRUE/FALSE)"))
+    sales_m2 = _to_float(ref("月商(円)_再掲")) or sales_m
+    top1 = _to_float(ref("主要顧客上位1社売上比率(%)"))
+    top3 = _to_float(ref("主要顧客上位3社売上比率(%)"))
+    cust_score = _to_float(ref("主要顧客の平均信用スコア(0-100)"))
+    npl_cnt = _to_float(ref("不良債権件数(直近12ヶ月)"))
+    years = _to_float(ref("業歴(年)"))
+    # ---------- 公平性 ----------
+    has_sec = _to_bool(ref("有価証券報告書提出企業か(TRUE/FALSE)"))
+    pub_off = _to_bool(ref("決算公告や官報での公開あり(TRUE/FALSE)"))
+    pub_web = _to_bool(ref("HP/IRサイトで財務資料公開あり(TRUE/FALSE)"))
+    upd_on = _to_bool(ref("直近更新が定め通りか(TRUE/FALSE)"))
+    # 比率
+    cash_to_ms = _ratio(cash, sales_m2)
+    coll_to_ms = _ratio(collat, sales_m2)
+    def cagr(v1: Optional[float], v3: Optional[float]) -> Optional[float]:
+        if v1 is None or v3 is None or v1 <= 0:
+            return None
+        try:
+            return (v3 / v1) ** (1 / 2) - 1.0
+        except Exception:
+            return None
+    s_cagr = cagr(s1, s3)
+    p_cagr = cagr(p1, p3)
+    # --- 経営者能力 ---
+    mg_att = (
+        _ramp(yoy3, 90, 50) +
+        _ramp(0 if not audit_bad else -audit_bad, 0, -3) +
+        _ramp(0 if not comp_bad else -comp_bad, 0, -2) +
+        _ramp(indep, 33, 0)
+    ) / 4
+    _add(items, "経営者能力", "経営姿勢", mg_att,
+         _WEIGHTS[("経営者能力", "経営姿勢")],
+         f"予実{yoy3 or '—'}%/監査{int(audit_bad or 0)}/違反{int(comp_bad or 0)}/社外{indep or '—'}%")
+    mg_exp = _ramp(exp_years if exp_years is not None else 5.0, 15, 0)
+    _add(items, "経営者能力", "事業経験", mg_exp,
+         _WEIGHTS[("経営者能力", "事業経験")],
+         f"経験{exp_years if exp_years is not None else '不明→中立'}年")
+    mg_asset = _ramp(cash_to_ms, 1.5, 0.2)
+    _add(items, "経営者能力", "資産担保力", mg_asset,
+         _WEIGHTS[("経営者能力", "資産担保力")],
+         f"現預金/月商≈{round(cash_to_ms, 2) if cash_to_ms else '—'}")
+    if incidents and incidents > 0:
+        pen = 0.0; rs = f"重大事故{int(incidents)}件→大幅減点"
+    elif has_bk:
+        pen = 6.0 if (bk_years and bk_years >= 10) else 3.0
+        rs = f"倒産歴あり（{bk_years or '不明'}年）"
+    else:
+        pen = 10.0; rs = "事故/倒産なし"
+    _add(items, "経営者能力", "減点事項", pen,
+         _WEIGHTS[("経営者能力", "減点事項")], rs)
+    # --- 成長率 ---
+    _add(items, "成長率", "売上高伸長性",
+         _ramp(s_cagr, 0.08, -0.05),
+         _WEIGHTS[("成長率", "売上高伸長性")],
+         f"CAGR売上{round((s_cagr or 0)*100,1) if s_cagr is not None else '—'}%")
+    _add(items, "成長率", "利益伸長性",
+         _ramp(p_cagr, 0.08, -0.05),
+         _WEIGHTS[("成長率", "利益伸長性")],
+         f"CAGR営業{round((p_cagr or 0)*100,1) if p_cagr is not None else '—'}%")
+    # 成長中/全体の比率（0〜1）→ スコアへ線形変換
+    prod_ratio = None
+    if prod_all and prod_all > 0 and prod_grow is not None:
+        prod_ratio = max(0.0, min(1.0, prod_grow / prod_all))
+    prod_score = None if prod_ratio is None else 10.0 * prod_ratio
+    _add(items, "成長率", "商品",
+         5.0 if prod_score is None else prod_score,
+         _WEIGHTS[("成長率", "商品")],
+         f"成長中/主力 ≈ {round(prod_ratio,2) if prod_ratio is not None else '—'}")
+    # --- 安定性 ---
+    _add(items, "安定性", "自己資本",
+         _ramp(equity, 40, 5),
+         _WEIGHTS[("安定性", "自己資本")],
+         f"自己資本比率{equity or '—'}%")
+    if (delay_cnt is not None) or (boun_cnt is not None) or (delay_days is not None):
+        sc = (
+            _ramp(- (delay_cnt or 0), 0, -6) +
+            _ramp(- (boun_cnt or 0), 0, -1) +
+            _ramp(- (delay_days or 0), 0, -30)
+        ) / 3
+        rs = f"遅延{int(delay_cnt or 0)}/不渡{int(boun_cnt or 0)}/平均{int(delay_days or 0)}日"
+    else:
+        sc = _ramp(cash_to_ms, 1.0, 0.2)
+        rs = f"代理：現預金/月商≈{round(cash_to_ms,2) if cash_to_ms else '—'}"
+    _add(items, "安定性", "決済振り",
+         sc, _WEIGHTS[("安定性", "決済振り")], rs)
+    sc_mb = 5.0
+    sc_mb += 2.0 if mainbank else (-0.5 if mainbank is False else 0)
+    sc_mb += 1.0 if has_line else 0.0
+    sc_mb = _clamp(sc_mb, 0, 10)
+    _add(items, "安定性", "金融取引",
+         sc_mb, _WEIGHTS[("安定性", "金融取引")],
+         f"メイン{'有' if mainbank else '無' if mainbank is False else '—'}/与信枠{'有' if has_line else '無' if has_line is False else '—'}")
+    _add(items, "安定性", "資産担保余力",
+         _ramp(coll_to_ms, 4.0, 0.0),
+         _WEIGHTS[("安定性", "資産担保余力")],
+         f"担保/月商≈{round(coll_to_ms,2) if coll_to_ms else '—'}")
+    _add(items, "安定性", "取引先",
+         ( _ramp(- (top1 or 50), 0, -80) +
+           _ramp(cust_score, 80, 50) +
+           _ramp(- (npl_cnt or 1), 0, -3) ) / 3,
+         _WEIGHTS[("安定性", "取引先")],
+         f"上位1社{top1 or '—'}%/信用{cust_score or '—'}/不良{int(npl_cnt or 0)}")
+    _add(items, "安定性", "業歴",
+         _ramp(years, 20, 1),
+         _WEIGHTS[("安定性", "業歴")],
+         f"{years or '—'}年")
+    # --- 公平性・総合世評 ---
+    sc_dis = 0.0
+    sc_dis += 10.0 if has_sec else (7.0 if (pub_off or pub_web) else 4.0)
+    if upd_on:
+        sc_dis += 1.0
+    sc_dis = _clamp(sc_dis, 0, 10)
+    _add(items, "公平性・総合世評", "ディスクロージャー",
+         sc_dis, _WEIGHTS[("公平性・総合世評", "ディスクロージャー")],
+         f"{'有報' if has_sec else '公開あり' if (pub_off or pub_web) else '公開乏しい'} / 更新{'◯' if upd_on else '—'}")
+    total = round(sum(x["score"] for x in items), 1)
+    return {
+        "name": "企業評価（外部）",
+        "external_total": total,
+        "items": items,
+        "notes": "欠損は中立、連続スコア×重み（自動正規化）／日本語単位を自動解釈"
+    }
+# ===== ラッパー：UI から��びやすい形 =====
+def score_external(fin: Dict[str, Any] | None = None,
+                   external_df: Optional[pd.DataFrame] = None,
+                   company: str = "",
+                   country: str = "") -> Dict[str, Any]:
+    """
+    UI 側では基本この関数を呼ぶ想定。
+    - `external_df` が未指定ならテンプレを自動生成して中立値扱いで採点（ばらつきは小さくなる）
+    - 値が入った DataFrame を渡せば、上の `score_external_from_df` で定量スコア化
+    """
+    if external_df is None or external_df.empty:
+        tmpl = get_external_template_df()
+        filled = fill_missing_with_external(tmpl, company=company, country=country)
+        return score_external_from_df(filled)
+    else:
+        filled = fill_missing_with_external(external_df, company=company, country=country)
+        return score_external_from_df(filled)