ESF_IR_reporter / core.py
Corin1998's picture
Upload 11 files
e521e71 verified
import pandas as pd
import yaml, datetime, hashlib, json
import math
from pathlib import Path
from templating import get_env, render
from models import CompanyMeta, ReportSections, RenderPayload
from render import html_to_pdf, html_to_docx
DISPLAY_NAME = {
"co2_emissions": "CO₂排出量",
"energy_renewable_ratio": "再生可能エネルギー比率",
"female_management_ratio": "女性管理職比率",
}
def _sha256(p: Path) -> str:
h = hashlib.sha256()
with p.open("rb") as f:
for chunk in iter(lambda: f.read(8192), b""):
h.update(chunk)
return h.hexdigest()
def _require_columns(df: pd.DataFrame, required, name: str):
missing = [c for c in required if c not in df.columns]
if missing:
raise ValueError(f"{name} に必須列がありません: {missing}. 例: {required}")
def load_company_meta(path: str) -> CompanyMeta:
data = yaml.safe_load(Path(path).read_text(encoding="utf-8"))
return CompanyMeta(**data)
def load_financials(path: str) -> pd.DataFrame:
df = pd.read_csv(path)
_require_columns(
df,
["year", "quarter", "revenue", "ebit", "net_income", "total_assets", "total_equity"],
"financials.csv",
)
df["year"] = pd.to_numeric(df["year"], errors="coerce").astype("Int64")
q = (
df["quarter"]
.astype(str).str.upper()
.str.replace("Q", "Q")
.str.replace(" ", "")
)
df["quarter"] = "Q" + q.str.extract(r"(\d)", expand=False).fillna("")
return df
def load_esg(path: str) -> pd.DataFrame:
df = pd.read_csv(path)
_require_columns(df, ["year", "metric", "value", "unit", "scope", "notes"], "esg_metrics.csv")
df["year"] = pd.to_numeric(df["year"], errors="coerce").astype("Int64")
return df
def compute_kpi(fin_df: pd.DataFrame, fiscal_year: int):
years = set(fin_df["year"].dropna().astype(int))
if fiscal_year not in years:
raise ValueError(f"financials.csv に年度 {fiscal_year} のデータがありません。year 列を確認してください。")
fy = fin_df[fin_df["year"] == fiscal_year].copy()
if fy.empty:
raise ValueError(f"年度 {fiscal_year} の四半期データが空です。quarter の表記(Q1~Q4)を確認してください。")
order = {"Q1": 1, "Q2": 2, "Q3": 3, "Q4": 4}
fy["q_order"] = fy["quarter"].map(order)
latest = fy.sort_values("q_order").dropna(subset=["q_order"]).tail(1)
if latest.empty:
raise ValueError(f"年度 {fiscal_year} の quarter が Q1〜Q4 として認識できません。例: Q4")
prev_fy = fin_df[fin_df["year"] == fiscal_year - 1].copy()
if not prev_fy.empty:
prev_fy["q_order"] = prev_fy["quarter"].map(order)
prev = prev_fy.sort_values("q_order").dropna(subset=["q_order"]).tail(1)
else:
prev = pd.DataFrame()
revenue = float(latest["revenue"].iloc[0])
ebit = float(latest["ebit"].iloc[0])
net_income = float(latest["net_income"].iloc[0])
equity = float(latest["total_equity"].iloc[0]) if "total_equity" in latest else 0.0
ebit_margin = (ebit / revenue * 100) if revenue else 0.0
roe = (net_income / equity * 100) if equity else 0.0
revenue_yoy = 0.0
if not prev.empty and float(prev["revenue"].iloc[0]) != 0:
revenue_yoy = ((revenue / float(prev["revenue"].iloc[0])) - 1) * 100
return {
"revenue": revenue,
"ebit": ebit,
"net_income": net_income,
"ebit_margin": ebit_margin,
"roe": roe,
"revenue_yoy": revenue_yoy,
}
def esg_table(df: pd.DataFrame, fiscal_year: int):
dfy = df[df["year"] == fiscal_year].copy()
rows = []
for _, r in dfy.iterrows():
metric = r["metric"]
display = DISPLAY_NAME.get(metric, metric)
rows.append({
"display": display,
"value": r["value"],
"unit": r.get("unit", ""),
"notes": r.get("notes", ""),
})
return rows
def build_sections(meta: CompanyMeta, kpi: dict, esg_rows: list, llm=None) -> ReportSections:
if llm:
ceo_message = llm.generate_ceo_message(meta, kpi, esg_rows)
risk = llm.generate_risk_opportunity(meta, kpi, esg_rows)
else:
ceo_message = f"{meta.fiscal_year}期は、売上成長と収益性の両立に注力しました。"
risk = "主要リスクはマクロ環境と規制動向。機会は生成AI活用と脱炭素需要の拡大です。"
return ReportSections(ceo_message=ceo_message, risk_opportunity=risk)
def _s(x):
if x is None:
return ""
if isinstance(x, float) and math.isnan(x):
return ""
return str(x)
def _translate_payload_texts(payload: dict, lang: str, llm):
if not llm or lang == "ja":
return payload
texts = []
texts.append(_s(payload["sections"]["ceo_message"]))
texts.append(_s(payload["sections"]["risk_opportunity"]))
# esg_table display/notes
for row in payload["esg_table"]:
texts.append(_s(row.get("display", "")))
texts.append(_s(row.get("notes", "")))
texts.append(_s(payload["meta"]["report_title"]))
for topic in payload["meta"].get("material_topics", []):
texts.append(_s(topic))
translated = llm.translate_texts(texts, target_lang=lang)
it = iter(translated)
payload["sections"]["ceo_message"] = next(it)
payload["sections"]["risk_opportunity"] = next(it)
for row in payload["esg_table"]:
row["display"] = next(it)
row["notes"] = next(it)
payload["meta"]["report_title"] = next(it)
mt = payload["meta"].get("material_topics", [])
for i in range(len(mt)):
mt[i] = next(it)
return payload
def generate_report(
company_yaml,
financials_csv,
esg_csv,
templates_dir,
template_name="report.html.j2",
out_html="output/report.html",
out_pdf="output/report.pdf",
out_docx="output/report.docx",
lang="ja",
llm=None,
):
Path(Path(out_html).parent).mkdir(parents=True, exist_ok=True)
tdir = Path(templates_dir)
tdir.mkdir(parents=True, exist_ok=True)
tpl = tdir / template_name
if not tpl.exists():
tpl.write_text("""<!doctype html>
<html lang="{{ lang }}">
<head>
<meta charset="utf-8" />
<title>{{ meta.report_title }} - {{ meta.company_name }}</title>
<style>
body { font-family: system-ui, -apple-system, "Segoe UI", Helvetica, Arial; line-height: 1.6; }
h1,h2,h3 { margin: 0.6em 0; }
.kpi { display: grid; grid-template-columns: repeat(3, 1fr); gap: 12px; }
.card { border: 1px solid #ddd; border-radius: 12px; padding: 12px; }
.small { color: #666; font-size: 0.9em; }
table { border-collapse: collapse; width: 100%; }
th, td { border: 1px solid #eee; padding: 8px; text-align: right; }
th { background: #fafafa; }
.left { text-align: left; }
</style>
</head>
<body>
<h1>{{ meta.report_title }}({{ meta.fiscal_year }})</h1>
<p class="small">{{ meta.company_name }} / Ticker: {{ meta.ticker }} / 通貨: {{ meta.currency }}</p>
<h2>CEOメッセージ</h2>
<p>{{ sections.ceo_message }}</p>
<h2>ハイライトKPI</h2>
<div class="kpi">
<div class="card">
<div class="small">売上高</div>
<div><strong>{{ kpi.revenue | round(0) | int }} {{ meta.currency }}</strong></div>
<div class="small">前年比: {{ kpi.revenue_yoy | round(1) }}%</div>
</div>
<div class="card">
<div class="small">EBIT</div>
<div><strong>{{ kpi.ebit | round(0) | int }} {{ meta.currency }}</strong></div>
<div class="small">マージン: {{ kpi.ebit_margin | round(1) }}%</div>
</div>
<div class="card">
<div class="small">純利益</div>
<div><strong>{{ kpi.net_income | round(0) | int }} {{ meta.currency }}</strong></div>
<div class="small">ROE: {{ kpi.roe | round(1) }}%</div>
</div>
</div>
<h2>ESGサマリー</h2>
<table>
<thead>
<tr><th class="left">指標</th><th>値</th><th>単位</th><th class="left">備考</th></tr>
</thead>
<tbody>
{% for row in esg_table %}
<tr>
<td class="left">{{ row.display }}</td>
<td>{{ row.value }}</td>
<td>{{ row.unit }}</td>
<td class="left">{{ row.notes }}</td>
</tr>
{% endfor %}
</tbody>
</table>
<h2>リスク & 機会(要約)</h2>
<p>{{ sections.risk_opportunity }}</p>
<footer class="small">Generated on {{ generated_at }}</footer>
</body>
</html>
""", encoding="utf-8")
meta = load_company_meta(company_yaml)
fin = load_financials(financials_csv)
esg = load_esg(esg_csv)
kpi = compute_kpi(fin, meta.fiscal_year)
esg_rows = esg_table(esg, meta.fiscal_year)
sections = build_sections(meta, kpi, esg_rows, llm=llm)
env = get_env(templates_dir)
payload = RenderPayload(
meta=meta,
esg_table=esg_rows,
kpi=kpi,
sections=sections,
generated_at=datetime.datetime.now().strftime("%Y-%m-%d %H:%M"),
lang=lang,
).model_dump()
payload = _translate_payload_texts(payload, lang=lang, llm=llm)
html = render(env, template_name, payload)
Path(out_html).write_text(html, encoding="utf-8")
html_to_pdf(html, out_pdf)
html_to_docx(html, out_docx)
meta_json = {
"inputs": {
"company_yaml_sha": _sha256(Path(company_yaml)),
"financials_csv_sha": _sha256(Path(financials_csv)),
"esg_csv_sha": _sha256(Path(esg_csv)),
"lang": lang,
},
"outputs": {"html": out_html, "pdf": out_pdf, "docx": out_docx},
"template": {"dir": templates_dir, "name": template_name},
"generated_at": datetime.datetime.now().isoformat(timespec="seconds"),
}
return out_html, out_pdf, out_docx, meta_json