File size: 12,281 Bytes
5b82238 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 |
import pandas as pd
import yaml, datetime, hashlib, json, math
from pathlib import Path
from templating import get_env, render
from models import CompanyMeta, ReportSections, RenderPayload
from render import html_to_pdf, html_to_docx
from charts import line_chart_base64, materiality_base64
from validators import validate_financials, validate_esg
from typing import Dict, Any, List, Optional
DISPLAY_NAME = {
"co2_emissions": "CO₂排出量",
"energy_renewable_ratio": "再生可能エネルギー比率",
"female_management_ratio": "女性管理職比率",
}
def _sha256(p: Path) -> str:
h = hashlib.sha256()
with p.open("rb") as f:
for chunk in iter(lambda: f.read(8192), b""):
h.update(chunk)
return h.hexdigest()
def _require_columns(df: pd.DataFrame, required, name: str):
missing = [c for c in required if c not in df.columns]
if missing:
raise ValueError(f"{name} に必須列がありません: {missing}. 例: {required}")
def load_company_meta(path: str) -> CompanyMeta:
data = yaml.safe_load(Path(path).read_text(encoding="utf-8"))
return CompanyMeta(**data)
def load_financials(path: str) -> pd.DataFrame:
if str(path).lower().endswith(".xlsx"):
df = pd.read_excel(path)
else:
df = pd.read_csv(path)
df = validate_financials(df)
# 正規化(quarter)
q = (df["quarter"].astype(str).str.upper().str.replace("Q","Q").str.replace(" ",""))
df["quarter"] = "Q" + q.str.extract(r"(\d)", expand=False).fillna("")
df["year"] = pd.to_numeric(df["year"], errors="coerce").astype("Int64")
return df
def load_esg(path: str) -> pd.DataFrame:
if str(path).lower().endswith(".xlsx"):
df = pd.read_excel(path)
else:
df = pd.read_csv(path)
df = validate_esg(df)
df["year"] = pd.to_numeric(df["year"], errors="coerce").astype("Int64")
return df
def compute_kpi(fin_df: pd.DataFrame, fiscal_year: int):
years = set(fin_df["year"].dropna().astype(int))
if fiscal_year not in years:
raise ValueError(f"financials.csv に年度 {fiscal_year} のデータがありません。year 列を確認してください。")
fy = fin_df[fin_df["year"] == fiscal_year].copy()
if fy.empty:
raise ValueError(f"年度 {fiscal_year} の四半期データが空です。quarter の表記(Q1~Q4)を確認してください。")
order = {"Q1":1, "Q2":2, "Q3":3, "Q4":4}
fy["q_order"] = fy["quarter"].map(order)
latest = fy.sort_values("q_order").dropna(subset=["q_order"]).tail(1)
if latest.empty:
raise ValueError(f"年度 {fiscal_year} の quarter が Q1〜Q4 として認識できません。例: Q4")
prev_fy = fin_df[fin_df["year"] == fiscal_year - 1].copy()
if not prev_fy.empty:
prev_fy["q_order"] = prev_fy["quarter"].map(order)
prev = prev_fy.sort_values("q_order").dropna(subset=["q_order"]).tail(1)
else:
prev = pd.DataFrame()
revenue = float(latest["revenue"].iloc[0])
ebit = float(latest["ebit"].iloc[0])
net_income = float(latest["net_income"].iloc[0])
equity = float(latest["total_equity"].iloc[0]) if "total_equity" in latest else 0.0
ebit_margin = (ebit / revenue * 100) if revenue else 0.0
roe = (net_income / equity * 100) if equity else 0.0
revenue_yoy = 0.0
if not prev.empty and float(prev["revenue"].iloc[0]) != 0:
revenue_yoy = ((revenue / float(prev["revenue"].iloc[0])) - 1) * 100
return {
"revenue": revenue, "ebit": ebit, "net_income": net_income,
"ebit_margin": ebit_margin, "roe": roe, "revenue_yoy": revenue_yoy,
}
def esg_table(df: pd.DataFrame, fiscal_year: int):
dfy = df[df["year"] == fiscal_year].copy()
rows = []
for _, r in dfy.iterrows():
metric = r["metric"]
display = DISPLAY_NAME.get(metric, metric)
rows.append({
"display": display,
"value": r["value"],
"unit": r.get("unit", ""),
"notes": r.get("notes", ""),
})
return rows
def build_sections(meta: CompanyMeta, kpi: dict, esg_rows: list, llm=None) -> ReportSections:
if llm:
ceo_message = llm.generate_ceo_message(meta, kpi, esg_rows)
risk = llm.generate_risk_opportunity(meta, kpi, esg_rows)
else:
ceo_message = f"{meta.fiscal_year}期は、売上成長と収益性の両立に注力しました。"
risk = "主要リスクはマクロ環境と規制動向。機会は生成AI活用と脱炭素需要の拡大です。"
return ReportSections(ceo_message=ceo_message, risk_opportunity=risk)
def _s(x):
if x is None: return ""
if isinstance(x, float) and math.isnan(x): return ""
return str(x)
def _translate_payload_texts(payload: dict, lang: str, llm, glossary: Optional[Dict[str,str]]):
if not llm or lang == "ja":
return payload
texts = []
texts.append(_s(payload["sections"]["ceo_message"]))
texts.append(_s(payload["sections"]["risk_opportunity"]))
for row in payload["esg_table"]:
texts.append(_s(row.get("display", "")))
texts.append(_s(row.get("notes", "")))
texts.append(_s(payload["meta"]["report_title"]))
for topic in payload["meta"].get("material_topics", []):
texts.append(_s(topic))
translated = llm.translate_texts(texts, target_lang=lang, glossary=glossary or {})
it = iter(translated)
payload["sections"]["ceo_message"] = next(it)
payload["sections"]["risk_opportunity"] = next(it)
for row in payload["esg_table"]:
row["display"] = next(it)
row["notes"] = next(it)
payload["meta"]["report_title"] = next(it)
mt = payload["meta"].get("material_topics", [])
for i in range(len(mt)):
mt[i] = next(it)
return payload
def _load_glossary(glossary_path: Optional[str]) -> Dict[str,str]:
if not glossary_path: return {}
try:
g = yaml.safe_load(Path(glossary_path).read_text(encoding="utf-8"))
return g or {}
except Exception:
return {}
def _load_benchmarks(benchmarks_path: Optional[str]) -> Dict[str,Any]:
if not benchmarks_path: return {}
try:
b = yaml.safe_load(Path(benchmarks_path).read_text(encoding="utf-8"))
return b or {}
except Exception:
return {}
def _build_charts(fin: pd.DataFrame, esg: pd.DataFrame, fiscal_year: int) -> Dict[str,str]:
# Revenue trend(現年/前年のQ1-Q4)
def series(df, y):
o = {"Q1":1,"Q2":2,"Q3":3,"Q4":4}
d = df[df["year"]==y].copy()
d["q"] = d["quarter"].map(o)
d = d.sort_values("q")
xs = d["quarter"].tolist()
ys = d["revenue"].tolist()
return xs, ys
xs, ys = series(fin, fiscal_year)
rev = line_chart_base64(xs, ys, xlabel="Quarter", ylabel="Revenue", title=f"Revenue Trend {fiscal_year}")
# ESG: 再エネ・女性比率があれば時系列
def metric_series(metric):
d = esg[esg["metric"]==metric].sort_values("year")
return d["year"].tolist(), d["value"].tolist()
xs_re, ys_re = metric_series("energy_renewable_ratio")
xs_fm, ys_fm = metric_series("female_management_ratio")
re_img = line_chart_base64(xs_re, ys_re, xlabel="Year", ylabel="%", title="Renewable Energy Ratio")
fm_img = line_chart_base64(xs_fm, ys_fm, xlabel="Year", ylabel="%", title="Female Management Ratio")
# マテリアリティマトリクス(任意:meta.targets.weights があれば)
return {"revenue": rev, "renewable": re_img, "female": fm_img}
def generate_report(
company_yaml,
financials_csv,
esg_csv,
templates_dir,
template_name="base.html.j2",
out_html="output/report.html",
out_pdf="output/report.pdf",
out_docx="output/report.docx",
lang="ja",
llm=None,
glossary_path: Optional[str] = None,
benchmarks_path: Optional[str] = None,
tenant: Optional[str] = None,
rag_index_dir: Optional[str] = None,
):
Path(Path(out_html).parent).mkdir(parents=True, exist_ok=True)
# テンプレ存在チェック(なければ base を生成)
tdir = Path(templates_dir); tdir.mkdir(parents=True, exist_ok=True)
if not (tdir / template_name).exists():
(tdir / "base.html.j2").write_text("""<!doctype html>
<html lang="{{ lang }}"><head><meta charset="utf-8"><title>{{ meta.report_title }}</title></head>
<body>
<h1>{{ meta.report_title }}({{ meta.fiscal_year }})</h1>
<p>{{ meta.company_name }} / Ticker: {{ meta.ticker }} / {{ meta.currency }}</p>
<h2>CEOメッセージ</h2><p>{{ sections.ceo_message }}</p>
<h2>KPI</h2><ul>
<li>売上: {{ kpi.revenue|round(0)|int }} {{ meta.currency }} / YoY {{ kpi.revenue_yoy|round(1) }}%</li>
<li>EBIT: {{ kpi.ebit|round(0)|int }} / Margin {{ kpi.ebit_margin|round(1) }}%</li>
<li>純利益: {{ kpi.net_income|round(0)|int }} / ROE {{ kpi.roe|round(1) }}%</li>
</ul>
<h2>チャート</h2>
<img src="{{ charts.revenue }}" style="max-width:520px"><br/>
<img src="{{ charts.renewable }}" style="max-width:520px">
<img src="{{ charts.female }}" style="max-width:520px">
<h2>ESGサマリー</h2>
<table border="1" cellspacing="0" cellpadding="6">
<tr><th>指標</th><th>値</th><th>単位</th><th>備考</th></tr>
{% for row in esg_table %}
<tr><td>{{ row.display }}</td><td>{{ row.value }}</td><td>{{ row.unit }}</td><td>{{ row.notes }}</td></tr>
{% endfor %}
</table>
<h2>リスク & 機会</h2><p>{{ sections.risk_opportunity }}</p>
{% if benchmark_summary %}<h2>ベンチマーク比較</h2><p>{{ benchmark_summary }}</p>{% endif %}
<footer>Generated on {{ generated_at }} | Template: {{ template_name }} | Tenant: {{ tenant }}</footer>
</body></html>""", encoding="utf-8")
template_name = "base.html.j2"
meta = load_company_meta(company_yaml)
fin = load_financials(financials_csv)
esg = load_esg(esg_csv)
kpi = compute_kpi(fin, meta.fiscal_year)
esg_rows = esg_table(esg, meta.fiscal_year)
sections = build_sections(meta, kpi, esg_rows, llm=llm)
charts = _build_charts(fin, esg, meta.fiscal_year)
glossary = _load_glossary(glossary_path)
benchmarks = _load_benchmarks(benchmarks_path)
# ベンチマーク1行要約(任意)
benchmark_summary = ""
try:
if benchmarks:
msgs = []
if "revenue_yoy" in benchmarks:
msgs.append(f"売上YoY: 当社 {kpi['revenue_yoy']:.1f}% / 業界 {benchmarks['revenue_yoy']:.1f}%")
if "renewable_energy_ratio" in benchmarks:
cur = esg[esg["metric"]=="energy_renewable_ratio"].sort_values("year").tail(1)["value"].iloc[0]
msgs.append(f"再エネ比率: 当社 {cur:.1f}% / 業界 {benchmarks['renewable_energy_ratio']:.1f}%")
benchmark_summary = " / ".join(msgs)
except Exception:
pass
env = get_env(templates_dir)
payload = RenderPayload(
meta=meta, esg_table=esg_rows, kpi=kpi, sections=sections,
generated_at=datetime.datetime.now().strftime("%Y-%m-%d %H:%M"),
lang=lang
).model_dump()
payload["charts"] = charts
payload["template_name"] = template_name
payload["tenant"] = tenant or ""
# 翻訳(ja以外)
payload = _translate_payload_texts(payload, lang=lang, llm=llm, glossary=glossary)
html = render(env, template_name, payload)
Path(out_html).write_text(html, encoding="utf-8")
html_to_pdf(html, out_pdf)
html_to_docx(html, out_docx)
# 監査メタ
meta_json = {
"inputs": {
"company_yaml_sha": _sha256(Path(company_yaml)),
"financials_csv_sha": _sha256(Path(financials_csv)),
"esg_csv_sha": _sha256(Path(esg_csv)),
"lang": lang,
"tenant": tenant,
"glossary_keys": list(glossary.keys()) if glossary else [],
"benchmarks": benchmarks,
},
"outputs": {"html": out_html, "pdf": out_pdf, "docx": out_docx},
"template": {"dir": templates_dir, "name": template_name},
"generated_at": datetime.datetime.now().isoformat(timespec="seconds"),
"usage": getattr(llm, "last_usage", {}) if llm else {},
"benchmark_summary": benchmark_summary,
}
return out_html, out_pdf, out_docx, meta_json, html
|