import pandas as pd import yaml, datetime, hashlib, json, math from pathlib import Path from templating import get_env, render from models import CompanyMeta, ReportSections, RenderPayload from render import html_to_pdf, html_to_docx from charts import line_chart_base64, materiality_base64 from validators import validate_financials, validate_esg from typing import Dict, Any, List, Optional DISPLAY_NAME = { "co2_emissions": "CO₂排出量", "energy_renewable_ratio": "再生可能エネルギー比率", "female_management_ratio": "女性管理職比率", } def _sha256(p: Path) -> str: h = hashlib.sha256() with p.open("rb") as f: for chunk in iter(lambda: f.read(8192), b""): h.update(chunk) return h.hexdigest() def _require_columns(df: pd.DataFrame, required, name: str): missing = [c for c in required if c not in df.columns] if missing: raise ValueError(f"{name} に必須列がありません: {missing}. 例: {required}") def load_company_meta(path: str) -> CompanyMeta: data = yaml.safe_load(Path(path).read_text(encoding="utf-8")) return CompanyMeta(**data) def load_financials(path: str) -> pd.DataFrame: if str(path).lower().endswith(".xlsx"): df = pd.read_excel(path) else: df = pd.read_csv(path) df = validate_financials(df) # 正規化(quarter) q = (df["quarter"].astype(str).str.upper().str.replace("Q","Q").str.replace(" ","")) df["quarter"] = "Q" + q.str.extract(r"(\d)", expand=False).fillna("") df["year"] = pd.to_numeric(df["year"], errors="coerce").astype("Int64") return df def load_esg(path: str) -> pd.DataFrame: if str(path).lower().endswith(".xlsx"): df = pd.read_excel(path) else: df = pd.read_csv(path) df = validate_esg(df) df["year"] = pd.to_numeric(df["year"], errors="coerce").astype("Int64") return df def compute_kpi(fin_df: pd.DataFrame, fiscal_year: int): years = set(fin_df["year"].dropna().astype(int)) if fiscal_year not in years: raise ValueError(f"financials.csv に年度 {fiscal_year} のデータがありません。year 列を確認してください。") fy = fin_df[fin_df["year"] == fiscal_year].copy() if fy.empty: raise ValueError(f"年度 {fiscal_year} の四半期データが空です。quarter の表記(Q1~Q4)を確認してください。") order = {"Q1":1, "Q2":2, "Q3":3, "Q4":4} fy["q_order"] = fy["quarter"].map(order) latest = fy.sort_values("q_order").dropna(subset=["q_order"]).tail(1) if latest.empty: raise ValueError(f"年度 {fiscal_year} の quarter が Q1〜Q4 として認識できません。例: Q4") prev_fy = fin_df[fin_df["year"] == fiscal_year - 1].copy() if not prev_fy.empty: prev_fy["q_order"] = prev_fy["quarter"].map(order) prev = prev_fy.sort_values("q_order").dropna(subset=["q_order"]).tail(1) else: prev = pd.DataFrame() revenue = float(latest["revenue"].iloc[0]) ebit = float(latest["ebit"].iloc[0]) net_income = float(latest["net_income"].iloc[0]) equity = float(latest["total_equity"].iloc[0]) if "total_equity" in latest else 0.0 ebit_margin = (ebit / revenue * 100) if revenue else 0.0 roe = (net_income / equity * 100) if equity else 0.0 revenue_yoy = 0.0 if not prev.empty and float(prev["revenue"].iloc[0]) != 0: revenue_yoy = ((revenue / float(prev["revenue"].iloc[0])) - 1) * 100 return { "revenue": revenue, "ebit": ebit, "net_income": net_income, "ebit_margin": ebit_margin, "roe": roe, "revenue_yoy": revenue_yoy, } def esg_table(df: pd.DataFrame, fiscal_year: int): dfy = df[df["year"] == fiscal_year].copy() rows = [] for _, r in dfy.iterrows(): metric = r["metric"] display = DISPLAY_NAME.get(metric, metric) rows.append({ "display": display, "value": r["value"], "unit": r.get("unit", ""), "notes": r.get("notes", ""), }) return rows def build_sections(meta: CompanyMeta, kpi: dict, esg_rows: list, llm=None) -> ReportSections: if llm: ceo_message = llm.generate_ceo_message(meta, kpi, esg_rows) risk = llm.generate_risk_opportunity(meta, kpi, esg_rows) else: ceo_message = f"{meta.fiscal_year}期は、売上成長と収益性の両立に注力しました。" risk = "主要リスクはマクロ環境と規制動向。機会は生成AI活用と脱炭素需要の拡大です。" return ReportSections(ceo_message=ceo_message, risk_opportunity=risk) def _s(x): if x is None: return "" if isinstance(x, float) and math.isnan(x): return "" return str(x) def _translate_payload_texts(payload: dict, lang: str, llm, glossary: Optional[Dict[str,str]]): if not llm or lang == "ja": return payload texts = [] texts.append(_s(payload["sections"]["ceo_message"])) texts.append(_s(payload["sections"]["risk_opportunity"])) for row in payload["esg_table"]: texts.append(_s(row.get("display", ""))) texts.append(_s(row.get("notes", ""))) texts.append(_s(payload["meta"]["report_title"])) for topic in payload["meta"].get("material_topics", []): texts.append(_s(topic)) translated = llm.translate_texts(texts, target_lang=lang, glossary=glossary or {}) it = iter(translated) payload["sections"]["ceo_message"] = next(it) payload["sections"]["risk_opportunity"] = next(it) for row in payload["esg_table"]: row["display"] = next(it) row["notes"] = next(it) payload["meta"]["report_title"] = next(it) mt = payload["meta"].get("material_topics", []) for i in range(len(mt)): mt[i] = next(it) return payload def _load_glossary(glossary_path: Optional[str]) -> Dict[str,str]: if not glossary_path: return {} try: g = yaml.safe_load(Path(glossary_path).read_text(encoding="utf-8")) return g or {} except Exception: return {} def _load_benchmarks(benchmarks_path: Optional[str]) -> Dict[str,Any]: if not benchmarks_path: return {} try: b = yaml.safe_load(Path(benchmarks_path).read_text(encoding="utf-8")) return b or {} except Exception: return {} def _build_charts(fin: pd.DataFrame, esg: pd.DataFrame, fiscal_year: int) -> Dict[str,str]: # Revenue trend(現年/前年のQ1-Q4) def series(df, y): o = {"Q1":1,"Q2":2,"Q3":3,"Q4":4} d = df[df["year"]==y].copy() d["q"] = d["quarter"].map(o) d = d.sort_values("q") xs = d["quarter"].tolist() ys = d["revenue"].tolist() return xs, ys xs, ys = series(fin, fiscal_year) rev = line_chart_base64(xs, ys, xlabel="Quarter", ylabel="Revenue", title=f"Revenue Trend {fiscal_year}") # ESG: 再エネ・女性比率があれば時系列 def metric_series(metric): d = esg[esg["metric"]==metric].sort_values("year") return d["year"].tolist(), d["value"].tolist() xs_re, ys_re = metric_series("energy_renewable_ratio") xs_fm, ys_fm = metric_series("female_management_ratio") re_img = line_chart_base64(xs_re, ys_re, xlabel="Year", ylabel="%", title="Renewable Energy Ratio") fm_img = line_chart_base64(xs_fm, ys_fm, xlabel="Year", ylabel="%", title="Female Management Ratio") # マテリアリティマトリクス(任意:meta.targets.weights があれば) return {"revenue": rev, "renewable": re_img, "female": fm_img} def generate_report( company_yaml, financials_csv, esg_csv, templates_dir, template_name="base.html.j2", out_html="output/report.html", out_pdf="output/report.pdf", out_docx="output/report.docx", lang="ja", llm=None, glossary_path: Optional[str] = None, benchmarks_path: Optional[str] = None, tenant: Optional[str] = None, rag_index_dir: Optional[str] = None, ): Path(Path(out_html).parent).mkdir(parents=True, exist_ok=True) # テンプレ存在チェック(なければ base を生成) tdir = Path(templates_dir); tdir.mkdir(parents=True, exist_ok=True) if not (tdir / template_name).exists(): (tdir / "base.html.j2").write_text(""" {{ meta.report_title }}

{{ meta.report_title }}({{ meta.fiscal_year }})

{{ meta.company_name }} / Ticker: {{ meta.ticker }} / {{ meta.currency }}

CEOメッセージ

{{ sections.ceo_message }}

KPI

チャート


ESGサマリー

{% for row in esg_table %} {% endfor %}
指標単位備考
{{ row.display }}{{ row.value }}{{ row.unit }}{{ row.notes }}

リスク & 機会

{{ sections.risk_opportunity }}

{% if benchmark_summary %}

ベンチマーク比較

{{ benchmark_summary }}

{% endif %} """, encoding="utf-8") template_name = "base.html.j2" meta = load_company_meta(company_yaml) fin = load_financials(financials_csv) esg = load_esg(esg_csv) kpi = compute_kpi(fin, meta.fiscal_year) esg_rows = esg_table(esg, meta.fiscal_year) sections = build_sections(meta, kpi, esg_rows, llm=llm) charts = _build_charts(fin, esg, meta.fiscal_year) glossary = _load_glossary(glossary_path) benchmarks = _load_benchmarks(benchmarks_path) # ベンチマーク1行要約(任意) benchmark_summary = "" try: if benchmarks: msgs = [] if "revenue_yoy" in benchmarks: msgs.append(f"売上YoY: 当社 {kpi['revenue_yoy']:.1f}% / 業界 {benchmarks['revenue_yoy']:.1f}%") if "renewable_energy_ratio" in benchmarks: cur = esg[esg["metric"]=="energy_renewable_ratio"].sort_values("year").tail(1)["value"].iloc[0] msgs.append(f"再エネ比率: 当社 {cur:.1f}% / 業界 {benchmarks['renewable_energy_ratio']:.1f}%") benchmark_summary = " / ".join(msgs) except Exception: pass env = get_env(templates_dir) payload = RenderPayload( meta=meta, esg_table=esg_rows, kpi=kpi, sections=sections, generated_at=datetime.datetime.now().strftime("%Y-%m-%d %H:%M"), lang=lang ).model_dump() payload["charts"] = charts payload["template_name"] = template_name payload["tenant"] = tenant or "" # 翻訳(ja以外) payload = _translate_payload_texts(payload, lang=lang, llm=llm, glossary=glossary) html = render(env, template_name, payload) Path(out_html).write_text(html, encoding="utf-8") html_to_pdf(html, out_pdf) html_to_docx(html, out_docx) # 監査メタ meta_json = { "inputs": { "company_yaml_sha": _sha256(Path(company_yaml)), "financials_csv_sha": _sha256(Path(financials_csv)), "esg_csv_sha": _sha256(Path(esg_csv)), "lang": lang, "tenant": tenant, "glossary_keys": list(glossary.keys()) if glossary else [], "benchmarks": benchmarks, }, "outputs": {"html": out_html, "pdf": out_pdf, "docx": out_docx}, "template": {"dir": templates_dir, "name": template_name}, "generated_at": datetime.datetime.now().isoformat(timespec="seconds"), "usage": getattr(llm, "last_usage", {}) if llm else {}, "benchmark_summary": benchmark_summary, } return out_html, out_pdf, out_docx, meta_json, html