Upload 17 files
Browse files- FROM python:3.dockerfile +15 -0
- app.py +158 -0
- benchmark.yaml +2 -0
- charts.py +23 -0
- core.py +301 -0
- hf.yaml +6 -0
- llm.py +82 -0
- models.py +24 -0
- rag.py +55 -0
- render.py +35 -0
- requirements.txt +17 -0
- templates:gir.html.j2 +7 -0
- templates:report.html.j2 +1 -6
- templates:sasb.html.j2 +12 -0
- templates:tcfd.html.j2 +9 -0
- templating.py +12 -0
- validators.py +55 -0
FROM python:3.dockerfile
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.11-slim
|
| 2 |
+
|
| 3 |
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 4 |
+
libcairo2 pango1.0-tools libpango-1.0-0 libgdk-pixbuf2.0-0 libffi-dev \
|
| 5 |
+
fonts-noto fonts-noto-cjk git \
|
| 6 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 7 |
+
|
| 8 |
+
WORKDIR /code
|
| 9 |
+
COPY requirements.txt .
|
| 10 |
+
RUN pip install --no-cache-dir -U pip wheel && pip install --no-cache-dir -r requirements.txt
|
| 11 |
+
|
| 12 |
+
COPY . .
|
| 13 |
+
ENV GRADIO_SERVER_NAME=0.0.0.0
|
| 14 |
+
EXPOSE 7860
|
| 15 |
+
CMD ["python", "app.py"]
|
app.py
ADDED
|
@@ -0,0 +1,158 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
from pathlib import Path
|
| 3 |
+
import tempfile, datetime, json, shutil, os
|
| 4 |
+
from core import generate_report
|
| 5 |
+
from rag import build_or_update_index, answer_with_context
|
| 6 |
+
|
| 7 |
+
TITLE = "IR/ESG Report Generator (HF Space, Pro)"
|
| 8 |
+
DESC = "CSV/YAML から IR/ESG レポート(HTML/PDF/DOCX)を生成。LLM要約+翻訳・グラフ・ベンチマーク・RAG・Hub保存に対応。"
|
| 9 |
+
|
| 10 |
+
TEMPLATES_DIR = str((Path(__file__).resolve().parent / "templates").absolute())
|
| 11 |
+
|
| 12 |
+
import shutil as _shutil
|
| 13 |
+
def _to_path(upload_obj, tmpdir: Path, filename: str) -> Path:
|
| 14 |
+
dst = tmpdir / filename
|
| 15 |
+
if upload_obj is None:
|
| 16 |
+
raise ValueError(f"{filename} が未指定です。")
|
| 17 |
+
if isinstance(upload_obj, (str, Path)):
|
| 18 |
+
src = Path(upload_obj); _shutil.copy(src, dst); return dst
|
| 19 |
+
if hasattr(upload_obj, "name"): # NamedString
|
| 20 |
+
src = Path(upload_obj.name); _shutil.copy(src, dst); return dst
|
| 21 |
+
if hasattr(upload_obj, "read"):
|
| 22 |
+
data = upload_obj.read()
|
| 23 |
+
if isinstance(data, str): data = data.encode("utf-8")
|
| 24 |
+
dst.write_bytes(data); return dst
|
| 25 |
+
if isinstance(upload_obj, dict):
|
| 26 |
+
if "name" in upload_obj: _shutil.copy(upload_obj["name"], dst); return dst
|
| 27 |
+
if "data" in upload_obj:
|
| 28 |
+
data = upload_obj["data"]
|
| 29 |
+
if isinstance(data, str): data = data.encode("utf-8")
|
| 30 |
+
dst.write_bytes(data); return dst
|
| 31 |
+
raise TypeError(f"Unsupported upload type: {type(upload_obj)}")
|
| 32 |
+
|
| 33 |
+
def run(company_yaml, financials_csv, esg_csv, use_llm, lang, template_key,
|
| 34 |
+
tenant, push_to_hub, glossary_yaml, benchmarks_yaml, past_reports_zip):
|
| 35 |
+
try:
|
| 36 |
+
if not company_yaml or not financials_csv or not esg_csv:
|
| 37 |
+
return "全ファイルをアップロードしてください。", None, None, None, None, ""
|
| 38 |
+
|
| 39 |
+
with tempfile.TemporaryDirectory() as td:
|
| 40 |
+
tdir = Path(td)
|
| 41 |
+
cpath = _to_path(company_yaml, tdir, "company.yaml")
|
| 42 |
+
fpath = _to_path(financials_csv, tdir, "financials.csv")
|
| 43 |
+
epath = _to_path(esg_csv, tdir, "esg.csv")
|
| 44 |
+
gpath = _to_path(glossary_yaml, tdir, "glossary.yaml") if glossary_yaml else None
|
| 45 |
+
bpath = _to_path(benchmarks_yaml, tdir, "benchmarks.yaml") if benchmarks_yaml else None
|
| 46 |
+
rzip = _to_path(past_reports_zip, tdir, "past_reports.zip") if past_reports_zip else None
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
if rzip:
|
| 50 |
+
build_or_update_index(rzip, index_dir=tdir / "index")
|
| 51 |
+
|
| 52 |
+
outdir = tdir / "out"; outdir.mkdir(parents=True, exist_ok=True)
|
| 53 |
+
|
| 54 |
+
llm = None
|
| 55 |
+
if use_llm:
|
| 56 |
+
try:
|
| 57 |
+
from llm import OpenAILLM
|
| 58 |
+
llm = OpenAILLM()
|
| 59 |
+
except Exception as e:
|
| 60 |
+
return f"LLM初期化エラー: {e}", None, None, None, None, ""
|
| 61 |
+
|
| 62 |
+
html, pdf, docx, meta_json, html_text = generate_report(
|
| 63 |
+
company_yaml=str(cpath),
|
| 64 |
+
financials_csv=str(fpath),
|
| 65 |
+
esg_csv=str(epath),
|
| 66 |
+
templates_dir=TEMPLATES_DIR,
|
| 67 |
+
template_name={
|
| 68 |
+
"base":"base.html.j2",
|
| 69 |
+
"tcfd":"tcfd.html.j2",
|
| 70 |
+
"sasab":"sasb.html.j2",
|
| 71 |
+
"sasb":"sasb.html.j2",
|
| 72 |
+
"gri":"gri.html.j2",
|
| 73 |
+
}.get(template_key, "base.html.j2"),
|
| 74 |
+
out_html=str(outdir / "report.html"),
|
| 75 |
+
out_pdf=str(outdir / "report.pdf"),
|
| 76 |
+
out_docx=str(outdir / "report.docx"),
|
| 77 |
+
lang=lang,
|
| 78 |
+
llm=llm,
|
| 79 |
+
glossary_path=str(gpath) if gpath else None,
|
| 80 |
+
benchmarks_path=str(bpath) if bpath else None,
|
| 81 |
+
tenant=tenant,
|
| 82 |
+
rag_index_dir=str(tdir / "index") if rzip else None,
|
| 83 |
+
)
|
| 84 |
+
|
| 85 |
+
repo_tmp = Path("./tmp"); repo_tmp.mkdir(exist_ok=True)
|
| 86 |
+
ts = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
|
| 87 |
+
prefix = f"{tenant or 'default'}-{ts}"
|
| 88 |
+
html_out = repo_tmp / f"{prefix}.html"
|
| 89 |
+
pdf_out = repo_tmp / f"{prefix}.pdf"
|
| 90 |
+
docx_out = repo_tmp / f"{prefix}.docx"
|
| 91 |
+
meta_out = repo_tmp / f"{prefix}.json"
|
| 92 |
+
_shutil.copy(html, html_out)
|
| 93 |
+
_shutil.copy(pdf, pdf_out)
|
| 94 |
+
_shutil.copy(docx, docx_out)
|
| 95 |
+
Path(meta_out).write_text(json.dumps(meta_json, ensure_ascii=False, indent=2), encoding="utf-8")
|
| 96 |
+
|
| 97 |
+
if push_to_hub:
|
| 98 |
+
try:
|
| 99 |
+
from huggingface_hub import HfApi
|
| 100 |
+
api = HfApi()
|
| 101 |
+
repo_id = os.environ.get("HF_DATASET_REPO", "your-org/ir-esg-reports")
|
| 102 |
+
base_path = f"{tenant or 'default'}/{ts}"
|
| 103 |
+
for p, name in [(html_out,"report.html"),(pdf_out,"report.pdf"),(docx_out,"report.docx"),(meta_out,"report.json")]:
|
| 104 |
+
api.upload_file(
|
| 105 |
+
path_or_fileobj=str(p),
|
| 106 |
+
path_in_repo=f"{base_path}/{name}",
|
| 107 |
+
repo_id=repo_id,
|
| 108 |
+
repo_type="dataset"
|
| 109 |
+
)
|
| 110 |
+
except Exception as e:
|
| 111 |
+
meta_json["hub_error"] = str(e)
|
| 112 |
+
Path(meta_out).write_text(json.dumps(meta_json, ensure_ascii=False, indent=2), encoding="utf-8")
|
| 113 |
+
|
| 114 |
+
return "生成が完了しました。", str(html_out), str(pdf_out), str(docx_out), str(meta_out), html_text
|
| 115 |
+
|
| 116 |
+
except Exception as e:
|
| 117 |
+
import traceback
|
| 118 |
+
tb = traceback.format_exc(limit=20)
|
| 119 |
+
return f"エラー: {e}\n--- trace ---\n{tb}", None, None, None, None, ""
|
| 120 |
+
|
| 121 |
+
|
| 122 |
+
with gr.Blocks(title=TITLE) as demo:
|
| 123 |
+
gr.Markdown(f"# {TITLE}\n{DESC}")
|
| 124 |
+
|
| 125 |
+
with gr.Row():
|
| 126 |
+
company_yaml = gr.File(label="company.yaml(会社情報・年度等)", file_types=[".yaml", ".yml"])
|
| 127 |
+
financials_csv = gr.File(label="financials.csv(財務KPI)", file_types=[".csv", ".xlsx"])
|
| 128 |
+
esg_csv = gr.File(label="esg_metrics.csv(ESG指標)", file_types=[".csv", ".xlsx"])
|
| 129 |
+
|
| 130 |
+
with gr.Row():
|
| 131 |
+
use_llm = gr.Checkbox(label="LLMで要約/翻訳を行う(OPENAI_API_KEY2 必須)", value=True)
|
| 132 |
+
lang = gr.Dropdown(choices=["ja","en","zh","ko","de","fr"], value="ja", label="出力言語")
|
| 133 |
+
template_key = gr.Dropdown(choices=["base","tcfd","sasb","gri"], value="base", label="テンプレート")
|
| 134 |
+
tenant = gr.Textbox(label="テナント名(会社識別子)", value="HitC")
|
| 135 |
+
|
| 136 |
+
with gr.Row():
|
| 137 |
+
glossary_yaml = gr.File(label="glossary.yaml(用語集・任意)", file_types=[".yaml", ".yml"])
|
| 138 |
+
benchmarks_yaml = gr.File(label="benchmarks.yaml(業界平均など・任意)", file_types=[".yaml", ".yml"])
|
| 139 |
+
past_reports_zip = gr.File(label="過去レポートZip(RAG用・任意)", file_types=[".zip"])
|
| 140 |
+
push_to_hub = gr.Checkbox(label="生成物を Hugging Face Hub(Datasets)へ保存", value=False)
|
| 141 |
+
|
| 142 |
+
run_btn = gr.Button("レポート生成")
|
| 143 |
+
|
| 144 |
+
status = gr.Textbox(label="ステータス", interactive=False)
|
| 145 |
+
html_file = gr.File(label="HTMLダウンロード")
|
| 146 |
+
pdf_file = gr.File(label="PDFダウンロード")
|
| 147 |
+
docx_file = gr.File(label="DOCXダウンロード")
|
| 148 |
+
meta_file = gr.File(label="メタ情報(JSON)")
|
| 149 |
+
html_preview = gr.HTML(label="HTMLプレビュー(抜粋)")
|
| 150 |
+
|
| 151 |
+
run_btn.click(
|
| 152 |
+
fn=run,
|
| 153 |
+
inputs=[company_yaml, financials_csv, esg_csv, use_llm, lang, template_key, tenant, push_to_hub, glossary_yaml, benchmarks_yaml, past_reports_zip],
|
| 154 |
+
outputs=[status, html_file, pdf_file, docx_file, meta_file, html_preview]
|
| 155 |
+
)
|
| 156 |
+
|
| 157 |
+
if __name__ == "__main__":
|
| 158 |
+
demo.launch()
|
benchmark.yaml
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
revenue_yoy: 8.5
|
| 2 |
+
renewable_energy_ratio: 35.0
|
charts.py
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import io, base64
|
| 2 |
+
import matplotlib.pyplot as plt
|
| 3 |
+
|
| 4 |
+
def _b64(fig):
|
| 5 |
+
buf = io.BytesIO()
|
| 6 |
+
fig.savefig(buf, format="png", bbox_inches="tight")
|
| 7 |
+
plt.close(fig)
|
| 8 |
+
return "data:image/png;base64," + base64.b64encode(buf.getvalue()).decode("utf-8")
|
| 9 |
+
|
| 10 |
+
def line_chart_base64(xs, ys, xlabel="", ylabel="", title=""):
|
| 11 |
+
fig, ax = plt.subplots()
|
| 12 |
+
ax.plot(xs, ys)
|
| 13 |
+
ax.set(xlabel=xlabel, ylabel=ylabel, title=title)
|
| 14 |
+
return _b64(fig)
|
| 15 |
+
|
| 16 |
+
def materiality_base64(labels, x_vals, y_vals, title="Materiality Matrix"):
|
| 17 |
+
fig, ax = plt.subplots()
|
| 18 |
+
ax.scatter(x_vals, y_vals)
|
| 19 |
+
for lbl, x, y in zip(labels, x_vals, y_vals):
|
| 20 |
+
ax.annotate(lbl, (x, y), xytext=(5,5), textcoords="offset points")
|
| 21 |
+
ax.set(xlabel="Stakeholder Importance", ylabel="Business Impact", title=title)
|
| 22 |
+
ax.grid(True, alpha=0.3)
|
| 23 |
+
return _b64(fig)
|
core.py
ADDED
|
@@ -0,0 +1,301 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import yaml, datetime, hashlib, json, math
|
| 3 |
+
from pathlib import Path
|
| 4 |
+
from templating import get_env, render
|
| 5 |
+
from models import CompanyMeta, ReportSections, RenderPayload
|
| 6 |
+
from render import html_to_pdf, html_to_docx
|
| 7 |
+
from charts import line_chart_base64, materiality_base64
|
| 8 |
+
from validators import validate_financials, validate_esg
|
| 9 |
+
from typing import Dict, Any, List, Optional
|
| 10 |
+
|
| 11 |
+
DISPLAY_NAME = {
|
| 12 |
+
"co2_emissions": "CO₂排出量",
|
| 13 |
+
"energy_renewable_ratio": "再生可能エネルギー比率",
|
| 14 |
+
"female_management_ratio": "女性管理職比率",
|
| 15 |
+
}
|
| 16 |
+
|
| 17 |
+
def _sha256(p: Path) -> str:
|
| 18 |
+
h = hashlib.sha256()
|
| 19 |
+
with p.open("rb") as f:
|
| 20 |
+
for chunk in iter(lambda: f.read(8192), b""):
|
| 21 |
+
h.update(chunk)
|
| 22 |
+
return h.hexdigest()
|
| 23 |
+
|
| 24 |
+
def _require_columns(df: pd.DataFrame, required, name: str):
|
| 25 |
+
missing = [c for c in required if c not in df.columns]
|
| 26 |
+
if missing:
|
| 27 |
+
raise ValueError(f"{name} に必須列がありません: {missing}. 例: {required}")
|
| 28 |
+
|
| 29 |
+
def load_company_meta(path: str) -> CompanyMeta:
|
| 30 |
+
data = yaml.safe_load(Path(path).read_text(encoding="utf-8"))
|
| 31 |
+
return CompanyMeta(**data)
|
| 32 |
+
|
| 33 |
+
def load_financials(path: str) -> pd.DataFrame:
|
| 34 |
+
if str(path).lower().endswith(".xlsx"):
|
| 35 |
+
df = pd.read_excel(path)
|
| 36 |
+
else:
|
| 37 |
+
df = pd.read_csv(path)
|
| 38 |
+
df = validate_financials(df)
|
| 39 |
+
# 正規化(quarter)
|
| 40 |
+
q = (df["quarter"].astype(str).str.upper().str.replace("Q","Q").str.replace(" ",""))
|
| 41 |
+
df["quarter"] = "Q" + q.str.extract(r"(\d)", expand=False).fillna("")
|
| 42 |
+
df["year"] = pd.to_numeric(df["year"], errors="coerce").astype("Int64")
|
| 43 |
+
return df
|
| 44 |
+
|
| 45 |
+
def load_esg(path: str) -> pd.DataFrame:
|
| 46 |
+
if str(path).lower().endswith(".xlsx"):
|
| 47 |
+
df = pd.read_excel(path)
|
| 48 |
+
else:
|
| 49 |
+
df = pd.read_csv(path)
|
| 50 |
+
df = validate_esg(df)
|
| 51 |
+
df["year"] = pd.to_numeric(df["year"], errors="coerce").astype("Int64")
|
| 52 |
+
return df
|
| 53 |
+
|
| 54 |
+
def compute_kpi(fin_df: pd.DataFrame, fiscal_year: int):
|
| 55 |
+
years = set(fin_df["year"].dropna().astype(int))
|
| 56 |
+
if fiscal_year not in years:
|
| 57 |
+
raise ValueError(f"financials.csv に年度 {fiscal_year} のデータがありません。year 列を確認してください。")
|
| 58 |
+
|
| 59 |
+
fy = fin_df[fin_df["year"] == fiscal_year].copy()
|
| 60 |
+
if fy.empty:
|
| 61 |
+
raise ValueError(f"年度 {fiscal_year} の四半期データが空です。quarter の表記(Q1~Q4)を確認してください。")
|
| 62 |
+
|
| 63 |
+
order = {"Q1":1, "Q2":2, "Q3":3, "Q4":4}
|
| 64 |
+
fy["q_order"] = fy["quarter"].map(order)
|
| 65 |
+
latest = fy.sort_values("q_order").dropna(subset=["q_order"]).tail(1)
|
| 66 |
+
if latest.empty:
|
| 67 |
+
raise ValueError(f"年度 {fiscal_year} の quarter が Q1〜Q4 として認識できません。例: Q4")
|
| 68 |
+
|
| 69 |
+
prev_fy = fin_df[fin_df["year"] == fiscal_year - 1].copy()
|
| 70 |
+
if not prev_fy.empty:
|
| 71 |
+
prev_fy["q_order"] = prev_fy["quarter"].map(order)
|
| 72 |
+
prev = prev_fy.sort_values("q_order").dropna(subset=["q_order"]).tail(1)
|
| 73 |
+
else:
|
| 74 |
+
prev = pd.DataFrame()
|
| 75 |
+
|
| 76 |
+
revenue = float(latest["revenue"].iloc[0])
|
| 77 |
+
ebit = float(latest["ebit"].iloc[0])
|
| 78 |
+
net_income = float(latest["net_income"].iloc[0])
|
| 79 |
+
equity = float(latest["total_equity"].iloc[0]) if "total_equity" in latest else 0.0
|
| 80 |
+
|
| 81 |
+
ebit_margin = (ebit / revenue * 100) if revenue else 0.0
|
| 82 |
+
roe = (net_income / equity * 100) if equity else 0.0
|
| 83 |
+
revenue_yoy = 0.0
|
| 84 |
+
if not prev.empty and float(prev["revenue"].iloc[0]) != 0:
|
| 85 |
+
revenue_yoy = ((revenue / float(prev["revenue"].iloc[0])) - 1) * 100
|
| 86 |
+
|
| 87 |
+
return {
|
| 88 |
+
"revenue": revenue, "ebit": ebit, "net_income": net_income,
|
| 89 |
+
"ebit_margin": ebit_margin, "roe": roe, "revenue_yoy": revenue_yoy,
|
| 90 |
+
}
|
| 91 |
+
|
| 92 |
+
def esg_table(df: pd.DataFrame, fiscal_year: int):
|
| 93 |
+
dfy = df[df["year"] == fiscal_year].copy()
|
| 94 |
+
rows = []
|
| 95 |
+
for _, r in dfy.iterrows():
|
| 96 |
+
metric = r["metric"]
|
| 97 |
+
display = DISPLAY_NAME.get(metric, metric)
|
| 98 |
+
rows.append({
|
| 99 |
+
"display": display,
|
| 100 |
+
"value": r["value"],
|
| 101 |
+
"unit": r.get("unit", ""),
|
| 102 |
+
"notes": r.get("notes", ""),
|
| 103 |
+
})
|
| 104 |
+
return rows
|
| 105 |
+
|
| 106 |
+
def build_sections(meta: CompanyMeta, kpi: dict, esg_rows: list, llm=None) -> ReportSections:
|
| 107 |
+
if llm:
|
| 108 |
+
ceo_message = llm.generate_ceo_message(meta, kpi, esg_rows)
|
| 109 |
+
risk = llm.generate_risk_opportunity(meta, kpi, esg_rows)
|
| 110 |
+
else:
|
| 111 |
+
ceo_message = f"{meta.fiscal_year}期は、売上成長と収益性の両立に注力しました。"
|
| 112 |
+
risk = "主要リスクはマクロ環境と規制動向。機会は生成AI活用と脱炭素需要の拡大です。"
|
| 113 |
+
return ReportSections(ceo_message=ceo_message, risk_opportunity=risk)
|
| 114 |
+
|
| 115 |
+
def _s(x):
|
| 116 |
+
if x is None: return ""
|
| 117 |
+
if isinstance(x, float) and math.isnan(x): return ""
|
| 118 |
+
return str(x)
|
| 119 |
+
|
| 120 |
+
def _translate_payload_texts(payload: dict, lang: str, llm, glossary: Optional[Dict[str,str]]):
|
| 121 |
+
if not llm or lang == "ja":
|
| 122 |
+
return payload
|
| 123 |
+
|
| 124 |
+
texts = []
|
| 125 |
+
texts.append(_s(payload["sections"]["ceo_message"]))
|
| 126 |
+
texts.append(_s(payload["sections"]["risk_opportunity"]))
|
| 127 |
+
for row in payload["esg_table"]:
|
| 128 |
+
texts.append(_s(row.get("display", "")))
|
| 129 |
+
texts.append(_s(row.get("notes", "")))
|
| 130 |
+
texts.append(_s(payload["meta"]["report_title"]))
|
| 131 |
+
for topic in payload["meta"].get("material_topics", []):
|
| 132 |
+
texts.append(_s(topic))
|
| 133 |
+
|
| 134 |
+
translated = llm.translate_texts(texts, target_lang=lang, glossary=glossary or {})
|
| 135 |
+
it = iter(translated)
|
| 136 |
+
|
| 137 |
+
payload["sections"]["ceo_message"] = next(it)
|
| 138 |
+
payload["sections"]["risk_opportunity"] = next(it)
|
| 139 |
+
for row in payload["esg_table"]:
|
| 140 |
+
row["display"] = next(it)
|
| 141 |
+
row["notes"] = next(it)
|
| 142 |
+
payload["meta"]["report_title"] = next(it)
|
| 143 |
+
mt = payload["meta"].get("material_topics", [])
|
| 144 |
+
for i in range(len(mt)):
|
| 145 |
+
mt[i] = next(it)
|
| 146 |
+
|
| 147 |
+
return payload
|
| 148 |
+
|
| 149 |
+
def _load_glossary(glossary_path: Optional[str]) -> Dict[str,str]:
|
| 150 |
+
if not glossary_path: return {}
|
| 151 |
+
try:
|
| 152 |
+
g = yaml.safe_load(Path(glossary_path).read_text(encoding="utf-8"))
|
| 153 |
+
return g or {}
|
| 154 |
+
except Exception:
|
| 155 |
+
return {}
|
| 156 |
+
|
| 157 |
+
def _load_benchmarks(benchmarks_path: Optional[str]) -> Dict[str,Any]:
|
| 158 |
+
if not benchmarks_path: return {}
|
| 159 |
+
try:
|
| 160 |
+
b = yaml.safe_load(Path(benchmarks_path).read_text(encoding="utf-8"))
|
| 161 |
+
return b or {}
|
| 162 |
+
except Exception:
|
| 163 |
+
return {}
|
| 164 |
+
|
| 165 |
+
def _build_charts(fin: pd.DataFrame, esg: pd.DataFrame, fiscal_year: int) -> Dict[str,str]:
|
| 166 |
+
# Revenue trend(現年/前年のQ1-Q4)
|
| 167 |
+
def series(df, y):
|
| 168 |
+
o = {"Q1":1,"Q2":2,"Q3":3,"Q4":4}
|
| 169 |
+
d = df[df["year"]==y].copy()
|
| 170 |
+
d["q"] = d["quarter"].map(o)
|
| 171 |
+
d = d.sort_values("q")
|
| 172 |
+
xs = d["quarter"].tolist()
|
| 173 |
+
ys = d["revenue"].tolist()
|
| 174 |
+
return xs, ys
|
| 175 |
+
xs, ys = series(fin, fiscal_year)
|
| 176 |
+
rev = line_chart_base64(xs, ys, xlabel="Quarter", ylabel="Revenue", title=f"Revenue Trend {fiscal_year}")
|
| 177 |
+
|
| 178 |
+
# ESG: 再エネ・女性比率があれば時系列
|
| 179 |
+
def metric_series(metric):
|
| 180 |
+
d = esg[esg["metric"]==metric].sort_values("year")
|
| 181 |
+
return d["year"].tolist(), d["value"].tolist()
|
| 182 |
+
xs_re, ys_re = metric_series("energy_renewable_ratio")
|
| 183 |
+
xs_fm, ys_fm = metric_series("female_management_ratio")
|
| 184 |
+
re_img = line_chart_base64(xs_re, ys_re, xlabel="Year", ylabel="%", title="Renewable Energy Ratio")
|
| 185 |
+
fm_img = line_chart_base64(xs_fm, ys_fm, xlabel="Year", ylabel="%", title="Female Management Ratio")
|
| 186 |
+
|
| 187 |
+
# マテリアリティマトリクス(任意:meta.targets.weights があれば)
|
| 188 |
+
return {"revenue": rev, "renewable": re_img, "female": fm_img}
|
| 189 |
+
|
| 190 |
+
def generate_report(
|
| 191 |
+
company_yaml,
|
| 192 |
+
financials_csv,
|
| 193 |
+
esg_csv,
|
| 194 |
+
templates_dir,
|
| 195 |
+
template_name="base.html.j2",
|
| 196 |
+
out_html="output/report.html",
|
| 197 |
+
out_pdf="output/report.pdf",
|
| 198 |
+
out_docx="output/report.docx",
|
| 199 |
+
lang="ja",
|
| 200 |
+
llm=None,
|
| 201 |
+
glossary_path: Optional[str] = None,
|
| 202 |
+
benchmarks_path: Optional[str] = None,
|
| 203 |
+
tenant: Optional[str] = None,
|
| 204 |
+
rag_index_dir: Optional[str] = None,
|
| 205 |
+
):
|
| 206 |
+
Path(Path(out_html).parent).mkdir(parents=True, exist_ok=True)
|
| 207 |
+
|
| 208 |
+
# テンプレ存在チェック(なければ base を生成)
|
| 209 |
+
tdir = Path(templates_dir); tdir.mkdir(parents=True, exist_ok=True)
|
| 210 |
+
if not (tdir / template_name).exists():
|
| 211 |
+
(tdir / "base.html.j2").write_text("""<!doctype html>
|
| 212 |
+
<html lang="{{ lang }}"><head><meta charset="utf-8"><title>{{ meta.report_title }}</title></head>
|
| 213 |
+
<body>
|
| 214 |
+
<h1>{{ meta.report_title }}({{ meta.fiscal_year }})</h1>
|
| 215 |
+
<p>{{ meta.company_name }} / Ticker: {{ meta.ticker }} / {{ meta.currency }}</p>
|
| 216 |
+
<h2>CEOメッセージ</h2><p>{{ sections.ceo_message }}</p>
|
| 217 |
+
<h2>KPI</h2><ul>
|
| 218 |
+
<li>売上: {{ kpi.revenue|round(0)|int }} {{ meta.currency }} / YoY {{ kpi.revenue_yoy|round(1) }}%</li>
|
| 219 |
+
<li>EBIT: {{ kpi.ebit|round(0)|int }} / Margin {{ kpi.ebit_margin|round(1) }}%</li>
|
| 220 |
+
<li>純利益: {{ kpi.net_income|round(0)|int }} / ROE {{ kpi.roe|round(1) }}%</li>
|
| 221 |
+
</ul>
|
| 222 |
+
<h2>チャート</h2>
|
| 223 |
+
<img src="{{ charts.revenue }}" style="max-width:520px"><br/>
|
| 224 |
+
<img src="{{ charts.renewable }}" style="max-width:520px">
|
| 225 |
+
<img src="{{ charts.female }}" style="max-width:520px">
|
| 226 |
+
<h2>ESGサマリー</h2>
|
| 227 |
+
<table border="1" cellspacing="0" cellpadding="6">
|
| 228 |
+
<tr><th>指標</th><th>値</th><th>単位</th><th>備考</th></tr>
|
| 229 |
+
{% for row in esg_table %}
|
| 230 |
+
<tr><td>{{ row.display }}</td><td>{{ row.value }}</td><td>{{ row.unit }}</td><td>{{ row.notes }}</td></tr>
|
| 231 |
+
{% endfor %}
|
| 232 |
+
</table>
|
| 233 |
+
<h2>リスク & 機会</h2><p>{{ sections.risk_opportunity }}</p>
|
| 234 |
+
{% if benchmark_summary %}<h2>ベンチマーク比較</h2><p>{{ benchmark_summary }}</p>{% endif %}
|
| 235 |
+
<footer>Generated on {{ generated_at }} | Template: {{ template_name }} | Tenant: {{ tenant }}</footer>
|
| 236 |
+
</body></html>""", encoding="utf-8")
|
| 237 |
+
template_name = "base.html.j2"
|
| 238 |
+
|
| 239 |
+
meta = load_company_meta(company_yaml)
|
| 240 |
+
fin = load_financials(financials_csv)
|
| 241 |
+
esg = load_esg(esg_csv)
|
| 242 |
+
|
| 243 |
+
kpi = compute_kpi(fin, meta.fiscal_year)
|
| 244 |
+
esg_rows = esg_table(esg, meta.fiscal_year)
|
| 245 |
+
sections = build_sections(meta, kpi, esg_rows, llm=llm)
|
| 246 |
+
|
| 247 |
+
charts = _build_charts(fin, esg, meta.fiscal_year)
|
| 248 |
+
glossary = _load_glossary(glossary_path)
|
| 249 |
+
benchmarks = _load_benchmarks(benchmarks_path)
|
| 250 |
+
|
| 251 |
+
# ベンチマーク1行要約(任意)
|
| 252 |
+
benchmark_summary = ""
|
| 253 |
+
try:
|
| 254 |
+
if benchmarks:
|
| 255 |
+
msgs = []
|
| 256 |
+
if "revenue_yoy" in benchmarks:
|
| 257 |
+
msgs.append(f"売上YoY: 当社 {kpi['revenue_yoy']:.1f}% / 業界 {benchmarks['revenue_yoy']:.1f}%")
|
| 258 |
+
if "renewable_energy_ratio" in benchmarks:
|
| 259 |
+
cur = esg[esg["metric"]=="energy_renewable_ratio"].sort_values("year").tail(1)["value"].iloc[0]
|
| 260 |
+
msgs.append(f"再エネ比率: 当社 {cur:.1f}% / 業界 {benchmarks['renewable_energy_ratio']:.1f}%")
|
| 261 |
+
benchmark_summary = " / ".join(msgs)
|
| 262 |
+
except Exception:
|
| 263 |
+
pass
|
| 264 |
+
|
| 265 |
+
env = get_env(templates_dir)
|
| 266 |
+
payload = RenderPayload(
|
| 267 |
+
meta=meta, esg_table=esg_rows, kpi=kpi, sections=sections,
|
| 268 |
+
generated_at=datetime.datetime.now().strftime("%Y-%m-%d %H:%M"),
|
| 269 |
+
lang=lang
|
| 270 |
+
).model_dump()
|
| 271 |
+
|
| 272 |
+
payload["charts"] = charts
|
| 273 |
+
payload["template_name"] = template_name
|
| 274 |
+
payload["tenant"] = tenant or ""
|
| 275 |
+
|
| 276 |
+
# 翻訳(ja以外)
|
| 277 |
+
payload = _translate_payload_texts(payload, lang=lang, llm=llm, glossary=glossary)
|
| 278 |
+
|
| 279 |
+
html = render(env, template_name, payload)
|
| 280 |
+
Path(out_html).write_text(html, encoding="utf-8")
|
| 281 |
+
html_to_pdf(html, out_pdf)
|
| 282 |
+
html_to_docx(html, out_docx)
|
| 283 |
+
|
| 284 |
+
# 監査メタ
|
| 285 |
+
meta_json = {
|
| 286 |
+
"inputs": {
|
| 287 |
+
"company_yaml_sha": _sha256(Path(company_yaml)),
|
| 288 |
+
"financials_csv_sha": _sha256(Path(financials_csv)),
|
| 289 |
+
"esg_csv_sha": _sha256(Path(esg_csv)),
|
| 290 |
+
"lang": lang,
|
| 291 |
+
"tenant": tenant,
|
| 292 |
+
"glossary_keys": list(glossary.keys()) if glossary else [],
|
| 293 |
+
"benchmarks": benchmarks,
|
| 294 |
+
},
|
| 295 |
+
"outputs": {"html": out_html, "pdf": out_pdf, "docx": out_docx},
|
| 296 |
+
"template": {"dir": templates_dir, "name": template_name},
|
| 297 |
+
"generated_at": datetime.datetime.now().isoformat(timespec="seconds"),
|
| 298 |
+
"usage": getattr(llm, "last_usage", {}) if llm else {},
|
| 299 |
+
"benchmark_summary": benchmark_summary,
|
| 300 |
+
}
|
| 301 |
+
return out_html, out_pdf, out_docx, meta_json, html
|
hf.yaml
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
title: IR-ESG-Report-Generator-Pro
|
| 2 |
+
emoji: 📈
|
| 3 |
+
colorFrom: gray
|
| 4 |
+
colorTo: indigo
|
| 5 |
+
sdk: docker
|
| 6 |
+
pinned: false
|
llm.py
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os, math, time
|
| 2 |
+
from typing import List, Dict, Any, Optional
|
| 3 |
+
from openai import OpenAI
|
| 4 |
+
|
| 5 |
+
def _norm(x):
|
| 6 |
+
if x is None: return ""
|
| 7 |
+
if isinstance(x, float) and math.isnan(x): return ""
|
| 8 |
+
return str(x)
|
| 9 |
+
|
| 10 |
+
def _backoff(attempt):
|
| 11 |
+
# 0.5, 1, 2, 4 ... (上限 8s)
|
| 12 |
+
return min(0.5 * (2 ** attempt), 8.0)
|
| 13 |
+
|
| 14 |
+
class OpenAILLM:
|
| 15 |
+
def __init__(self, model_chat: str = "gpt-4o-mini", model_translate: str = "gpt-4o-mini"):
|
| 16 |
+
api_key = os.environ.get("OPENAI_API_KEY2")
|
| 17 |
+
if not api_key:
|
| 18 |
+
raise ValueError("環境変数 OPENAI_API_KEY2 が設定されていません。")
|
| 19 |
+
self.client = OpenAI(api_key=api_key)
|
| 20 |
+
self.model_chat = model_chat
|
| 21 |
+
self.model_translate = model_translate
|
| 22 |
+
self.last_usage = {"prompt_tokens":0, "completion_tokens":0, "total_tokens":0}
|
| 23 |
+
|
| 24 |
+
def _update_usage(self, rsp):
|
| 25 |
+
try:
|
| 26 |
+
u = rsp.usage
|
| 27 |
+
if u:
|
| 28 |
+
self.last_usage = {
|
| 29 |
+
"prompt_tokens": getattr(u, "prompt_tokens", 0),
|
| 30 |
+
"completion_tokens": getattr(u, "completion_tokens", 0),
|
| 31 |
+
"total_tokens": getattr(u, "total_tokens", 0),
|
| 32 |
+
}
|
| 33 |
+
except Exception:
|
| 34 |
+
pass
|
| 35 |
+
|
| 36 |
+
def _chat(self, model, messages, temperature=0.2, max_retries=3):
|
| 37 |
+
for i in range(max_retries+1):
|
| 38 |
+
try:
|
| 39 |
+
rsp = self.client.chat.completions.create(
|
| 40 |
+
model=model, messages=messages, temperature=temperature
|
| 41 |
+
)
|
| 42 |
+
self._update_usage(rsp)
|
| 43 |
+
return rsp.choices[0].message.content.strip()
|
| 44 |
+
except Exception as e:
|
| 45 |
+
if i == max_retries:
|
| 46 |
+
raise
|
| 47 |
+
time.sleep(_backoff(i))
|
| 48 |
+
|
| 49 |
+
def generate_ceo_message(self, meta, kpi: Dict[str, float], esg_rows: List[Dict[str, Any]]) -> str:
|
| 50 |
+
prompt = (
|
| 51 |
+
"以下の企業情報・KPI・ESG指標をもとに、日本語で200字程度のCEOメッセージ草案を出力。"
|
| 52 |
+
"事実ベース・簡潔・投資家向け。数値は丸め過ぎないこと。\n\n"
|
| 53 |
+
f"企業情報: {meta.model_dump()}\nKPI: {kpi}\nESG: {esg_rows}\n"
|
| 54 |
+
)
|
| 55 |
+
return self._chat(self.model_chat, [{"role":"user","content":prompt}], temperature=0.2)
|
| 56 |
+
|
| 57 |
+
def generate_risk_opportunity(self, meta, kpi: Dict[str, float], esg_rows: List[Dict[str, Any]]) -> str:
|
| 58 |
+
prompt = (
|
| 59 |
+
"以下に基づき主要なリスクと機会を150字程度で日本語要約。具体的観点を1-2点:\n\n"
|
| 60 |
+
f"企業情報: {meta.model_dump()}\nKPI: {kpi}\nESG: {esg_rows}\n"
|
| 61 |
+
)
|
| 62 |
+
return self._chat(self.model_chat, [{"role":"user","content":prompt}], temperature=0.2)
|
| 63 |
+
|
| 64 |
+
def translate_texts(self, texts: List[Any], target_lang: str = "en", glossary: Optional[Dict[str,str]] = None) -> List[str]:
|
| 65 |
+
norm = [_norm(t) for t in texts]
|
| 66 |
+
SEP = "\n<<<SEP>>>\n"
|
| 67 |
+
rules = ""
|
| 68 |
+
if glossary:
|
| 69 |
+
rules = "用語統一ルール(厳守):\n" + "\n".join([f"- {k} -> {v}" for k,v in glossary.items()])
|
| 70 |
+
|
| 71 |
+
system = (
|
| 72 |
+
"You are a precise financial/ESG translator. Preserve numbers and units. "
|
| 73 |
+
"Follow the glossary strictly. Keep tone concise."
|
| 74 |
+
)
|
| 75 |
+
prompt = f"Translate the following into {target_lang}. Each part is separated by <<<SEP>>>.\n{rules}\n\n" + SEP.join(norm)
|
| 76 |
+
|
| 77 |
+
txt = self._chat(self.model_translate, [{"role":"system","content":system},{"role":"user","content":prompt}], temperature=0.1)
|
| 78 |
+
parts = [p.strip() for p in txt.split("<<<SEP>>>")]
|
| 79 |
+
if len(parts) != len(norm):
|
| 80 |
+
parts = [txt] + norm[1:]
|
| 81 |
+
parts = parts[:len(norm)]
|
| 82 |
+
return parts
|
models.py
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pydantic import BaseModel
|
| 2 |
+
from typing import List, Optional, Dict, Any
|
| 3 |
+
|
| 4 |
+
class CompanyMeta(BaseModel):
|
| 5 |
+
company_name: str
|
| 6 |
+
fiscal_year: int
|
| 7 |
+
currency: str = "JPY"
|
| 8 |
+
ticker: Optional[str] = None
|
| 9 |
+
report_title: str = "Integrated Report"
|
| 10 |
+
ceo_name: Optional[str] = None
|
| 11 |
+
material_topics: List[str] = []
|
| 12 |
+
targets: Dict[str, Any] = {}
|
| 13 |
+
|
| 14 |
+
class ReportSections(BaseModel):
|
| 15 |
+
ceo_message: str = ""
|
| 16 |
+
risk_opportunity: str = ""
|
| 17 |
+
|
| 18 |
+
class RenderPayload(BaseModel):
|
| 19 |
+
meta: CompanyMeta
|
| 20 |
+
esg_table: List[Dict[str, Any]]
|
| 21 |
+
kpi: Dict[str, float]
|
| 22 |
+
sections: ReportSections
|
| 23 |
+
generated_at: str
|
| 24 |
+
lang: str = "ja"
|
rag.py
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandera as pa
|
| 2 |
+
from pandera import Column, DataFrameSchema, Check
|
| 3 |
+
import pandas as pd
|
| 4 |
+
|
| 5 |
+
FIN_REQUIRED = ["year","quarter","revenue","ebit","net_income","total_assets","total_equity"]
|
| 6 |
+
ESG_REQUIRED = ["year","metric","value","unit","scope","notes"]
|
| 7 |
+
|
| 8 |
+
ALIASES = {
|
| 9 |
+
"revenue": ["revenue","sales","売上","売上高"],
|
| 10 |
+
"ebit": ["ebit","operating_income","営業利益"],
|
| 11 |
+
"net_income": ["net_income","純利益","profit"],
|
| 12 |
+
"total_equity": ["total_equity","shareholders_equity","自己資本"],
|
| 13 |
+
}
|
| 14 |
+
|
| 15 |
+
def normalize_columns(df: pd.DataFrame, required: list) -> pd.DataFrame:
|
| 16 |
+
cols = {c.lower(): c for c in df.columns}
|
| 17 |
+
# 別名を正規化
|
| 18 |
+
for key, names in ALIASES.items():
|
| 19 |
+
if key not in df.columns:
|
| 20 |
+
for n in names:
|
| 21 |
+
if n in df.columns or n in cols:
|
| 22 |
+
src = n if n in df.columns else cols.get(n)
|
| 23 |
+
df = df.rename(columns={src: key})
|
| 24 |
+
break
|
| 25 |
+
missing = [c for c in required if c not in df.columns]
|
| 26 |
+
if missing:
|
| 27 |
+
raise ValueError(f"必須列不足: {missing}")
|
| 28 |
+
return df
|
| 29 |
+
|
| 30 |
+
fin_schema = DataFrameSchema({
|
| 31 |
+
"year": Column(int, Check.ge(1900)),
|
| 32 |
+
"quarter": Column(str),
|
| 33 |
+
"revenue": Column(float, Check.ge(0)),
|
| 34 |
+
"ebit": Column(float),
|
| 35 |
+
"net_income": Column(float),
|
| 36 |
+
"total_assets": Column(float, nullable=True),
|
| 37 |
+
"total_equity": Column(float, nullable=True),
|
| 38 |
+
})
|
| 39 |
+
|
| 40 |
+
esg_schema = DataFrameSchema({
|
| 41 |
+
"year": Column(int, Check.ge(1900)),
|
| 42 |
+
"metric": Column(str),
|
| 43 |
+
"value": Column(float),
|
| 44 |
+
"unit": Column(str, nullable=True),
|
| 45 |
+
"scope": Column(str, nullable=True),
|
| 46 |
+
"notes": Column(object, nullable=True),
|
| 47 |
+
})
|
| 48 |
+
|
| 49 |
+
def validate_financials(df: pd.DataFrame) -> pd.DataFrame:
|
| 50 |
+
df = normalize_columns(df, FIN_REQUIRED)
|
| 51 |
+
return fin_schema.validate(df, lazy=True)
|
| 52 |
+
|
| 53 |
+
def validate_esg(df: pd.DataFrame) -> pd.DataFrame:
|
| 54 |
+
df = normalize_columns(df, ESG_REQUIRED)
|
| 55 |
+
return esg_schema.validate(df, lazy=True)
|
render.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from docx import Document
|
| 2 |
+
from docx.shared import Pt
|
| 3 |
+
from bs4 import BeautifulSoup
|
| 4 |
+
|
| 5 |
+
_HAS_WEASY = False
|
| 6 |
+
try:
|
| 7 |
+
from weasyprint import HTML
|
| 8 |
+
_HAS_WEASY = True
|
| 9 |
+
except Exception:
|
| 10 |
+
_HAS_WEASY = False
|
| 11 |
+
|
| 12 |
+
def html_to_pdf(html_str: str, out_pdf_path: str):
|
| 13 |
+
if _HAS_WEASY:
|
| 14 |
+
try:
|
| 15 |
+
HTML(string=html_str).write_pdf(out_pdf_path)
|
| 16 |
+
return
|
| 17 |
+
except Exception:
|
| 18 |
+
pass
|
| 19 |
+
from xhtml2pdf import pisa
|
| 20 |
+
with open(out_pdf_path, "wb") as f:
|
| 21 |
+
pisa.CreatePDF(src=html_str, dest=f)
|
| 22 |
+
|
| 23 |
+
def html_to_docx(html_str: str, out_docx_path: str):
|
| 24 |
+
doc = Document()
|
| 25 |
+
soup = BeautifulSoup(html_str, "html.parser")
|
| 26 |
+
for tag in soup.find_all(["h1","h2","h3","p","li"]):
|
| 27 |
+
txt = tag.get_text(strip=True)
|
| 28 |
+
if not txt:
|
| 29 |
+
continue
|
| 30 |
+
if tag.name == "h1": p = doc.add_heading(txt, level=0)
|
| 31 |
+
elif tag.name == "h2": p = doc.add_heading(txt, level=1)
|
| 32 |
+
elif tag.name == "h3": p = doc.add_heading(txt, level=2)
|
| 33 |
+
else: p = doc.add_paragraph(txt)
|
| 34 |
+
for run in p.runs: run.font.size = Pt(11)
|
| 35 |
+
doc.save(out_docx_path)
|
requirements.txt
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
gradio==4.44.0
|
| 2 |
+
pandas==2.2.2
|
| 3 |
+
pydantic==2.7.4
|
| 4 |
+
Jinja2==3.1.4
|
| 5 |
+
python-docx==1.1.2
|
| 6 |
+
WeasyPrint==62.3
|
| 7 |
+
beautifulsoup4==4.12.3
|
| 8 |
+
PyYAML==6.0.2
|
| 9 |
+
openai==1.40.2
|
| 10 |
+
httpx==0.27.2
|
| 11 |
+
matplotlib==3.8.4
|
| 12 |
+
Pillow==10.3.0
|
| 13 |
+
faiss-cpu==1.8.0
|
| 14 |
+
sentence-transformers==3.0.1
|
| 15 |
+
huggingface_hub==0.24.6
|
| 16 |
+
pandera==0.20.3
|
| 17 |
+
xhtml2pdf==0.2.15
|
templates:gir.html.j2
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{% extends "base.html.j2" %}
|
| 2 |
+
{% block body %}
|
| 3 |
+
<h1>GRI Report({{ meta.fiscal_year }})</h1>
|
| 4 |
+
<p>{{ sections.ceo_message }}</p>
|
| 5 |
+
<p>{{ benchmark_summary }}</p>
|
| 6 |
+
<img src="{{ charts.renewable }}"><img src="{{ charts.female }}">
|
| 7 |
+
{% endblock %}
|
templates:report.html.j2
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
<!doctype html>
|
| 2 |
-
<html lang="
|
| 3 |
<head>
|
| 4 |
<meta charset="utf-8" />
|
| 5 |
<title>{{ meta.report_title }} - {{ meta.company_name }}</title>
|
|
@@ -29,11 +29,6 @@
|
|
| 29 |
<div><strong>{{ kpi.revenue | round(0) | int }} {{ meta.currency }}</strong></div>
|
| 30 |
<div class="small">前年比: {{ kpi.revenue_yoy | round(1) }}%</div>
|
| 31 |
</div>
|
| 32 |
-
<div class="card">
|
| 33 |
-
<div class="small">営業利益</div>
|
| 34 |
-
<div><strong>{{ kpi.operating_income | round(0) | int }} {{ meta.currency }}</strong></div>
|
| 35 |
-
<div class="small">マージン: {{ kpi.operating_margin | round(1) }}%</div>
|
| 36 |
-
</div>
|
| 37 |
<div class="card">
|
| 38 |
<div class="small">EBIT</div>
|
| 39 |
<div><strong>{{ kpi.ebit | round(0) | int }} {{ meta.currency }}</strong></div>
|
|
|
|
| 1 |
<!doctype html>
|
| 2 |
+
<html lang="{{ lang }}">
|
| 3 |
<head>
|
| 4 |
<meta charset="utf-8" />
|
| 5 |
<title>{{ meta.report_title }} - {{ meta.company_name }}</title>
|
|
|
|
| 29 |
<div><strong>{{ kpi.revenue | round(0) | int }} {{ meta.currency }}</strong></div>
|
| 30 |
<div class="small">前年比: {{ kpi.revenue_yoy | round(1) }}%</div>
|
| 31 |
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
<div class="card">
|
| 33 |
<div class="small">EBIT</div>
|
| 34 |
<div><strong>{{ kpi.ebit | round(0) | int }} {{ meta.currency }}</strong></div>
|
templates:sasb.html.j2
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{% extends "base.html.j2" %}
|
| 2 |
+
{% block body %}
|
| 3 |
+
<h1>SASB Report({{ meta.fiscal_year }})</h1>
|
| 4 |
+
<p>{{ sections.ceo_message }}</p>
|
| 5 |
+
<table border="1" cellspacing="0" cellpadding="6">
|
| 6 |
+
<tr><th>指標</th><th>値</th><th>単位</th><th>備考</th></tr>
|
| 7 |
+
{% for row in esg_table %}
|
| 8 |
+
<tr><td>{{ row.display }}</td><td>{{ row.value }}</td><td>{{ row.unit }}</td><td>{{ row.notes }}</td></tr>
|
| 9 |
+
{% endfor %}
|
| 10 |
+
</table>
|
| 11 |
+
<p>{{ sections.risk_opportunity }}</p>
|
| 12 |
+
{% endblock %}
|
templates:tcfd.html.j2
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{% extends "base.html.j2" %}
|
| 2 |
+
{% block body %}
|
| 3 |
+
<h1>TCFD Report({{ meta.fiscal_year }})</h1>
|
| 4 |
+
<h2>1. ガバナンス</h2><p>{{ sections.ceo_message }}</p>
|
| 5 |
+
<h2>2. 戦略</h2><p>{{ benchmark_summary }}</p>
|
| 6 |
+
<h2>3. リスク管理</h2><p>{{ sections.risk_opportunity }}</p>
|
| 7 |
+
<h2>4. 指標・目標</h2>
|
| 8 |
+
<img src="{{ charts.revenue }}"><img src="{{ charts.renewable }}"><img src="{{ charts.female }}">
|
| 9 |
+
{% endblock %}
|
templating.py
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from jinja2 import Environment, FileSystemLoader, select_autoescape
|
| 2 |
+
from pathlib import Path
|
| 3 |
+
|
| 4 |
+
def get_env(templates_dir: str):
|
| 5 |
+
here = Path(__file__).resolve().parent
|
| 6 |
+
searchpaths = [templates_dir, str(here / "templates"), str(Path().resolve() / "templates")]
|
| 7 |
+
env = Environment(loader=FileSystemLoader(searchpaths), autoescape=select_autoescape(["html","xml"]))
|
| 8 |
+
return env
|
| 9 |
+
|
| 10 |
+
def render(env, template_name: str, context: dict) -> str:
|
| 11 |
+
template = env.get_template(template_name)
|
| 12 |
+
return template.render(**context)
|
validators.py
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandera as pa
|
| 2 |
+
from pandera import Column, DataFrameSchema, Check
|
| 3 |
+
import pandas as pd
|
| 4 |
+
|
| 5 |
+
FIN_REQUIRED = ["year","quarter","revenue","ebit","net_income","total_assets","total_equity"]
|
| 6 |
+
ESG_REQUIRED = ["year","metric","value","unit","scope","notes"]
|
| 7 |
+
|
| 8 |
+
ALIASES = {
|
| 9 |
+
"revenue": ["revenue","sales","売上","売上高"],
|
| 10 |
+
"ebit": ["ebit","operating_income","営業利益"],
|
| 11 |
+
"net_income": ["net_income","純利益","profit"],
|
| 12 |
+
"total_equity": ["total_equity","shareholders_equity","自己資本"],
|
| 13 |
+
}
|
| 14 |
+
|
| 15 |
+
def normalize_columns(df: pd.DataFrame, required: list) -> pd.DataFrame:
|
| 16 |
+
cols = {c.lower(): c for c in df.columns}
|
| 17 |
+
# 別名を正規化
|
| 18 |
+
for key, names in ALIASES.items():
|
| 19 |
+
if key not in df.columns:
|
| 20 |
+
for n in names:
|
| 21 |
+
if n in df.columns or n in cols:
|
| 22 |
+
src = n if n in df.columns else cols.get(n)
|
| 23 |
+
df = df.rename(columns={src: key})
|
| 24 |
+
break
|
| 25 |
+
missing = [c for c in required if c not in df.columns]
|
| 26 |
+
if missing:
|
| 27 |
+
raise ValueError(f"必須列不足: {missing}")
|
| 28 |
+
return df
|
| 29 |
+
|
| 30 |
+
fin_schema = DataFrameSchema({
|
| 31 |
+
"year": Column(int, Check.ge(1900)),
|
| 32 |
+
"quarter": Column(str),
|
| 33 |
+
"revenue": Column(float, Check.ge(0)),
|
| 34 |
+
"ebit": Column(float),
|
| 35 |
+
"net_income": Column(float),
|
| 36 |
+
"total_assets": Column(float, nullable=True),
|
| 37 |
+
"total_equity": Column(float, nullable=True),
|
| 38 |
+
})
|
| 39 |
+
|
| 40 |
+
esg_schema = DataFrameSchema({
|
| 41 |
+
"year": Column(int, Check.ge(1900)),
|
| 42 |
+
"metric": Column(str),
|
| 43 |
+
"value": Column(float),
|
| 44 |
+
"unit": Column(str, nullable=True),
|
| 45 |
+
"scope": Column(str, nullable=True),
|
| 46 |
+
"notes": Column(object, nullable=True),
|
| 47 |
+
})
|
| 48 |
+
|
| 49 |
+
def validate_financials(df: pd.DataFrame) -> pd.DataFrame:
|
| 50 |
+
df = normalize_columns(df, FIN_REQUIRED)
|
| 51 |
+
return fin_schema.validate(df, lazy=True)
|
| 52 |
+
|
| 53 |
+
def validate_esg(df: pd.DataFrame) -> pd.DataFrame:
|
| 54 |
+
df = normalize_columns(df, ESG_REQUIRED)
|
| 55 |
+
return esg_schema.validate(df, lazy=True)
|