Corin1998 commited on
Commit
5b82238
·
verified ·
1 Parent(s): f469bad

Upload 17 files

Browse files
FROM python:3.dockerfile ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ RUN apt-get update && apt-get install -y --no-install-recommends \
4
+ libcairo2 pango1.0-tools libpango-1.0-0 libgdk-pixbuf2.0-0 libffi-dev \
5
+ fonts-noto fonts-noto-cjk git \
6
+ && rm -rf /var/lib/apt/lists/*
7
+
8
+ WORKDIR /code
9
+ COPY requirements.txt .
10
+ RUN pip install --no-cache-dir -U pip wheel && pip install --no-cache-dir -r requirements.txt
11
+
12
+ COPY . .
13
+ ENV GRADIO_SERVER_NAME=0.0.0.0
14
+ EXPOSE 7860
15
+ CMD ["python", "app.py"]
app.py ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from pathlib import Path
3
+ import tempfile, datetime, json, shutil, os
4
+ from core import generate_report
5
+ from rag import build_or_update_index, answer_with_context
6
+
7
+ TITLE = "IR/ESG Report Generator (HF Space, Pro)"
8
+ DESC = "CSV/YAML から IR/ESG レポート(HTML/PDF/DOCX)を生成。LLM要約+翻訳・グラフ・ベンチマーク・RAG・Hub保存に対応。"
9
+
10
+ TEMPLATES_DIR = str((Path(__file__).resolve().parent / "templates").absolute())
11
+
12
+ import shutil as _shutil
13
+ def _to_path(upload_obj, tmpdir: Path, filename: str) -> Path:
14
+ dst = tmpdir / filename
15
+ if upload_obj is None:
16
+ raise ValueError(f"{filename} が未指定です。")
17
+ if isinstance(upload_obj, (str, Path)):
18
+ src = Path(upload_obj); _shutil.copy(src, dst); return dst
19
+ if hasattr(upload_obj, "name"): # NamedString
20
+ src = Path(upload_obj.name); _shutil.copy(src, dst); return dst
21
+ if hasattr(upload_obj, "read"):
22
+ data = upload_obj.read()
23
+ if isinstance(data, str): data = data.encode("utf-8")
24
+ dst.write_bytes(data); return dst
25
+ if isinstance(upload_obj, dict):
26
+ if "name" in upload_obj: _shutil.copy(upload_obj["name"], dst); return dst
27
+ if "data" in upload_obj:
28
+ data = upload_obj["data"]
29
+ if isinstance(data, str): data = data.encode("utf-8")
30
+ dst.write_bytes(data); return dst
31
+ raise TypeError(f"Unsupported upload type: {type(upload_obj)}")
32
+
33
+ def run(company_yaml, financials_csv, esg_csv, use_llm, lang, template_key,
34
+ tenant, push_to_hub, glossary_yaml, benchmarks_yaml, past_reports_zip):
35
+ try:
36
+ if not company_yaml or not financials_csv or not esg_csv:
37
+ return "全ファイルをアップロードしてください。", None, None, None, None, ""
38
+
39
+ with tempfile.TemporaryDirectory() as td:
40
+ tdir = Path(td)
41
+ cpath = _to_path(company_yaml, tdir, "company.yaml")
42
+ fpath = _to_path(financials_csv, tdir, "financials.csv")
43
+ epath = _to_path(esg_csv, tdir, "esg.csv")
44
+ gpath = _to_path(glossary_yaml, tdir, "glossary.yaml") if glossary_yaml else None
45
+ bpath = _to_path(benchmarks_yaml, tdir, "benchmarks.yaml") if benchmarks_yaml else None
46
+ rzip = _to_path(past_reports_zip, tdir, "past_reports.zip") if past_reports_zip else None
47
+
48
+
49
+ if rzip:
50
+ build_or_update_index(rzip, index_dir=tdir / "index")
51
+
52
+ outdir = tdir / "out"; outdir.mkdir(parents=True, exist_ok=True)
53
+
54
+ llm = None
55
+ if use_llm:
56
+ try:
57
+ from llm import OpenAILLM
58
+ llm = OpenAILLM()
59
+ except Exception as e:
60
+ return f"LLM初期化エラー: {e}", None, None, None, None, ""
61
+
62
+ html, pdf, docx, meta_json, html_text = generate_report(
63
+ company_yaml=str(cpath),
64
+ financials_csv=str(fpath),
65
+ esg_csv=str(epath),
66
+ templates_dir=TEMPLATES_DIR,
67
+ template_name={
68
+ "base":"base.html.j2",
69
+ "tcfd":"tcfd.html.j2",
70
+ "sasab":"sasb.html.j2",
71
+ "sasb":"sasb.html.j2",
72
+ "gri":"gri.html.j2",
73
+ }.get(template_key, "base.html.j2"),
74
+ out_html=str(outdir / "report.html"),
75
+ out_pdf=str(outdir / "report.pdf"),
76
+ out_docx=str(outdir / "report.docx"),
77
+ lang=lang,
78
+ llm=llm,
79
+ glossary_path=str(gpath) if gpath else None,
80
+ benchmarks_path=str(bpath) if bpath else None,
81
+ tenant=tenant,
82
+ rag_index_dir=str(tdir / "index") if rzip else None,
83
+ )
84
+
85
+ repo_tmp = Path("./tmp"); repo_tmp.mkdir(exist_ok=True)
86
+ ts = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
87
+ prefix = f"{tenant or 'default'}-{ts}"
88
+ html_out = repo_tmp / f"{prefix}.html"
89
+ pdf_out = repo_tmp / f"{prefix}.pdf"
90
+ docx_out = repo_tmp / f"{prefix}.docx"
91
+ meta_out = repo_tmp / f"{prefix}.json"
92
+ _shutil.copy(html, html_out)
93
+ _shutil.copy(pdf, pdf_out)
94
+ _shutil.copy(docx, docx_out)
95
+ Path(meta_out).write_text(json.dumps(meta_json, ensure_ascii=False, indent=2), encoding="utf-8")
96
+
97
+ if push_to_hub:
98
+ try:
99
+ from huggingface_hub import HfApi
100
+ api = HfApi()
101
+ repo_id = os.environ.get("HF_DATASET_REPO", "your-org/ir-esg-reports")
102
+ base_path = f"{tenant or 'default'}/{ts}"
103
+ for p, name in [(html_out,"report.html"),(pdf_out,"report.pdf"),(docx_out,"report.docx"),(meta_out,"report.json")]:
104
+ api.upload_file(
105
+ path_or_fileobj=str(p),
106
+ path_in_repo=f"{base_path}/{name}",
107
+ repo_id=repo_id,
108
+ repo_type="dataset"
109
+ )
110
+ except Exception as e:
111
+ meta_json["hub_error"] = str(e)
112
+ Path(meta_out).write_text(json.dumps(meta_json, ensure_ascii=False, indent=2), encoding="utf-8")
113
+
114
+ return "生成が完了しました。", str(html_out), str(pdf_out), str(docx_out), str(meta_out), html_text
115
+
116
+ except Exception as e:
117
+ import traceback
118
+ tb = traceback.format_exc(limit=20)
119
+ return f"エラー: {e}\n--- trace ---\n{tb}", None, None, None, None, ""
120
+
121
+
122
+ with gr.Blocks(title=TITLE) as demo:
123
+ gr.Markdown(f"# {TITLE}\n{DESC}")
124
+
125
+ with gr.Row():
126
+ company_yaml = gr.File(label="company.yaml(会社情報・年度等)", file_types=[".yaml", ".yml"])
127
+ financials_csv = gr.File(label="financials.csv(財務KPI)", file_types=[".csv", ".xlsx"])
128
+ esg_csv = gr.File(label="esg_metrics.csv(ESG指標)", file_types=[".csv", ".xlsx"])
129
+
130
+ with gr.Row():
131
+ use_llm = gr.Checkbox(label="LLMで要約/翻訳を行う(OPENAI_API_KEY2 必須)", value=True)
132
+ lang = gr.Dropdown(choices=["ja","en","zh","ko","de","fr"], value="ja", label="出力言語")
133
+ template_key = gr.Dropdown(choices=["base","tcfd","sasb","gri"], value="base", label="テンプレート")
134
+ tenant = gr.Textbox(label="テナント名(会社識別子)", value="HitC")
135
+
136
+ with gr.Row():
137
+ glossary_yaml = gr.File(label="glossary.yaml(用語集・任意)", file_types=[".yaml", ".yml"])
138
+ benchmarks_yaml = gr.File(label="benchmarks.yaml(業界平均など・任意)", file_types=[".yaml", ".yml"])
139
+ past_reports_zip = gr.File(label="過去レポートZip(RAG用・任意)", file_types=[".zip"])
140
+ push_to_hub = gr.Checkbox(label="生成物を Hugging Face Hub(Datasets)へ保存", value=False)
141
+
142
+ run_btn = gr.Button("レポート生成")
143
+
144
+ status = gr.Textbox(label="ステータス", interactive=False)
145
+ html_file = gr.File(label="HTMLダウンロード")
146
+ pdf_file = gr.File(label="PDFダウンロード")
147
+ docx_file = gr.File(label="DOCXダウンロード")
148
+ meta_file = gr.File(label="メタ情報(JSON)")
149
+ html_preview = gr.HTML(label="HTMLプレビュー(抜粋)")
150
+
151
+ run_btn.click(
152
+ fn=run,
153
+ inputs=[company_yaml, financials_csv, esg_csv, use_llm, lang, template_key, tenant, push_to_hub, glossary_yaml, benchmarks_yaml, past_reports_zip],
154
+ outputs=[status, html_file, pdf_file, docx_file, meta_file, html_preview]
155
+ )
156
+
157
+ if __name__ == "__main__":
158
+ demo.launch()
benchmark.yaml ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ revenue_yoy: 8.5
2
+ renewable_energy_ratio: 35.0
charts.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import io, base64
2
+ import matplotlib.pyplot as plt
3
+
4
+ def _b64(fig):
5
+ buf = io.BytesIO()
6
+ fig.savefig(buf, format="png", bbox_inches="tight")
7
+ plt.close(fig)
8
+ return "data:image/png;base64," + base64.b64encode(buf.getvalue()).decode("utf-8")
9
+
10
+ def line_chart_base64(xs, ys, xlabel="", ylabel="", title=""):
11
+ fig, ax = plt.subplots()
12
+ ax.plot(xs, ys)
13
+ ax.set(xlabel=xlabel, ylabel=ylabel, title=title)
14
+ return _b64(fig)
15
+
16
+ def materiality_base64(labels, x_vals, y_vals, title="Materiality Matrix"):
17
+ fig, ax = plt.subplots()
18
+ ax.scatter(x_vals, y_vals)
19
+ for lbl, x, y in zip(labels, x_vals, y_vals):
20
+ ax.annotate(lbl, (x, y), xytext=(5,5), textcoords="offset points")
21
+ ax.set(xlabel="Stakeholder Importance", ylabel="Business Impact", title=title)
22
+ ax.grid(True, alpha=0.3)
23
+ return _b64(fig)
core.py ADDED
@@ -0,0 +1,301 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import yaml, datetime, hashlib, json, math
3
+ from pathlib import Path
4
+ from templating import get_env, render
5
+ from models import CompanyMeta, ReportSections, RenderPayload
6
+ from render import html_to_pdf, html_to_docx
7
+ from charts import line_chart_base64, materiality_base64
8
+ from validators import validate_financials, validate_esg
9
+ from typing import Dict, Any, List, Optional
10
+
11
+ DISPLAY_NAME = {
12
+ "co2_emissions": "CO₂排出量",
13
+ "energy_renewable_ratio": "再生可能エネルギー比率",
14
+ "female_management_ratio": "女性管理職比率",
15
+ }
16
+
17
+ def _sha256(p: Path) -> str:
18
+ h = hashlib.sha256()
19
+ with p.open("rb") as f:
20
+ for chunk in iter(lambda: f.read(8192), b""):
21
+ h.update(chunk)
22
+ return h.hexdigest()
23
+
24
+ def _require_columns(df: pd.DataFrame, required, name: str):
25
+ missing = [c for c in required if c not in df.columns]
26
+ if missing:
27
+ raise ValueError(f"{name} に必須列がありません: {missing}. 例: {required}")
28
+
29
+ def load_company_meta(path: str) -> CompanyMeta:
30
+ data = yaml.safe_load(Path(path).read_text(encoding="utf-8"))
31
+ return CompanyMeta(**data)
32
+
33
+ def load_financials(path: str) -> pd.DataFrame:
34
+ if str(path).lower().endswith(".xlsx"):
35
+ df = pd.read_excel(path)
36
+ else:
37
+ df = pd.read_csv(path)
38
+ df = validate_financials(df)
39
+ # 正規化(quarter)
40
+ q = (df["quarter"].astype(str).str.upper().str.replace("Q","Q").str.replace(" ",""))
41
+ df["quarter"] = "Q" + q.str.extract(r"(\d)", expand=False).fillna("")
42
+ df["year"] = pd.to_numeric(df["year"], errors="coerce").astype("Int64")
43
+ return df
44
+
45
+ def load_esg(path: str) -> pd.DataFrame:
46
+ if str(path).lower().endswith(".xlsx"):
47
+ df = pd.read_excel(path)
48
+ else:
49
+ df = pd.read_csv(path)
50
+ df = validate_esg(df)
51
+ df["year"] = pd.to_numeric(df["year"], errors="coerce").astype("Int64")
52
+ return df
53
+
54
+ def compute_kpi(fin_df: pd.DataFrame, fiscal_year: int):
55
+ years = set(fin_df["year"].dropna().astype(int))
56
+ if fiscal_year not in years:
57
+ raise ValueError(f"financials.csv に年度 {fiscal_year} のデータがありません。year 列を確認してください。")
58
+
59
+ fy = fin_df[fin_df["year"] == fiscal_year].copy()
60
+ if fy.empty:
61
+ raise ValueError(f"年度 {fiscal_year} の四半期データが空です。quarter の表記(Q1~Q4)を確認してください。")
62
+
63
+ order = {"Q1":1, "Q2":2, "Q3":3, "Q4":4}
64
+ fy["q_order"] = fy["quarter"].map(order)
65
+ latest = fy.sort_values("q_order").dropna(subset=["q_order"]).tail(1)
66
+ if latest.empty:
67
+ raise ValueError(f"年度 {fiscal_year} の quarter が Q1〜Q4 として認識できません。例: Q4")
68
+
69
+ prev_fy = fin_df[fin_df["year"] == fiscal_year - 1].copy()
70
+ if not prev_fy.empty:
71
+ prev_fy["q_order"] = prev_fy["quarter"].map(order)
72
+ prev = prev_fy.sort_values("q_order").dropna(subset=["q_order"]).tail(1)
73
+ else:
74
+ prev = pd.DataFrame()
75
+
76
+ revenue = float(latest["revenue"].iloc[0])
77
+ ebit = float(latest["ebit"].iloc[0])
78
+ net_income = float(latest["net_income"].iloc[0])
79
+ equity = float(latest["total_equity"].iloc[0]) if "total_equity" in latest else 0.0
80
+
81
+ ebit_margin = (ebit / revenue * 100) if revenue else 0.0
82
+ roe = (net_income / equity * 100) if equity else 0.0
83
+ revenue_yoy = 0.0
84
+ if not prev.empty and float(prev["revenue"].iloc[0]) != 0:
85
+ revenue_yoy = ((revenue / float(prev["revenue"].iloc[0])) - 1) * 100
86
+
87
+ return {
88
+ "revenue": revenue, "ebit": ebit, "net_income": net_income,
89
+ "ebit_margin": ebit_margin, "roe": roe, "revenue_yoy": revenue_yoy,
90
+ }
91
+
92
+ def esg_table(df: pd.DataFrame, fiscal_year: int):
93
+ dfy = df[df["year"] == fiscal_year].copy()
94
+ rows = []
95
+ for _, r in dfy.iterrows():
96
+ metric = r["metric"]
97
+ display = DISPLAY_NAME.get(metric, metric)
98
+ rows.append({
99
+ "display": display,
100
+ "value": r["value"],
101
+ "unit": r.get("unit", ""),
102
+ "notes": r.get("notes", ""),
103
+ })
104
+ return rows
105
+
106
+ def build_sections(meta: CompanyMeta, kpi: dict, esg_rows: list, llm=None) -> ReportSections:
107
+ if llm:
108
+ ceo_message = llm.generate_ceo_message(meta, kpi, esg_rows)
109
+ risk = llm.generate_risk_opportunity(meta, kpi, esg_rows)
110
+ else:
111
+ ceo_message = f"{meta.fiscal_year}期は、売上成長と収益性の両立に注力しました。"
112
+ risk = "主要リスクはマクロ環境と規制動向。機会は生成AI活用と脱炭素需要の拡大です。"
113
+ return ReportSections(ceo_message=ceo_message, risk_opportunity=risk)
114
+
115
+ def _s(x):
116
+ if x is None: return ""
117
+ if isinstance(x, float) and math.isnan(x): return ""
118
+ return str(x)
119
+
120
+ def _translate_payload_texts(payload: dict, lang: str, llm, glossary: Optional[Dict[str,str]]):
121
+ if not llm or lang == "ja":
122
+ return payload
123
+
124
+ texts = []
125
+ texts.append(_s(payload["sections"]["ceo_message"]))
126
+ texts.append(_s(payload["sections"]["risk_opportunity"]))
127
+ for row in payload["esg_table"]:
128
+ texts.append(_s(row.get("display", "")))
129
+ texts.append(_s(row.get("notes", "")))
130
+ texts.append(_s(payload["meta"]["report_title"]))
131
+ for topic in payload["meta"].get("material_topics", []):
132
+ texts.append(_s(topic))
133
+
134
+ translated = llm.translate_texts(texts, target_lang=lang, glossary=glossary or {})
135
+ it = iter(translated)
136
+
137
+ payload["sections"]["ceo_message"] = next(it)
138
+ payload["sections"]["risk_opportunity"] = next(it)
139
+ for row in payload["esg_table"]:
140
+ row["display"] = next(it)
141
+ row["notes"] = next(it)
142
+ payload["meta"]["report_title"] = next(it)
143
+ mt = payload["meta"].get("material_topics", [])
144
+ for i in range(len(mt)):
145
+ mt[i] = next(it)
146
+
147
+ return payload
148
+
149
+ def _load_glossary(glossary_path: Optional[str]) -> Dict[str,str]:
150
+ if not glossary_path: return {}
151
+ try:
152
+ g = yaml.safe_load(Path(glossary_path).read_text(encoding="utf-8"))
153
+ return g or {}
154
+ except Exception:
155
+ return {}
156
+
157
+ def _load_benchmarks(benchmarks_path: Optional[str]) -> Dict[str,Any]:
158
+ if not benchmarks_path: return {}
159
+ try:
160
+ b = yaml.safe_load(Path(benchmarks_path).read_text(encoding="utf-8"))
161
+ return b or {}
162
+ except Exception:
163
+ return {}
164
+
165
+ def _build_charts(fin: pd.DataFrame, esg: pd.DataFrame, fiscal_year: int) -> Dict[str,str]:
166
+ # Revenue trend(現年/前年のQ1-Q4)
167
+ def series(df, y):
168
+ o = {"Q1":1,"Q2":2,"Q3":3,"Q4":4}
169
+ d = df[df["year"]==y].copy()
170
+ d["q"] = d["quarter"].map(o)
171
+ d = d.sort_values("q")
172
+ xs = d["quarter"].tolist()
173
+ ys = d["revenue"].tolist()
174
+ return xs, ys
175
+ xs, ys = series(fin, fiscal_year)
176
+ rev = line_chart_base64(xs, ys, xlabel="Quarter", ylabel="Revenue", title=f"Revenue Trend {fiscal_year}")
177
+
178
+ # ESG: 再エネ・女性比率があれば時系列
179
+ def metric_series(metric):
180
+ d = esg[esg["metric"]==metric].sort_values("year")
181
+ return d["year"].tolist(), d["value"].tolist()
182
+ xs_re, ys_re = metric_series("energy_renewable_ratio")
183
+ xs_fm, ys_fm = metric_series("female_management_ratio")
184
+ re_img = line_chart_base64(xs_re, ys_re, xlabel="Year", ylabel="%", title="Renewable Energy Ratio")
185
+ fm_img = line_chart_base64(xs_fm, ys_fm, xlabel="Year", ylabel="%", title="Female Management Ratio")
186
+
187
+ # マテリアリティマトリクス(任意:meta.targets.weights があれば)
188
+ return {"revenue": rev, "renewable": re_img, "female": fm_img}
189
+
190
+ def generate_report(
191
+ company_yaml,
192
+ financials_csv,
193
+ esg_csv,
194
+ templates_dir,
195
+ template_name="base.html.j2",
196
+ out_html="output/report.html",
197
+ out_pdf="output/report.pdf",
198
+ out_docx="output/report.docx",
199
+ lang="ja",
200
+ llm=None,
201
+ glossary_path: Optional[str] = None,
202
+ benchmarks_path: Optional[str] = None,
203
+ tenant: Optional[str] = None,
204
+ rag_index_dir: Optional[str] = None,
205
+ ):
206
+ Path(Path(out_html).parent).mkdir(parents=True, exist_ok=True)
207
+
208
+ # テンプレ存在チェック(なければ base を生成)
209
+ tdir = Path(templates_dir); tdir.mkdir(parents=True, exist_ok=True)
210
+ if not (tdir / template_name).exists():
211
+ (tdir / "base.html.j2").write_text("""<!doctype html>
212
+ <html lang="{{ lang }}"><head><meta charset="utf-8"><title>{{ meta.report_title }}</title></head>
213
+ <body>
214
+ <h1>{{ meta.report_title }}({{ meta.fiscal_year }})</h1>
215
+ <p>{{ meta.company_name }} / Ticker: {{ meta.ticker }} / {{ meta.currency }}</p>
216
+ <h2>CEOメッセージ</h2><p>{{ sections.ceo_message }}</p>
217
+ <h2>KPI</h2><ul>
218
+ <li>売上: {{ kpi.revenue|round(0)|int }} {{ meta.currency }} / YoY {{ kpi.revenue_yoy|round(1) }}%</li>
219
+ <li>EBIT: {{ kpi.ebit|round(0)|int }} / Margin {{ kpi.ebit_margin|round(1) }}%</li>
220
+ <li>純利益: {{ kpi.net_income|round(0)|int }} / ROE {{ kpi.roe|round(1) }}%</li>
221
+ </ul>
222
+ <h2>チャート</h2>
223
+ <img src="{{ charts.revenue }}" style="max-width:520px"><br/>
224
+ <img src="{{ charts.renewable }}" style="max-width:520px">
225
+ <img src="{{ charts.female }}" style="max-width:520px">
226
+ <h2>ESGサマリー</h2>
227
+ <table border="1" cellspacing="0" cellpadding="6">
228
+ <tr><th>指標</th><th>値</th><th>単位</th><th>備考</th></tr>
229
+ {% for row in esg_table %}
230
+ <tr><td>{{ row.display }}</td><td>{{ row.value }}</td><td>{{ row.unit }}</td><td>{{ row.notes }}</td></tr>
231
+ {% endfor %}
232
+ </table>
233
+ <h2>リスク & 機会</h2><p>{{ sections.risk_opportunity }}</p>
234
+ {% if benchmark_summary %}<h2>ベンチマーク比較</h2><p>{{ benchmark_summary }}</p>{% endif %}
235
+ <footer>Generated on {{ generated_at }} | Template: {{ template_name }} | Tenant: {{ tenant }}</footer>
236
+ </body></html>""", encoding="utf-8")
237
+ template_name = "base.html.j2"
238
+
239
+ meta = load_company_meta(company_yaml)
240
+ fin = load_financials(financials_csv)
241
+ esg = load_esg(esg_csv)
242
+
243
+ kpi = compute_kpi(fin, meta.fiscal_year)
244
+ esg_rows = esg_table(esg, meta.fiscal_year)
245
+ sections = build_sections(meta, kpi, esg_rows, llm=llm)
246
+
247
+ charts = _build_charts(fin, esg, meta.fiscal_year)
248
+ glossary = _load_glossary(glossary_path)
249
+ benchmarks = _load_benchmarks(benchmarks_path)
250
+
251
+ # ベンチマーク1行要約(任意)
252
+ benchmark_summary = ""
253
+ try:
254
+ if benchmarks:
255
+ msgs = []
256
+ if "revenue_yoy" in benchmarks:
257
+ msgs.append(f"売上YoY: 当社 {kpi['revenue_yoy']:.1f}% / 業界 {benchmarks['revenue_yoy']:.1f}%")
258
+ if "renewable_energy_ratio" in benchmarks:
259
+ cur = esg[esg["metric"]=="energy_renewable_ratio"].sort_values("year").tail(1)["value"].iloc[0]
260
+ msgs.append(f"再エネ比率: 当社 {cur:.1f}% / 業界 {benchmarks['renewable_energy_ratio']:.1f}%")
261
+ benchmark_summary = " / ".join(msgs)
262
+ except Exception:
263
+ pass
264
+
265
+ env = get_env(templates_dir)
266
+ payload = RenderPayload(
267
+ meta=meta, esg_table=esg_rows, kpi=kpi, sections=sections,
268
+ generated_at=datetime.datetime.now().strftime("%Y-%m-%d %H:%M"),
269
+ lang=lang
270
+ ).model_dump()
271
+
272
+ payload["charts"] = charts
273
+ payload["template_name"] = template_name
274
+ payload["tenant"] = tenant or ""
275
+
276
+ # 翻訳(ja以外)
277
+ payload = _translate_payload_texts(payload, lang=lang, llm=llm, glossary=glossary)
278
+
279
+ html = render(env, template_name, payload)
280
+ Path(out_html).write_text(html, encoding="utf-8")
281
+ html_to_pdf(html, out_pdf)
282
+ html_to_docx(html, out_docx)
283
+
284
+ # 監査メタ
285
+ meta_json = {
286
+ "inputs": {
287
+ "company_yaml_sha": _sha256(Path(company_yaml)),
288
+ "financials_csv_sha": _sha256(Path(financials_csv)),
289
+ "esg_csv_sha": _sha256(Path(esg_csv)),
290
+ "lang": lang,
291
+ "tenant": tenant,
292
+ "glossary_keys": list(glossary.keys()) if glossary else [],
293
+ "benchmarks": benchmarks,
294
+ },
295
+ "outputs": {"html": out_html, "pdf": out_pdf, "docx": out_docx},
296
+ "template": {"dir": templates_dir, "name": template_name},
297
+ "generated_at": datetime.datetime.now().isoformat(timespec="seconds"),
298
+ "usage": getattr(llm, "last_usage", {}) if llm else {},
299
+ "benchmark_summary": benchmark_summary,
300
+ }
301
+ return out_html, out_pdf, out_docx, meta_json, html
hf.yaml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ title: IR-ESG-Report-Generator-Pro
2
+ emoji: 📈
3
+ colorFrom: gray
4
+ colorTo: indigo
5
+ sdk: docker
6
+ pinned: false
llm.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, math, time
2
+ from typing import List, Dict, Any, Optional
3
+ from openai import OpenAI
4
+
5
+ def _norm(x):
6
+ if x is None: return ""
7
+ if isinstance(x, float) and math.isnan(x): return ""
8
+ return str(x)
9
+
10
+ def _backoff(attempt):
11
+ # 0.5, 1, 2, 4 ... (上限 8s)
12
+ return min(0.5 * (2 ** attempt), 8.0)
13
+
14
+ class OpenAILLM:
15
+ def __init__(self, model_chat: str = "gpt-4o-mini", model_translate: str = "gpt-4o-mini"):
16
+ api_key = os.environ.get("OPENAI_API_KEY2")
17
+ if not api_key:
18
+ raise ValueError("環境変数 OPENAI_API_KEY2 が設定されていません。")
19
+ self.client = OpenAI(api_key=api_key)
20
+ self.model_chat = model_chat
21
+ self.model_translate = model_translate
22
+ self.last_usage = {"prompt_tokens":0, "completion_tokens":0, "total_tokens":0}
23
+
24
+ def _update_usage(self, rsp):
25
+ try:
26
+ u = rsp.usage
27
+ if u:
28
+ self.last_usage = {
29
+ "prompt_tokens": getattr(u, "prompt_tokens", 0),
30
+ "completion_tokens": getattr(u, "completion_tokens", 0),
31
+ "total_tokens": getattr(u, "total_tokens", 0),
32
+ }
33
+ except Exception:
34
+ pass
35
+
36
+ def _chat(self, model, messages, temperature=0.2, max_retries=3):
37
+ for i in range(max_retries+1):
38
+ try:
39
+ rsp = self.client.chat.completions.create(
40
+ model=model, messages=messages, temperature=temperature
41
+ )
42
+ self._update_usage(rsp)
43
+ return rsp.choices[0].message.content.strip()
44
+ except Exception as e:
45
+ if i == max_retries:
46
+ raise
47
+ time.sleep(_backoff(i))
48
+
49
+ def generate_ceo_message(self, meta, kpi: Dict[str, float], esg_rows: List[Dict[str, Any]]) -> str:
50
+ prompt = (
51
+ "以下の企業情報・KPI・ESG指標をもとに、日本語で200字程度のCEOメッセージ草案を出力。"
52
+ "事実ベース・簡潔・投資家向け。数値は丸め過ぎないこと。\n\n"
53
+ f"企業情報: {meta.model_dump()}\nKPI: {kpi}\nESG: {esg_rows}\n"
54
+ )
55
+ return self._chat(self.model_chat, [{"role":"user","content":prompt}], temperature=0.2)
56
+
57
+ def generate_risk_opportunity(self, meta, kpi: Dict[str, float], esg_rows: List[Dict[str, Any]]) -> str:
58
+ prompt = (
59
+ "以下に基づき主要なリスクと機会を150字程度で日本語要約。具体的観点を1-2点:\n\n"
60
+ f"企業情報: {meta.model_dump()}\nKPI: {kpi}\nESG: {esg_rows}\n"
61
+ )
62
+ return self._chat(self.model_chat, [{"role":"user","content":prompt}], temperature=0.2)
63
+
64
+ def translate_texts(self, texts: List[Any], target_lang: str = "en", glossary: Optional[Dict[str,str]] = None) -> List[str]:
65
+ norm = [_norm(t) for t in texts]
66
+ SEP = "\n<<<SEP>>>\n"
67
+ rules = ""
68
+ if glossary:
69
+ rules = "用語統一ルール(厳守):\n" + "\n".join([f"- {k} -> {v}" for k,v in glossary.items()])
70
+
71
+ system = (
72
+ "You are a precise financial/ESG translator. Preserve numbers and units. "
73
+ "Follow the glossary strictly. Keep tone concise."
74
+ )
75
+ prompt = f"Translate the following into {target_lang}. Each part is separated by <<<SEP>>>.\n{rules}\n\n" + SEP.join(norm)
76
+
77
+ txt = self._chat(self.model_translate, [{"role":"system","content":system},{"role":"user","content":prompt}], temperature=0.1)
78
+ parts = [p.strip() for p in txt.split("<<<SEP>>>")]
79
+ if len(parts) != len(norm):
80
+ parts = [txt] + norm[1:]
81
+ parts = parts[:len(norm)]
82
+ return parts
models.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel
2
+ from typing import List, Optional, Dict, Any
3
+
4
+ class CompanyMeta(BaseModel):
5
+ company_name: str
6
+ fiscal_year: int
7
+ currency: str = "JPY"
8
+ ticker: Optional[str] = None
9
+ report_title: str = "Integrated Report"
10
+ ceo_name: Optional[str] = None
11
+ material_topics: List[str] = []
12
+ targets: Dict[str, Any] = {}
13
+
14
+ class ReportSections(BaseModel):
15
+ ceo_message: str = ""
16
+ risk_opportunity: str = ""
17
+
18
+ class RenderPayload(BaseModel):
19
+ meta: CompanyMeta
20
+ esg_table: List[Dict[str, Any]]
21
+ kpi: Dict[str, float]
22
+ sections: ReportSections
23
+ generated_at: str
24
+ lang: str = "ja"
rag.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandera as pa
2
+ from pandera import Column, DataFrameSchema, Check
3
+ import pandas as pd
4
+
5
+ FIN_REQUIRED = ["year","quarter","revenue","ebit","net_income","total_assets","total_equity"]
6
+ ESG_REQUIRED = ["year","metric","value","unit","scope","notes"]
7
+
8
+ ALIASES = {
9
+ "revenue": ["revenue","sales","売上","売上高"],
10
+ "ebit": ["ebit","operating_income","営業利益"],
11
+ "net_income": ["net_income","純利益","profit"],
12
+ "total_equity": ["total_equity","shareholders_equity","自己資本"],
13
+ }
14
+
15
+ def normalize_columns(df: pd.DataFrame, required: list) -> pd.DataFrame:
16
+ cols = {c.lower(): c for c in df.columns}
17
+ # 別名を正規化
18
+ for key, names in ALIASES.items():
19
+ if key not in df.columns:
20
+ for n in names:
21
+ if n in df.columns or n in cols:
22
+ src = n if n in df.columns else cols.get(n)
23
+ df = df.rename(columns={src: key})
24
+ break
25
+ missing = [c for c in required if c not in df.columns]
26
+ if missing:
27
+ raise ValueError(f"必須列不足: {missing}")
28
+ return df
29
+
30
+ fin_schema = DataFrameSchema({
31
+ "year": Column(int, Check.ge(1900)),
32
+ "quarter": Column(str),
33
+ "revenue": Column(float, Check.ge(0)),
34
+ "ebit": Column(float),
35
+ "net_income": Column(float),
36
+ "total_assets": Column(float, nullable=True),
37
+ "total_equity": Column(float, nullable=True),
38
+ })
39
+
40
+ esg_schema = DataFrameSchema({
41
+ "year": Column(int, Check.ge(1900)),
42
+ "metric": Column(str),
43
+ "value": Column(float),
44
+ "unit": Column(str, nullable=True),
45
+ "scope": Column(str, nullable=True),
46
+ "notes": Column(object, nullable=True),
47
+ })
48
+
49
+ def validate_financials(df: pd.DataFrame) -> pd.DataFrame:
50
+ df = normalize_columns(df, FIN_REQUIRED)
51
+ return fin_schema.validate(df, lazy=True)
52
+
53
+ def validate_esg(df: pd.DataFrame) -> pd.DataFrame:
54
+ df = normalize_columns(df, ESG_REQUIRED)
55
+ return esg_schema.validate(df, lazy=True)
render.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from docx import Document
2
+ from docx.shared import Pt
3
+ from bs4 import BeautifulSoup
4
+
5
+ _HAS_WEASY = False
6
+ try:
7
+ from weasyprint import HTML
8
+ _HAS_WEASY = True
9
+ except Exception:
10
+ _HAS_WEASY = False
11
+
12
+ def html_to_pdf(html_str: str, out_pdf_path: str):
13
+ if _HAS_WEASY:
14
+ try:
15
+ HTML(string=html_str).write_pdf(out_pdf_path)
16
+ return
17
+ except Exception:
18
+ pass
19
+ from xhtml2pdf import pisa
20
+ with open(out_pdf_path, "wb") as f:
21
+ pisa.CreatePDF(src=html_str, dest=f)
22
+
23
+ def html_to_docx(html_str: str, out_docx_path: str):
24
+ doc = Document()
25
+ soup = BeautifulSoup(html_str, "html.parser")
26
+ for tag in soup.find_all(["h1","h2","h3","p","li"]):
27
+ txt = tag.get_text(strip=True)
28
+ if not txt:
29
+ continue
30
+ if tag.name == "h1": p = doc.add_heading(txt, level=0)
31
+ elif tag.name == "h2": p = doc.add_heading(txt, level=1)
32
+ elif tag.name == "h3": p = doc.add_heading(txt, level=2)
33
+ else: p = doc.add_paragraph(txt)
34
+ for run in p.runs: run.font.size = Pt(11)
35
+ doc.save(out_docx_path)
requirements.txt ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ gradio==4.44.0
2
+ pandas==2.2.2
3
+ pydantic==2.7.4
4
+ Jinja2==3.1.4
5
+ python-docx==1.1.2
6
+ WeasyPrint==62.3
7
+ beautifulsoup4==4.12.3
8
+ PyYAML==6.0.2
9
+ openai==1.40.2
10
+ httpx==0.27.2
11
+ matplotlib==3.8.4
12
+ Pillow==10.3.0
13
+ faiss-cpu==1.8.0
14
+ sentence-transformers==3.0.1
15
+ huggingface_hub==0.24.6
16
+ pandera==0.20.3
17
+ xhtml2pdf==0.2.15
templates:gir.html.j2 ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {% extends "base.html.j2" %}
2
+ {% block body %}
3
+ <h1>GRI Report({{ meta.fiscal_year }})</h1>
4
+ <p>{{ sections.ceo_message }}</p>
5
+ <p>{{ benchmark_summary }}</p>
6
+ <img src="{{ charts.renewable }}"><img src="{{ charts.female }}">
7
+ {% endblock %}
templates:report.html.j2 CHANGED
@@ -1,5 +1,5 @@
1
  <!doctype html>
2
- <html lang="ja">
3
  <head>
4
  <meta charset="utf-8" />
5
  <title>{{ meta.report_title }} - {{ meta.company_name }}</title>
@@ -29,11 +29,6 @@
29
  <div><strong>{{ kpi.revenue | round(0) | int }} {{ meta.currency }}</strong></div>
30
  <div class="small">前年比: {{ kpi.revenue_yoy | round(1) }}%</div>
31
  </div>
32
- <div class="card">
33
- <div class="small">営業利益</div>
34
- <div><strong>{{ kpi.operating_income | round(0) | int }} {{ meta.currency }}</strong></div>
35
- <div class="small">マージン: {{ kpi.operating_margin | round(1) }}%</div>
36
- </div>
37
  <div class="card">
38
  <div class="small">EBIT</div>
39
  <div><strong>{{ kpi.ebit | round(0) | int }} {{ meta.currency }}</strong></div>
 
1
  <!doctype html>
2
+ <html lang="{{ lang }}">
3
  <head>
4
  <meta charset="utf-8" />
5
  <title>{{ meta.report_title }} - {{ meta.company_name }}</title>
 
29
  <div><strong>{{ kpi.revenue | round(0) | int }} {{ meta.currency }}</strong></div>
30
  <div class="small">前年比: {{ kpi.revenue_yoy | round(1) }}%</div>
31
  </div>
 
 
 
 
 
32
  <div class="card">
33
  <div class="small">EBIT</div>
34
  <div><strong>{{ kpi.ebit | round(0) | int }} {{ meta.currency }}</strong></div>
templates:sasb.html.j2 ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {% extends "base.html.j2" %}
2
+ {% block body %}
3
+ <h1>SASB Report({{ meta.fiscal_year }})</h1>
4
+ <p>{{ sections.ceo_message }}</p>
5
+ <table border="1" cellspacing="0" cellpadding="6">
6
+ <tr><th>指標</th><th>値</th><th>単位</th><th>備考</th></tr>
7
+ {% for row in esg_table %}
8
+ <tr><td>{{ row.display }}</td><td>{{ row.value }}</td><td>{{ row.unit }}</td><td>{{ row.notes }}</td></tr>
9
+ {% endfor %}
10
+ </table>
11
+ <p>{{ sections.risk_opportunity }}</p>
12
+ {% endblock %}
templates:tcfd.html.j2 ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {% extends "base.html.j2" %}
2
+ {% block body %}
3
+ <h1>TCFD Report({{ meta.fiscal_year }})</h1>
4
+ <h2>1. ガバナンス</h2><p>{{ sections.ceo_message }}</p>
5
+ <h2>2. 戦略</h2><p>{{ benchmark_summary }}</p>
6
+ <h2>3. リスク管理</h2><p>{{ sections.risk_opportunity }}</p>
7
+ <h2>4. 指標・目標</h2>
8
+ <img src="{{ charts.revenue }}"><img src="{{ charts.renewable }}"><img src="{{ charts.female }}">
9
+ {% endblock %}
templating.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from jinja2 import Environment, FileSystemLoader, select_autoescape
2
+ from pathlib import Path
3
+
4
+ def get_env(templates_dir: str):
5
+ here = Path(__file__).resolve().parent
6
+ searchpaths = [templates_dir, str(here / "templates"), str(Path().resolve() / "templates")]
7
+ env = Environment(loader=FileSystemLoader(searchpaths), autoescape=select_autoescape(["html","xml"]))
8
+ return env
9
+
10
+ def render(env, template_name: str, context: dict) -> str:
11
+ template = env.get_template(template_name)
12
+ return template.render(**context)
validators.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandera as pa
2
+ from pandera import Column, DataFrameSchema, Check
3
+ import pandas as pd
4
+
5
+ FIN_REQUIRED = ["year","quarter","revenue","ebit","net_income","total_assets","total_equity"]
6
+ ESG_REQUIRED = ["year","metric","value","unit","scope","notes"]
7
+
8
+ ALIASES = {
9
+ "revenue": ["revenue","sales","売上","売上高"],
10
+ "ebit": ["ebit","operating_income","営業利益"],
11
+ "net_income": ["net_income","純利益","profit"],
12
+ "total_equity": ["total_equity","shareholders_equity","自己資本"],
13
+ }
14
+
15
+ def normalize_columns(df: pd.DataFrame, required: list) -> pd.DataFrame:
16
+ cols = {c.lower(): c for c in df.columns}
17
+ # 別名を正規化
18
+ for key, names in ALIASES.items():
19
+ if key not in df.columns:
20
+ for n in names:
21
+ if n in df.columns or n in cols:
22
+ src = n if n in df.columns else cols.get(n)
23
+ df = df.rename(columns={src: key})
24
+ break
25
+ missing = [c for c in required if c not in df.columns]
26
+ if missing:
27
+ raise ValueError(f"必須列不足: {missing}")
28
+ return df
29
+
30
+ fin_schema = DataFrameSchema({
31
+ "year": Column(int, Check.ge(1900)),
32
+ "quarter": Column(str),
33
+ "revenue": Column(float, Check.ge(0)),
34
+ "ebit": Column(float),
35
+ "net_income": Column(float),
36
+ "total_assets": Column(float, nullable=True),
37
+ "total_equity": Column(float, nullable=True),
38
+ })
39
+
40
+ esg_schema = DataFrameSchema({
41
+ "year": Column(int, Check.ge(1900)),
42
+ "metric": Column(str),
43
+ "value": Column(float),
44
+ "unit": Column(str, nullable=True),
45
+ "scope": Column(str, nullable=True),
46
+ "notes": Column(object, nullable=True),
47
+ })
48
+
49
+ def validate_financials(df: pd.DataFrame) -> pd.DataFrame:
50
+ df = normalize_columns(df, FIN_REQUIRED)
51
+ return fin_schema.validate(df, lazy=True)
52
+
53
+ def validate_esg(df: pd.DataFrame) -> pd.DataFrame:
54
+ df = normalize_columns(df, ESG_REQUIRED)
55
+ return esg_schema.validate(df, lazy=True)