File size: 7,675 Bytes
5b82238 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 |
import gradio as gr
from pathlib import Path
import tempfile, datetime, json, shutil, os
from core import generate_report
from rag import build_or_update_index, answer_with_context
TITLE = "IR/ESG Report Generator (HF Space, Pro)"
DESC = "CSV/YAML から IR/ESG レポート(HTML/PDF/DOCX)を生成。LLM要約+翻訳・グラフ・ベンチマーク・RAG・Hub保存に対応。"
TEMPLATES_DIR = str((Path(__file__).resolve().parent / "templates").absolute())
import shutil as _shutil
def _to_path(upload_obj, tmpdir: Path, filename: str) -> Path:
dst = tmpdir / filename
if upload_obj is None:
raise ValueError(f"{filename} が未指定です。")
if isinstance(upload_obj, (str, Path)):
src = Path(upload_obj); _shutil.copy(src, dst); return dst
if hasattr(upload_obj, "name"): # NamedString
src = Path(upload_obj.name); _shutil.copy(src, dst); return dst
if hasattr(upload_obj, "read"):
data = upload_obj.read()
if isinstance(data, str): data = data.encode("utf-8")
dst.write_bytes(data); return dst
if isinstance(upload_obj, dict):
if "name" in upload_obj: _shutil.copy(upload_obj["name"], dst); return dst
if "data" in upload_obj:
data = upload_obj["data"]
if isinstance(data, str): data = data.encode("utf-8")
dst.write_bytes(data); return dst
raise TypeError(f"Unsupported upload type: {type(upload_obj)}")
def run(company_yaml, financials_csv, esg_csv, use_llm, lang, template_key,
tenant, push_to_hub, glossary_yaml, benchmarks_yaml, past_reports_zip):
try:
if not company_yaml or not financials_csv or not esg_csv:
return "全ファイルをアップロードしてください。", None, None, None, None, ""
with tempfile.TemporaryDirectory() as td:
tdir = Path(td)
cpath = _to_path(company_yaml, tdir, "company.yaml")
fpath = _to_path(financials_csv, tdir, "financials.csv")
epath = _to_path(esg_csv, tdir, "esg.csv")
gpath = _to_path(glossary_yaml, tdir, "glossary.yaml") if glossary_yaml else None
bpath = _to_path(benchmarks_yaml, tdir, "benchmarks.yaml") if benchmarks_yaml else None
rzip = _to_path(past_reports_zip, tdir, "past_reports.zip") if past_reports_zip else None
if rzip:
build_or_update_index(rzip, index_dir=tdir / "index")
outdir = tdir / "out"; outdir.mkdir(parents=True, exist_ok=True)
llm = None
if use_llm:
try:
from llm import OpenAILLM
llm = OpenAILLM()
except Exception as e:
return f"LLM初期化エラー: {e}", None, None, None, None, ""
html, pdf, docx, meta_json, html_text = generate_report(
company_yaml=str(cpath),
financials_csv=str(fpath),
esg_csv=str(epath),
templates_dir=TEMPLATES_DIR,
template_name={
"base":"base.html.j2",
"tcfd":"tcfd.html.j2",
"sasab":"sasb.html.j2",
"sasb":"sasb.html.j2",
"gri":"gri.html.j2",
}.get(template_key, "base.html.j2"),
out_html=str(outdir / "report.html"),
out_pdf=str(outdir / "report.pdf"),
out_docx=str(outdir / "report.docx"),
lang=lang,
llm=llm,
glossary_path=str(gpath) if gpath else None,
benchmarks_path=str(bpath) if bpath else None,
tenant=tenant,
rag_index_dir=str(tdir / "index") if rzip else None,
)
repo_tmp = Path("./tmp"); repo_tmp.mkdir(exist_ok=True)
ts = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
prefix = f"{tenant or 'default'}-{ts}"
html_out = repo_tmp / f"{prefix}.html"
pdf_out = repo_tmp / f"{prefix}.pdf"
docx_out = repo_tmp / f"{prefix}.docx"
meta_out = repo_tmp / f"{prefix}.json"
_shutil.copy(html, html_out)
_shutil.copy(pdf, pdf_out)
_shutil.copy(docx, docx_out)
Path(meta_out).write_text(json.dumps(meta_json, ensure_ascii=False, indent=2), encoding="utf-8")
if push_to_hub:
try:
from huggingface_hub import HfApi
api = HfApi()
repo_id = os.environ.get("HF_DATASET_REPO", "your-org/ir-esg-reports")
base_path = f"{tenant or 'default'}/{ts}"
for p, name in [(html_out,"report.html"),(pdf_out,"report.pdf"),(docx_out,"report.docx"),(meta_out,"report.json")]:
api.upload_file(
path_or_fileobj=str(p),
path_in_repo=f"{base_path}/{name}",
repo_id=repo_id,
repo_type="dataset"
)
except Exception as e:
meta_json["hub_error"] = str(e)
Path(meta_out).write_text(json.dumps(meta_json, ensure_ascii=False, indent=2), encoding="utf-8")
return "生成が完了しました。", str(html_out), str(pdf_out), str(docx_out), str(meta_out), html_text
except Exception as e:
import traceback
tb = traceback.format_exc(limit=20)
return f"エラー: {e}\n--- trace ---\n{tb}", None, None, None, None, ""
with gr.Blocks(title=TITLE) as demo:
gr.Markdown(f"# {TITLE}\n{DESC}")
with gr.Row():
company_yaml = gr.File(label="company.yaml(会社情報・年度等)", file_types=[".yaml", ".yml"])
financials_csv = gr.File(label="financials.csv(財務KPI)", file_types=[".csv", ".xlsx"])
esg_csv = gr.File(label="esg_metrics.csv(ESG指標)", file_types=[".csv", ".xlsx"])
with gr.Row():
use_llm = gr.Checkbox(label="LLMで要約/翻訳を行う(OPENAI_API_KEY2 必須)", value=True)
lang = gr.Dropdown(choices=["ja","en","zh","ko","de","fr"], value="ja", label="出力言語")
template_key = gr.Dropdown(choices=["base","tcfd","sasb","gri"], value="base", label="テンプレート")
tenant = gr.Textbox(label="テナント名(会社識別子)", value="HitC")
with gr.Row():
glossary_yaml = gr.File(label="glossary.yaml(用語集・任意)", file_types=[".yaml", ".yml"])
benchmarks_yaml = gr.File(label="benchmarks.yaml(業界平均など・任意)", file_types=[".yaml", ".yml"])
past_reports_zip = gr.File(label="過去レポートZip(RAG用・任意)", file_types=[".zip"])
push_to_hub = gr.Checkbox(label="生成物を Hugging Face Hub(Datasets)へ保存", value=False)
run_btn = gr.Button("レポート生成")
status = gr.Textbox(label="ステータス", interactive=False)
html_file = gr.File(label="HTMLダウンロード")
pdf_file = gr.File(label="PDFダウンロード")
docx_file = gr.File(label="DOCXダウンロード")
meta_file = gr.File(label="メタ情報(JSON)")
html_preview = gr.HTML(label="HTMLプレビュー(抜粋)")
run_btn.click(
fn=run,
inputs=[company_yaml, financials_csv, esg_csv, use_llm, lang, template_key, tenant, push_to_hub, glossary_yaml, benchmarks_yaml, past_reports_zip],
outputs=[status, html_file, pdf_file, docx_file, meta_file, html_preview]
)
if __name__ == "__main__":
demo.launch()
|