Corin1998's picture
Upload 17 files
5823ed6 verified
import gradio as gr
from pathlib import Path
import tempfile, datetime, json, shutil, os
from core import generate_report
from rag import build_or_update_index, answer_with_context
TITLE = "IR/ESG Report Generator (HF Space, Pro)"
DESC = "CSV/YAML から IR/ESG レポート(HTML/PDF/DOCX)を生成。LLM要約+翻訳・グラフ・ベンチマーク・RAG・Hub保存に対応。"
TEMPLATES_DIR = str((Path(__file__).resolve().parent / "templates").absolute())
import shutil as _shutil
def _to_path(upload_obj, tmpdir: Path, filename: str) -> Path:
dst = tmpdir / filename
if upload_obj is None:
raise ValueError(f"{filename} が未指定です。")
if isinstance(upload_obj, (str, Path)):
src = Path(upload_obj); _shutil.copy(src, dst); return dst
if hasattr(upload_obj, "name"): # NamedString
src = Path(upload_obj.name); _shutil.copy(src, dst); return dst
if hasattr(upload_obj, "read"):
data = upload_obj.read()
if isinstance(data, str): data = data.encode("utf-8")
dst.write_bytes(data); return dst
if isinstance(upload_obj, dict):
if "name" in upload_obj: _shutil.copy(upload_obj["name"], dst); return dst
if "data" in upload_obj:
data = upload_obj["data"]
if isinstance(data, str): data = data.encode("utf-8")
dst.write_bytes(data); return dst
raise TypeError(f"Unsupported upload type: {type(upload_obj)}")
def run(company_yaml, financials_csv, esg_csv, use_llm, lang, template_key,
tenant, push_to_hub, glossary_yaml, benchmarks_yaml, past_reports_zip):
try:
if not company_yaml or not financials_csv or not esg_csv:
return "全ファイルをアップロードしてください。", None, None, None, None, ""
with tempfile.TemporaryDirectory() as td:
tdir = Path(td)
cpath = _to_path(company_yaml, tdir, "company.yaml")
fpath = _to_path(financials_csv, tdir, "financials.csv")
epath = _to_path(esg_csv, tdir, "esg.csv")
gpath = _to_path(glossary_yaml, tdir, "glossary.yaml") if glossary_yaml else None
bpath = _to_path(benchmarks_yaml, tdir, "benchmarks.yaml") if benchmarks_yaml else None
rzip = _to_path(past_reports_zip, tdir, "past_reports.zip") if past_reports_zip else None
if rzip:
build_or_update_index(rzip, index_dir=tdir / "index")
outdir = tdir / "out"; outdir.mkdir(parents=True, exist_ok=True)
llm = None
if use_llm:
try:
from llm import OpenAILLM
llm = OpenAILLM()
except Exception as e:
return f"LLM初期化エラー: {e}", None, None, None, None, ""
html, pdf, docx, meta_json, html_text = generate_report(
company_yaml=str(cpath),
financials_csv=str(fpath),
esg_csv=str(epath),
templates_dir=TEMPLATES_DIR,
template_name={
"base":"base.html.j2",
"tcfd":"tcfd.html.j2",
"sasab":"sasb.html.j2",
"sasb":"sasb.html.j2",
"gri":"gri.html.j2",
}.get(template_key, "base.html.j2"),
out_html=str(outdir / "report.html"),
out_pdf=str(outdir / "report.pdf"),
out_docx=str(outdir / "report.docx"),
lang=lang,
llm=llm,
glossary_path=str(gpath) if gpath else None,
benchmarks_path=str(bpath) if bpath else None,
tenant=tenant,
rag_index_dir=str(tdir / "index") if rzip else None,
)
repo_tmp = Path("./tmp"); repo_tmp.mkdir(exist_ok=True)
ts = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
prefix = f"{tenant or 'default'}-{ts}"
html_out = repo_tmp / f"{prefix}.html"
pdf_out = repo_tmp / f"{prefix}.pdf"
docx_out = repo_tmp / f"{prefix}.docx"
meta_out = repo_tmp / f"{prefix}.json"
_shutil.copy(html, html_out)
_shutil.copy(pdf, pdf_out)
_shutil.copy(docx, docx_out)
Path(meta_out).write_text(json.dumps(meta_json, ensure_ascii=False, indent=2), encoding="utf-8")
if push_to_hub:
try:
from huggingface_hub import HfApi
api = HfApi()
repo_id = os.environ.get("HF_DATASET_REPO", "your-org/ir-esg-reports")
base_path = f"{tenant or 'default'}/{ts}"
for p, name in [(html_out,"report.html"),(pdf_out,"report.pdf"),(docx_out,"report.docx"),(meta_out,"report.json")]:
api.upload_file(
path_or_fileobj=str(p),
path_in_repo=f"{base_path}/{name}",
repo_id=repo_id,
repo_type="dataset"
)
except Exception as e:
meta_json["hub_error"] = str(e)
Path(meta_out).write_text(json.dumps(meta_json, ensure_ascii=False, indent=2), encoding="utf-8")
return "生成が完了しました。", str(html_out), str(pdf_out), str(docx_out), str(meta_out), html_text
except Exception as e:
import traceback
tb = traceback.format_exc(limit=20)
return f"エラー: {e}\n--- trace ---\n{tb}", None, None, None, None, ""
with gr.Blocks(title=TITLE) as demo:
gr.Markdown(f"# {TITLE}\n{DESC}")
with gr.Row():
company_yaml = gr.File(label="company.yaml(会社情報・年度等)", file_types=[".yaml", ".yml"])
financials_csv = gr.File(label="financials.csv(財務KPI)", file_types=[".csv", ".xlsx"])
esg_csv = gr.File(label="esg_metrics.csv(ESG指標)", file_types=[".csv", ".xlsx"])
with gr.Row():
use_llm = gr.Checkbox(label="LLMで要約/翻訳を行う(OPENAI_API_KEY2 必須)", value=True)
lang = gr.Dropdown(choices=["ja","en","zh","ko","de","fr"], value="ja", label="出力言語")
template_key = gr.Dropdown(choices=["base","tcfd","sasb","gri"], value="base", label="テンプレート")
tenant = gr.Textbox(label="テナント名(会社識別子)", value="HitC")
with gr.Row():
glossary_yaml = gr.File(label="glossary.yaml(用語集・任意)", file_types=[".yaml", ".yml"])
benchmarks_yaml = gr.File(label="benchmarks.yaml(業界平均など・任意)", file_types=[".yaml", ".yml"])
past_reports_zip = gr.File(label="過去レポートZip(RAG用・任意)", file_types=[".zip"])
push_to_hub = gr.Checkbox(label="生成物を Hugging Face Hub(Datasets)へ保存", value=False)
run_btn = gr.Button("レポート生成")
status = gr.Textbox(label="ステータス", interactive=False)
html_file = gr.File(label="HTMLダウンロード")
pdf_file = gr.File(label="PDFダウンロード")
docx_file = gr.File(label="DOCXダウンロード")
meta_file = gr.File(label="メタ情報(JSON)")
html_preview = gr.HTML(label="HTMLプレビュー(抜粋)")
run_btn.click(
fn=run,
inputs=[company_yaml, financials_csv, esg_csv, use_llm, lang, template_key, tenant, push_to_hub, glossary_yaml, benchmarks_yaml, past_reports_zip],
outputs=[status, html_file, pdf_file, docx_file, meta_file, html_preview]
)
if __name__ == "__main__":
demo.launch()