3
File size: 7,675 Bytes
5b82238
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
import gradio as gr
from pathlib import Path
import tempfile, datetime, json, shutil, os
from core import generate_report
from rag import build_or_update_index, answer_with_context

TITLE = "IR/ESG Report Generator (HF Space, Pro)"
DESC = "CSV/YAML から IR/ESG レポート(HTML/PDF/DOCX)を生成。LLM要約+翻訳・グラフ・ベンチマーク・RAG・Hub保存に対応。"

TEMPLATES_DIR = str((Path(__file__).resolve().parent / "templates").absolute())

import shutil as _shutil
def _to_path(upload_obj, tmpdir: Path, filename: str) -> Path:
    dst = tmpdir / filename
    if upload_obj is None:
        raise ValueError(f"{filename} が未指定です。")
    if isinstance(upload_obj, (str, Path)):
        src = Path(upload_obj); _shutil.copy(src, dst); return dst
    if hasattr(upload_obj, "name"):  # NamedString
        src = Path(upload_obj.name); _shutil.copy(src, dst); return dst
    if hasattr(upload_obj, "read"):
        data = upload_obj.read()
        if isinstance(data, str): data = data.encode("utf-8")
        dst.write_bytes(data); return dst
    if isinstance(upload_obj, dict):
        if "name" in upload_obj: _shutil.copy(upload_obj["name"], dst); return dst
        if "data" in upload_obj: 
            data = upload_obj["data"]
            if isinstance(data, str): data = data.encode("utf-8")
            dst.write_bytes(data); return dst
    raise TypeError(f"Unsupported upload type: {type(upload_obj)}")

def run(company_yaml, financials_csv, esg_csv, use_llm, lang, template_key,
        tenant, push_to_hub, glossary_yaml, benchmarks_yaml, past_reports_zip):
    try:
        if not company_yaml or not financials_csv or not esg_csv:
            return "全ファイルをアップロードしてください。", None, None, None, None, ""

        with tempfile.TemporaryDirectory() as td:
            tdir = Path(td)
            cpath = _to_path(company_yaml, tdir, "company.yaml")
            fpath = _to_path(financials_csv, tdir, "financials.csv")
            epath = _to_path(esg_csv, tdir, "esg.csv")
            gpath = _to_path(glossary_yaml, tdir, "glossary.yaml") if glossary_yaml else None
            bpath = _to_path(benchmarks_yaml, tdir, "benchmarks.yaml") if benchmarks_yaml else None
            rzip  = _to_path(past_reports_zip, tdir, "past_reports.zip") if past_reports_zip else None

            
            if rzip:
                build_or_update_index(rzip, index_dir=tdir / "index")

            outdir = tdir / "out"; outdir.mkdir(parents=True, exist_ok=True)

            llm = None
            if use_llm:
                try:
                    from llm import OpenAILLM
                    llm = OpenAILLM()
                except Exception as e:
                    return f"LLM初期化エラー: {e}", None, None, None, None, ""

            html, pdf, docx, meta_json, html_text = generate_report(
                company_yaml=str(cpath),
                financials_csv=str(fpath),
                esg_csv=str(epath),
                templates_dir=TEMPLATES_DIR,
                template_name={
                    "base":"base.html.j2",
                    "tcfd":"tcfd.html.j2",
                    "sasab":"sasb.html.j2",  
                    "sasb":"sasb.html.j2",
                    "gri":"gri.html.j2",
                }.get(template_key, "base.html.j2"),
                out_html=str(outdir / "report.html"),
                out_pdf=str(outdir / "report.pdf"),
                out_docx=str(outdir / "report.docx"),
                lang=lang,
                llm=llm,
                glossary_path=str(gpath) if gpath else None,
                benchmarks_path=str(bpath) if bpath else None,
                tenant=tenant,
                rag_index_dir=str(tdir / "index") if rzip else None,
            )

            repo_tmp = Path("./tmp"); repo_tmp.mkdir(exist_ok=True)
            ts = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
            prefix = f"{tenant or 'default'}-{ts}"
            html_out = repo_tmp / f"{prefix}.html"
            pdf_out  = repo_tmp / f"{prefix}.pdf"
            docx_out = repo_tmp / f"{prefix}.docx"
            meta_out = repo_tmp / f"{prefix}.json"
            _shutil.copy(html, html_out)
            _shutil.copy(pdf, pdf_out)
            _shutil.copy(docx, docx_out)
            Path(meta_out).write_text(json.dumps(meta_json, ensure_ascii=False, indent=2), encoding="utf-8")

            if push_to_hub:
                try:
                    from huggingface_hub import HfApi
                    api = HfApi()
                    repo_id = os.environ.get("HF_DATASET_REPO", "your-org/ir-esg-reports")
                    base_path = f"{tenant or 'default'}/{ts}"
                    for p, name in [(html_out,"report.html"),(pdf_out,"report.pdf"),(docx_out,"report.docx"),(meta_out,"report.json")]:
                        api.upload_file(
                            path_or_fileobj=str(p),
                            path_in_repo=f"{base_path}/{name}",
                            repo_id=repo_id,
                            repo_type="dataset"
                        )
                except Exception as e:
                    meta_json["hub_error"] = str(e)
                    Path(meta_out).write_text(json.dumps(meta_json, ensure_ascii=False, indent=2), encoding="utf-8")

            return "生成が完了しました。", str(html_out), str(pdf_out), str(docx_out), str(meta_out), html_text

    except Exception as e:
        import traceback
        tb = traceback.format_exc(limit=20)
        return f"エラー: {e}\n--- trace ---\n{tb}", None, None, None, None, ""


with gr.Blocks(title=TITLE) as demo:
    gr.Markdown(f"# {TITLE}\n{DESC}")

    with gr.Row():
        company_yaml = gr.File(label="company.yaml(会社情報・年度等)", file_types=[".yaml", ".yml"])
        financials_csv = gr.File(label="financials.csv(財務KPI)", file_types=[".csv", ".xlsx"])
        esg_csv = gr.File(label="esg_metrics.csv(ESG指標)", file_types=[".csv", ".xlsx"])

    with gr.Row():
        use_llm = gr.Checkbox(label="LLMで要約/翻訳を行う(OPENAI_API_KEY2 必須)", value=True)
        lang = gr.Dropdown(choices=["ja","en","zh","ko","de","fr"], value="ja", label="出力言語")
        template_key = gr.Dropdown(choices=["base","tcfd","sasb","gri"], value="base", label="テンプレート")
        tenant = gr.Textbox(label="テナント名(会社識別子)", value="HitC")

    with gr.Row():
        glossary_yaml = gr.File(label="glossary.yaml(用語集・任意)", file_types=[".yaml", ".yml"])
        benchmarks_yaml = gr.File(label="benchmarks.yaml(業界平均など・任意)", file_types=[".yaml", ".yml"])
        past_reports_zip = gr.File(label="過去レポートZip(RAG用・任意)", file_types=[".zip"])
        push_to_hub = gr.Checkbox(label="生成物を Hugging Face Hub(Datasets)へ保存", value=False)

    run_btn = gr.Button("レポート生成")

    status = gr.Textbox(label="ステータス", interactive=False)
    html_file = gr.File(label="HTMLダウンロード")
    pdf_file = gr.File(label="PDFダウンロード")
    docx_file = gr.File(label="DOCXダウンロード")
    meta_file = gr.File(label="メタ情報(JSON)")
    html_preview = gr.HTML(label="HTMLプレビュー(抜粋)")

    run_btn.click(
        fn=run,
        inputs=[company_yaml, financials_csv, esg_csv, use_llm, lang, template_key, tenant, push_to_hub, glossary_yaml, benchmarks_yaml, past_reports_zip],
        outputs=[status, html_file, pdf_file, docx_file, meta_file, html_preview]
    )

if __name__ == "__main__":
    demo.launch()