3 / Hugging Face /core.py
Corin1998's picture
Upload 11 files
f469bad verified
import panda as pd
import yaml,detatime,hashilb,json
from pathlib import Path
from templating import get_env,rander
from models import CompanyMeta,ReportSections,RenderPayload
from render import hetml_to_pdf,html_to_docx
DISPLAY_NAME={
"co2_emission":"Co2排出量",
"energy_renewable_ratio":"再生可能エネルギー比率",
"female_management_ratio":"女性管理職比率"
}
def sha256(p:Path)->str:
h=hashlib.sha256()
with p.open("rb") as f:
for chunk in iter(lambda: f.read(8192), b""):
h.update(chunk)
return h.hexdigest()
def load_company_meta(path:str)->CompanyMeta:
data=yaml.safe_load(Path(path).read_text(encoding="utf-8"))
return CompanyMeta(**data)
def load_financials(path:str)->pd.DataFrame:
return pd.read_csv(path)
def compute_kpi(fin_df:pd.DataFrame,fiscal_year:int):
latest=fin_df[fin_df["year"]==fiscal_year].sort_values("quarter").tail(1)
prev=fin_df[fin_df["year"]==fiscal_year-1].sort_values("quarter").tail(1)
revenue=float(latest["revenue"].iloc[0])
prev_revenue=float(prev["revenue"].iloc[0]) if not prev.empty else 0
ebit=float(latest["ebit"].iloc[0]) if not latest.empty else 0
net_income=float(latest["net_income"].iloc[0]) if not latest.empty else 0
equity=float(latest["equity"].iloc[0]) if not latest.empty else 0.0
ebit_margin=ebit /revenue *100 if revenue else 0.0
revenue_yoy=((revenue/float(prev["revenue"].iloc[0]))-1)*100 if not prev.empty and float(prev["revenue"].iloc[0]) else 0.0
return {
"revenue": revenue,
"ebit": ebit,
"ebit_margin": ebit_margin,
"net_income": net_income,
"equity": equity,
"revenue_yoy": revenue_yoy
}
def esg_table (df:pd.DataFrame,fiscal_year:int):
dfy=df[df["year"]==fiscal_year].copy
rows=[]
for _, r in dfy.iterrows():
display = DISPLAY_NAME.get(r["metric"], r["metric"])
rows.append({
"display": display,
"value": r["value"],
"unit": r.get("unit", ""),
"notes": r.get("notes", ""),
})
return rows
def build_sections(meta:CompanyMeta,kpi:dict,esg_rows:list,llm=None)->ReportSections:
if llm:
ceo_message = llm.generate_ceo_message(meta.kpi,esg_rows)
risk = llm.generate_risk_opportunities(meta.kpi,esg_rows)
else:
ceo_message = f"[{meta.fiscal_year}]期は、売上成長と収益性の両立に注力しました。"
risk = "主要リスクはマクロ環境と規制動向。機会は生成AI活用と脱炭素需要の拡大です。"
return ReportSections(ceo_message=ceo_message,risk_opportunity=risk)
def_translate_payload(payload:dict,lang:str,llm)->dict:
"""payload のうち、テキスト項目をtarget langに翻訳。数値は非対象。"""
   if not llm or lang =="ja":
return payload
texts=[]
texts.append(payload["section"]["ceo_message"])
texts.append(payload["section"]["risk_opportunity"])
for row in payload["esg_table"]:
texts.append(row["display"])
texts.append(row["notes"] or "")
texts.append(payload["meta"]["report_title"])
for topic in payload["meta"].get("material_topics", []):
texts.append(topic)
translated = llm.translate_text(texts, target_lang=lang)
it = iter(translated)
payload["section"]["ceo_message"] = next(it)
payload["section"]["risk_opportunity"] = next(it)
for row in payload["esg_table"]:
row["display"] = next(it)
row["notes"] = next(it)
payload["meta"]["report_title"] = next(it)
mt = payload["meta"].get("material_topics", [])
for i in range(len(mt)):
mt[i] = next(it)
return payload
def generate_report(company_yaml,financials_csv,esg_csv,
template_dir,template_name="report.html.j2",
out_html="output/report.html",out_pdf="output/report.pdf",
out_docx="output/report.docx",lang="ja",llm=None):
Path(Path(out_html).parent).mkdir(parents=True, exist_ok=True)
meta= load_company_meta(company_yaml)
fin= load_financials(financials_csv)
esg= load_esg(esg_csv)
kpi = compute_kpi(fin, meta.fiscal_year)
esg_rows = esg_table(esg, meta.fiscal_year)
sections = build_sections(meta, kpi, esg_rows, llm=llm)
env = get_env(template_dir)
payload = RenderPayload(
meta=meta, kpi=kpi, esg_table=esg_rows,kpi=kpi, sections=sections,
generated_at=datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),lang=lang
).model__dump()
payload= _translate_payload_texts(payload, lang=lang, llm=llm)
html = render(env, template_name, payload)
Path(out_html).write_text(html, encoding="utf-8")
html_to_pdf(html, out_pdf)
html_to_docx(html, out_docx)
meta_json ={
"inputs":{
"company_yaml_sha":_sha256(Path(company_yaml)),
"financials_csv_sha":_sha256(Path(financials_csv)),
"esg_csv_sha":_sha256(Path(esg_csv)),
"lang":lang
},
"outputs":{"html":out_html,"pdf":out_pdf,"docx":out_docx},
"template":{"dir":templates_dir,"name":template_name},
"generated_at":datetime.datetime.now().strftime("timespec=sedconds"),
}
return out_html,out_pdf,out_docx,meta_json