Spaces:

Corin1998
/

Score

Sleeping

App Files Files Community

Corin1998 commited on Aug 27, 2025

Commit

f2c5804

verified ·

1 Parent(s): 7761278

Update ui/ui_app.py

Browse files

Files changed (1) hide show

ui/ui_app.py +52 -157

ui/ui_app.py CHANGED Viewed

@@ -1,122 +1,21 @@
-from __future__ import annotations
-import os, io, base64, json, traceback, shutil
-from typing import List, Dict, Any
 import gradio as gr
 import pandas as pd
-from pdf2image import convert_from_path
-import pdfplumber
-from openai import OpenAI
 import plotly.graph_objects as go
-from api.scorer import score_company  # スコア計算は /api/scorer.py を利用
-OPENAI_MODEL_VISION = os.environ.get("OPENAI_VISION_MODEL", "gpt-4o-mini")
-OPENAI_MODEL_TEXT   = os.environ.get("OPENAI_TEXT_MODEL",   "gpt-4o-mini")
-def _b64(img: bytes) -> str:
-    return base64.b64encode(img).decode("utf-8")
-def _client() -> OpenAI:
-    key = os.environ.get("OPENAI_API_KEY")
-    if not key:
-        raise gr.Error("OPENAI_API_KEY が未設定です。Spaces → Settings → Variables and secrets に追加してください。")
-    return OpenAI(api_key=key, timeout=60)  # proxiesは渡さない
-def _health_html() -> str:
-    msgs = []
-    msgs.append("✅ OPENAI_API_KEY: " + ("検出" if os.environ.get("OPENAI_API_KEY") else "未設定"))
-    for b in ("pdftoppm", "pdftocairo"):
-        ok = bool(shutil.which(b))
-        msgs.append(("✅" if ok else "❌") + f" {b}: " + ("検出" if ok else "見つからず（packages.txt に poppler-utils が必要）"))
-    msgs.append(f"ℹ️ Vision={OPENAI_MODEL_VISION} / Text={OPENAI_MODEL_TEXT}")
-    return "<br>".join(msgs)
-def pdf_to_images(pdf_path: str, dpi: int = 220, max_pages: int = 6) -> List[bytes]:
-    pages = convert_from_path(pdf_path, dpi=dpi, fmt="png")
-    imgs: List[bytes] = []
-    for i, p in enumerate(pages):
-        if i >= max_pages:
-            break
-        buf = io.BytesIO()
-        p.save(buf, format="PNG")
-        imgs.append(buf.getvalue())
-    return imgs
-def pdf_to_text(pdf_path: str, max_chars: int = 15000) -> str:
-    parts: List[str] = []
-    with pdfplumber.open(pdf_path) as pdf:
-        for i, page in enumerate(pdf.pages):
-            t = (page.extract_text() or "").strip()
-            if t:
-                parts.append(f"[page {i+1}]\n{t}")
-            if sum(len(x) for x in parts) > max_chars:
-                break
-    return "\n\n".join(parts)[:max_chars]
-SYSTEM_JSON = """あなたは有能な財務アナリストです。
-与えられた決算書（画像またはテキスト）から、次の厳密な JSON 構造のみを日本語の単位なし・半角数値で返してください。分からない項目は null。
-{
-  "company": {"name": null},
-  "period": {"start_date": null, "end_date": null},
-  "balance_sheet": {
-    "total_assets": null, "total_liabilities": null, "total_equity": null,
-    "current_assets": null, "fixed_assets": null,
-    "current_liabilities": null, "long_term_liabilities": null
-  },
-  "income_statement": {
-    "sales": null, "cost_of_sales": null, "gross_profit": null,
-    "operating_expenses": null, "operating_income": null,
-    "ordinary_income": null, "net_income": null
-  },
-  "cash_flows": {
-    "operating_cash_flow": null, "investing_cash_flow": null, "financing_cash_flow": null
-  }
-}
-"""
-def extract_financials(images: List[bytes] | None, text_blob: str | None, company_hint: str) -> Dict[str, Any]:
-    client = _client()
-    if images:
-        content = [{"type": "text", "text": SYSTEM_JSON}]
-        if company_hint:
-            content.append({"type": "text", "text": f"会社名の候補: {company_hint}"})
-        for im in images:
-            content.append({"type": "input_image", "image_url": f"data:image/png;base64,{_b64(im)}"})
-        resp = client.chat.completions.create(
-            model=OPENAI_MODEL_VISION,
-            messages=[
-                {"role": "system", "content": "返答は必ず有効な JSON オブジェクトのみ。説明を含めない。"},
-                {"role": "user", "content": content},
-            ],
-            response_format={"type": "json_object"},
-            temperature=0.1,
-        )
-        return json.loads(resp.choices[0].message.content)
-    else:
-        prompt = f"{SYSTEM_JSON}\n\n以下は決算書のテキストです。上記の JSON だけを返してください。\n\n{text_blob or ''}"
-        resp = client.chat.completions.create(
-            model=OPENAI_MODEL_TEXT,
-            messages=[
-                {"role": "system", "content": "返答は必ず有効な JSON オブジェクトのみ。"},
-                {"role": "user", "content": prompt},
-            ],
-            response_format={"type": "json_object"},
-            temperature=0.1,
-        )
-        return json.loads(resp.choices[0].message.content)
-def fin_to_df(fin: Dict[str, Any]) -> pd.DataFrame:
     rows = []
-    def add(cat, d):
-        for k, v in (d or {}).items():
             rows.append({"category": cat, "item": k, "value": v})
-    add("balance_sheet", fin.get("balance_sheet"))
-    add("income_statement", fin.get("income_statement"))
-    add("cash_flows", fin.get("cash_flows"))
-    return pd.DataFrame(rows, columns=["category", "item", "value"])
-def df_to_fin(df: pd.DataFrame) -> Dict[str, Any]:
     out = {"balance_sheet": {}, "income_statement": {}, "cash_flows": {}}
     for _, r in df.iterrows():
         cat, item, val = str(r["category"]), str(r["item"]), r["value"]
@@ -128,25 +27,33 @@ def df_to_fin(df: pd.DataFrame) -> Dict[str, Any]:
             out[cat][item] = parsed
     return out
-def radar(score: Dict[str, Any]) -> go.Figure:
     labels = [d["metric"] for d in score["details"]]
     values = [d["score"] for d in score["details"]]
     fig = go.Figure()
     fig.add_trace(go.Scatterpolar(r=values + values[:1], theta=labels + labels[:1], fill="toself"))
     fig.update_layout(polar=dict(radialaxis=dict(visible=True, range=[0, 100])),
-                      showlegend=False, margin=dict(l=20, r=20, t=30, b=20),
-                      height=380, title=f"総合スコア: {score['total_score']}（グレード: {score['grade']}）")
     return fig
 def run_analyze(company: str, use_vision: bool, files: list[str]):
     if not files:
         raise gr.Error("PDF をアップロードしてください。")
     try:
-        images: List[bytes] = []
-        for p in files:
-            images += pdf_to_images(p, dpi=220, max_pages=6)
-        fin = extract_financials(images if use_vision else None, None, company or "")
-    except Exception:
         text_blob = ""
         for p in files:
             text_blob += pdf_to_text(p) + "\n\n"
@@ -155,53 +62,29 @@ def run_analyze(company: str, use_vision: bool, files: list[str]):
     df = fin_to_df(fin)
     score = score_company(fin)
     fig = radar(score)
-    insight = ""
-    try:
-        client = _client()
-        prompt = f"""次の財務データとスコア結果から、箇条書きで短く日本語でコメントしてください。
-- 良い点 3つ
-- 懸念点 3つ
-- 総評（100字以内）
-[財務データ]
-{json.dumps(fin, ensure_ascii=False)}
-[スコア]
-{json.dumps(score, ensure_ascii=False)}
-"""
-        resp = client.chat.completions.create(
-            model=OPENAI_MODEL_TEXT,
-            messages=[{"role": "system", "content": "簡潔で公正な財務アナリスト。"},
-                      {"role": "user", "content": prompt}],
-            temperature=0.3,
-        )
-        insight = resp.choices[0].message.content
-    except Exception as e:
-        insight = f"AI所見の生成に失敗: {e}"
     return (json.dumps(fin, ensure_ascii=False, indent=2),
             df,
             json.dumps(score, ensure_ascii=False, indent=2),
             fig,
-            insight)
 def run_recalc(df: pd.DataFrame):
     try:
         fin = df_to_fin(df)
         score = score_company(fin)
         fig = radar(score)
-        return (json.dumps(score, ensure_ascii=False, indent=2),
-                fig,
-                json.dumps(fin, ensure_ascii=False, indent=2))
     except Exception as e:
         tb = traceback.format_exc(limit=6)
         raise gr.Error(f"再計算に失敗しました: {e}\n\n<pre style='white-space:pre-wrap'>{tb}</pre>")
-def build_ui() -> gr.Blocks:
-    with gr.Blocks(theme=gr.themes.Soft(primary_hue="indigo"),
-                   fill_height=True, analytics_enabled=False) as demo:
-        gr.Markdown("## 🧮 企業スコアリング（PDF解析 × OpenAI Vision）")
         with gr.Row():
             with gr.Column(scale=1):
                 company = gr.Textbox(label="企業名（任意）", placeholder="例：株式会社OO")
@@ -211,7 +94,7 @@ def build_ui() -> gr.Blocks:
                 recalc_btn = gr.Button("🔁 この表の値で再計算")
                 health_btn = gr.Button("🩺 環境チェック")
                 health_out = gr.HTML()
-                gr.Markdown("※ 画像化やVisionに失敗した場合はテキスト抽出に自動フォールバックします。")
             with gr.Column(scale=1):
                 fin_json = gr.Code(label="抽出JSON", language="json", interactive=False)
@@ -224,11 +107,23 @@ def build_ui() -> gr.Blocks:
                 chart = gr.Plot(label="スコアレーダー")
             with gr.Tab("AI診断（日本語）"):
                 insight_md = gr.Markdown()
         run_btn.click(run_analyze, inputs=[company, use_vision, files],
-                      outputs=[fin_json, df_out, score_json, chart, insight_md],
                       concurrency_limit=1)
-        recalc_btn.click(run_recalc, inputs=[df_out], outputs=[score_json, chart, fin_json],
-                         concurrency_limit=1)
-        health_btn.click(_health_html, outputs=health_out, concurrency_limit=1)
     return demo

+import json, traceback
 import gradio as gr
 import pandas as pd
 import plotly.graph_objects as go
+from core.pdf_utils import pdf_to_images, pdf_to_text
+from core.ai_client import extract_financials, short_insight
+from core.scorer import score_company
+from core.health import health_html
+def fin_to_df(fin: dict) -> pd.DataFrame:
     rows = []
+    for cat in ("balance_sheet","income_statement","cash_flows"):
+        for k, v in (fin.get(cat) or {}).items():
             rows.append({"category": cat, "item": k, "value": v})
+    return pd.DataFrame(rows, columns=["category","item","value"])
+def df_to_fin(df: pd.DataFrame) -> dict:
     out = {"balance_sheet": {}, "income_statement": {}, "cash_flows": {}}
     for _, r in df.iterrows():
         cat, item, val = str(r["category"]), str(r["item"]), r["value"]
             out[cat][item] = parsed
     return out
+def radar(score: dict) -> go.Figure:
     labels = [d["metric"] for d in score["details"]]
     values = [d["score"] for d in score["details"]]
     fig = go.Figure()
     fig.add_trace(go.Scatterpolar(r=values + values[:1], theta=labels + labels[:1], fill="toself"))
     fig.update_layout(polar=dict(radialaxis=dict(visible=True, range=[0, 100])),
+                      showlegend=False, margin=dict(l=20,r=20,t=30,b=20), height=380,
+                      title=f"総合スコア: {score['total_score']}（グレード: {score['grade']}）")
     return fig
 def run_analyze(company: str, use_vision: bool, files: list[str]):
     if not files:
         raise gr.Error("PDF をアップロードしてください。")
+    # 1) 画像化→Vision をまず試す。失敗したら 2) テキスト抽出に自動フォールバック
+    fin = None
+    errs = []
     try:
+        if use_vision:
+            imgs = []
+            for p in files:
+                imgs += pdf_to_images(p, dpi=220, max_pages=6)
+            fin = extract_financials(imgs, None, company or "")
+    except Exception as e:
+        errs.append(f"[Visionスキップ] {e}")
+    if fin is None:
         text_blob = ""
         for p in files:
             text_blob += pdf_to_text(p) + "\n\n"
     df = fin_to_df(fin)
     score = score_company(fin)
     fig = radar(score)
+    insight = short_insight(fin, score)
+    debug = "<br>".join(errs) if errs else "OK"
     return (json.dumps(fin, ensure_ascii=False, indent=2),
             df,
             json.dumps(score, ensure_ascii=False, indent=2),
             fig,
+            insight,
+            debug)
 def run_recalc(df: pd.DataFrame):
     try:
         fin = df_to_fin(df)
         score = score_company(fin)
         fig = radar(score)
+        return (json.dumps(score, ensure_ascii=False, indent=2), fig, json.dumps(fin, ensure_ascii=False, indent=2))
     except Exception as e:
         tb = traceback.format_exc(limit=6)
         raise gr.Error(f"再計算に失敗しました: {e}\n\n<pre style='white-space:pre-wrap'>{tb}</pre>")
+def create_demo():
+    with gr.Blocks(theme=gr.themes.Soft(primary_hue="indigo"), fill_height=True, analytics_enabled=False) as demo:
+        gr.Markdown("## 🧮 企業スコアリング（PDF解析 × OpenAI）")
         with gr.Row():
             with gr.Column(scale=1):
                 company = gr.Textbox(label="企業名（任意）", placeholder="例：株式会社OO")
                 recalc_btn = gr.Button("🔁 この表の値で再計算")
                 health_btn = gr.Button("🩺 環境チェック")
                 health_out = gr.HTML()
+                gr.Markdown("※ 画像化/Visionに失敗しても**自動でテキスト抽出にフォールバック**します。")
             with gr.Column(scale=1):
                 fin_json = gr.Code(label="抽出JSON", language="json", interactive=False)
                 chart = gr.Plot(label="スコアレーダー")
             with gr.Tab("AI診断（日本語）"):
                 insight_md = gr.Markdown()
+            with gr.Tab("デバッグ情報"):
+                debug_md = gr.Markdown()
+                health_live = gr.HTML(value=health_html())
         run_btn.click(run_analyze, inputs=[company, use_vision, files],
+                      outputs=[fin_json, df_out, score_json, chart, insight_md, debug_md],
                       concurrency_limit=1)
+        recalc_btn.click(run_recalc, inputs=[df_out], outputs=[score_json, chart, fin_json], concurrency_limit=1)
+        health_btn.click(health_html, outputs=health_out, concurrency_limit=1)
     return demo
+def main():
+    demo = create_demo()
+    # DeprecationWarning を避けるため queue() は使わない（重い処理のみイベント側で制限）
+    demo.launch(server_name="0.0.0.0", server_port=7860)
+if __name__ == "__main__":
+    main()