Corin1998 commited on
Commit
f2c5804
·
verified ·
1 Parent(s): 7761278

Update ui/ui_app.py

Browse files
Files changed (1) hide show
  1. ui/ui_app.py +52 -157
ui/ui_app.py CHANGED
@@ -1,122 +1,21 @@
1
- from __future__ import annotations
2
- import os, io, base64, json, traceback, shutil
3
- from typing import List, Dict, Any
4
-
5
  import gradio as gr
6
  import pandas as pd
7
- from pdf2image import convert_from_path
8
- import pdfplumber
9
- from openai import OpenAI
10
  import plotly.graph_objects as go
11
 
12
- from api.scorer import score_company # スコア計算は /api/scorer.py を利用
13
-
14
- OPENAI_MODEL_VISION = os.environ.get("OPENAI_VISION_MODEL", "gpt-4o-mini")
15
- OPENAI_MODEL_TEXT = os.environ.get("OPENAI_TEXT_MODEL", "gpt-4o-mini")
16
-
17
- def _b64(img: bytes) -> str:
18
- return base64.b64encode(img).decode("utf-8")
19
-
20
- def _client() -> OpenAI:
21
- key = os.environ.get("OPENAI_API_KEY")
22
- if not key:
23
- raise gr.Error("OPENAI_API_KEY が未設定です。Spaces → Settings → Variables and secrets に追加してください。")
24
- return OpenAI(api_key=key, timeout=60) # proxiesは渡さない
25
-
26
- def _health_html() -> str:
27
- msgs = []
28
- msgs.append("✅ OPENAI_API_KEY: " + ("検出" if os.environ.get("OPENAI_API_KEY") else "未設定"))
29
- for b in ("pdftoppm", "pdftocairo"):
30
- ok = bool(shutil.which(b))
31
- msgs.append(("✅" if ok else "❌") + f" {b}: " + ("検出" if ok else "見つからず(packages.txt に poppler-utils が必要)"))
32
- msgs.append(f"ℹ️ Vision={OPENAI_MODEL_VISION} / Text={OPENAI_MODEL_TEXT}")
33
- return "<br>".join(msgs)
34
-
35
- def pdf_to_images(pdf_path: str, dpi: int = 220, max_pages: int = 6) -> List[bytes]:
36
- pages = convert_from_path(pdf_path, dpi=dpi, fmt="png")
37
- imgs: List[bytes] = []
38
- for i, p in enumerate(pages):
39
- if i >= max_pages:
40
- break
41
- buf = io.BytesIO()
42
- p.save(buf, format="PNG")
43
- imgs.append(buf.getvalue())
44
- return imgs
45
-
46
- def pdf_to_text(pdf_path: str, max_chars: int = 15000) -> str:
47
- parts: List[str] = []
48
- with pdfplumber.open(pdf_path) as pdf:
49
- for i, page in enumerate(pdf.pages):
50
- t = (page.extract_text() or "").strip()
51
- if t:
52
- parts.append(f"[page {i+1}]\n{t}")
53
- if sum(len(x) for x in parts) > max_chars:
54
- break
55
- return "\n\n".join(parts)[:max_chars]
56
-
57
- SYSTEM_JSON = """あなたは有能な財務アナリストです。
58
- 与えられた決算書(画像またはテキスト)から、次の厳密な JSON 構造のみを日本語の単位なし・半角数値で返してください。分からない項目は null。
59
- {
60
- "company": {"name": null},
61
- "period": {"start_date": null, "end_date": null},
62
- "balance_sheet": {
63
- "total_assets": null, "total_liabilities": null, "total_equity": null,
64
- "current_assets": null, "fixed_assets": null,
65
- "current_liabilities": null, "long_term_liabilities": null
66
- },
67
- "income_statement": {
68
- "sales": null, "cost_of_sales": null, "gross_profit": null,
69
- "operating_expenses": null, "operating_income": null,
70
- "ordinary_income": null, "net_income": null
71
- },
72
- "cash_flows": {
73
- "operating_cash_flow": null, "investing_cash_flow": null, "financing_cash_flow": null
74
- }
75
- }
76
- """
77
-
78
- def extract_financials(images: List[bytes] | None, text_blob: str | None, company_hint: str) -> Dict[str, Any]:
79
- client = _client()
80
- if images:
81
- content = [{"type": "text", "text": SYSTEM_JSON}]
82
- if company_hint:
83
- content.append({"type": "text", "text": f"会社名の候補: {company_hint}"})
84
- for im in images:
85
- content.append({"type": "input_image", "image_url": f"data:image/png;base64,{_b64(im)}"})
86
- resp = client.chat.completions.create(
87
- model=OPENAI_MODEL_VISION,
88
- messages=[
89
- {"role": "system", "content": "返答は必ず有効な JSON オブジェクトのみ。説明を含めない。"},
90
- {"role": "user", "content": content},
91
- ],
92
- response_format={"type": "json_object"},
93
- temperature=0.1,
94
- )
95
- return json.loads(resp.choices[0].message.content)
96
- else:
97
- prompt = f"{SYSTEM_JSON}\n\n以下は決算書のテキストです。上記の JSON だけを返してください。\n\n{text_blob or ''}"
98
- resp = client.chat.completions.create(
99
- model=OPENAI_MODEL_TEXT,
100
- messages=[
101
- {"role": "system", "content": "返答は必ず有効な JSON オブジェクトのみ。"},
102
- {"role": "user", "content": prompt},
103
- ],
104
- response_format={"type": "json_object"},
105
- temperature=0.1,
106
- )
107
- return json.loads(resp.choices[0].message.content)
108
-
109
- def fin_to_df(fin: Dict[str, Any]) -> pd.DataFrame:
110
  rows = []
111
- def add(cat, d):
112
- for k, v in (d or {}).items():
113
  rows.append({"category": cat, "item": k, "value": v})
114
- add("balance_sheet", fin.get("balance_sheet"))
115
- add("income_statement", fin.get("income_statement"))
116
- add("cash_flows", fin.get("cash_flows"))
117
- return pd.DataFrame(rows, columns=["category", "item", "value"])
118
 
119
- def df_to_fin(df: pd.DataFrame) -> Dict[str, Any]:
120
  out = {"balance_sheet": {}, "income_statement": {}, "cash_flows": {}}
121
  for _, r in df.iterrows():
122
  cat, item, val = str(r["category"]), str(r["item"]), r["value"]
@@ -128,25 +27,33 @@ def df_to_fin(df: pd.DataFrame) -> Dict[str, Any]:
128
  out[cat][item] = parsed
129
  return out
130
 
131
- def radar(score: Dict[str, Any]) -> go.Figure:
132
  labels = [d["metric"] for d in score["details"]]
133
  values = [d["score"] for d in score["details"]]
134
  fig = go.Figure()
135
  fig.add_trace(go.Scatterpolar(r=values + values[:1], theta=labels + labels[:1], fill="toself"))
136
  fig.update_layout(polar=dict(radialaxis=dict(visible=True, range=[0, 100])),
137
- showlegend=False, margin=dict(l=20, r=20, t=30, b=20),
138
- height=380, title=f"総合スコア: {score['total_score']}(グレード: {score['grade']})")
139
  return fig
140
 
141
  def run_analyze(company: str, use_vision: bool, files: list[str]):
142
  if not files:
143
  raise gr.Error("PDF をアップロードしてください。")
 
 
 
 
144
  try:
145
- images: List[bytes] = []
146
- for p in files:
147
- images += pdf_to_images(p, dpi=220, max_pages=6)
148
- fin = extract_financials(images if use_vision else None, None, company or "")
149
- except Exception:
 
 
 
 
150
  text_blob = ""
151
  for p in files:
152
  text_blob += pdf_to_text(p) + "\n\n"
@@ -155,53 +62,29 @@ def run_analyze(company: str, use_vision: bool, files: list[str]):
155
  df = fin_to_df(fin)
156
  score = score_company(fin)
157
  fig = radar(score)
 
158
 
159
- insight = ""
160
- try:
161
- client = _client()
162
- prompt = f"""次の財務データとスコア結果から、箇条書きで短く日本語でコメントしてください。
163
- - 良い点 3つ
164
- - 懸念点 3つ
165
- - 総評(100字以内)
166
-
167
- [財務データ]
168
- {json.dumps(fin, ensure_ascii=False)}
169
-
170
- [スコア]
171
- {json.dumps(score, ensure_ascii=False)}
172
- """
173
- resp = client.chat.completions.create(
174
- model=OPENAI_MODEL_TEXT,
175
- messages=[{"role": "system", "content": "簡潔で公正な財務アナリスト。"},
176
- {"role": "user", "content": prompt}],
177
- temperature=0.3,
178
- )
179
- insight = resp.choices[0].message.content
180
- except Exception as e:
181
- insight = f"AI所見の生成に失敗: {e}"
182
-
183
  return (json.dumps(fin, ensure_ascii=False, indent=2),
184
  df,
185
  json.dumps(score, ensure_ascii=False, indent=2),
186
  fig,
187
- insight)
 
188
 
189
  def run_recalc(df: pd.DataFrame):
190
  try:
191
  fin = df_to_fin(df)
192
  score = score_company(fin)
193
  fig = radar(score)
194
- return (json.dumps(score, ensure_ascii=False, indent=2),
195
- fig,
196
- json.dumps(fin, ensure_ascii=False, indent=2))
197
  except Exception as e:
198
  tb = traceback.format_exc(limit=6)
199
  raise gr.Error(f"再計算に失敗しました: {e}\n\n<pre style='white-space:pre-wrap'>{tb}</pre>")
200
 
201
- def build_ui() -> gr.Blocks:
202
- with gr.Blocks(theme=gr.themes.Soft(primary_hue="indigo"),
203
- fill_height=True, analytics_enabled=False) as demo:
204
- gr.Markdown("## 🧮 企業スコアリング(PDF解析 × OpenAI Vision)")
205
  with gr.Row():
206
  with gr.Column(scale=1):
207
  company = gr.Textbox(label="企業名(任意)", placeholder="例:株式会社OO")
@@ -211,7 +94,7 @@ def build_ui() -> gr.Blocks:
211
  recalc_btn = gr.Button("🔁 この表の値で再計算")
212
  health_btn = gr.Button("🩺 環境チェック")
213
  health_out = gr.HTML()
214
- gr.Markdown("※ 画像化やVisionに失敗した場合はテキスト抽出に自動フォールバックします。")
215
 
216
  with gr.Column(scale=1):
217
  fin_json = gr.Code(label="抽出JSON", language="json", interactive=False)
@@ -224,11 +107,23 @@ def build_ui() -> gr.Blocks:
224
  chart = gr.Plot(label="スコアレーダー")
225
  with gr.Tab("AI診断(日本語)"):
226
  insight_md = gr.Markdown()
 
 
 
227
 
228
  run_btn.click(run_analyze, inputs=[company, use_vision, files],
229
- outputs=[fin_json, df_out, score_json, chart, insight_md],
230
  concurrency_limit=1)
231
- recalc_btn.click(run_recalc, inputs=[df_out], outputs=[score_json, chart, fin_json],
232
- concurrency_limit=1)
233
- health_btn.click(_health_html, outputs=health_out, concurrency_limit=1)
 
234
  return demo
 
 
 
 
 
 
 
 
 
1
+ import json, traceback
 
 
 
2
  import gradio as gr
3
  import pandas as pd
 
 
 
4
  import plotly.graph_objects as go
5
 
6
+ from core.pdf_utils import pdf_to_images, pdf_to_text
7
+ from core.ai_client import extract_financials, short_insight
8
+ from core.scorer import score_company
9
+ from core.health import health_html
10
+
11
+ def fin_to_df(fin: dict) -> pd.DataFrame:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  rows = []
13
+ for cat in ("balance_sheet","income_statement","cash_flows"):
14
+ for k, v in (fin.get(cat) or {}).items():
15
  rows.append({"category": cat, "item": k, "value": v})
16
+ return pd.DataFrame(rows, columns=["category","item","value"])
 
 
 
17
 
18
+ def df_to_fin(df: pd.DataFrame) -> dict:
19
  out = {"balance_sheet": {}, "income_statement": {}, "cash_flows": {}}
20
  for _, r in df.iterrows():
21
  cat, item, val = str(r["category"]), str(r["item"]), r["value"]
 
27
  out[cat][item] = parsed
28
  return out
29
 
30
+ def radar(score: dict) -> go.Figure:
31
  labels = [d["metric"] for d in score["details"]]
32
  values = [d["score"] for d in score["details"]]
33
  fig = go.Figure()
34
  fig.add_trace(go.Scatterpolar(r=values + values[:1], theta=labels + labels[:1], fill="toself"))
35
  fig.update_layout(polar=dict(radialaxis=dict(visible=True, range=[0, 100])),
36
+ showlegend=False, margin=dict(l=20,r=20,t=30,b=20), height=380,
37
+ title=f"総合スコア: {score['total_score']}(グレード: {score['grade']})")
38
  return fig
39
 
40
  def run_analyze(company: str, use_vision: bool, files: list[str]):
41
  if not files:
42
  raise gr.Error("PDF をアップロードしてください。")
43
+
44
+ # 1) 画像化→Vision をまず試す。失敗したら 2) テキスト抽出に自動フォールバック
45
+ fin = None
46
+ errs = []
47
  try:
48
+ if use_vision:
49
+ imgs = []
50
+ for p in files:
51
+ imgs += pdf_to_images(p, dpi=220, max_pages=6)
52
+ fin = extract_financials(imgs, None, company or "")
53
+ except Exception as e:
54
+ errs.append(f"[Visionスキップ] {e}")
55
+
56
+ if fin is None:
57
  text_blob = ""
58
  for p in files:
59
  text_blob += pdf_to_text(p) + "\n\n"
 
62
  df = fin_to_df(fin)
63
  score = score_company(fin)
64
  fig = radar(score)
65
+ insight = short_insight(fin, score)
66
 
67
+ debug = "<br>".join(errs) if errs else "OK"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  return (json.dumps(fin, ensure_ascii=False, indent=2),
69
  df,
70
  json.dumps(score, ensure_ascii=False, indent=2),
71
  fig,
72
+ insight,
73
+ debug)
74
 
75
  def run_recalc(df: pd.DataFrame):
76
  try:
77
  fin = df_to_fin(df)
78
  score = score_company(fin)
79
  fig = radar(score)
80
+ return (json.dumps(score, ensure_ascii=False, indent=2), fig, json.dumps(fin, ensure_ascii=False, indent=2))
 
 
81
  except Exception as e:
82
  tb = traceback.format_exc(limit=6)
83
  raise gr.Error(f"再計算に失敗しました: {e}\n\n<pre style='white-space:pre-wrap'>{tb}</pre>")
84
 
85
+ def create_demo():
86
+ with gr.Blocks(theme=gr.themes.Soft(primary_hue="indigo"), fill_height=True, analytics_enabled=False) as demo:
87
+ gr.Markdown("## 🧮 企業スコアリング(PDF解析 × OpenAI)")
 
88
  with gr.Row():
89
  with gr.Column(scale=1):
90
  company = gr.Textbox(label="企業名(任意)", placeholder="例:株式会社OO")
 
94
  recalc_btn = gr.Button("🔁 この表の値で再計算")
95
  health_btn = gr.Button("🩺 環境チェック")
96
  health_out = gr.HTML()
97
+ gr.Markdown("※ 画像化/Visionに失敗しても**自動でテキスト抽出にフォールバック**します。")
98
 
99
  with gr.Column(scale=1):
100
  fin_json = gr.Code(label="抽出JSON", language="json", interactive=False)
 
107
  chart = gr.Plot(label="スコアレーダー")
108
  with gr.Tab("AI診断(日本語)"):
109
  insight_md = gr.Markdown()
110
+ with gr.Tab("デバッグ情報"):
111
+ debug_md = gr.Markdown()
112
+ health_live = gr.HTML(value=health_html())
113
 
114
  run_btn.click(run_analyze, inputs=[company, use_vision, files],
115
+ outputs=[fin_json, df_out, score_json, chart, insight_md, debug_md],
116
  concurrency_limit=1)
117
+
118
+ recalc_btn.click(run_recalc, inputs=[df_out], outputs=[score_json, chart, fin_json], concurrency_limit=1)
119
+ health_btn.click(health_html, outputs=health_out, concurrency_limit=1)
120
+
121
  return demo
122
+
123
+ def main():
124
+ demo = create_demo()
125
+ # DeprecationWarning を避けるため queue() は使わない(重い処理のみイベント側で制限)
126
+ demo.launch(server_name="0.0.0.0", server_port=7860)
127
+
128
+ if __name__ == "__main__":
129
+ main()