| | |
| | from __future__ import annotations |
| | import os, json |
| | from typing import List, Optional |
| | from openai import OpenAI |
| | from schemas import FinancialExtract, MultipleSuggestion, MarketOutlook |
| |
|
| | VISION_MODEL = os.environ.get("OPENAI_VISION_MODEL", "gpt-4o-mini") |
| | TEXT_MODEL = os.environ.get("OPENAI_TEXT_MODEL", "gpt-4o-mini") |
| |
|
| | def get_client() -> OpenAI: |
| | key = os.environ.get("OPENAI_API_KEY") |
| | if not key: |
| | raise RuntimeError("OPENAI_API_KEY が未設定です。Spaces → Settings → Variables and secrets で追加してください。") |
| | return OpenAI(api_key=key, timeout=120) |
| |
|
| | def upload_file_to_openai(client: OpenAI, filename: str, file_bytes: bytes) -> str: |
| | from io import BytesIO |
| | bio = BytesIO(file_bytes) |
| | try: |
| | f = client.files.create(file=(filename or "uploaded", bio), purpose="vision") |
| | except Exception: |
| | f = client.files.create(file=(filename or "uploaded", bio), purpose="assistants") |
| | return f.id |
| |
|
| | def _safe_output_text(resp) -> str: |
| | try: |
| | return resp.output_text |
| | except Exception: |
| | try: |
| | return resp.output[0].content[0].text |
| | except Exception: |
| | return "" |
| |
|
| | def _strip_code_fences(s: str) -> str: |
| | s2 = s.strip() |
| | if s2.startswith("```"): |
| | s2 = s2.strip("`") |
| | if "\n" in s2: s2 = s2.split("\n", 1)[-1] |
| | if s2.endswith("```"): |
| | s2 = s2[:-3] |
| | return s2.strip() |
| |
|
| | def _pdf_text_concat(paths: List[str], max_chars: int = 180_000) -> str: |
| | try: |
| | from pypdf import PdfReader |
| | except Exception: |
| | return "" |
| | out, total = [], 0 |
| | for p in paths: |
| | try: |
| | r = PdfReader(p) |
| | for page in r.pages: |
| | t = page.extract_text() or "" |
| | if t: |
| | out.append(t) |
| | total += len(t) |
| | if total > max_chars: break |
| | except Exception: |
| | continue |
| | if total > max_chars: break |
| | return "\n\n".join(out)[:max_chars] |
| |
|
| | def _json_loads_strict(raw: str) -> dict: |
| | import json |
| | try: |
| | return json.loads(raw) |
| | except json.JSONDecodeError: |
| | return json.loads(_strip_code_fences(raw)) |
| |
|
| | def extract_financials_from_files( |
| | client: OpenAI, |
| | file_ids: List[str], |
| | company_hint: Optional[str], |
| | currency_hint: Optional[str], |
| | model: str = VISION_MODEL, |
| | debug: bool = False, |
| | local_paths: Optional[List[str]] = None, |
| | ) -> FinancialExtract: |
| |
|
| | schema = FinancialExtract.model_json_schema() |
| | system = ( |
| | "You are a meticulous financial analyst. Prefer consolidated and annual figures. " |
| | "Return strictly valid JSON only." |
| | ) |
| | base_user = ( |
| | "Extract: company_name, currency, fiscal_year_end, and the latest two periods' numbers " |
| | "(revenue, COGS, EBIT, depreciation, EBITDA, net_income, cash, receivables, inventory, payables, " |
| | "current_assets, current_liabilities, total_assets, total_equity, total_debt, interest_expense)." |
| | ) |
| | if company_hint: |
| | base_user += f"\nCompany hint: {company_hint}" |
| | if currency_hint: |
| | base_user += f"\nCurrency hint: {currency_hint}" |
| |
|
| | try: |
| | resp = client.responses.create( |
| | model=model, |
| | input=[ |
| | {"role": "system", "content": [{"type": "input_text", "text": system}]}, |
| | {"role": "user", |
| | "content": [{"type": "input_text", "text": base_user}] + [{"type":"input_file","file_id": fid} for fid in file_ids]}, |
| | ], |
| | max_output_tokens=2048, |
| | ) |
| | raw = _safe_output_text(resp) |
| | data = _json_loads_strict(raw) |
| | return FinancialExtract.model_validate(data) |
| | except Exception as e_vision: |
| | if not local_paths: |
| | raise RuntimeError(f"Vision抽出に失敗し、かつローカルPDFテキストがありません: {e_vision}") |
| |
|
| | text = _pdf_text_concat(local_paths) |
| | if not text: |
| | raise RuntimeError(f"PDFが画像ベースの可能性があり、テキスト抽出できません。Vision側エラー: {e_vision}") |
| |
|
| | user2 = ( |
| | base_user |
| | + "\n\nSOURCE TEXT (truncated):\n" + text[:150000] |
| | + "\n\nReturn a single JSON object that VALIDATES this JSON Schema (no extra keys):\n" |
| | + json.dumps(schema, ensure_ascii=False) |
| | ) |
| | resp2 = client.responses.create( |
| | model=os.environ.get("OPENAI_TEXT_MODEL", TEXT_MODEL), |
| | input=[ |
| | {"role": "system", "content": [{"type": "input_text", "text": system}]}, |
| | {"role": "user", "content": [{"type": "input_text", "text": user2}]}, |
| | ], |
| | max_output_tokens=2048, |
| | ) |
| | raw2 = _safe_output_text(resp2) |
| | if not raw2: |
| | raise RuntimeError("テキスト抽出フォールバックでも応答が空でした。") |
| | data2 = _json_loads_strict(raw2) |
| | return FinancialExtract.model_validate(data2) |
| |
|
| | def suggest_multiples_with_llm(client: OpenAI, text_model: str, industry: str, region: str = "JP", debug: bool = False) -> Optional[MultipleSuggestion]: |
| | if not industry: |
| | return None |
| | system = "You are an equity analyst. Provide conservative valuation multiple estimates." |
| | user = ( |
| | f"Industry: {industry}\nRegion: {region}\n" |
| | "Return STRICT JSON: {\"revenue_multiple\": float, \"ebitda_multiple\": float}" |
| | ) |
| | try: |
| | resp = client.responses.create( |
| | model=text_model, |
| | input=[ |
| | {"role": "system", "content": [{"type": "input_text", "text": system}]}, |
| | {"role": "user", "content": [{"type": "input_text", "text": user}]}, |
| | ], |
| | max_output_tokens=200, |
| | ) |
| | raw = _safe_output_text(resp) |
| | data = _json_loads_strict(raw) |
| | return MultipleSuggestion.model_validate(data) |
| | except Exception: |
| | if debug: raise |
| | return None |
| |
|
| | def suggest_market_outlook_with_llm( |
| | client: OpenAI, |
| | text_model: str, |
| | industry: str, |
| | market_notes: str, |
| | region: str = "JP", |
| | debug: bool = False, |
| | ) -> Optional[MarketOutlook]: |
| | """市場拡大期待(3-5年)の日本語サマリを返す。""" |
| | if not (industry or market_notes): |
| | return None |
| |
|
| | system = "日本語で簡潔に出力してください。You are a market analyst. Rate market expansion expectations for the next 3-5 years." |
| | user = ( |
| | f"業種: {industry or '不明'}\n地域: {region}\n補足メモ: {market_notes or 'なし'}\n" |
| | "以下のキーのみ、**日本語**で厳格なJSONを返してください。余計な説明は出力しない:\n" |
| | " expectation_label: とても低い / 低い / 中程度 / 高い / とても高い のいずれか\n" |
| | " expectation_score: 整数 1-5\n" |
| | " expected_market_cagr: 小数(%)\n" |
| | " rationale: 60文字以内の日本語要約\n" |
| | ) |
| |
|
| | try: |
| | resp = client.responses.create( |
| | model=text_model, |
| | input=[ |
| | {"role": "system", "content": [{"type": "input_text", "text": system}]}, |
| | {"role": "user", "content": [{"type": "input_text", "text": user}]}, |
| | ], |
| | max_output_tokens=250, |
| | ) |
| | raw = _safe_output_text(resp) |
| | if not raw: |
| | return None |
| | data = _json_loads_strict(raw) |
| | |
| | if "expectation_label_ja" in data and "expectation_label" not in data: |
| | data["expectation_label"] = data.pop("expectation_label_ja") |
| | return MarketOutlook.model_validate(data) |
| | except Exception: |
| | if debug: raise |
| | return None |
| |
|