# -*- coding: utf-8 -*- from __future__ import annotations import os, json from typing import List, Optional from openai import OpenAI from schemas import FinancialExtract, MultipleSuggestion, MarketOutlook VISION_MODEL = os.environ.get("OPENAI_VISION_MODEL", "gpt-4o-mini") TEXT_MODEL = os.environ.get("OPENAI_TEXT_MODEL", "gpt-4o-mini") def get_client() -> OpenAI: key = os.environ.get("OPENAI_API_KEY") if not key: raise RuntimeError("OPENAI_API_KEY が未設定です。Spaces → Settings → Variables and secrets で追加してください。") return OpenAI(api_key=key, timeout=120) def upload_file_to_openai(client: OpenAI, filename: str, file_bytes: bytes) -> str: from io import BytesIO bio = BytesIO(file_bytes) try: f = client.files.create(file=(filename or "uploaded", bio), purpose="vision") except Exception: f = client.files.create(file=(filename or "uploaded", bio), purpose="assistants") return f.id def _safe_output_text(resp) -> str: try: return resp.output_text except Exception: try: return resp.output[0].content[0].text except Exception: return "" def _strip_code_fences(s: str) -> str: s2 = s.strip() if s2.startswith("```"): s2 = s2.strip("`") if "\n" in s2: s2 = s2.split("\n", 1)[-1] if s2.endswith("```"): s2 = s2[:-3] return s2.strip() def _pdf_text_concat(paths: List[str], max_chars: int = 180_000) -> str: try: from pypdf import PdfReader except Exception: return "" out, total = [], 0 for p in paths: try: r = PdfReader(p) for page in r.pages: t = page.extract_text() or "" if t: out.append(t) total += len(t) if total > max_chars: break except Exception: continue if total > max_chars: break return "\n\n".join(out)[:max_chars] def _json_loads_strict(raw: str) -> dict: import json try: return json.loads(raw) except json.JSONDecodeError: return json.loads(_strip_code_fences(raw)) def extract_financials_from_files( client: OpenAI, file_ids: List[str], company_hint: Optional[str], currency_hint: Optional[str], model: str = VISION_MODEL, debug: bool = False, local_paths: Optional[List[str]] = None, ) -> FinancialExtract: schema = FinancialExtract.model_json_schema() system = ( "You are a meticulous financial analyst. Prefer consolidated and annual figures. " "Return strictly valid JSON only." ) base_user = ( "Extract: company_name, currency, fiscal_year_end, and the latest two periods' numbers " "(revenue, COGS, EBIT, depreciation, EBITDA, net_income, cash, receivables, inventory, payables, " "current_assets, current_liabilities, total_assets, total_equity, total_debt, interest_expense)." ) if company_hint: base_user += f"\nCompany hint: {company_hint}" if currency_hint: base_user += f"\nCurrency hint: {currency_hint}" try: resp = client.responses.create( model=model, input=[ {"role": "system", "content": [{"type": "input_text", "text": system}]}, {"role": "user", "content": [{"type": "input_text", "text": base_user}] + [{"type":"input_file","file_id": fid} for fid in file_ids]}, ], max_output_tokens=2048, ) raw = _safe_output_text(resp) data = _json_loads_strict(raw) return FinancialExtract.model_validate(data) except Exception as e_vision: if not local_paths: raise RuntimeError(f"Vision抽出に失敗し、かつローカルPDFテキストがありません: {e_vision}") text = _pdf_text_concat(local_paths) if not text: raise RuntimeError(f"PDFが画像ベースの可能性があり、テキスト抽出できません。Vision側エラー: {e_vision}") user2 = ( base_user + "\n\nSOURCE TEXT (truncated):\n" + text[:150000] + "\n\nReturn a single JSON object that VALIDATES this JSON Schema (no extra keys):\n" + json.dumps(schema, ensure_ascii=False) ) resp2 = client.responses.create( model=os.environ.get("OPENAI_TEXT_MODEL", TEXT_MODEL), input=[ {"role": "system", "content": [{"type": "input_text", "text": system}]}, {"role": "user", "content": [{"type": "input_text", "text": user2}]}, ], max_output_tokens=2048, ) raw2 = _safe_output_text(resp2) if not raw2: raise RuntimeError("テキスト抽出フォールバックでも応答が空でした。") data2 = _json_loads_strict(raw2) return FinancialExtract.model_validate(data2) def suggest_multiples_with_llm(client: OpenAI, text_model: str, industry: str, region: str = "JP", debug: bool = False) -> Optional[MultipleSuggestion]: if not industry: return None system = "You are an equity analyst. Provide conservative valuation multiple estimates." user = ( f"Industry: {industry}\nRegion: {region}\n" "Return STRICT JSON: {\"revenue_multiple\": float, \"ebitda_multiple\": float}" ) try: resp = client.responses.create( model=text_model, input=[ {"role": "system", "content": [{"type": "input_text", "text": system}]}, {"role": "user", "content": [{"type": "input_text", "text": user}]}, ], max_output_tokens=200, ) raw = _safe_output_text(resp) data = _json_loads_strict(raw) return MultipleSuggestion.model_validate(data) except Exception: if debug: raise return None def suggest_market_outlook_with_llm( client: OpenAI, text_model: str, industry: str, market_notes: str, region: str = "JP", debug: bool = False, ) -> Optional[MarketOutlook]: """市場拡大期待(3-5年)の日本語サマリを返す。""" if not (industry or market_notes): return None system = "日本語で簡潔に出力してください。You are a market analyst. Rate market expansion expectations for the next 3-5 years." user = ( f"業種: {industry or '不明'}\n地域: {region}\n補足メモ: {market_notes or 'なし'}\n" "以下のキーのみ、**日本語**で厳格なJSONを返してください。余計な説明は出力しない:\n" " expectation_label: とても低い / 低い / 中程度 / 高い / とても高い のいずれか\n" " expectation_score: 整数 1-5\n" " expected_market_cagr: 小数(%)\n" " rationale: 60文字以内の日本語要約\n" ) try: resp = client.responses.create( model=text_model, input=[ {"role": "system", "content": [{"type": "input_text", "text": system}]}, {"role": "user", "content": [{"type": "input_text", "text": user}]}, ], max_output_tokens=250, ) raw = _safe_output_text(resp) if not raw: return None data = _json_loads_strict(raw) # 保険:英語キーで来た場合の受け皿 if "expectation_label_ja" in data and "expectation_label" not in data: data["expectation_label"] = data.pop("expectation_label_ja") return MarketOutlook.model_validate(data) except Exception: if debug: raise return None