Update core/extract.py
Browse files- core/extract.py +17 -9
core/extract.py
CHANGED
|
@@ -2,7 +2,6 @@ from __future__ import annotations
|
|
| 2 |
import json
|
| 3 |
from typing import Any, Dict, List, Optional
|
| 4 |
from .openai_client import get_client, VISION_MODEL, TEXT_MODEL
|
| 5 |
-
from .pdf_io import b64
|
| 6 |
|
| 7 |
SYSTEM_JSON = """あなたは有能な財務アナリストです。
|
| 8 |
与えられた決算書(画像またはテキスト)から、次の厳密な JSON 構造のみを日本語の単位なし・半角数値で返してください。分からない項目は null。
|
|
@@ -27,19 +26,28 @@ SYSTEM_JSON = """あなたは有能な財務アナリストです。
|
|
| 27 |
|
| 28 |
def extract_financials(images: Optional[List[bytes]], text_blob: Optional[str], company_hint: str="") -> Dict[str, Any]:
|
| 29 |
client = get_client()
|
| 30 |
-
if images
|
| 31 |
content = [{"type": "text", "text": SYSTEM_JSON}]
|
| 32 |
if company_hint:
|
| 33 |
content.append({"type": "text", "text": f"会社名の候補: {company_hint}"})
|
| 34 |
for im in images:
|
| 35 |
-
content.append({"type": "input_image", "image_url": f"data:image/png;base64,{
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
resp = client.chat.completions.create(
|
| 37 |
model=VISION_MODEL,
|
| 38 |
messages=[
|
| 39 |
-
{"role":
|
| 40 |
-
{"role":
|
| 41 |
],
|
| 42 |
-
response_format={"type":
|
| 43 |
temperature=0.1,
|
| 44 |
)
|
| 45 |
return json.loads(resp.choices[0].message.content)
|
|
@@ -48,10 +56,10 @@ def extract_financials(images: Optional[List[bytes]], text_blob: Optional[str],
|
|
| 48 |
resp = client.chat.completions.create(
|
| 49 |
model=TEXT_MODEL,
|
| 50 |
messages=[
|
| 51 |
-
{"role":
|
| 52 |
-
{"role":
|
| 53 |
],
|
| 54 |
-
response_format={"type":
|
| 55 |
temperature=0.1,
|
| 56 |
)
|
| 57 |
return json.loads(resp.choices[0].message.content)
|
|
|
|
| 2 |
import json
|
| 3 |
from typing import Any, Dict, List, Optional
|
| 4 |
from .openai_client import get_client, VISION_MODEL, TEXT_MODEL
|
|
|
|
| 5 |
|
| 6 |
SYSTEM_JSON = """あなたは有能な財務アナリストです。
|
| 7 |
与えられた決算書(画像またはテキスト)から、次の厳密な JSON 構造のみを日本語の単位なし・半角数値で返してください。分からない項目は null。
|
|
|
|
| 26 |
|
| 27 |
def extract_financials(images: Optional[List[bytes]], text_blob: Optional[str], company_hint: str="") -> Dict[str, Any]:
|
| 28 |
client = get_client()
|
| 29 |
+
if images:
|
| 30 |
content = [{"type": "text", "text": SYSTEM_JSON}]
|
| 31 |
if company_hint:
|
| 32 |
content.append({"type": "text", "text": f"会社名の候補: {company_hint}"})
|
| 33 |
for im in images:
|
| 34 |
+
content.append({"type": "input_image", "image_url": f"data:image/png;base64,{im.decode('latin1') if isinstance(im, str) else 'data'}"})
|
| 35 |
+
# 上のデータ URI 生成は UI 側で行うためここでは未使用
|
| 36 |
+
# (UIでdata:image/png;base64,xxxを組む実装に合わせる場合は差し替え)
|
| 37 |
+
pass
|
| 38 |
+
# 実運用では UI 側で Vision を呼ぶ形にせず、ここで共通化
|
| 39 |
+
if images:
|
| 40 |
+
content = [{"type":"text","text":SYSTEM_JSON}]
|
| 41 |
+
for im in images:
|
| 42 |
+
import base64
|
| 43 |
+
content.append({"type":"input_image","image_url":f"data:image/png;base64,{base64.b64encode(im).decode('utf-8')}"})
|
| 44 |
resp = client.chat.completions.create(
|
| 45 |
model=VISION_MODEL,
|
| 46 |
messages=[
|
| 47 |
+
{"role":"system","content":"返答は必ず有効な JSON オブジェクトのみ。説明は不要。"},
|
| 48 |
+
{"role":"user","content":content},
|
| 49 |
],
|
| 50 |
+
response_format={"type":"json_object"},
|
| 51 |
temperature=0.1,
|
| 52 |
)
|
| 53 |
return json.loads(resp.choices[0].message.content)
|
|
|
|
| 56 |
resp = client.chat.completions.create(
|
| 57 |
model=TEXT_MODEL,
|
| 58 |
messages=[
|
| 59 |
+
{"role":"system","content":"返答は必ず有効な JSON オブジェクトのみ。説明は不要。"},
|
| 60 |
+
{"role":"user","content":prompt},
|
| 61 |
],
|
| 62 |
+
response_format={"type":"json_object"},
|
| 63 |
temperature=0.1,
|
| 64 |
)
|
| 65 |
return json.loads(resp.choices[0].message.content)
|