multilingual_sns_analyzerV2 / lib /openai_client.py
Corin1998's picture
Update lib/openai_client.py
8c873fe verified
import os
import json
from typing import List, Dict
from openai import OpenAI
_EMBED_MODEL = os.getenv("EMBED_MODEL", "text-embedding-3-large")
_CHAT_MODEL = os.getenv("CHAT_MODEL", "gpt-4o-mini")
def get_client() -> OpenAI:
# OPENAI_API_KEY は Spaces の Secrets に設定してください
if not os.getenv("OPENAI_API_KEY"):
raise RuntimeError("OPENAI_API_KEY が未設定です(Settings → Secrets に追加してください)。")
return OpenAI()
def embed_texts(texts: List[str], batch_size: int = 128) -> List[List[float]]:
client = get_client()
out: List[List[float]] = []
for i in range(0, len(texts), batch_size):
batch = texts[i:i+batch_size]
resp = client.embeddings.create(model=_EMBED_MODEL, input=batch)
out.extend([d.embedding for d in resp.data])
return out
def summarize_cluster(samples: List[str], output_lang: str = "ja") -> Dict:
"""
samples を読んで、構造化要約(JSON) + 感情比率を返す。
output_lang: 'ja'|'en'|'auto'
"""
client = get_client()
if output_lang == "ja":
lang_inst = "回答は必ず日本語で。"
elif output_lang == "en":
lang_inst = "Respond in English."
else:
lang_inst = "Respond in the most appropriate language for the comments."
prompt = f"""
You are an insightful product analyst. Read user comments and output a concise JSON with:
- title: short cluster title
- overview: 2-3 sentences
- actions: 2-5 actionable suggestions (array of strings)
- sentiment: proportions of positive/neutral/negative (each 0-1 float; sum≈1)
{lang_inst}
Comments:
{chr(10).join(f"- {s}" for s in samples[:30])}
Return ONLY strict JSON with keys: title, overview, actions, sentiment.
""".strip()
try:
resp = client.chat.completions.create(
model=_CHAT_MODEL,
messages=[{"role": "user", "content": prompt}],
temperature=0.2,
response_format={"type": "json_object"},
)
return json.loads(resp.choices[0].message.content)
except Exception:
# フォールバック
return {
"title": "クラスタ",
"overview": "要約に失敗しました。",
"actions": [],
"sentiment": {"positive": 0.33, "neutral": 0.34, "negative": 0.33},
}