| import os |
| import json |
| from typing import List, Dict |
| from openai import OpenAI |
|
|
| _EMBED_MODEL = os.getenv("EMBED_MODEL", "text-embedding-3-large") |
| _CHAT_MODEL = os.getenv("CHAT_MODEL", "gpt-4o-mini") |
|
|
| def get_client() -> OpenAI: |
| |
| if not os.getenv("OPENAI_API_KEY"): |
| raise RuntimeError("OPENAI_API_KEY が未設定です(Settings → Secrets に追加してください)。") |
| return OpenAI() |
|
|
| def embed_texts(texts: List[str], batch_size: int = 128) -> List[List[float]]: |
| client = get_client() |
| out: List[List[float]] = [] |
| for i in range(0, len(texts), batch_size): |
| batch = texts[i:i+batch_size] |
| resp = client.embeddings.create(model=_EMBED_MODEL, input=batch) |
| out.extend([d.embedding for d in resp.data]) |
| return out |
|
|
| def summarize_cluster(samples: List[str], output_lang: str = "ja") -> Dict: |
| """ |
| samples を読んで、構造化要約(JSON) + 感情比率を返す。 |
| output_lang: 'ja'|'en'|'auto' |
| """ |
| client = get_client() |
| if output_lang == "ja": |
| lang_inst = "回答は必ず日本語で。" |
| elif output_lang == "en": |
| lang_inst = "Respond in English." |
| else: |
| lang_inst = "Respond in the most appropriate language for the comments." |
|
|
| prompt = f""" |
| You are an insightful product analyst. Read user comments and output a concise JSON with: |
| - title: short cluster title |
| - overview: 2-3 sentences |
| - actions: 2-5 actionable suggestions (array of strings) |
| - sentiment: proportions of positive/neutral/negative (each 0-1 float; sum≈1) |
| |
| {lang_inst} |
| |
| Comments: |
| {chr(10).join(f"- {s}" for s in samples[:30])} |
| |
| Return ONLY strict JSON with keys: title, overview, actions, sentiment. |
| """.strip() |
|
|
| try: |
| resp = client.chat.completions.create( |
| model=_CHAT_MODEL, |
| messages=[{"role": "user", "content": prompt}], |
| temperature=0.2, |
| response_format={"type": "json_object"}, |
| ) |
| return json.loads(resp.choices[0].message.content) |
| except Exception: |
| |
| return { |
| "title": "クラスタ", |
| "overview": "要約に失敗しました。", |
| "actions": [], |
| "sentiment": {"positive": 0.33, "neutral": 0.34, "negative": 0.33}, |
| } |
|
|