import os, orjson as json import regex as re # Try optional .env so local runs can pick up keys without exporting try: from dotenv import load_dotenv # type: ignore load_dotenv() except Exception: pass PROMPT_PERSONALIZATION = """Return JSON only describing personalization cues in the email. Subject: {subject} {body} Definition: - Personalization cues: explicit references to the recipient (name, role, company, location, prior context), dynamic fields, or tailored details that indicate the message is written for a specific person/situation. - Relevance: whether the cue logically relates to the email’s topic/ask. - Intrusive: cues that feel privacy-invasive (e.g., excessive personal history) or out of place for the context. Output JSON strictly in this schema (no prose): {{ "cues": [ {{"text": , "type": <"name"|"company"|"role"|"location"|"context">, "relevant": }} , ... ], "too_intrusive": }} """ def _extract_json(text: str): t = (text or "").strip().strip("```").strip() try: return json.loads(t) except Exception: pass start = t.find("{"); end = t.rfind("}") if start != -1 and end != -1 and end > start: try: return json.loads(t[start:end+1]) except Exception: pass return {"cues": [], "too_intrusive": False} def _openai_call(prompt): from openai import OpenAI client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY")) r = client.chat.completions.create(model="gpt-4o-mini", temperature=0, messages=[{"role":"user","content":prompt}]) usage = r.usage return r.choices[0].message.content, {"prompt_tokens": getattr(usage, "prompt_tokens", 0), "completion_tokens": getattr(usage, "completion_tokens", 0)} def _claude_call(prompt): import anthropic client = anthropic.Anthropic(api_key=os.environ.get("ANTHROPIC_API_KEY")) m = client.messages.create(model="claude-3-5-sonnet-20240620", max_tokens=600, temperature=0, messages=[{"role":"user","content":prompt}]) return m.content[0].text, {"prompt_tokens": getattr(m.usage, "input_tokens", 0), "completion_tokens": getattr(m.usage, "output_tokens", 0)} def _heuristic_personalization(subject: str, body: str): text = f"{subject or ''}\n{body or ''}" cues = [] # Name/greeting detection (simple heuristic) m = re.search(r"(?i)^(hi|hello|dear)\s+([A-Z][a-z]+(?:\s+[A-Z][a-z]+)?)\b", (body or "").strip()) if m: cues.append({"text": m.group(2), "type": "name", "relevant": True}) # Role/company keywords if re.search(r"(?i)\b(ceo|cto|founder|manager|director|engineer|designer)\b", text): cues.append({"text": "role_reference", "type": "role", "relevant": True}) if re.search(r"(?i)\b(inc\.|llc|limited|ltd|corp\.|company|startup)\b", text): cues.append({"text": "company_reference", "type": "company", "relevant": True}) # Contextual references if re.search(r"(?i)\b(follow\s*up|as discussed|regarding|re:|meeting|last week|yesterday)\b", text): cues.append({"text": "context_reference", "type": "context", "relevant": True}) too_intrusive = bool(re.search(r"(?i)\b(ssn|social security|salary|home address|private|confidential)\b", text)) return {"cues": cues, "too_intrusive": too_intrusive} def personalization_flags(subject: str, body: str, engine: str = "openai"): prompt = PROMPT_PERSONALIZATION.format(subject=subject or "", body=body or "") try: raw, usage = (_openai_call if engine == "openai" else _claude_call)(prompt) data = _extract_json(raw) # Validate structure cues = data.get("cues", []) if isinstance(data, dict) else [] if not isinstance(cues, list): cues = [] too_intrusive = bool(data.get("too_intrusive", False)) if isinstance(data, dict) else False return {"cues": cues, "too_intrusive": too_intrusive}, usage except Exception: # Fallback to heuristics with zero usage return _heuristic_personalization(subject, body), {"prompt_tokens": 0, "completion_tokens": 0}