Spaces:

yourcomms
/

n8n

Running

niwayandm commited on Nov 13, 2025

Commit

9625e2f

1 Parent(s): 0fe3308

Update emails to include normalized subject

Files changed (1) hide show

python/hubspot_emails.py CHANGED Viewed

@@ -80,7 +80,7 @@ EMAIL_PROPERTIES = [
 # Email parsing
 # -----------------------------------------------------------------------------
 EMAIL_RE = re.compile(r'[\w\.\+\-]+@[\w\.\-]+\.\w+')
 def parse_emails(raw: Optional[object]) -> List[str]:
     if raw is None:
@@ -100,6 +100,24 @@ def parse_emails(raw: Optional[object]) -> List[str]:
         candidates.extend(EMAIL_RE.findall(str(raw)))
     return sorted({c.strip().lower() for c in candidates if c and c.strip()})
 # -----------------------------------------------------------------------------
 # Time helpers
 # -----------------------------------------------------------------------------
@@ -263,6 +281,7 @@ def read_emails_by_ids(
             email_metadata_data.append({
                 "email_id": record.id,
                 "subject": props.get("hs_email_subject"),
                 "from_email": props.get("hs_email_from_email") or "",
                 "to_emails": parse_emails(props.get("hs_email_to_email")),
                 "sent_at": sent_at_iso,

 # Email parsing
 # -----------------------------------------------------------------------------
 EMAIL_RE = re.compile(r'[\w\.\+\-]+@[\w\.\-]+\.\w+')
+SUBJECT_PREFIX_RE = re.compile(r'^(re:|fw:|fwd:)\s*', re.IGNORECASE)
 def parse_emails(raw: Optional[object]) -> List[str]:
     if raw is None:
         candidates.extend(EMAIL_RE.findall(str(raw)))
     return sorted({c.strip().lower() for c in candidates if c and c.strip()})
+def normalize_subject(raw: Optional[str]) -> Optional[str]:
+    if not raw:
+        return None
+    s = raw.strip().lower()
+    # remove multiple prefixes
+    while True:
+        new_s = SUBJECT_PREFIX_RE.sub("", s)
+        if new_s == s:
+            break
+        s = new_s.strip()
+    # collapse whitespace
+    s = re.sub(r"\s+", " ", s).strip()
+    return s or None
 # -----------------------------------------------------------------------------
 # Time helpers
 # -----------------------------------------------------------------------------
             email_metadata_data.append({
                 "email_id": record.id,
                 "subject": props.get("hs_email_subject"),
+                "normalized_subject": normalize_subject(props.get("hs_email_subject")),
                 "from_email": props.get("hs_email_from_email") or "",
                 "to_emails": parse_emails(props.get("hs_email_to_email")),
                 "sent_at": sent_at_iso,