Spaces:
Sleeping
Sleeping
Update app.py
Browse filessend always to wholix, even if a mail is missing to avoid endless retries, because the exclude_hash is not stored.
app.py
CHANGED
|
@@ -10,6 +10,9 @@ Gradio App – robuste Async-Ausführung via Status-Endpoints
|
|
| 10 |
- Logging in Konsole + logs/<job_id>.log
|
| 11 |
- FIX: exclude_hash richtig laden/berücksichtigen (paginierte Wholix-Suche)
|
| 12 |
- FIX: pro Lead harte Fehlerisolierung -> kein Gradio-Absturz
|
|
|
|
|
|
|
|
|
|
| 13 |
"""
|
| 14 |
|
| 15 |
import json
|
|
@@ -342,8 +345,6 @@ def email_generate_async(token_id: str, variables: dict, items: List[dict]) -> d
|
|
| 342 |
if not items:
|
| 343 |
raise ValueError("items fehlt (mindestens 1 Lead erforderlich).")
|
| 344 |
|
| 345 |
-
# Nur die Keys weiterreichen, die dein Backend tatsächlich kennt/erwartet.
|
| 346 |
-
# Wir filtern NICHT um – wir vertrauen der übergebenen payload (keine Fantasie-Felder).
|
| 347 |
root_allowed = {
|
| 348 |
"Produkt_und_Dienstleistungsbeschreibung",
|
| 349 |
"CTA",
|
|
@@ -356,14 +357,11 @@ def email_generate_async(token_id: str, variables: dict, items: List[dict]) -> d
|
|
| 356 |
|
| 357 |
safe_vars = {}
|
| 358 |
for k, v in (variables or {}).items():
|
| 359 |
-
# 1:1 durchlassen für die bekannten Felder
|
| 360 |
if k in root_allowed:
|
| 361 |
safe_vars[k] = v
|
| 362 |
|
| 363 |
-
# Payload für den echten Endpoint: Root-Variablen + Items
|
| 364 |
start_body = {**safe_vars, "items": items, "item_index": 0}
|
| 365 |
|
| 366 |
-
# Debug-Log: zeigen, welche Keys wir wirklich senden (inkl. Signatur)
|
| 367 |
try:
|
| 368 |
sent_keys = sorted(list(start_body.keys()))
|
| 369 |
sig_preview = str(start_body.get("Signatur") or "")[:120]
|
|
@@ -372,7 +370,6 @@ def email_generate_async(token_id: str, variables: dict, items: List[dict]) -> d
|
|
| 372 |
except Exception:
|
| 373 |
pass
|
| 374 |
|
| 375 |
-
# Async-Start & Poll
|
| 376 |
res_job_id = start_async_job(
|
| 377 |
url=f"{LEAD_BASE_URL}/email/generate?async=1",
|
| 378 |
body=start_body,
|
|
@@ -385,7 +382,6 @@ def email_generate_async(token_id: str, variables: dict, items: List[dict]) -> d
|
|
| 385 |
session=GLOBAL_SES,
|
| 386 |
)
|
| 387 |
|
| 388 |
-
# Ergebnis 1:1 auswerten – nur echte Felder
|
| 389 |
raw = unwrap_result(st) or {}
|
| 390 |
results = raw.get("results") if isinstance(raw, dict) else None
|
| 391 |
first = (results[0] if isinstance(results, list) and results else {}) or {}
|
|
@@ -412,27 +408,60 @@ def wholix_login(email: str, password: str) -> str:
|
|
| 412 |
raise RuntimeError("Wholix-Login fehlgeschlagen.")
|
| 413 |
return token
|
| 414 |
|
| 415 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 416 |
"""
|
| 417 |
-
Sendet NUR erlaubte Felder an Wholix und saniert problematische Werte
|
| 418 |
-
|
| 419 |
-
|
| 420 |
-
|
| 421 |
-
- Leere Strings -> None und weglassen
|
| 422 |
"""
|
| 423 |
import urllib.parse as _urlparse
|
| 424 |
|
| 425 |
if not isinstance(record, dict):
|
| 426 |
raise ValueError("Wholix: record muss ein dict sein.")
|
| 427 |
|
| 428 |
-
|
| 429 |
-
|
| 430 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 431 |
|
| 432 |
ALLOWED = {
|
| 433 |
"firstname",
|
| 434 |
"lastname",
|
| 435 |
-
"email", # Pflichtfeld
|
| 436 |
"adress", # (sic) genau so
|
| 437 |
"city",
|
| 438 |
"postcode",
|
|
@@ -451,12 +480,6 @@ def wholix_store_contact(token: str, record: dict, module: str = "Contacts") ->
|
|
| 451 |
"tags", # Multi-Select: {keys:[], values:[]}
|
| 452 |
}
|
| 453 |
|
| 454 |
-
def _clean_str(v):
|
| 455 |
-
if v is None:
|
| 456 |
-
return None
|
| 457 |
-
s = str(v).strip()
|
| 458 |
-
return s if s else None
|
| 459 |
-
|
| 460 |
def _coerce_departments(v):
|
| 461 |
if v is None:
|
| 462 |
return None
|
|
@@ -464,7 +487,6 @@ def wholix_store_contact(token: str, record: dict, module: str = "Contacts") ->
|
|
| 464 |
v = ", ".join(str(x).strip() for x in v if str(x).strip())
|
| 465 |
else:
|
| 466 |
v = str(v).strip()
|
| 467 |
-
# String wie "[Marketing]" -> "Marketing"
|
| 468 |
if v.startswith("[") and v.endswith("]"):
|
| 469 |
v = v[1:-1].strip().strip("'\"")
|
| 470 |
return _clean_str(v)
|
|
@@ -494,7 +516,6 @@ def wholix_store_contact(token: str, record: dict, module: str = "Contacts") ->
|
|
| 494 |
elif k in ("linkedin_url", "company_url"):
|
| 495 |
val = _normalize_url(val)
|
| 496 |
elif k in ("status_field", "tags"):
|
| 497 |
-
# nur korrektes Schema durchlassen
|
| 498 |
if not (isinstance(val, dict) and "keys" in val and "values" in val):
|
| 499 |
val = None
|
| 500 |
else:
|
|
@@ -503,7 +524,7 @@ def wholix_store_contact(token: str, record: dict, module: str = "Contacts") ->
|
|
| 503 |
if val is not None:
|
| 504 |
out[k] = val
|
| 505 |
|
| 506 |
-
# Pflichtfeld sicher
|
| 507 |
out["email"] = _clean_str(email)
|
| 508 |
|
| 509 |
url = f"{WHOLIX_BASE_URL}/api/v1/table-object-data/store-objects"
|
|
@@ -615,8 +636,8 @@ def _gc_jobs():
|
|
| 615 |
|
| 616 |
LEAD_COUNTS = [1, 2, 3, 4, 5, 10, 15, 20, 40, 80, 100, 200, 300, 400, 500, 1000]
|
| 617 |
|
| 618 |
-
CURL_DATA_RE = re.compile(r"""--data(?:-raw)?\s+(?P<q>['"])(?P<body>.*?)(?P=q)""", re.DOTALL)
|
| 619 |
-
HDR_XTOKEN_RE = re.compile(r"""-H\s+(?P<q>['"])X-Token-Id:\s*(?P<val>[^'"]+)(?P=q)""", re.IGNORECASE)
|
| 620 |
|
| 621 |
def _find_data_quote_start(s: str) -> Tuple[int, Optional[str]]:
|
| 622 |
m = re.search(r"--data(?:-raw)?\s+(['\"])", s)
|
|
@@ -653,14 +674,11 @@ def parse_curl(curl_text: str) -> Tuple[str, Dict[str, Any]]:
|
|
| 653 |
if not isinstance(curl_text, str) or not curl_text.strip():
|
| 654 |
raise ValueError("Leerer curl-Text.")
|
| 655 |
|
| 656 |
-
# 1) Token aus Header holen (robust für einfache/doppelte Quotes)
|
| 657 |
token_id = ""
|
| 658 |
-
# -H 'X-Token-Id: abc' oder -H "X-Token-Id: abc"
|
| 659 |
m = re.search(r"""-H\s+(["'])X-Token-Id:\s*([^"']+)\1""", curl_text, re.IGNORECASE)
|
| 660 |
if m:
|
| 661 |
token_id = m.group(2).strip()
|
| 662 |
|
| 663 |
-
# fallback: unquoted header
|
| 664 |
if not token_id:
|
| 665 |
m2 = re.search(r"""X-Token-Id:\s*([A-Za-z0-9\-\._]+)""", curl_text, re.IGNORECASE)
|
| 666 |
if m2:
|
|
@@ -673,19 +691,16 @@ def parse_curl(curl_text: str) -> Tuple[str, Dict[str, Any]]:
|
|
| 673 |
if not token_id:
|
| 674 |
raise ValueError("Konnte keinen X-Token-Id Header im curl (oder env X_TOKEN_ID) finden.")
|
| 675 |
|
| 676 |
-
|
| 677 |
-
qmatch = re.search(r"""--data(?:-raw)?\s+(['"])""", curl_text)
|
| 678 |
if not qmatch:
|
| 679 |
raise ValueError("Konnte den JSON Body nicht finden (erwarte --data-raw '...').")
|
| 680 |
quote = qmatch.group(1)
|
| 681 |
start = qmatch.end(1)
|
| 682 |
|
| 683 |
-
# Payload bis zum passenden schließenden Quote lesen (beachtet Escapes)
|
| 684 |
out = []
|
| 685 |
i = start
|
| 686 |
n = len(curl_text)
|
| 687 |
if quote == "'":
|
| 688 |
-
# Bash-Rule: in single quotes sind nur Sequenzen '\"'\"' als eingebettetes einzelnes '
|
| 689 |
while i < n:
|
| 690 |
if i + 4 < n and curl_text[i:i+5] == "'\"'\"'":
|
| 691 |
out.append("'")
|
|
@@ -697,7 +712,6 @@ def parse_curl(curl_text: str) -> Tuple[str, Dict[str, Any]]:
|
|
| 697 |
out.append(ch)
|
| 698 |
i += 1
|
| 699 |
else:
|
| 700 |
-
# Doppel-Quotes: Backslashes beachten
|
| 701 |
while i < n:
|
| 702 |
ch = curl_text[i]
|
| 703 |
if ch == "\\" and i + 1 < n:
|
|
@@ -710,11 +724,9 @@ def parse_curl(curl_text: str) -> Tuple[str, Dict[str, Any]]:
|
|
| 710 |
i += 1
|
| 711 |
|
| 712 |
body_str = "".join(out).strip()
|
| 713 |
-
# 3) JSON laden (ohne Felder zu „erdenken“)
|
| 714 |
try:
|
| 715 |
payload = json.loads(body_str)
|
| 716 |
except json.JSONDecodeError:
|
| 717 |
-
# CRLF -> LF und nochmal versuchen
|
| 718 |
payload = json.loads(body_str.replace("\r\n", "\n").replace("\r", "\n"))
|
| 719 |
|
| 720 |
return token_id, payload
|
|
@@ -801,18 +813,11 @@ def run_pipeline_bg(job_id: str, curl_text: str, n_leads_ui: int):
|
|
| 801 |
return None
|
| 802 |
|
| 803 |
def norm_tags(raw) -> dict:
|
| 804 |
-
"""
|
| 805 |
-
Multi-Select für Wholix: {keys:[…],values:[…]}
|
| 806 |
-
- String "[AI]" -> "AI"
|
| 807 |
-
- String "AI" -> "AI"
|
| 808 |
-
- Liste ["AI","X"] -> entsprechend erweitern
|
| 809 |
-
"""
|
| 810 |
if raw is None:
|
| 811 |
return {"keys": [], "values": []}
|
| 812 |
vals: List[str] = []
|
| 813 |
if isinstance(raw, str):
|
| 814 |
s = raw.strip()
|
| 815 |
-
# wenn JSON-Array als String übergeben wurde
|
| 816 |
if (s.startswith("[") and s.endswith("]")):
|
| 817 |
try:
|
| 818 |
arr = json.loads(s)
|
|
@@ -828,8 +833,6 @@ def run_pipeline_bg(job_id: str, curl_text: str, n_leads_ui: int):
|
|
| 828 |
vals = [str(x).strip() for x in raw if str(x).strip()]
|
| 829 |
else:
|
| 830 |
vals = [str(raw).strip()]
|
| 831 |
-
|
| 832 |
-
# leere filtern
|
| 833 |
vals = [v for v in vals if v]
|
| 834 |
return {"keys": vals, "values": vals}
|
| 835 |
|
|
@@ -913,75 +916,69 @@ def run_pipeline_bg(job_id: str, curl_text: str, n_leads_ui: int):
|
|
| 913 |
_job_emit(job_id, f" ✉️ Subject: {subj_flat}")
|
| 914 |
_job_emit(job_id, f" ✉️ Message: {body_flat}")
|
| 915 |
|
| 916 |
-
# 4) Speichern in Wholix
|
| 917 |
step += 1
|
| 918 |
_job_emit(job_id, " → Speichere in Wholix …", progress=int(step / total_steps * 100))
|
| 919 |
|
| 920 |
email_to = str(person.get("email") or "").strip()
|
| 921 |
-
if not email_to:
|
| 922 |
-
_job_emit(job_id, " ⚠️ Keine E-Mail vorhanden – Speichern übersprungen.")
|
| 923 |
-
stored_ok = False
|
| 924 |
-
else:
|
| 925 |
-
try:
|
| 926 |
-
# --- Felder befüllen NUR wenn real vorhanden -----------------
|
| 927 |
-
firstname = pick(person, "first_name")
|
| 928 |
-
lastname = pick(person, "last_name")
|
| 929 |
-
job_title = pick(person, "job_title", "title", "position")
|
| 930 |
-
departments = norm_departments(pick(person, "departments"))
|
| 931 |
-
linkedin = pick(person, "linkedin_url")
|
| 932 |
-
|
| 933 |
-
# Kontakt-Adressdaten: erst person, dann company (falls vorhanden)
|
| 934 |
-
phonenumber = pick(person, "phone", "phonenumber") or pick(company, "phone", "company_phone", "phonenumber")
|
| 935 |
-
adress = pick(person, "adress", "address", "street") or pick(company, "adress", "address", "street")
|
| 936 |
-
city = pick(person, "city") or pick(company, "city")
|
| 937 |
-
postcode = pick(person, "postcode", "postal_code", "zip") or pick(company, "postcode", "postal_code", "zip")
|
| 938 |
-
|
| 939 |
-
company_name = pick(company, "name", "company_name")
|
| 940 |
-
company_url = norm_url(pick(company, "url", "website_url", "website", "domain"))
|
| 941 |
-
|
| 942 |
-
# Tags korrekt als Multi-Select (Dropdown-Wert ohne [])
|
| 943 |
-
tags_ms = norm_tags(raw_tag)
|
| 944 |
-
|
| 945 |
-
record = {
|
| 946 |
-
"firstname": firstname,
|
| 947 |
-
"lastname": lastname,
|
| 948 |
-
"email": email_to,
|
| 949 |
-
"phonenumber": phonenumber,
|
| 950 |
-
"adress": adress,
|
| 951 |
-
"city": city,
|
| 952 |
-
"postcode": postcode,
|
| 953 |
-
|
| 954 |
-
"job_title": job_title,
|
| 955 |
-
"departments": departments,
|
| 956 |
-
"linkedin_url": linkedin,
|
| 957 |
-
|
| 958 |
-
"company_name": company_name,
|
| 959 |
-
"company_url": company_url,
|
| 960 |
-
|
| 961 |
-
"message_mail_subject": subj_raw or None,
|
| 962 |
-
"message_mail": body_raw or None,
|
| 963 |
-
"message_followup1": draft.get("followup1") or None,
|
| 964 |
-
"message_followup2": draft.get("followup2") or None,
|
| 965 |
-
|
| 966 |
-
"exclude_hash": combined_id or None,
|
| 967 |
-
|
| 968 |
-
"status_field": {"keys": ["Kontakt aufgenommen"], "values": ["Kontakt aufgenommen"]},
|
| 969 |
-
"tags": tags_ms,
|
| 970 |
-
}
|
| 971 |
-
|
| 972 |
-
# Log: kurze Vorschau der wichtigsten Felder
|
| 973 |
-
try:
|
| 974 |
-
APP_LOG.info(f"Store-> firstname={firstname} lastname={lastname} email={email_to} job_title={job_title} dept={departments} phone={phonenumber} city={city} postcode={postcode} company_url={company_url}")
|
| 975 |
-
APP_LOG.info(f"Store-> tags={tags_ms}")
|
| 976 |
-
except Exception:
|
| 977 |
-
pass
|
| 978 |
|
| 979 |
-
|
| 980 |
-
|
| 981 |
-
|
| 982 |
-
|
| 983 |
-
|
| 984 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 985 |
|
| 986 |
if combined_id:
|
| 987 |
excludes_set.add(combined_id)
|
|
@@ -989,10 +986,10 @@ def run_pipeline_bg(job_id: str, curl_text: str, n_leads_ui: int):
|
|
| 989 |
|
| 990 |
_job_emit(job_id, rows_append={
|
| 991 |
"person": f"{person.get('first_name','')} {person.get('last_name','')}".strip(),
|
| 992 |
-
"email": email_to,
|
| 993 |
"company": company.get("name") or company.get("company_name") or "",
|
| 994 |
"subject": subj_flat,
|
| 995 |
-
"message": body_flat,
|
| 996 |
"stored_ok": stored_ok,
|
| 997 |
})
|
| 998 |
|
|
|
|
| 10 |
- Logging in Konsole + logs/<job_id>.log
|
| 11 |
- FIX: exclude_hash richtig laden/berücksichtigen (paginierte Wholix-Suche)
|
| 12 |
- FIX: pro Lead harte Fehlerisolierung -> kein Gradio-Absturz
|
| 13 |
+
- NEU: Speichert IMMER in Wholix. Falls keine echte E-Mail vorhanden ist,
|
| 14 |
+
wird eine stabile Platzhalter-Adresse generiert (no-email.invalid)
|
| 15 |
+
und ein Tag "no-email" gesetzt.
|
| 16 |
"""
|
| 17 |
|
| 18 |
import json
|
|
|
|
| 345 |
if not items:
|
| 346 |
raise ValueError("items fehlt (mindestens 1 Lead erforderlich).")
|
| 347 |
|
|
|
|
|
|
|
| 348 |
root_allowed = {
|
| 349 |
"Produkt_und_Dienstleistungsbeschreibung",
|
| 350 |
"CTA",
|
|
|
|
| 357 |
|
| 358 |
safe_vars = {}
|
| 359 |
for k, v in (variables or {}).items():
|
|
|
|
| 360 |
if k in root_allowed:
|
| 361 |
safe_vars[k] = v
|
| 362 |
|
|
|
|
| 363 |
start_body = {**safe_vars, "items": items, "item_index": 0}
|
| 364 |
|
|
|
|
| 365 |
try:
|
| 366 |
sent_keys = sorted(list(start_body.keys()))
|
| 367 |
sig_preview = str(start_body.get("Signatur") or "")[:120]
|
|
|
|
| 370 |
except Exception:
|
| 371 |
pass
|
| 372 |
|
|
|
|
| 373 |
res_job_id = start_async_job(
|
| 374 |
url=f"{LEAD_BASE_URL}/email/generate?async=1",
|
| 375 |
body=start_body,
|
|
|
|
| 382 |
session=GLOBAL_SES,
|
| 383 |
)
|
| 384 |
|
|
|
|
| 385 |
raw = unwrap_result(st) or {}
|
| 386 |
results = raw.get("results") if isinstance(raw, dict) else None
|
| 387 |
first = (results[0] if isinstance(results, list) and results else {}) or {}
|
|
|
|
| 408 |
raise RuntimeError("Wholix-Login fehlgeschlagen.")
|
| 409 |
return token
|
| 410 |
|
| 411 |
+
# ===================== Helfer für Platzhalter-E-Mail =======================
|
| 412 |
+
|
| 413 |
+
def _slug(val: Any, maxlen: int = 24) -> str:
|
| 414 |
+
s = "" if val is None else str(val)
|
| 415 |
+
s = s.lower().strip()
|
| 416 |
+
s = re.sub(r"[^a-z0-9]+", "-", s)
|
| 417 |
+
s = re.sub(r"-+", "-", s).strip("-")
|
| 418 |
+
return s[:maxlen] or "x"
|
| 419 |
+
|
| 420 |
+
|
| 421 |
+
def _make_placeholder_email(record: dict) -> str:
|
| 422 |
+
fn = _slug(record.get("firstname"))
|
| 423 |
+
ln = _slug(record.get("lastname"))
|
| 424 |
+
co = _slug(record.get("company_name"))
|
| 425 |
+
cid = _slug(record.get("exclude_hash") or uuid.uuid4().hex[:8], maxlen=16)
|
| 426 |
+
local = "-".join([p for p in [fn, ln, co] if p]) or "lead"
|
| 427 |
+
local = re.sub(r"\.+", ".", local.replace("-", "."))
|
| 428 |
+
local = local.strip(".")[:48]
|
| 429 |
+
return f"{local}.{cid}@no-email.invalid"
|
| 430 |
+
|
| 431 |
+
|
| 432 |
+
def wholix_store_contact(token: str, record: dict, module: str = "Contacts", allow_placeholder: bool = True) -> dict:
|
| 433 |
"""
|
| 434 |
+
Sendet NUR erlaubte Felder an Wholix und saniert problematische Werte.
|
| 435 |
+
NEU: Wenn keine E-Mail vorhanden ist und allow_placeholder=True,
|
| 436 |
+
wird automatisch eine Platzhalter-Adresse erzeugt (no-email.invalid),
|
| 437 |
+
damit der Datensatz *immer* gespeichert werden kann.
|
|
|
|
| 438 |
"""
|
| 439 |
import urllib.parse as _urlparse
|
| 440 |
|
| 441 |
if not isinstance(record, dict):
|
| 442 |
raise ValueError("Wholix: record muss ein dict sein.")
|
| 443 |
|
| 444 |
+
# E-Mail prüfen / ggf. generieren
|
| 445 |
+
def _clean_str(v):
|
| 446 |
+
if v is None:
|
| 447 |
+
return None
|
| 448 |
+
s = str(v).strip()
|
| 449 |
+
return s if s else None
|
| 450 |
+
|
| 451 |
+
email = _clean_str(record.get("email"))
|
| 452 |
+
if not email and allow_placeholder:
|
| 453 |
+
email = _make_placeholder_email(record)
|
| 454 |
+
record["email"] = email
|
| 455 |
+
# Kennzeichnungs-Tag ergänzen (wird später sauber normiert)
|
| 456 |
+
if isinstance(record.get("tags"), dict):
|
| 457 |
+
keys = list({*record["tags"].get("keys", []), "no-email"})
|
| 458 |
+
vals = list({*record["tags"].get("values", []), "no-email"})
|
| 459 |
+
record["tags"] = {"keys": keys, "values": vals}
|
| 460 |
|
| 461 |
ALLOWED = {
|
| 462 |
"firstname",
|
| 463 |
"lastname",
|
| 464 |
+
"email", # Pflichtfeld (ggf. Platzhalter)
|
| 465 |
"adress", # (sic) genau so
|
| 466 |
"city",
|
| 467 |
"postcode",
|
|
|
|
| 480 |
"tags", # Multi-Select: {keys:[], values:[]}
|
| 481 |
}
|
| 482 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 483 |
def _coerce_departments(v):
|
| 484 |
if v is None:
|
| 485 |
return None
|
|
|
|
| 487 |
v = ", ".join(str(x).strip() for x in v if str(x).strip())
|
| 488 |
else:
|
| 489 |
v = str(v).strip()
|
|
|
|
| 490 |
if v.startswith("[") and v.endswith("]"):
|
| 491 |
v = v[1:-1].strip().strip("'\"")
|
| 492 |
return _clean_str(v)
|
|
|
|
| 516 |
elif k in ("linkedin_url", "company_url"):
|
| 517 |
val = _normalize_url(val)
|
| 518 |
elif k in ("status_field", "tags"):
|
|
|
|
| 519 |
if not (isinstance(val, dict) and "keys" in val and "values" in val):
|
| 520 |
val = None
|
| 521 |
else:
|
|
|
|
| 524 |
if val is not None:
|
| 525 |
out[k] = val
|
| 526 |
|
| 527 |
+
# Pflichtfeld sicher (jetzt inkl. Platzhalter möglich)
|
| 528 |
out["email"] = _clean_str(email)
|
| 529 |
|
| 530 |
url = f"{WHOLIX_BASE_URL}/api/v1/table-object-data/store-objects"
|
|
|
|
| 636 |
|
| 637 |
LEAD_COUNTS = [1, 2, 3, 4, 5, 10, 15, 20, 40, 80, 100, 200, 300, 400, 500, 1000]
|
| 638 |
|
| 639 |
+
CURL_DATA_RE = re.compile(r"""--data(?:-raw)?\s+(?P<q>['\"])(?P<body>.*?)(?P=q)""", re.DOTALL)
|
| 640 |
+
HDR_XTOKEN_RE = re.compile(r"""-H\s+(?P<q>['\"])X-Token-Id:\s*(?P<val>[^'\"]+)(?P=q)""", re.IGNORECASE)
|
| 641 |
|
| 642 |
def _find_data_quote_start(s: str) -> Tuple[int, Optional[str]]:
|
| 643 |
m = re.search(r"--data(?:-raw)?\s+(['\"])", s)
|
|
|
|
| 674 |
if not isinstance(curl_text, str) or not curl_text.strip():
|
| 675 |
raise ValueError("Leerer curl-Text.")
|
| 676 |
|
|
|
|
| 677 |
token_id = ""
|
|
|
|
| 678 |
m = re.search(r"""-H\s+(["'])X-Token-Id:\s*([^"']+)\1""", curl_text, re.IGNORECASE)
|
| 679 |
if m:
|
| 680 |
token_id = m.group(2).strip()
|
| 681 |
|
|
|
|
| 682 |
if not token_id:
|
| 683 |
m2 = re.search(r"""X-Token-Id:\s*([A-Za-z0-9\-\._]+)""", curl_text, re.IGNORECASE)
|
| 684 |
if m2:
|
|
|
|
| 691 |
if not token_id:
|
| 692 |
raise ValueError("Konnte keinen X-Token-Id Header im curl (oder env X_TOKEN_ID) finden.")
|
| 693 |
|
| 694 |
+
qmatch = re.search(r"""--data(?:-raw)?\s+(['\"])""", curl_text)
|
|
|
|
| 695 |
if not qmatch:
|
| 696 |
raise ValueError("Konnte den JSON Body nicht finden (erwarte --data-raw '...').")
|
| 697 |
quote = qmatch.group(1)
|
| 698 |
start = qmatch.end(1)
|
| 699 |
|
|
|
|
| 700 |
out = []
|
| 701 |
i = start
|
| 702 |
n = len(curl_text)
|
| 703 |
if quote == "'":
|
|
|
|
| 704 |
while i < n:
|
| 705 |
if i + 4 < n and curl_text[i:i+5] == "'\"'\"'":
|
| 706 |
out.append("'")
|
|
|
|
| 712 |
out.append(ch)
|
| 713 |
i += 1
|
| 714 |
else:
|
|
|
|
| 715 |
while i < n:
|
| 716 |
ch = curl_text[i]
|
| 717 |
if ch == "\\" and i + 1 < n:
|
|
|
|
| 724 |
i += 1
|
| 725 |
|
| 726 |
body_str = "".join(out).strip()
|
|
|
|
| 727 |
try:
|
| 728 |
payload = json.loads(body_str)
|
| 729 |
except json.JSONDecodeError:
|
|
|
|
| 730 |
payload = json.loads(body_str.replace("\r\n", "\n").replace("\r", "\n"))
|
| 731 |
|
| 732 |
return token_id, payload
|
|
|
|
| 813 |
return None
|
| 814 |
|
| 815 |
def norm_tags(raw) -> dict:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 816 |
if raw is None:
|
| 817 |
return {"keys": [], "values": []}
|
| 818 |
vals: List[str] = []
|
| 819 |
if isinstance(raw, str):
|
| 820 |
s = raw.strip()
|
|
|
|
| 821 |
if (s.startswith("[") and s.endswith("]")):
|
| 822 |
try:
|
| 823 |
arr = json.loads(s)
|
|
|
|
| 833 |
vals = [str(x).strip() for x in raw if str(x).strip()]
|
| 834 |
else:
|
| 835 |
vals = [str(raw).strip()]
|
|
|
|
|
|
|
| 836 |
vals = [v for v in vals if v]
|
| 837 |
return {"keys": vals, "values": vals}
|
| 838 |
|
|
|
|
| 916 |
_job_emit(job_id, f" ✉️ Subject: {subj_flat}")
|
| 917 |
_job_emit(job_id, f" ✉️ Message: {body_flat}")
|
| 918 |
|
| 919 |
+
# 4) Speichern in Wholix (IMMER – auch ohne echte E-Mail)
|
| 920 |
step += 1
|
| 921 |
_job_emit(job_id, " → Speichere in Wholix …", progress=int(step / total_steps * 100))
|
| 922 |
|
| 923 |
email_to = str(person.get("email") or "").strip()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 924 |
|
| 925 |
+
try:
|
| 926 |
+
# --- Felder befüllen NUR wenn real vorhanden -----------------
|
| 927 |
+
firstname = pick(person, "first_name")
|
| 928 |
+
lastname = pick(person, "last_name")
|
| 929 |
+
job_title = pick(person, "job_title", "title", "position")
|
| 930 |
+
departments = norm_departments(pick(person, "departments"))
|
| 931 |
+
linkedin = pick(person, "linkedin_url")
|
| 932 |
+
|
| 933 |
+
phonenumber = pick(person, "phone", "phonenumber") or pick(company, "phone", "company_phone", "phonenumber")
|
| 934 |
+
adress = pick(person, "adress", "address", "street") or pick(company, "adress", "address", "street")
|
| 935 |
+
city = pick(person, "city") or pick(company, "city")
|
| 936 |
+
postcode = pick(person, "postcode", "postal_code", "zip") or pick(company, "postcode", "postal_code", "zip")
|
| 937 |
+
|
| 938 |
+
company_name = pick(company, "name", "company_name")
|
| 939 |
+
company_url = norm_url(pick(company, "url", "website_url", "website", "domain"))
|
| 940 |
+
|
| 941 |
+
tags_ms = norm_tags(raw_tag)
|
| 942 |
+
# Wenn keine echte E-Mail: Tag ergänzen
|
| 943 |
+
if not email_to:
|
| 944 |
+
tags_ms = {"keys": list({*tags_ms.get("keys", []), "no-email"}),
|
| 945 |
+
"values": list({*tags_ms.get("values", []), "no-email"})}
|
| 946 |
+
|
| 947 |
+
record = {
|
| 948 |
+
"firstname": firstname,
|
| 949 |
+
"lastname": lastname,
|
| 950 |
+
"email": email_to or None, # kann None sein → Platzhalter wird im Store erzeugt
|
| 951 |
+
"phonenumber": phonenumber,
|
| 952 |
+
"adress": adress,
|
| 953 |
+
"city": city,
|
| 954 |
+
"postcode": postcode,
|
| 955 |
+
|
| 956 |
+
"job_title": job_title,
|
| 957 |
+
"departments": departments,
|
| 958 |
+
"linkedin_url": linkedin,
|
| 959 |
+
|
| 960 |
+
"company_name": company_name,
|
| 961 |
+
"company_url": company_url,
|
| 962 |
+
|
| 963 |
+
"message_mail_subject": subj_raw or None,
|
| 964 |
+
"message_mail": body_raw or None,
|
| 965 |
+
"message_followup1": draft.get("followup1") or None,
|
| 966 |
+
"message_followup2": draft.get("followup2") or None,
|
| 967 |
+
|
| 968 |
+
"exclude_hash": combined_id or None,
|
| 969 |
+
|
| 970 |
+
"status_field": {"keys": ["Kontakt aufgenommen"], "values": ["Kontakt aufgenommen"]},
|
| 971 |
+
"tags": tags_ms,
|
| 972 |
+
}
|
| 973 |
+
|
| 974 |
+
store_res = wholix_store_contact(wh_token, record, allow_placeholder=True)
|
| 975 |
+
stored_ok = bool(store_res)
|
| 976 |
+
if not email_to:
|
| 977 |
+
_job_emit(job_id, " ℹ️ Keine echte E-Mail – Platzhalter gespeichert und Tag 'no-email' gesetzt.")
|
| 978 |
+
except Exception:
|
| 979 |
+
logging.exception(f"[{job_id}] Wholix-Store-Fehler")
|
| 980 |
+
_job_emit(job_id, "❌ Wholix-Store-Fehler: siehe Logs")
|
| 981 |
+
stored_ok = False
|
| 982 |
|
| 983 |
if combined_id:
|
| 984 |
excludes_set.add(combined_id)
|
|
|
|
| 986 |
|
| 987 |
_job_emit(job_id, rows_append={
|
| 988 |
"person": f"{person.get('first_name','')} {person.get('last_name','')}".strip(),
|
| 989 |
+
"email": email_to or "(placeholder)",
|
| 990 |
"company": company.get("name") or company.get("company_name") or "",
|
| 991 |
"subject": subj_flat,
|
| 992 |
+
"message": body_flat,
|
| 993 |
"stored_ok": stored_ok,
|
| 994 |
})
|
| 995 |
|