MichaelWelsch commited on
Commit
c1c5ea5
·
verified ·
1 Parent(s): f4915e7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +886 -724
app.py CHANGED
@@ -2,861 +2,1031 @@
2
  # -*- coding: utf-8 -*-
3
 
4
  """
5
- Gradio UI (robust gegen kurzzeitigen Browser-Verbindungsverlust):
6
- - Startet einen Hintergrund-Job und speichert Status/Progress/Ergebnisse per job_id.
7
- - UI pollt den Status (Button „Aktualisieren“), kein Streaming/Yield mehr.
8
-
9
- Run:
10
- pip install gradio requests
11
- python app.py
 
12
  """
13
 
14
  import json
15
- import math
16
  import os
17
  import random
18
  import re
19
  import time
20
- import threading
21
  import uuid
 
22
  from concurrent.futures import ThreadPoolExecutor
23
- from typing import Any, Dict, List, Optional, Tuple
 
 
 
 
24
 
25
  import gradio as gr
26
  import requests
 
 
 
27
 
28
  LEAD_BASE_URL = "https://wholixleadgenbackend.ngrok.io"
29
  WHOLIX_BASE_URL = "https://api.wholix.ai"
30
 
31
- # ============================== HTTP helper ===============================
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
  class HTTPError(Exception):
34
- def __init__(self, status: int, url: str, body: Any):
35
- body_str = body if isinstance(body, str) else json.dumps(body, ensure_ascii=False)
36
- super().__init__(f"HTTP {status} on {url}: {body_str[:500]}")
 
 
 
 
 
 
37
  self.status = status
38
  self.url = url
39
  self.body = body
 
40
 
41
  def _is_json_ct(ct: str) -> bool:
42
  return bool(ct and ("application/json" in ct or ct.endswith("+json")))
43
 
 
 
 
 
 
 
 
44
  def req(
45
  url: str,
46
  method: str = "GET",
47
  headers: Optional[Dict[str, str]] = None,
48
  json_body: Any = None,
49
  data: Any = None,
50
- timeout: float = 60.0,
51
- retries: int = 2,
52
- retry_on: Tuple[int, ...] = (408, 425, 429, 500, 502, 503, 504, 520, 522, 524),
53
  session: Optional[requests.Session] = None,
54
- ):
55
- s = session or requests.Session()
56
- attempt = 0
57
- while True:
58
- try:
59
- r = s.request(
60
- method=method.upper(),
61
- url=url,
62
- headers=headers,
63
- json=json_body,
64
- data=data,
65
- timeout=timeout,
66
- )
67
- ct = r.headers.get("content-type", "")
68
- text = r.text or ""
69
- if not r.ok:
 
 
70
  body = text
71
- try:
72
- if _is_json_ct(ct):
73
- body = r.json()
74
- except Exception:
75
- pass
76
- raise HTTPError(r.status_code, url, body)
77
- if text == "":
78
- return None
79
- if _is_json_ct(ct) or (not ct and text.strip().startswith(("{", "["))):
80
- try:
81
- return r.json()
82
- except Exception:
83
- repaired = text.replace("NaN", "null").replace("Infinity", "null").replace("-Infinity", "null")
84
- return json.loads(repaired)
85
- return text
86
- except (requests.Timeout, requests.ConnectionError, HTTPError) as e:
87
- code = e.status if isinstance(e, HTTPError) else 0
88
- should = attempt < retries and (code in retry_on or code == 0)
89
- if not should:
90
- raise
91
- attempt += 1
92
- base = min(2.5, 0.4 * (2 ** attempt)) # full-jitter
93
- delay = random.random() * base
94
- time.sleep(delay)
95
-
96
- # ============================= Curl parsing ===============================
97
-
98
- CURL_DATA_RE = re.compile(
99
- r"""--data(?:-raw)?\s+(?P<quote>['"])(?P<body>.*?)(?P=quote)""",
100
- re.DOTALL
101
- )
102
- HDR_XTOKEN_RE = re.compile(r"""-H\s+(?P<q>['"])X-Token-Id:\s*(?P<val>[^'"]+)(?P=q)""", re.IGNORECASE)
103
-
104
- def parse_curl(curl_text: str) -> Tuple[str, Dict[str, Any]]:
105
- """
106
- Extract X-Token-Id and JSON body from a curl command.
107
- """
108
- token_id = ""
109
- body_str = ""
110
-
111
- m = HDR_XTOKEN_RE.search(curl_text)
112
- if m:
113
- token_id = m.group("val").strip()
114
-
115
- if not token_id:
116
- hdr_inline = re.search(r"X-Token-Id:\s*([a-zA-Z0-9\-\._]+)", curl_text, re.IGNORECASE)
117
- if hdr_inline:
118
- token_id = hdr_inline.group(1).strip()
119
-
120
- md = CURL_DATA_RE.search(curl_text)
121
- if md:
122
- body_str = md.group("body").strip()
123
- else:
124
- md2 = re.search(r"-d\s+(?P<q>['\"])(?P<body>.*?)(?P=q)", curl_text, re.DOTALL)
125
- if md2:
126
- body_str = md2.group("body").strip()
127
 
128
- if not body_str:
129
- raise ValueError("Konnte den JSON Body aus dem curl nicht finden (erwarte --data-raw '...').")
130
 
 
131
  try:
132
- payload = json.loads(body_str)
133
- except json.JSONDecodeError:
134
- candidate = body_str.replace("\r\n", "\n").replace("\r", "\n")
135
- payload = json.loads(candidate)
136
-
137
- if not token_id:
138
- env_token = os.getenv("X_TOKEN_ID", "").strip()
139
- if env_token:
140
- token_id = env_token
141
-
142
- if not token_id:
143
- raise ValueError("Konnte keinen X-Token-Id Header im curl (oder env X_TOKEN_ID) finden.")
144
-
145
- return token_id, payload
146
-
147
- # ============================ Wholix helpers ==============================
148
 
149
- def wholix_login(email: str, password: str) -> str:
150
- url = f"{WHOLIX_BASE_URL}/api/v1/auth/login"
151
- res = req(url, method="POST", json_body={"email": email, "password": password}, timeout=30)
152
- token = (res or {}).get("token")
153
- if not token:
154
- raise RuntimeError("Wholix-Login fehlgeschlagen: kein token in der Response.")
155
- return token
156
 
157
- def wholix_fetch_excludes(token: str, module: str = "Contacts", per_page: int = 500, max_pages: int = 100) -> List[str]:
158
- url = f"{WHOLIX_BASE_URL}/api/v1/table-object-data/fetch-paginated-results"
159
- headers = {"Authorization": f"Bearer {token}"}
160
- exclude_hashes: List[str] = []
161
- seen = set()
162
- last_page = math.inf
163
- session = requests.Session()
164
 
165
- for page in range(1, max_pages + 1):
166
- if page > last_page:
167
- break
168
- payload = {"module": module, "action": "search", "page": page, "per_page": per_page}
169
- res = req(url, method="POST", headers=headers, json_body=payload, timeout=60, session=session) or {}
170
- data_block = res.get("data") or {}
171
- rows = data_block.get("data") or data_block.get("results") or []
172
- last_page = data_block.get("last_page", page)
173
- if not rows:
174
- break
175
- for row in rows:
176
- h = str(row.get("exclude_hash") or row.get("excludeHash") or row.get("exclude_id") or "").strip()
177
- if not h or h in seen:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
178
  continue
179
- seen.add(h)
180
- exclude_hashes.append(h)
181
- return exclude_hashes
182
-
183
- # ======================= Lead + Email generation =========================
184
-
185
- def _pick_single(payload: Any) -> Dict[str, Any]:
186
- def unwrap(x):
187
- if isinstance(x, dict) and x.get("ok") and any(k in x for k in ("result", "data", "content")):
188
- return x.get("result") or x.get("data") or x.get("content")
189
- return x
190
- p = unwrap(payload)
191
- if isinstance(p, dict) and "items" in p and isinstance(p["items"], list) and p["items"]:
192
- return p["items"][0]
193
- if isinstance(p, list) and p:
194
- return p[0]
195
- if isinstance(p, dict):
196
- return p
197
- raise RuntimeError("Unerwartete Lead-Payload-Form.")
198
-
199
- def lead_suggest(token_id: str, filters: Dict[str, Any], icp_text: str, exclude_ids: List[str]) -> Dict[str, Any]:
200
- start_url = f"{LEAD_BASE_URL}/lead/suggest?async=1"
201
- headers = {"X-Token-Id": token_id, "Prefer": "respond-async"}
202
- start_body = {"filters": filters, "icp_text": icp_text or "", "exclude_ids": exclude_ids or []}
203
-
204
- try:
205
- start = req(start_url, method="POST", headers=headers, json_body=start_body, timeout=60)
206
- except HTTPError:
207
- start = None
208
 
209
- if not start or not start.get("ok") or not start.get("job_id"):
210
- sync = req(f"{LEAD_BASE_URL}/lead/suggest", method="POST", headers={"X-Token-Id": token_id}, json_body=start_body, timeout=180)
211
- return _pick_single(sync)
212
 
213
- job_id = start["job_id"]
 
 
 
 
 
 
 
 
214
  t0 = time.time()
215
- max_wait = 10 * 60
216
  attempt = 0
 
217
  while True:
218
- st = req(f"{LEAD_BASE_URL}/lead/suggest/status/{job_id}?t={int(time.time()*1000)}",
219
- method="GET", headers={"X-Token-Id": token_id}, timeout=60)
220
- if st and st.get("status") == "done" and st.get("ok"):
221
- return _pick_single(st)
222
- if time.time() - t0 > max_wait:
223
- raise TimeoutError("Timeout beim Warten auf Lead-Ergebnis.")
224
- attempt += 1
225
- time.sleep(min(2.4, 0.6 * attempt))
226
-
227
- def _ci_get(d: Dict[str, Any], key: str) -> Any:
228
- if not isinstance(d, dict):
229
- return None
230
- if key in d and str(d[key]).strip() != "":
231
- return d[key]
232
- k = next((k for k in d.keys() if isinstance(k, str) and k.lower() == key.lower() and str(d[k]).strip() != ""), None)
233
- return d.get(k) if k else None
234
-
235
- def _normalize_draft_result(raw: Any) -> Dict[str, Any]:
236
- r = raw
237
- if isinstance(r, str):
238
  try:
239
- r = json.loads(r)
240
- except Exception:
241
- r = {"Text": r}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
242
 
243
- if isinstance(r, dict) and r.get("ok") and any(k in r for k in ("result", "content", "data")):
244
- r = r.get("result") or r.get("content") or r.get("data") or r
 
 
 
245
 
246
- if isinstance(r, dict) and "message" in r and isinstance(r["message"], dict):
247
- r = r["message"]
248
- elif isinstance(r, dict) and "results" in r and isinstance(r["results"], list) and r["results"]:
249
- cand = r["results"][0]
250
- if isinstance(cand, dict) and "message" in cand and isinstance(cand["message"], dict):
251
- r = cand["message"]
252
- else:
253
- r = cand
254
 
255
- email_obj = r.get("email") if isinstance(r, dict) else {}
256
- messages_obj = r.get("messages") if isinstance(r, dict) else {}
257
-
258
- def pick(*keys):
259
- for src in (r, email_obj, messages_obj):
260
- if isinstance(src, dict):
261
- for k in keys:
262
- v = _ci_get(src, k)
263
- if v is not None and str(v).strip() != "":
264
- return v
265
- return ""
266
 
267
- subject = (pick("Betreff", "subject", "email_subject") or "Kurzer Austausch").strip()
268
- body = (pick("Text", "text", "Body", "body", "email_body", "content") or "").strip()
269
- fu1 = (pick("FollowUp1", "followup1", "LinkedIn", "linkedin", "li") or "").strip()
270
- fu2 = (pick("FollowUp2", "followup2", "Facebook", "facebook", "fb") or "").strip()
271
 
272
- if not body:
273
- body = "Hallo,\n\nkurzer Kontaktaufbau – gerne Austausch, ob wir unterstützen können.\n\nBeste Grüße"
 
 
 
274
 
275
- to = ""
276
- if isinstance(email_obj, dict):
277
- to = str(email_obj.get("to") or "").strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
278
 
279
- return {
280
- "email": {"to": to, "subject": subject, "body": body},
281
- "followup1": fu1,
282
- "followup2": fu2,
283
- }
284
 
285
- def email_generate(token_id: str, variables: Dict[str, Any], items: List[Dict[str, Any]]) -> Dict[str, Any]:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
286
  if not items:
287
- raise ValueError("items ist erforderlich und muss mind. 1 Lead enthalten.")
288
- body = {**variables, "items": items, "item_index": 0}
 
 
 
 
 
 
 
 
 
 
 
289
 
290
- start = None
291
- try:
292
- start = req(f"{LEAD_BASE_URL}/email/generate?async=1",
293
- method="POST",
294
- headers={"X-Token-Id": token_id, "Prefer": "respond-async"},
295
- json_body=body, timeout=60)
296
- except HTTPError:
297
- pass
298
 
299
- if not start or not start.get("ok") or not start.get("job_id"):
300
- res = req(f"{LEAD_BASE_URL}/email/generate",
301
- method="POST",
302
- headers={"X-Token-Id": token_id},
303
- json_body=body, timeout=180)
304
- return _normalize_draft_result(res)
305
 
306
- job_id = start["job_id"]
307
- t0 = time.time()
308
- max_wait = 10 * 60
309
- attempt = 0
310
- while True:
311
- st = req(f"{LEAD_BASE_URL}/email/generate/status/{job_id}?t={int(time.time()*1000)}",
312
- method="GET", headers={"X-Token-Id": token_id}, timeout=60)
313
- if st and st.get("ok") and st.get("status") == "done":
314
- return _normalize_draft_result(st)
315
- if time.time() - t0 > max_wait:
316
- raise TimeoutError("Timeout bei der E-Mail-Generierung.")
317
- attempt += 1
318
- time.sleep(min(2.4, 0.6 * attempt))
319
-
320
- # ======== DROP-IN: Wholix-Mapping & Store (FIXED) ========================
321
-
322
- ALLOWED_FIELDS = {
323
- "firstname",
324
- "lastname",
325
- "email", # Pflichtfeld
326
- "adress", # (sic) exakt so geschrieben
327
- "city",
328
- "postcode",
329
- "phonenumber",
330
- "job_title",
331
- "departments", # Text, KEINE Liste
332
- "linkedin_url",
333
- "company_name",
334
- "company_url",
335
- "message_mail",
336
- "message_mail_subject",
337
- "message_followup1",
338
- "message_followup2",
339
- "exclude_hash",
340
- "status_field", # Multi-Select: { keys:[], values:[] }
341
- "tags", # Multi-Select: { keys:[], values:[] }
342
- }
343
-
344
- def filter_wholix_contact_fields(obj: dict) -> dict:
345
- """
346
- - nur erlaubte Felder
347
- - email immer getrimmt
348
- - Strings getrimmt; leere Werte raus
349
- """
350
- out = {}
351
- for k, v in (obj or {}).items():
352
- if k not in ALLOWED_FIELDS:
353
- continue
354
- if k == "email":
355
- out["email"] = str(v or "").strip()
356
- continue
357
- if v is None:
358
- continue
359
- s = v.strip() if isinstance(v, str) else v
360
- if isinstance(s, str) and s == "":
361
- continue
362
- out[k] = s
363
- return out
364
-
365
- def normalize_wholix_dropdown(val):
366
- """
367
- akzeptiert {keys,values}, Array oder String
368
- → normalisiert zu {keys:[...], values:[...]} oder None
369
- """
370
- if isinstance(val, dict) and ("keys" in val or "values" in val):
371
- ks = [str(x).strip() for x in (val.get("keys") or []) if str(x).strip()]
372
- vs = [str(x).strip() for x in (val.get("values") or []) if str(x).strip()]
373
- if not vs and ks:
374
- vs = ks[:]
375
- return {"keys": ks, "values": vs} if (ks or vs) else None
376
- if isinstance(val, list):
377
- ks = [str(x).strip() for x in val if str(x).strip()]
378
- return {"keys": ks, "values": ks} if ks else None
379
- if isinstance(val, str) and val.strip():
380
- s = val.strip()
381
- return {"keys": [s], "values": [s]}
382
- return None
383
-
384
- # ---------- helpers for robust mapping ----------
385
-
386
- def _first_non_empty(*vals):
387
- for v in vals:
388
- if isinstance(v, str) and v.strip():
389
- return v.strip()
390
- if v not in (None, "", [], {}):
391
- return v
392
- return None
393
-
394
- def _from_ci(d: dict, *keys, default=None):
395
- if not isinstance(d, dict):
396
- return default
397
- for k in keys:
398
- if k in d and str(d[k]).strip() != "":
399
- return d[k]
400
- for dk in d.keys():
401
- if isinstance(dk, str) and dk.lower() == k.lower() and str(d[dk]).strip() != "":
402
- return d[dk]
403
- return default
404
-
405
- def _join_nonempty(parts, sep=" "):
406
- return sep.join([str(x).strip() for x in parts if str(x or "").strip()])
407
-
408
- def _parse_maybe_json_list(value):
409
- """
410
- Accept list, JSON-string list, or bracketed string → return list[str]
411
- """
412
- if value is None:
413
- return []
414
- if isinstance(value, list):
415
- return [str(x).strip() for x in value if str(x).strip()]
416
- s = str(value).strip()
417
- if not s:
418
- return []
419
- # try JSON
420
- if (s.startswith("[") and s.endswith("]")) or (s.startswith("(") and s.endswith(")")):
421
- try:
422
- arr = json.loads(s.replace("(", "[").replace(")", "]"))
423
- if isinstance(arr, list):
424
- return [str(x).strip() for x in arr if str(x).strip()]
425
- except Exception:
426
- # crude fallback
427
- s2 = s.strip("[]()")
428
- parts = [p.strip().strip("'").strip('"') for p in s2.split(",")]
429
- return [p for p in parts if p]
430
- # plain string, maybe delimited
431
- if "," in s:
432
- return [p.strip() for p in s.split(",") if p.strip()]
433
- return [s]
434
-
435
- def _normalize_tag_items(tag_text):
436
- """
437
- Accepts: "AI", "[AI]", "AI, Sales", '["AI","Sales"]' → returns list[str]
438
- """
439
- if isinstance(tag_text, list):
440
- return [str(x).strip() for x in tag_text if str(x).strip()]
441
- if tag_text is None:
442
- return []
443
- s = str(tag_text).strip()
444
- if not s:
445
- return []
446
  try:
447
- if s.startswith("[") and s.endswith("]"):
448
- arr = json.loads(s)
449
- if isinstance(arr, list):
450
- return [str(x).strip() for x in arr if str(x).strip()]
451
  except Exception:
452
  pass
453
- for sep in [",", "|", ";"]:
454
- if sep in s:
455
- return [p.strip() for p in s.split(sep) if p.strip()]
456
- return [s.strip("[]")]
457
-
458
- # ---------- the fixed mapper ----------
459
-
460
- def map_to_wholix_record(lead: dict, draft: dict, tag_text: str = "AI") -> dict:
461
- """
462
- FIXED:
463
- - nutzt jetzt 'draft' für message_mail/subject/followups
464
- - bereinigt departments (keine ["..."] Reste)
465
- - breite Fallbacks für job_title / linkedin_url / adress / city / postcode / company_url / exclude_hash
466
- - Tags: nur Benutzer-Tags, KEINE Departments mehr
467
- """
468
- p = (lead or {}).get("person") or {}
469
- c = (lead or {}).get("company") or {}
470
- m = (lead or {}).get("messages") or {}
471
- ctx = (lead or {}).get("context") or {}
472
-
473
- # --- Email (REQUIRED) ---
474
- email = str(_first_non_empty(p.get("email"), _from_ci(p, "mail", "email_address")) or "").strip()
475
- if not email:
476
- e = ValueError("E-Mail-Adresse fehlt – Wholix benötigt 'email' als Pflichtfeld.")
477
- e.name = "ValidationError"
478
- raise e
479
-
480
- # --- Departments (TEXT) — clean up list-like strings ---
481
- depts_list = _parse_maybe_json_list(p.get("departments"))
482
- departments_txt = ", ".join(depts_list) if depts_list else None
483
-
484
- # --- Company URL with wide fallbacks ---
485
- company_url = _first_non_empty(
486
- c.get("url"), c.get("website"), c.get("domain"),
487
- c.get("homepage_url"), c.get("website_url"), c.get("url_normalized"),
488
- ctx.get("url"), (lead or {}).get("homepage_url"),
489
- )
490
-
491
- # --- Message from generated draft ---
492
- draft = draft or {}
493
- draft_email = draft.get("email") if isinstance(draft, dict) else {}
494
- msg_subject = _first_non_empty(
495
- _from_ci(draft_email, "subject", "email_subject"),
496
- _from_ci(draft, "subject", "email_subject", "Betreff"),
497
- _from_ci(m, "message_mail_subject"),
498
- )
499
- msg_body = _first_non_empty(
500
- _from_ci(draft_email, "body", "text", "content"),
501
- _from_ci(draft, "body", "Text", "content", "email_body"),
502
- _from_ci(m, "message_mail"),
503
- )
504
- followup1 = _first_non_empty(
505
- _from_ci(draft, "followup1", "FollowUp1", "LinkedIn", "linkedin", "li"),
506
- _from_ci(m, "followup1", "message_followup1"),
507
- )
508
- followup2 = _first_non_empty(
509
- _from_ci(draft, "followup2", "FollowUp2", "Facebook", "facebook", "fb"),
510
- _from_ci(m, "followup2", "message_followup2"),
511
- )
512
 
513
- # --- Address / City / Postcode fallbacks ---
514
- street = _first_non_empty(
515
- _join_nonempty([c.get("street_name"), c.get("street_number")]),
516
- c.get("address"), c.get("address1"), c.get("address_line1"),
517
- c.get("street"), c.get("street_address"),
518
  )
519
- city = _first_non_empty(c.get("city"), c.get("town"), c.get("locality"))
520
- postcode = _first_non_empty(c.get("zip_code"), c.get("postal_code"), c.get("postcode"), c.get("zip"))
521
-
522
- # --- Job title with fallbacks ---
523
- job_title = _first_non_empty(
524
- p.get("job_title"),
525
- p.get("job_title_de_DE"),
526
- p.get("title"),
527
- p.get("position"),
528
- _from_ci(p, "role"),
529
  )
530
 
531
- # --- LinkedIn URL with fallbacks ---
532
- linkedin_url = _first_non_empty(
533
- p.get("linkedin_url"),
534
- p.get("linkedin"),
535
- p.get("linkedin_profile"),
536
- p.get("linkedinUrl"),
537
- p.get("li"),
538
- p.get("li_url"),
539
- )
540
 
541
- # --- exclude_hash fallbacks ---
542
- exclude_hash = _first_non_empty(
543
- lead.get("exclude_hash"),
544
- c.get("exclude_hash"),
545
- p.get("exclude_hash"),
546
- lead.get("combined_id"),
547
- )
548
 
549
- # --- Tags: ONLY what user provided (no departments mirroring) ---
550
- tag_items = _normalize_tag_items(tag_text)
551
- tags_dropdown = {"keys": tag_items, "values": tag_items} if tag_items else None
552
-
553
- payload = {
554
- # Person
555
- "firstname": p.get("first_name") or None,
556
- "lastname": p.get("last_name") or None,
557
- "email": email,
558
- "adress": street or None, # (sic)
559
- "city": city or None,
560
- "postcode": postcode or None,
561
- "phonenumber": p.get("phone") or None,
562
- "job_title": job_title or None,
563
- "departments": departments_txt,
564
- "linkedin_url": linkedin_url or None,
565
-
566
- # Company
567
- "company_name": _first_non_empty(c.get("name"), c.get("company_name")),
568
- "company_url": company_url or None,
569
-
570
- # Message
571
- "message_mail": msg_body or None,
572
- "message_mail_subject": msg_subject or None,
573
- "message_followup1": followup1 or None,
574
- "message_followup2": followup2 or None,
575
-
576
- # Other
577
- "exclude_hash": exclude_hash or None,
578
-
579
- # Dropdowns
580
- "status_field": { "keys": ["Kontakt aufgenommen"], "values": ["Kontakt aufgenommen"] },
581
- "tags": tags_dropdown,
582
  }
583
 
584
- normalized = filter_wholix_contact_fields(payload)
585
-
586
- # Normalize dropdowns
587
- if "status_field" in normalized:
588
- fixed = normalize_wholix_dropdown(normalized["status_field"])
589
- if fixed: normalized["status_field"] = fixed
590
- else: normalized.pop("status_field", None)
591
 
592
- if "tags" in normalized:
593
- fixed = normalize_wholix_dropdown(normalized["tags"])
594
- if fixed: normalized["tags"] = fixed
595
- else: normalized.pop("tags", None)
596
-
597
- return normalized
 
598
 
599
  def wholix_store_contact(token: str, record: dict, module: str = "Contacts") -> dict:
600
  """
601
- Wholix-Store mit Dropdown-Fallbacks:
602
- 1) Normales {keys,values}
603
- 2) Legacy {value}
604
- 3) Ohne problematische Felder
 
605
  """
606
- email = str((record or {}).get("email") or "").strip()
 
 
 
 
 
607
  if not email:
608
- e = ValueError("Wholix: 'email' ist Pflichtfeld und darf nicht leer sein.")
609
- e.name = "ValidationError"
610
- raise e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
611
 
612
- normalized = filter_wholix_contact_fields({ **record, "email": email })
 
 
 
 
613
 
614
- if "status_field" in normalized:
615
- fixed = normalize_wholix_dropdown(normalized["status_field"])
616
- if fixed: normalized["status_field"] = fixed
617
- else: normalized.pop("status_field", None)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
618
 
619
- if "tags" in normalized:
620
- fixed = normalize_wholix_dropdown(normalized["tags"])
621
- if fixed: normalized["tags"] = fixed
622
- else: normalized.pop("tags", None)
623
 
624
  url = f"{WHOLIX_BASE_URL}/api/v1/table-object-data/store-objects"
625
  headers = {"Authorization": f"Bearer {token}"}
 
 
626
 
627
- body = {"module": module, "action": "store", "data": [normalized]}
628
- try:
629
- return req(url, method="POST", headers=headers, json_body=body, timeout=60)
630
- except Exception as e1:
631
- from requests import HTTPError as _ReqHTTPError
632
- status = getattr(e1, "status", None) or (e1.response.status_code if isinstance(e1, _ReqHTTPError) and e1.response else None)
633
- msg = ""
634
- try:
635
- msg = json.dumps(getattr(e1, "body", "") or "").lower()
636
- except Exception:
637
- pass
638
- if status != 422 or not any(k in (msg or "") for k in ("status_field", "tags")):
639
- raise
640
 
641
- try:
642
- legacy = dict(normalized)
643
- def to_value(v):
644
- if isinstance(v, dict) and "keys" in v and v["keys"]:
645
- return v["keys"][0]
646
- if isinstance(v, dict) and "values" in v and v["values"]:
647
- return v["values"][0]
648
- if isinstance(v, list) and v:
649
- return v[0]
650
- if isinstance(v, str) and v.strip():
651
- return v.strip()
652
- return None
653
 
654
- if "status_field" in legacy and legacy["status_field"]:
655
- legacy["status_field"] = {"value": to_value(legacy["status_field"])}
656
- if "tags" in legacy and legacy["tags"]:
657
- legacy["tags"] = {"value": to_value(legacy["tags"])}
658
-
659
- body2 = {"module": module, "action": "store", "data": [legacy]}
660
- return req(url, method="POST", headers=headers, json_body=body2, timeout=60)
661
- except Exception as e2:
662
- from requests import HTTPError as _ReqHTTPError
663
- status2 = getattr(e2, "status", None) or (e2.response.status_code if isinstance(e2, _ReqHTTPError) and e2.response else None)
664
- msg2 = ""
665
  try:
666
- msg2 = json.dumps(getattr(e2, "body", "") or "").lower()
667
- except Exception:
668
- pass
669
- if status2 != 422 or not any(k in (msg2 or "") for k in ("status_field", "tags")):
670
- raise
671
 
672
- stripped = dict(normalized)
673
- stripped.pop("status_field", None)
674
- stripped.pop("tags", None)
675
- body3 = {"module": module, "action": "store", "data": [stripped]}
676
- return req(url, method="POST", headers=headers, json_body=body3, timeout=60)
 
 
 
 
677
 
678
- # ====================== Background-Jobs (robust UI) =======================
 
 
 
 
 
 
 
 
 
 
679
 
680
- LEAD_COUNTS = [1, 2, 3, 5, 10, 20, 50, 100, 200]
681
 
682
- # In-Memory Job Store
683
- EXEC = ThreadPoolExecutor(max_workers=16)
684
- JOBS: Dict[str, Dict[str, Any]] = {} # job_id -> state dict
685
 
686
  def _job_init(job_id: str):
687
  JOBS[job_id] = {
688
- "log": [], # List[str]
689
- "progress": 0.0, # 0..1
690
- "rows": [], # Ergebnisse (List[Dict])
691
  "done": False,
692
- "error": None, # str | None
693
  "lock": threading.RLock(),
 
 
694
  }
695
 
696
- def _job_emit(job_id: str, msg: str = "", progress: Optional[float] = None, rows: Optional[List[Dict[str, Any]]] = None):
697
  st = JOBS.get(job_id)
698
- if not st:
699
- return
700
  with st["lock"]:
701
  if msg:
702
  st["log"].append(msg)
703
- st["log"] = st["log"][-1000:] # cap
704
- if progress is not None:
705
- st["progress"] = max(0.0, min(1.0, progress))
706
- if rows is not None:
707
- st["rows"] = rows[-1000:] # cap
 
 
 
 
 
708
 
709
  def _job_finish(job_id: str, error: Optional[str] = None):
710
  st = JOBS.get(job_id)
711
- if not st:
712
- return
713
  with st["lock"]:
714
  st["error"] = error
715
- st["done"] = True
716
- if error:
717
- st["log"].append(f"❌ {error}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
718
  else:
719
- st["log"].append("✅ Alles erledigt.")
 
 
 
 
 
720
 
721
- def run_pipeline_bg(job_id: str, curl_text: str, n_leads: int):
722
  """
723
- Background-Pipeline:
724
- 1) Wholix-Login
725
- 2) Excludes laden
726
- 3..N) Lead holen → Nachricht generieren → in Wholix speichern
727
  """
728
- results: List[Dict[str, Any]] = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
729
 
730
- def log(msg: str):
731
- st = JOBS.get(job_id, {})
732
- prog = st.get("progress", 0.0)
733
- _job_emit(job_id, msg=msg, progress=prog, rows=results)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
734
 
735
- # parse curl
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
736
  try:
737
  token_id, payload = parse_curl(curl_text)
738
- except Exception as e:
739
- _job_finish(job_id, error=f"Parse-Fehler: {e}")
 
740
  return
741
 
742
  wh_email = payload.get("wholix_email") or payload.get("Wholix_email") or ""
743
  wh_pass = payload.get("wholix_passwort") or payload.get("wholix_password") or ""
744
  if not wh_email or not wh_pass:
745
- _job_finish(job_id, error="In der JSON-Payload fehlen wholix_email / wholix_passwort.")
746
  return
747
 
 
748
  filters = payload.get("filters") or {}
749
- icp_text = payload.get("Produkt_und_Dienstleistungsbeschreibung") or payload.get("icp_text") or ""
750
  checklist = payload.get("Checkliste_Landingpage") or ""
751
  signature = payload.get("Signatur") or ""
752
  cta = payload.get("CTA") or ""
753
  homepage_url = payload.get("icp_homepage_url") or ""
754
- # Tags: in beliebigen Formen erlauben (AI | [AI] | "AI, Sales" | ["AI","Sales"])
755
  raw_tag = payload.get("Wholic_tag") or payload.get("Wholix_tag") or "AI"
756
- tag_text = raw_tag # Mapper parst das robust
757
 
758
- # optional limit aus Payload
759
  try:
760
- n_leads = int(payload.get("limit", n_leads))
761
  except Exception:
762
- pass
763
- n_leads = max(1, n_leads)
764
 
765
- total_steps = max(1, n_leads) * 4 + 2 # login + excludes + (lead + email + store)*N
 
766
  step = 0
767
 
768
- # 1) Login
769
  step += 1
770
- _job_emit(job_id, msg="→ Logge bei Wholix ein …", progress=step/total_steps, rows=results)
771
  try:
772
  wh_token = wholix_login(wh_email, wh_pass)
773
- except Exception as e:
774
- _job_finish(job_id, error=f"Wholix-Login: {e}")
 
775
  return
776
 
777
- # 2) Excludes
778
- step += 1
779
- _job_emit(job_id, msg="→ Lade Exclude-Hashes …", progress=step/total_steps, rows=results)
780
  try:
781
- excludes = wholix_fetch_excludes(wh_token)
782
- _job_emit(job_id, msg=f" {len(excludes)} Exclude-Hashes gefunden.")
783
- except Exception as e:
784
- _job_emit(job_id, msg=f"⚠️ Excludes konnten nicht geladen werden: {e} — fahre ohne fort.")
785
- excludes = []
786
 
787
- for i in range(1, n_leads + 1):
788
- # 3) Lead
789
- step += 1
790
- _job_emit(job_id, msg=f"→ [{i}/{n_leads}] Fordere Lead an …", progress=step/total_steps, rows=results)
791
- try:
792
- lead = lead_suggest(token_id, filters, icp_text, excludes)
793
- except Exception as e:
794
- _job_emit(job_id, msg=f"❌ Lead-Fehler: {e}", progress=step/total_steps, rows=results)
795
- continue
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
796
 
797
- person = (lead or {}).get("person") or {}
798
- company = (lead or {}).get("company") or {}
799
- _job_emit(job_id, msg=f" Lead: {person.get('first_name','?')} {person.get('last_name','?')} @ {company.get('name') or company.get('company_name','?')}")
800
-
801
- # 4) Email draft
802
- step += 1
803
- _job_emit(job_id, msg=" → Generiere Nachricht …", progress=step/total_steps, rows=results)
804
- items = [{"combined_id": lead.get("combined_id"), "company": company, "person": person}]
805
- variables = {
806
- "Touch_Point": "LinkedIn DM",
807
- **({"homepage_url": homepage_url} if homepage_url else {}),
808
- "Produkt_und_Dienstleistungsbeschreibung": icp_text,
809
- "Checkliste_Landingpage": checklist,
810
- "CTA": cta,
811
- "Signatur": signature,
812
- }
813
  try:
814
- draft = email_generate(token_id, variables, items)
815
- except Exception as e:
816
- _job_emit(job_id, msg=f"❌ Email-Generate-Fehler: {e}", progress=step/total_steps, rows=results)
817
- continue
 
 
 
818
 
819
- # 5) Store (MAPPER FIXED)
820
- step += 1
821
- _job_emit(job_id, msg=" → Speichere Kontakt + Nachricht in Wholix …", progress=step/total_steps, rows=results)
822
  try:
823
- record = map_to_wholix_record(lead, draft, tag_text=tag_text)
824
- store_res = wholix_store_contact(wh_token, record)
825
- except Exception as e:
826
- _job_emit(job_id, msg=f"❌ Wholix-Store-Fehler: {e}", progress=step/total_steps, rows=results)
827
- continue
 
 
 
 
 
828
 
829
- if lead.get("exclude_hash"):
830
- excludes.append(lead["exclude_hash"])
 
831
 
832
- result_row = {
833
- "person": f"{person.get('first_name','')} {person.get('last_name','')}".strip(),
834
- "email": (draft.get("email") or {}).get("to") or person.get("email") or "",
835
- "company": company.get("name") or company.get("company_name") or "",
836
- "subject": (draft.get("email") or {}).get("subject") or "",
837
- "stored_ok": bool(store_res),
838
- }
839
- results.append(result_row)
840
 
841
- # 6) small completion step for this iteration
842
- step += 1
843
- _job_emit(job_id, msg=" ✓ Fertig für diesen Lead.", progress=step/total_steps, rows=results)
844
 
845
- _job_finish(job_id, error=None)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
846
 
847
- # ================================ Gradio UI ===============================
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
848
 
849
  def build_ui():
850
  with gr.Blocks(theme=gr.themes.Soft(), css="""
851
  .logbox textarea { font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", monospace; font-size: 12.5px; line-height: 1.35; }
852
  """) as demo:
853
- gr.Markdown("## Wholix Lead → Message → Store (robust, Background-Job + Polling)")
854
- gr.Markdown("Füge unten deinen **gesamten `curl`** ein (inkl. `X-Token-Id` Header und JSON `--data-raw`), wähle die Lead-Anzahl und klicke **Start**. "
855
- "Die Verarbeitung läuft als Hintergrund-Job weiter selbst wenn der Browser kurz offline ist. "
856
- "Mit „Aktualisieren“ holst du den aktuellen Status ab.")
 
 
857
 
858
  with gr.Row():
859
- curl_in = gr.Textbox(label="curl Befehl", placeholder="curl -sS -N -X POST 'https://.../stream' -H 'X-Token-Id: ...' --data-raw '{...}'", lines=12)
 
 
 
 
860
  with gr.Row():
861
  count = gr.Dropdown(choices=[str(x) for x in LEAD_COUNTS], value="1", label="Anzahl Leads")
862
 
@@ -865,7 +1035,7 @@ def build_ui():
865
  poll_btn = gr.Button("🔄 Aktualisieren")
866
 
867
  with gr.Row():
868
- job_id_tb = gr.Textbox(label="Job-ID", interactive=False)
869
 
870
  with gr.Row():
871
  status = gr.Textbox(label="Status / Log", lines=18, interactive=False, elem_classes=["logbox"])
@@ -873,61 +1043,53 @@ def build_ui():
873
  progress = gr.Slider(label="Progress", minimum=0, maximum=100, value=0, interactive=False)
874
  with gr.Row():
875
  out = gr.Dataframe(
876
- headers=["person", "email", "company", "subject", "stored_ok"],
877
  label="Ergebnisse",
878
  interactive=False,
879
  wrap=True,
880
  row_count=(0, "dynamic"),
881
- col_count=(5, "fixed"),
882
  )
883
 
884
- # ----- Handlers -----
885
-
886
  def start_job(curl_text: str, n: str):
887
  try:
888
  n_int = int(n)
889
  except Exception:
890
  n_int = 1
 
 
891
  job_id = str(uuid.uuid4())
892
  _job_init(job_id)
893
- _job_emit(job_id, msg=f"Job gestartet: {job_id}")
894
 
895
- # Start background work
896
  EXEC.submit(run_pipeline_bg, job_id, curl_text, n_int)
897
 
898
- # Return initial state
899
  st = JOBS[job_id]
900
  with st["lock"]:
901
  log = "\n".join(st["log"])
902
- prog = int(round(st["progress"] * 100))
903
  rows = st["rows"]
904
  return log, prog, rows, job_id
905
 
906
  def poll_job(job_id: str):
907
  st = JOBS.get(job_id)
908
  if not st:
909
- return "Unbekannte Job-ID.", 0, []
910
  with st["lock"]:
911
  log = "\n".join(st["log"][-500:])
912
- prog = int(round(st["progress"] * 100))
913
  rows = st["rows"]
914
  return log, prog, rows
915
 
916
- start_btn.click(
917
- start_job,
918
- inputs=[curl_in, count],
919
- outputs=[status, progress, out, job_id_tb]
920
- )
921
-
922
- poll_btn.click(
923
- poll_job,
924
- inputs=[job_id_tb],
925
- outputs=[status, progress, out]
926
- )
927
 
928
  return demo
929
 
 
 
930
  if __name__ == "__main__":
931
  app = build_ui()
932
- # Du kannst server_name="0.0.0.0" setzen, wenn du LAN-Zugriff brauchst
933
- app.launch(share=True, debug=True)
 
2
  # -*- coding: utf-8 -*-
3
 
4
  """
5
+ Gradio App robuste Async-Ausführung via Status-Endpoints
6
+ - Rein asynchron mit Status-Endpoints
7
+ - Läuft serverseitig weiter, auch wenn UI/Verbindung weg ist
8
+ - Globale HTTP-Session mit Keep-Alive
9
+ - Polling mit Backoff + Retry-After
10
+ - Logging in Konsole + logs/<job_id>.log
11
+ - FIX: exclude_hash richtig laden/berücksichtigen (paginierte Wholix-Suche)
12
+ - FIX: pro Lead harte Fehlerisolierung -> kein Gradio-Absturz
13
  """
14
 
15
  import json
 
16
  import os
17
  import random
18
  import re
19
  import time
 
20
  import uuid
21
+ import threading
22
  from concurrent.futures import ThreadPoolExecutor
23
+ from typing import Any, Dict, List, Optional, Tuple, Set
24
+
25
+ import logging
26
+ from logging.handlers import RotatingFileHandler
27
+ from pathlib import Path
28
 
29
  import gradio as gr
30
  import requests
31
+ from requests.adapters import HTTPAdapter
32
+
33
+ # ============================== CONFIG ====================================
34
 
35
  LEAD_BASE_URL = "https://wholixleadgenbackend.ngrok.io"
36
  WHOLIX_BASE_URL = "https://api.wholix.ai"
37
 
38
+ MAX_LEADS = 100 # Sicherheitskappe
39
+ MAX_WORKERS = 16 # gleichzeitige Pipelines am Server
40
+ JOB_TTL_SEC = 60 * 60 # wie lange fertige Jobs im Speicher bleiben (1h)
41
+
42
+ # ============================== LOGGING ====================================
43
+
44
+ LOG_DIR = Path("logs")
45
+ LOG_DIR.mkdir(parents=True, exist_ok=True)
46
+
47
+ logging.basicConfig(
48
+ level=logging.INFO,
49
+ format="%(asctime)s | %(levelname)s | %(threadName)s | %(message)s",
50
+ )
51
+ APP_LOG = logging.getLogger("app")
52
+
53
+ def get_job_logger(job_id: str) -> logging.Logger:
54
+ lg = logging.getLogger(f"job.{job_id}")
55
+ if not any(isinstance(h, RotatingFileHandler) for h in lg.handlers):
56
+ fh = RotatingFileHandler(
57
+ LOG_DIR / f"{job_id}.log",
58
+ maxBytes=512_000,
59
+ backupCount=2,
60
+ encoding="utf-8",
61
+ )
62
+ fh.setFormatter(logging.Formatter("%(asctime)s | %(levelname)s | %(message)s"))
63
+ lg.addHandler(fh)
64
+ lg.setLevel(logging.INFO)
65
+ lg.propagate = True
66
+ return lg
67
+
68
+ # ========================== GLOBAL HTTP SESSION ============================
69
+
70
+ GLOBAL_SES = requests.Session()
71
+ GLOBAL_SES.headers.update({"Accept": "application/json, text/plain;q=0.9, */*;q=0.8"})
72
+ adapter = HTTPAdapter(pool_connections=50, pool_maxsize=50, max_retries=0)
73
+ GLOBAL_SES.mount("https://", adapter)
74
+ GLOBAL_SES.mount("http://", adapter)
75
+
76
+ try:
77
+ from requests_toolbelt.adapters.socket_options import TCPKeepAliveAdapter
78
+ ka = TCPKeepAliveAdapter(idle=30, interval=30, count=3)
79
+ GLOBAL_SES.mount("https://", ka)
80
+ GLOBAL_SES.mount("http://", ka)
81
+ except Exception:
82
+ pass
83
+
84
+ # ================================ HTTP =====================================
85
 
86
  class HTTPError(Exception):
87
+ def __init__(self, status: int, url: str, body: Any = None, headers: Dict[str, str] = None):
88
+ msg = f"HTTP {status} on {url}"
89
+ if body is not None:
90
+ try:
91
+ bs = body if isinstance(body, str) else json.dumps(body, ensure_ascii=False)
92
+ msg += f": {bs[:500]}"
93
+ except Exception:
94
+ pass
95
+ super().__init__(msg)
96
  self.status = status
97
  self.url = url
98
  self.body = body
99
+ self.headers = headers or {}
100
 
101
  def _is_json_ct(ct: str) -> bool:
102
  return bool(ct and ("application/json" in ct or ct.endswith("+json")))
103
 
104
+ def _safe_json(text: str):
105
+ try:
106
+ return json.loads(text)
107
+ except Exception:
108
+ repaired = text.replace("NaN", "null").replace("Infinity", "null").replace("-Infinity", "null")
109
+ return json.loads(repaired)
110
+
111
  def req(
112
  url: str,
113
  method: str = "GET",
114
  headers: Optional[Dict[str, str]] = None,
115
  json_body: Any = None,
116
  data: Any = None,
117
+ timeout: Tuple[float, float] = (5.0, 15.0),
 
 
118
  session: Optional[requests.Session] = None,
119
+ ) -> Any:
120
+ s = session or GLOBAL_SES
121
+ r = s.request(
122
+ method=method.upper(),
123
+ url=url,
124
+ headers=headers,
125
+ json=json_body,
126
+ data=data,
127
+ timeout=timeout,
128
+ )
129
+ ct = r.headers.get("content-type", "")
130
+ text = r.text or ""
131
+ if not r.ok:
132
+ body = None
133
+ if text:
134
+ try:
135
+ body = r.json() if _is_json_ct(ct) or (not ct and text.strip().startswith(("{", "["))) else text
136
+ except Exception:
137
  body = text
138
+ raise HTTPError(r.status_code, url, body=body, headers=dict(r.headers))
139
+ if text == "":
140
+ return None
141
+ if _is_json_ct(ct) or (not ct and text.strip().startswith(("{", "["))):
142
+ try:
143
+ return r.json()
144
+ except Exception:
145
+ return _safe_json(text)
146
+ return text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
 
148
+ # ========================= Async Start & Poll ==============================
 
149
 
150
+ def _looks_like_ngrok_html(body: Any) -> bool:
151
  try:
152
+ s = body if isinstance(body, str) else json.dumps(body, ensure_ascii=False)
153
+ except Exception:
154
+ s = str(body)
155
+ s_lower = (s or "").lower()
156
+ return ("cdn.ngrok.com" in s_lower) or ("ngrok" in s_lower and "<html" in s_lower)
 
 
 
 
 
 
 
 
 
 
 
157
 
158
+ def start_async_job(url: str, body: dict, headers: dict, session: Optional[requests.Session] = None) -> str:
159
+ """
160
+ Startet einen asynchronen Backend-Job und gibt die job_id zurück.
161
+ Neu: interne Retries (Backoff + Jitter) bei 404/408/425/429/5xx und ngrok-HTML.
162
+ Macht vorher einen Warmup-Ping auf die Base-URL.
163
+ """
164
+ s = session or GLOBAL_SES
165
 
166
+ # --- Warmup: ngrok/Server "aufwecken" (ignoriert Fehler) ---
167
+ try:
168
+ base = re.split(r"(?<=://[^/]+)", url)[0] # "https://host.tld"
169
+ s.get(base, timeout=(3.0, 3.0))
170
+ except Exception:
171
+ pass
 
172
 
173
+ # --- POST mit Retry ---
174
+ attempts = 0
175
+ max_attempts = 6
176
+ delay = 0.8
177
+ while True:
178
+ attempts += 1
179
+ try:
180
+ res = req(
181
+ url=url,
182
+ method="POST",
183
+ headers={**headers, "Prefer": "respond-async"},
184
+ json_body=body,
185
+ timeout=(5.0, 15.0),
186
+ session=s,
187
+ )
188
+ job_id = (res or {}).get("job_id")
189
+ ok = (res or {}).get("ok", False)
190
+ if not ok or not job_id:
191
+ raise RuntimeError(f"Backend hat keine job_id geliefert (res={res!r})")
192
+ return job_id
193
+
194
+ except HTTPError as e:
195
+ transient_codes = {404, 408, 425, 429, 500, 502, 503, 504, 520, 522, 524}
196
+ is_transient = (e.status in transient_codes) or _looks_like_ngrok_html(e.body)
197
+ if attempts < max_attempts and is_transient:
198
+ # Retry-After respektieren
199
+ ra = 0.0
200
+ try:
201
+ if isinstance(e.body, dict):
202
+ ra = float(e.body.get("retry_after", 0) or 0)
203
+ except Exception:
204
+ ra = 0.0
205
+ if not ra:
206
+ try:
207
+ ra_hdr = (e.headers or {}).get("Retry-After", "")
208
+ ra = float(ra_hdr) if ra_hdr else 0.0
209
+ except Exception:
210
+ ra = 0.0
211
+ sleep_for = ra or (delay + random.uniform(0, 0.5 * delay))
212
+ logging.warning(f"start_async_job: transient {e.status}, retry {attempts}/{max_attempts} in {sleep_for:.1f}s …")
213
+ time.sleep(sleep_for)
214
+ delay = min(8.0, delay * 1.8)
215
  continue
216
+ # nicht transient oder Versuche aufgebraucht
217
+ raise
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
218
 
 
 
 
219
 
220
+ def poll_status(
221
+ url: str,
222
+ headers: dict,
223
+ max_wait_sec: int = 60 * 20,
224
+ min_delay: float = 1.0,
225
+ max_delay: float = 30.0,
226
+ session: Optional[requests.Session] = None,
227
+ on_tick=None,
228
+ ) -> dict:
229
  t0 = time.time()
 
230
  attempt = 0
231
+ delay = min_delay
232
  while True:
233
+ if time.time() - t0 > max_wait_sec:
234
+ raise TimeoutError("Timeout beim Warten auf Ergebnis.")
235
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
236
  try:
237
+ st = req(url, method="GET", headers=headers, timeout=(5.0, 6.0), session=session)
238
+ except HTTPError as e:
239
+ if e.status in (408, 425, 429, 500, 502, 503, 504, 520, 522, 524):
240
+ ra = 0.0
241
+ if isinstance(e.body, dict):
242
+ try:
243
+ ra = float(e.body.get("retry_after", 0))
244
+ except Exception:
245
+ ra = 0.0
246
+ if not ra:
247
+ try:
248
+ ra_hdr = (e.headers or {}).get("Retry-After", "")
249
+ ra = float(ra_hdr) if ra_hdr else 0.0
250
+ except Exception:
251
+ ra = 0.0
252
+ attempt += 1
253
+ delay = min(max_delay, max(min_delay, 0.7 * (2 ** attempt)))
254
+ time.sleep(ra or (delay + random.uniform(0, 0.4 * delay)))
255
+ continue
256
+ raise
257
 
258
+ if callable(on_tick):
259
+ try:
260
+ on_tick(st)
261
+ except Exception:
262
+ pass
263
 
264
+ if st and st.get("ok") and st.get("status") == "done":
265
+ return st
 
 
 
 
 
 
266
 
267
+ attempt = min(attempt + 1, 20)
268
+ delay = min(max_delay, max(min_delay, delay * 1.6))
269
+ time.sleep(delay)
 
 
 
 
 
 
 
 
270
 
271
+ # ============================= Services ====================================
 
 
 
272
 
273
+ def unwrap_result(payload: Any) -> Any:
274
+ p = payload or {}
275
+ if isinstance(p, dict) and p.get("ok") and any(k in p for k in ("result", "data", "content")):
276
+ p = p.get("result") or p.get("data") or p.get("content")
277
+ return p
278
 
279
+ def suggest_single_lead(token_id: str, filters: dict, icp_text: str, exclude_ids: List[str]) -> dict:
280
+ res_job_id = start_async_job(
281
+ url=f"{LEAD_BASE_URL}/lead/suggest?async=1",
282
+ body={"filters": filters, "icp_text": icp_text or "", "exclude_ids": exclude_ids or []},
283
+ headers={"X-Token-Id": token_id},
284
+ )
285
+ st = poll_status(
286
+ url=f"{LEAD_BASE_URL}/lead/suggest/status/{res_job_id}?t={int(time.time()*1000)}",
287
+ headers={"X-Token-Id": token_id},
288
+ max_wait_sec=60 * 20,
289
+ session=GLOBAL_SES,
290
+ )
291
+ res = unwrap_result(st)
292
+ if isinstance(res, dict) and isinstance(res.get("items"), list) and res["items"]:
293
+ return res["items"][0]
294
+ if isinstance(res, list) and res:
295
+ return res[0]
296
+ return res if isinstance(res, dict) else {"result": res}
297
+
298
+ def _flatten_text(val: Any) -> str:
299
+ """
300
+ Newlines/Tabs/Mehrfach-Whitespace sauber zu einem Space zusammenziehen.
301
+ Keine Feldraten, nur Plain-String-Verarbeitung.
302
+ """
303
+ s = "" if val is None else str(val)
304
+ s = s.replace("\r\n", "\n").replace("\r", "\n")
305
+ s = re.sub(r"\s+", " ", s).strip()
306
+ return s
307
 
 
 
 
 
 
308
 
309
+ def normalize_draft(raw: Any) -> Dict[str, Any]:
310
+ r = unwrap_result(raw) or raw or {}
311
+ if isinstance(r, dict) and "message" in r and isinstance(r["message"], dict):
312
+ r = r["message"]
313
+ email_obj = r.get("email") if isinstance(r, dict) else {}
314
+ def pick(obj, *keys):
315
+ if not isinstance(obj, dict): return ""
316
+ for k in keys:
317
+ for kk, vv in obj.items():
318
+ if isinstance(kk, str) and kk.lower() == k.lower():
319
+ if isinstance(vv, str) and vv.strip(): return vv.strip()
320
+ if vv: return vv
321
+ return ""
322
+ subject = pick(email_obj, "subject", "email_subject", "Betreff") or pick(r, "subject", "email_subject", "Betreff")
323
+ body = pick(email_obj, "body", "text", "content") or pick(r, "body", "text", "content")
324
+ fu1 = pick(r, "followup1", "FollowUp1", "LinkedIn")
325
+ fu2 = pick(r, "followup2", "FollowUp2", "Facebook")
326
+ to = pick(email_obj, "to")
327
+ return {"email": {"to": to, "subject": subject, "body": body}, "followup1": fu1, "followup2": fu2}
328
+
329
+ def email_generate_async(token_id: str, variables: dict, items: List[dict]) -> dict:
330
+ """
331
+ Startet /email/generate (async) und gibt nur ECHTE Endpoint-Daten zurück.
332
+ - Übergibt globale Variablen 1:1 (inkl. Signatur) auf Root-Ebene.
333
+ - Keine Feldumbenennung, keine Heuristik.
334
+ Rückgabe:
335
+ {
336
+ "email": {"subject": <Betreff>, "body": <Text>},
337
+ "followup1": <FollowUp1>,
338
+ "followup2": <FollowUp2>,
339
+ "raw": <unwrap_result(...)>
340
+ }
341
+ """
342
  if not items:
343
+ raise ValueError("items fehlt (mindestens 1 Lead erforderlich).")
344
+
345
+ # Nur die Keys weiterreichen, die dein Backend tatsächlich kennt/erwartet.
346
+ # Wir filtern NICHT um – wir vertrauen der übergebenen payload (keine Fantasie-Felder).
347
+ root_allowed = {
348
+ "Produkt_und_Dienstleistungsbeschreibung",
349
+ "CTA",
350
+ "Signatur",
351
+ "Checkliste_Landingpage",
352
+ "homepage_url", # optional, falls im Frontend genutzt
353
+ "tags", # wird serverseitig ignoriert, aber ok
354
+ "Touch_Point", # optional – stört den Backend-Kontrakt nicht
355
+ }
356
 
357
+ safe_vars = {}
358
+ for k, v in (variables or {}).items():
359
+ # 1:1 durchlassen für die bekannten Felder
360
+ if k in root_allowed:
361
+ safe_vars[k] = v
 
 
 
362
 
363
+ # Payload für den echten Endpoint: Root-Variablen + Items
364
+ start_body = {**safe_vars, "items": items, "item_index": 0}
 
 
 
 
365
 
366
+ # Debug-Log: zeigen, welche Keys wir wirklich senden (inkl. Signatur)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
367
  try:
368
+ sent_keys = sorted(list(start_body.keys()))
369
+ sig_preview = str(start_body.get("Signatur") or "")[:120]
370
+ APP_LOG.info(f"/email/generate body keys: {sent_keys}")
371
+ APP_LOG.info(f"/email/generate Signatur (preview): {sig_preview}")
372
  except Exception:
373
  pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
374
 
375
+ # Async-Start & Poll
376
+ res_job_id = start_async_job(
377
+ url=f"{LEAD_BASE_URL}/email/generate?async=1",
378
+ body=start_body,
379
+ headers={"X-Token-Id": token_id},
380
  )
381
+ st = poll_status(
382
+ url=f"{LEAD_BASE_URL}/email/generate/status/{res_job_id}?t={int(time.time()*1000)}",
383
+ headers={"X-Token-Id": token_id},
384
+ max_wait_sec=60 * 20,
385
+ session=GLOBAL_SES,
 
 
 
 
 
386
  )
387
 
388
+ # Ergebnis 1:1 auswerten nur echte Felder
389
+ raw = unwrap_result(st) or {}
390
+ results = raw.get("results") if isinstance(raw, dict) else None
391
+ first = (results[0] if isinstance(results, list) and results else {}) or {}
392
+ msg = first.get("message") if isinstance(first, dict) else {}
 
 
 
 
393
 
394
+ subj = str((msg or {}).get("Betreff") or "")
395
+ text = str((msg or {}).get("Text") or "")
396
+ fu1 = str((msg or {}).get("FollowUp1") or "")
397
+ fu2 = str((msg or {}).get("FollowUp2") or "")
 
 
 
398
 
399
+ return {
400
+ "email": {"subject": subj, "body": text},
401
+ "followup1": fu1,
402
+ "followup2": fu2,
403
+ "raw": raw,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
404
  }
405
 
 
 
 
 
 
 
 
406
 
407
+ def wholix_login(email: str, password: str) -> str:
408
+ res = req(f"{WHOLIX_BASE_URL}/api/v1/auth/login", method="POST",
409
+ json_body={"email": email, "password": password}, timeout=(5.0, 15.0))
410
+ token = (res or {}).get("token")
411
+ if not token:
412
+ raise RuntimeError("Wholix-Login fehlgeschlagen.")
413
+ return token
414
 
415
  def wholix_store_contact(token: str, record: dict, module: str = "Contacts") -> dict:
416
  """
417
+ Sendet NUR erlaubte Felder an Wholix und saniert problematische Werte:
418
+ - URLs (linkedin_url/company_url): Schema ergänzen, invalid droppen
419
+ - departments: immer Plain-Text
420
+ - Multi-Select (status_field/tags): nur korrektes {keys,values}
421
+ - Leere Strings -> None und weglassen
422
  """
423
+ import urllib.parse as _urlparse
424
+
425
+ if not isinstance(record, dict):
426
+ raise ValueError("Wholix: record muss ein dict sein.")
427
+
428
+ email = str((record.get("email") or "")).strip()
429
  if not email:
430
+ raise ValueError("Wholix: 'email' ist Pflichtfeld.")
431
+
432
+ ALLOWED = {
433
+ "firstname",
434
+ "lastname",
435
+ "email", # Pflichtfeld
436
+ "adress", # (sic) genau so
437
+ "city",
438
+ "postcode",
439
+ "phonenumber",
440
+ "job_title",
441
+ "departments", # Text
442
+ "linkedin_url",
443
+ "company_name",
444
+ "company_url",
445
+ "message_mail",
446
+ "message_mail_subject",
447
+ "message_followup1",
448
+ "message_followup2",
449
+ "exclude_hash",
450
+ "status_field", # Multi-Select: {keys:[], values:[]}
451
+ "tags", # Multi-Select: {keys:[], values:[]}
452
+ }
453
 
454
+ def _clean_str(v):
455
+ if v is None:
456
+ return None
457
+ s = str(v).strip()
458
+ return s if s else None
459
 
460
+ def _coerce_departments(v):
461
+ if v is None:
462
+ return None
463
+ if isinstance(v, (list, tuple, set)):
464
+ v = ", ".join(str(x).strip() for x in v if str(x).strip())
465
+ else:
466
+ v = str(v).strip()
467
+ # String wie "[Marketing]" -> "Marketing"
468
+ if v.startswith("[") and v.endswith("]"):
469
+ v = v[1:-1].strip().strip("'\"")
470
+ return _clean_str(v)
471
+
472
+ def _normalize_url(u):
473
+ u = _clean_str(u)
474
+ if not u:
475
+ return None
476
+ if not re.match(r"^[a-zA-Z][a-zA-Z0-9+\-.]*://", u):
477
+ u = "https://" + u
478
+ try:
479
+ pr = _urlparse.urlparse(u)
480
+ if not pr.scheme or not pr.netloc:
481
+ return None
482
+ return u
483
+ except Exception:
484
+ return None
485
+
486
+ out = {}
487
+ for k in ALLOWED:
488
+ if k not in record:
489
+ continue
490
+ val = record[k]
491
+
492
+ if k == "departments":
493
+ val = _coerce_departments(val)
494
+ elif k in ("linkedin_url", "company_url"):
495
+ val = _normalize_url(val)
496
+ elif k in ("status_field", "tags"):
497
+ # nur korrektes Schema durchlassen
498
+ if not (isinstance(val, dict) and "keys" in val and "values" in val):
499
+ val = None
500
+ else:
501
+ val = _clean_str(val)
502
+
503
+ if val is not None:
504
+ out[k] = val
505
 
506
+ # Pflichtfeld sicher
507
+ out["email"] = _clean_str(email)
 
 
508
 
509
  url = f"{WHOLIX_BASE_URL}/api/v1/table-object-data/store-objects"
510
  headers = {"Authorization": f"Bearer {token}"}
511
+ body = {"module": module, "action": "store", "data": [out]}
512
+ return req(url, method="POST", headers=headers, json_body=body, timeout=(5.0, 30.0))
513
 
 
 
 
 
 
 
 
 
 
 
 
 
 
514
 
515
+ # ======= NEW: Wholix-Excludes paginiert laden (wie in deinem JS) ===========
516
+
517
+ def wholix_fetch_excludes(token: str,
518
+ module_name: str = "Contacts",
519
+ per_page: int = 500,
520
+ max_pages: int = 100,
521
+ dedupe: bool = True) -> List[Dict[str, str]]:
522
+ path = f"{WHOLIX_BASE_URL}/api/v1/table-object-data/fetch-paginated-results"
523
+ headers = {"Authorization": f"Bearer {token}"}
524
+ out: List[Dict[str, str]] = []
525
+ seen: Set[str] = set()
 
526
 
527
+ last_page = 10**9
528
+ for page in range(1, max_pages + 1):
529
+ if page > last_page:
530
+ break
531
+ payload = {"module": module_name, "action": "search", "page": page, "per_page": per_page}
 
 
 
 
 
 
532
  try:
533
+ res = req(path, method="POST", headers=headers, json_body=payload, timeout=(5.0, 30.0))
534
+ except Exception as e:
535
+ APP_LOG.warning(f"Wholix-Excludes Page {page} Fehler: {e}")
536
+ break
 
537
 
538
+ data_block = (res or {}).get("data") or {}
539
+ rows = data_block.get("data") or data_block.get("results") or []
540
+ if isinstance(data_block.get("last_page"), (int, float)):
541
+ last_page = int(data_block.get("last_page"))
542
+ else:
543
+ last_page = page
544
+
545
+ if not isinstance(rows, list) or not rows:
546
+ break
547
 
548
+ for row in rows:
549
+ ex = (row.get("exclude_hash") or row.get("excludeHash") or row.get("exclude_id") or "").strip()
550
+ if not ex:
551
+ continue
552
+ if dedupe and ex in seen:
553
+ continue
554
+ if dedupe:
555
+ seen.add(ex)
556
+ cname = (row.get("company_name") or row.get("companyName") or "").strip()
557
+ out.append({"exclude_hash": ex, "company_name": cname})
558
+ return out
559
 
560
+ # ========================== Job Management =================================
561
 
562
+ EXEC = ThreadPoolExecutor(max_workers=MAX_WORKERS)
563
+ JOBS: Dict[str, Dict[str, Any]] = {}
 
564
 
565
  def _job_init(job_id: str):
566
  JOBS[job_id] = {
567
+ "log": [],
568
+ "progress": 0,
569
+ "rows": [],
570
  "done": False,
571
+ "error": None,
572
  "lock": threading.RLock(),
573
+ "created_at": time.time(),
574
+ "finished_at": None,
575
  }
576
 
577
+ def _job_emit(job_id: str, msg: str = None, progress: Optional[int] = None, rows_append: Optional[dict] = None):
578
  st = JOBS.get(job_id)
579
+ if not st: return
580
+ lg = get_job_logger(job_id)
581
  with st["lock"]:
582
  if msg:
583
  st["log"].append(msg)
584
+ st["log"] = st["log"][-1000:]
585
+ APP_LOG.info(f"[{job_id}] {msg}")
586
+ lg.info(msg)
587
+ if isinstance(progress, int):
588
+ st["progress"] = max(0, min(100, progress))
589
+ if rows_append:
590
+ st["rows"].append(rows_append)
591
+ st["rows"] = st["rows"][-1000:]
592
+ APP_LOG.info(f"[{job_id}] row: {rows_append}")
593
+ lg.info(f"row: {rows_append}")
594
 
595
  def _job_finish(job_id: str, error: Optional[str] = None):
596
  st = JOBS.get(job_id)
597
+ if not st: return
598
+ lg = get_job_logger(job_id)
599
  with st["lock"]:
600
  st["error"] = error
601
+ st["done"] = True
602
+ st["finished_at"] = time.time()
603
+ final = "✅ Fertig." if not error else f"❌ {error}"
604
+ st["log"].append(final)
605
+ APP_LOG.info(f"[{job_id}] {final}")
606
+ (lg.error if error else lg.info)(final)
607
+
608
+ def _gc_jobs():
609
+ now = time.time()
610
+ for jid, st in list(JOBS.items()):
611
+ if st.get("done") and st.get("finished_at") and now - st["finished_at"] > JOB_TTL_SEC:
612
+ JOBS.pop(jid, None)
613
+
614
+ # ========================== Pipeline (Background) ===========================
615
+
616
+ LEAD_COUNTS = [1, 2, 3, 4, 5, 10, 15, 20, 40, 80, 100, 200, 300, 400, 500, 1000]
617
+
618
+ CURL_DATA_RE = re.compile(r"""--data(?:-raw)?\s+(?P<q>['"])(?P<body>.*?)(?P=q)""", re.DOTALL)
619
+ HDR_XTOKEN_RE = re.compile(r"""-H\s+(?P<q>['"])X-Token-Id:\s*(?P<val>[^'"]+)(?P=q)""", re.IGNORECASE)
620
+
621
+ def _find_data_quote_start(s: str) -> Tuple[int, Optional[str]]:
622
+ m = re.search(r"--data(?:-raw)?\s+(['\"])", s)
623
+ if not m:
624
+ return -1, None
625
+ return m.end(1), m.group(1)
626
+
627
+ def _scan_quoted_payload(s: str, i: int, q: str) -> Tuple[str, int]:
628
+ out = []
629
+ n = len(s)
630
+ while i < n:
631
+ ch = s[i]
632
+ if q == "'":
633
+ if i + 4 < n and s[i:i+5] == "'\"'\"'":
634
+ out.append("'"); i += 5; continue
635
+ if ch == "'":
636
+ return "".join(out), i + 1
637
+ out.append(ch); i += 1
638
  else:
639
+ if ch == "\\" and i + 1 < n:
640
+ out.append(s[i+1]); i += 2; continue
641
+ if ch == '"':
642
+ return "".join(out), i + 1
643
+ out.append(ch); i += 1
644
+ return "".join(out), i
645
 
646
+ def parse_curl(curl_text: str) -> Tuple[str, Dict[str, Any]]:
647
  """
648
+ Extrahiert X-Token-Id und JSON aus einem curl mit --data-raw '...'.
649
+ Keine Feldumbenennungen, keine Heuristik – JSON wird 1:1 übernommen.
 
 
650
  """
651
+ import shlex
652
+
653
+ if not isinstance(curl_text, str) or not curl_text.strip():
654
+ raise ValueError("Leerer curl-Text.")
655
+
656
+ # 1) Token aus Header holen (robust für einfache/doppelte Quotes)
657
+ token_id = ""
658
+ # -H 'X-Token-Id: abc' oder -H "X-Token-Id: abc"
659
+ m = re.search(r"""-H\s+(["'])X-Token-Id:\s*([^"']+)\1""", curl_text, re.IGNORECASE)
660
+ if m:
661
+ token_id = m.group(2).strip()
662
+
663
+ # fallback: unquoted header
664
+ if not token_id:
665
+ m2 = re.search(r"""X-Token-Id:\s*([A-Za-z0-9\-\._]+)""", curl_text, re.IGNORECASE)
666
+ if m2:
667
+ token_id = m2.group(1).strip()
668
+
669
+ if not token_id:
670
+ env_token = os.getenv("X_TOKEN_ID", "").strip()
671
+ if env_token:
672
+ token_id = env_token
673
+ if not token_id:
674
+ raise ValueError("Konnte keinen X-Token-Id Header im curl (oder env X_TOKEN_ID) finden.")
675
 
676
+ # 2) JSON-Body nach --data / --data-raw extrahieren (einfach/doppelt-quoted)
677
+ qmatch = re.search(r"""--data(?:-raw)?\s+(['"])""", curl_text)
678
+ if not qmatch:
679
+ raise ValueError("Konnte den JSON Body nicht finden (erwarte --data-raw '...').")
680
+ quote = qmatch.group(1)
681
+ start = qmatch.end(1)
682
+
683
+ # Payload bis zum passenden schließenden Quote lesen (beachtet Escapes)
684
+ out = []
685
+ i = start
686
+ n = len(curl_text)
687
+ if quote == "'":
688
+ # Bash-Rule: in single quotes sind nur Sequenzen '\"'\"' als eingebettetes einzelnes '
689
+ while i < n:
690
+ if i + 4 < n and curl_text[i:i+5] == "'\"'\"'":
691
+ out.append("'")
692
+ i += 5
693
+ continue
694
+ ch = curl_text[i]
695
+ if ch == "'":
696
+ break
697
+ out.append(ch)
698
+ i += 1
699
+ else:
700
+ # Doppel-Quotes: Backslashes beachten
701
+ while i < n:
702
+ ch = curl_text[i]
703
+ if ch == "\\" and i + 1 < n:
704
+ out.append(curl_text[i+1])
705
+ i += 2
706
+ continue
707
+ if ch == '"':
708
+ break
709
+ out.append(ch)
710
+ i += 1
711
 
712
+ body_str = "".join(out).strip()
713
+ # 3) JSON laden (ohne Felder zu „erdenken“)
714
+ try:
715
+ payload = json.loads(body_str)
716
+ except json.JSONDecodeError:
717
+ # CRLF -> LF und nochmal versuchen
718
+ payload = json.loads(body_str.replace("\r\n", "\n").replace("\r", "\n"))
719
+
720
+ return token_id, payload
721
+
722
+
723
+ def run_pipeline_bg(job_id: str, curl_text: str, n_leads_ui: int):
724
+ lg = get_job_logger(job_id)
725
+ APP_LOG.info(f"[{job_id}] Background job gestartet (n={n_leads_ui})")
726
+ lg.info(f"Background job gestartet (n={n_leads_ui})")
727
+
728
+ _gc_jobs()
729
+ _job_emit(job_id, "Job gestartet.")
730
  try:
731
  token_id, payload = parse_curl(curl_text)
732
+ except Exception:
733
+ logging.exception(f"[{job_id}] Parse-Fehler")
734
+ _job_finish(job_id, "Parse-Fehler: siehe Logs (Stacktrace).")
735
  return
736
 
737
  wh_email = payload.get("wholix_email") or payload.get("Wholix_email") or ""
738
  wh_pass = payload.get("wholix_passwort") or payload.get("wholix_password") or ""
739
  if not wh_email or not wh_pass:
740
+ _job_finish(job_id, "In der JSON-Payload fehlen wholix_email / wholix_passwort.")
741
  return
742
 
743
+ # Globale Felder exakt wie im Curl (1:1)
744
  filters = payload.get("filters") or {}
745
+ icp_text = payload.get("Produkt_und_Dienstleistungsbeschreibung") or ""
746
  checklist = payload.get("Checkliste_Landingpage") or ""
747
  signature = payload.get("Signatur") or ""
748
  cta = payload.get("CTA") or ""
749
  homepage_url = payload.get("icp_homepage_url") or ""
 
750
  raw_tag = payload.get("Wholic_tag") or payload.get("Wholix_tag") or "AI"
 
751
 
752
+ # Lead-Anzahl
753
  try:
754
+ n_leads = int(n_leads_ui)
755
  except Exception:
756
+ n_leads = 1
757
+ n_leads = max(1, min(n_leads, MAX_LEADS))
758
 
759
+ # Fortschritt: 1 (Login) + pro Lead 4 Schritte
760
+ total_steps = (n_leads * 4) + 1
761
  step = 0
762
 
763
+ # 1) Wholix Login
764
  step += 1
765
+ _job_emit(job_id, "→ Wholix Login …", progress=int(step / total_steps * 100))
766
  try:
767
  wh_token = wholix_login(wh_email, wh_pass)
768
+ except Exception:
769
+ logging.exception(f"[{job_id}] Wholix-Login fehlgeschlagen")
770
+ _job_finish(job_id, "Wholix-Login fehlgeschlagen (Details in Logs).")
771
  return
772
 
773
+ # ======= EXCLUDES LADEN / MERGEN =======================================
774
+ initial_excludes = set()
 
775
  try:
776
+ for x in (payload.get("exclude_ids") or []):
777
+ if x: initial_excludes.add(str(x).strip())
778
+ except Exception:
779
+ pass
 
780
 
781
+ _job_emit(job_id, f"→ Wholix-Excludes laden (Seed: {len(initial_excludes)})")
782
+ try:
783
+ wh_excludes = wholix_fetch_excludes(wh_token) # paginiert
784
+ for row in wh_excludes:
785
+ ex = (row.get("exclude_hash") or "").strip()
786
+ if ex:
787
+ initial_excludes.add(ex)
788
+ _job_emit(job_id, f" {len(wh_excludes)} Excludes aus Wholix, gesamt {len(initial_excludes)}")
789
+ except Exception:
790
+ logging.exception(f"[{job_id}] Wholix-Excludes laden fehlgeschlagen")
791
+ _job_emit(job_id, " Excludes laden übersprungen (Fehler).")
792
+
793
+ excludes: List[str] = list(initial_excludes)
794
+ excludes_set: Set[str] = set(initial_excludes)
795
+
796
+ # ---- Helper zum sicheren Auslesen echter Felder (ohne Raten) -----------
797
+ def pick(d: dict, *keys):
798
+ for k in keys:
799
+ if isinstance(d, dict) and k in d and d[k] not in (None, "", [], {}):
800
+ return d[k]
801
+ return None
802
+
803
+ def norm_tags(raw) -> dict:
804
+ """
805
+ Multi-Select für Wholix: {keys:[…],values:[…]}
806
+ - String "[AI]" -> "AI"
807
+ - String "AI" -> "AI"
808
+ - Liste ["AI","X"] -> entsprechend erweitern
809
+ """
810
+ if raw is None:
811
+ return {"keys": [], "values": []}
812
+ vals: List[str] = []
813
+ if isinstance(raw, str):
814
+ s = raw.strip()
815
+ # wenn JSON-Array als String übergeben wurde
816
+ if (s.startswith("[") and s.endswith("]")):
817
+ try:
818
+ arr = json.loads(s)
819
+ if isinstance(arr, list):
820
+ vals = [str(x).strip() for x in arr if str(x).strip()]
821
+ else:
822
+ vals = [s.strip("[] ").strip().strip("'\"")]
823
+ except Exception:
824
+ vals = [s.strip("[] ").strip().strip("'\"")]
825
+ else:
826
+ vals = [s]
827
+ elif isinstance(raw, (list, tuple, set)):
828
+ vals = [str(x).strip() for x in raw if str(x).strip()]
829
+ else:
830
+ vals = [str(raw).strip()]
831
+
832
+ # leere filtern
833
+ vals = [v for v in vals if v]
834
+ return {"keys": vals, "values": vals}
835
+
836
+ def norm_departments(v) -> str:
837
+ if v is None:
838
+ return None
839
+ if isinstance(v, (list, tuple, set)):
840
+ return ", ".join(str(x).strip() for x in v if str(x).strip()) or None
841
+ s = str(v).strip()
842
+ if s.startswith("[") and s.endswith("]"):
843
+ s = s[1:-1].strip().strip("'\"")
844
+ return s or None
845
 
846
+ def norm_url(u: Any) -> Optional[str]:
847
+ u = "" if u is None else str(u).strip()
848
+ if not u:
849
+ return None
850
+ if not re.match(r"^[a-zA-Z][a-zA-Z0-9+\-.]*://", u):
851
+ u = "https://" + u
 
 
 
 
 
 
 
 
 
 
852
  try:
853
+ from urllib.parse import urlparse
854
+ pr = urlparse(u)
855
+ if pr.scheme and pr.netloc:
856
+ return u
857
+ except Exception:
858
+ pass
859
+ return None
860
 
861
+ for i in range(1, n_leads + 1):
 
 
862
  try:
863
+ # 2) Lead holen
864
+ step += 1
865
+ _job_emit(job_id, f"→ [{i}/{n_leads}] Lead vorschlagen … (excludes={len(excludes_set)})",
866
+ progress=int(step / total_steps * 100))
867
+ try:
868
+ lead = suggest_single_lead(token_id, filters, icp_text, excludes)
869
+ except Exception:
870
+ logging.exception(f"[{job_id}] Lead-Fehler")
871
+ _job_emit(job_id, "❌ Lead-Fehler: siehe Logs")
872
+ continue
873
 
874
+ if not isinstance(lead, dict):
875
+ _job_emit(job_id, "❌ Ungültige Lead-Struktur")
876
+ continue
877
 
878
+ person = (lead.get("person") or {})
879
+ company = (lead.get("company") or {})
880
+ combined_id = str(lead.get("combined_id") or "").strip()
 
 
 
 
 
881
 
882
+ if combined_id and combined_id in excludes_set:
883
+ _job_emit(job_id, f" ⚠️ Lead übersprungen (bereits excluded): {combined_id}")
884
+ continue
885
 
886
+ _job_emit(job_id, f" Lead: {person.get('first_name','?')} {person.get('last_name','?')} @ {company.get('name') or company.get('company_name','?')}")
887
+
888
+ # 3) Nachricht generieren (echte Endpoint-Daten)
889
+ step += 1
890
+ _job_emit(job_id, " → Nachricht generieren …", progress=int(step / total_steps * 100))
891
+ items = [{"combined_id": combined_id, "company": company, "person": person}]
892
+ variables = {
893
+ "Produkt_und_Dienstleistungsbeschreibung": icp_text,
894
+ "CTA": cta,
895
+ "Signatur": signature,
896
+ "Checkliste_Landingpage": checklist,
897
+ "homepage_url": homepage_url, # optional
898
+ "tags": raw_tag, # kommt als "[AI]" → später normiert
899
+ "Touch_Point": "LinkedIn DM", # optional
900
+ }
901
+ try:
902
+ draft = email_generate_async(token_id, variables, items)
903
+ except Exception:
904
+ logging.exception(f"[{job_id}] Email-Generate-Fehler")
905
+ _job_emit(job_id, "❌ Email-Generate-Fehler: siehe Logs")
906
+ continue
907
 
908
+ subj_raw = (draft.get("email") or {}).get("subject", "")
909
+ body_raw = (draft.get("email") or {}).get("body", "")
910
+ subj_flat = _flatten_text(subj_raw)
911
+ body_flat = _flatten_text(body_raw)
912
+
913
+ _job_emit(job_id, f" ✉️ Subject: {subj_flat}")
914
+ _job_emit(job_id, f" ✉️ Message: {body_flat}")
915
+
916
+ # 4) Speichern in Wholix
917
+ step += 1
918
+ _job_emit(job_id, " → Speichere in Wholix …", progress=int(step / total_steps * 100))
919
+
920
+ email_to = str(person.get("email") or "").strip()
921
+ if not email_to:
922
+ _job_emit(job_id, " ⚠️ Keine E-Mail vorhanden – Speichern übersprungen.")
923
+ stored_ok = False
924
+ else:
925
+ try:
926
+ # --- Felder befüllen NUR wenn real vorhanden -----------------
927
+ firstname = pick(person, "first_name")
928
+ lastname = pick(person, "last_name")
929
+ job_title = pick(person, "job_title", "title", "position")
930
+ departments = norm_departments(pick(person, "departments"))
931
+ linkedin = pick(person, "linkedin_url")
932
+
933
+ # Kontakt-Adressdaten: erst person, dann company (falls vorhanden)
934
+ phonenumber = pick(person, "phone", "phonenumber") or pick(company, "phone", "company_phone", "phonenumber")
935
+ adress = pick(person, "adress", "address", "street") or pick(company, "adress", "address", "street")
936
+ city = pick(person, "city") or pick(company, "city")
937
+ postcode = pick(person, "postcode", "postal_code", "zip") or pick(company, "postcode", "postal_code", "zip")
938
+
939
+ company_name = pick(company, "name", "company_name")
940
+ company_url = norm_url(pick(company, "url", "website_url", "website", "domain"))
941
+
942
+ # Tags korrekt als Multi-Select (Dropdown-Wert ohne [])
943
+ tags_ms = norm_tags(raw_tag)
944
+
945
+ record = {
946
+ "firstname": firstname,
947
+ "lastname": lastname,
948
+ "email": email_to,
949
+ "phonenumber": phonenumber,
950
+ "adress": adress,
951
+ "city": city,
952
+ "postcode": postcode,
953
+
954
+ "job_title": job_title,
955
+ "departments": departments,
956
+ "linkedin_url": linkedin,
957
+
958
+ "company_name": company_name,
959
+ "company_url": company_url,
960
+
961
+ "message_mail_subject": subj_raw or None,
962
+ "message_mail": body_raw or None,
963
+ "message_followup1": draft.get("followup1") or None,
964
+ "message_followup2": draft.get("followup2") or None,
965
+
966
+ "exclude_hash": combined_id or None,
967
+
968
+ "status_field": {"keys": ["Kontakt aufgenommen"], "values": ["Kontakt aufgenommen"]},
969
+ "tags": tags_ms,
970
+ }
971
+
972
+ # Log: kurze Vorschau der wichtigsten Felder
973
+ try:
974
+ APP_LOG.info(f"Store-> firstname={firstname} lastname={lastname} email={email_to} job_title={job_title} dept={departments} phone={phonenumber} city={city} postcode={postcode} company_url={company_url}")
975
+ APP_LOG.info(f"Store-> tags={tags_ms}")
976
+ except Exception:
977
+ pass
978
+
979
+ store_res = wholix_store_contact(wh_token, record)
980
+ stored_ok = bool(store_res)
981
+ except Exception:
982
+ logging.exception(f"[{job_id}] Wholix-Store-Fehler")
983
+ _job_emit(job_id, "❌ Wholix-Store-Fehler: siehe Logs")
984
+ stored_ok = False
985
+
986
+ if combined_id:
987
+ excludes_set.add(combined_id)
988
+ excludes.append(combined_id)
989
+
990
+ _job_emit(job_id, rows_append={
991
+ "person": f"{person.get('first_name','')} {person.get('last_name','')}".strip(),
992
+ "email": email_to,
993
+ "company": company.get("name") or company.get("company_name") or "",
994
+ "subject": subj_flat,
995
+ "message": body_flat, # Body bleibt Body
996
+ "stored_ok": stored_ok,
997
+ })
998
+
999
+ # 5) Abschluss
1000
+ step += 1
1001
+ _job_emit(job_id, " ✓ Lead abgeschlossen.", progress=int(min(99, step / total_steps * 100)))
1002
+
1003
+ except Exception:
1004
+ logging.exception(f"[{job_id}] Unerwarteter Fehler im Lead-Durchlauf")
1005
+ _job_emit(job_id, "❌ Unerwarteter Fehler – Details in Logs.")
1006
+
1007
+ _job_emit(job_id, progress=100)
1008
+ _job_finish(job_id, None)
1009
+
1010
+
1011
+ # ================================ UI =======================================
1012
 
1013
  def build_ui():
1014
  with gr.Blocks(theme=gr.themes.Soft(), css="""
1015
  .logbox textarea { font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", monospace; font-size: 12.5px; line-height: 1.35; }
1016
  """) as demo:
1017
+ gr.Markdown("## Wholix Lead → Message → Store (rein asynchron, robust für lange Jobs)")
1018
+ gr.Markdown(
1019
+ "Füge deinen kompletten **`curl`** (mit `X-Token-Id` und JSON `--data-raw`) ein, wähle die Anzahl Leads und klicke **Start**. "
1020
+ "Die Verarbeitung läuft serverseitig weiter selbst wenn der Browser/Tab schließt. "
1021
+ "Mit **Aktualisieren** holst du den aktuellen Status ab."
1022
+ )
1023
 
1024
  with gr.Row():
1025
+ curl_in = gr.Textbox(
1026
+ label="curl Befehl",
1027
+ placeholder="curl -sS -N -X POST 'https://.../stream' -H 'X-Token-Id: ...' --data-raw '{...}'",
1028
+ lines=12
1029
+ )
1030
  with gr.Row():
1031
  count = gr.Dropdown(choices=[str(x) for x in LEAD_COUNTS], value="1", label="Anzahl Leads")
1032
 
 
1035
  poll_btn = gr.Button("🔄 Aktualisieren")
1036
 
1037
  with gr.Row():
1038
+ job_id_tb = gr.Textbox(label="Job-ID", interactive=True)
1039
 
1040
  with gr.Row():
1041
  status = gr.Textbox(label="Status / Log", lines=18, interactive=False, elem_classes=["logbox"])
 
1043
  progress = gr.Slider(label="Progress", minimum=0, maximum=100, value=0, interactive=False)
1044
  with gr.Row():
1045
  out = gr.Dataframe(
1046
+ headers=["person", "email", "company", "subject", "message", "stored_ok"],
1047
  label="Ergebnisse",
1048
  interactive=False,
1049
  wrap=True,
1050
  row_count=(0, "dynamic"),
1051
+ col_count=(6, "fixed"),
1052
  )
1053
 
 
 
1054
  def start_job(curl_text: str, n: str):
1055
  try:
1056
  n_int = int(n)
1057
  except Exception:
1058
  n_int = 1
1059
+ n_int = max(1, min(n_int, MAX_LEADS))
1060
+
1061
  job_id = str(uuid.uuid4())
1062
  _job_init(job_id)
1063
+ _job_emit(job_id, f"Job gestartet: {job_id}")
1064
 
 
1065
  EXEC.submit(run_pipeline_bg, job_id, curl_text, n_int)
1066
 
 
1067
  st = JOBS[job_id]
1068
  with st["lock"]:
1069
  log = "\n".join(st["log"])
1070
+ prog = int(st["progress"])
1071
  rows = st["rows"]
1072
  return log, prog, rows, job_id
1073
 
1074
  def poll_job(job_id: str):
1075
  st = JOBS.get(job_id)
1076
  if not st:
1077
+ return "Unbekannte oder abgelaufene Job-ID.", 0, []
1078
  with st["lock"]:
1079
  log = "\n".join(st["log"][-500:])
1080
+ prog = int(st["progress"])
1081
  rows = st["rows"]
1082
  return log, prog, rows
1083
 
1084
+ start_btn.click(start_job, inputs=[curl_in, count],
1085
+ outputs=[status, progress, out, job_id_tb])
1086
+ poll_btn.click(poll_job, inputs=[job_id_tb],
1087
+ outputs=[status, progress, out])
 
 
 
 
 
 
 
1088
 
1089
  return demo
1090
 
1091
+ # ============================== MAIN =======================================
1092
+
1093
  if __name__ == "__main__":
1094
  app = build_ui()
1095
+ app.launch(server_name="0.0.0.0", debug=True, share=True)