MichaelWelsch commited on
Commit
b7828ba
·
verified ·
1 Parent(s): 06f02f3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +763 -59
app.py CHANGED
@@ -1,70 +1,774 @@
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
- from huggingface_hub import InferenceClient
 
 
 
 
 
 
 
 
 
 
 
 
 
3
 
 
 
4
 
5
- def respond(
6
- message,
7
- history: list[dict[str, str]],
8
- system_message,
9
- max_tokens,
10
- temperature,
11
- top_p,
12
- hf_token: gr.OAuthToken,
 
 
13
  ):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  """
15
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
16
  """
17
- client = InferenceClient(token=hf_token.token, model="openai/gpt-oss-20b")
18
-
19
- messages = [{"role": "system", "content": system_message}]
20
-
21
- messages.extend(history)
22
-
23
- messages.append({"role": "user", "content": message})
24
-
25
- response = ""
26
-
27
- for message in client.chat_completion(
28
- messages,
29
- max_tokens=max_tokens,
30
- stream=True,
31
- temperature=temperature,
32
- top_p=top_p,
33
- ):
34
- choices = message.choices
35
- token = ""
36
- if len(choices) and choices[0].delta.content:
37
- token = choices[0].delta.content
38
-
39
- response += token
40
- yield response
41
-
42
-
43
- """
44
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
45
- """
46
- chatbot = gr.ChatInterface(
47
- respond,
48
- type="messages",
49
- additional_inputs=[
50
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
51
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
52
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
53
- gr.Slider(
54
- minimum=0.1,
55
- maximum=1.0,
56
- value=0.95,
57
- step=0.05,
58
- label="Top-p (nucleus sampling)",
59
- ),
60
- ],
61
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
 
63
- with gr.Blocks() as demo:
64
- with gr.Sidebar():
65
- gr.LoginButton()
66
- chatbot.render()
 
67
 
 
68
 
69
  if __name__ == "__main__":
70
  demo.launch()
 
1
+ import json
2
+ import math
3
+ import os
4
+ import random
5
+ import re
6
+ import time
7
+ import threading
8
+ import uuid
9
+ from concurrent.futures import ThreadPoolExecutor
10
+ from typing import Any, Dict, List, Optional, Tuple
11
+
12
  import gradio as gr
13
+ import requests
14
+
15
+ LEAD_BASE_URL = "https://wholixleadgenbackend.ngrok.io"
16
+ WHOLIX_BASE_URL = "https://api.wholix.ai"
17
+
18
+ # ============================== HTTP helper ===============================
19
+
20
+ class HTTPError(Exception):
21
+ def __init__(self, status: int, url: str, body: Any):
22
+ body_str = body if isinstance(body, str) else json.dumps(body, ensure_ascii=False)
23
+ super().__init__(f"HTTP {status} on {url}: {body_str[:500]}")
24
+ self.status = status
25
+ self.url = url
26
+ self.body = body
27
 
28
+ def _is_json_ct(ct: str) -> bool:
29
+ return bool(ct and ("application/json" in ct or ct.endswith("+json")))
30
 
31
+ def req(
32
+ url: str,
33
+ method: str = "GET",
34
+ headers: Optional[Dict[str, str]] = None,
35
+ json_body: Any = None,
36
+ data: Any = None,
37
+ timeout: float = 60.0,
38
+ retries: int = 2,
39
+ retry_on: Tuple[int, ...] = (408, 425, 429, 500, 502, 503, 504, 520, 522, 524),
40
+ session: Optional[requests.Session] = None,
41
  ):
42
+ s = session or requests.Session()
43
+ attempt = 0
44
+ while True:
45
+ try:
46
+ r = s.request(
47
+ method=method.upper(),
48
+ url=url,
49
+ headers=headers,
50
+ json=json_body,
51
+ data=data,
52
+ timeout=timeout,
53
+ )
54
+ ct = r.headers.get("content-type", "")
55
+ text = r.text or ""
56
+ if not r.ok:
57
+ body = text
58
+ try:
59
+ if _is_json_ct(ct):
60
+ body = r.json()
61
+ except Exception:
62
+ pass
63
+ raise HTTPError(r.status_code, url, body)
64
+ if text == "":
65
+ return None
66
+ if _is_json_ct(ct) or (not ct and text.strip().startswith(("{", "["))):
67
+ try:
68
+ return r.json()
69
+ except Exception:
70
+ repaired = text.replace("NaN", "null").replace("Infinity", "null").replace("-Infinity", "null")
71
+ return json.loads(repaired)
72
+ return text
73
+ except (requests.Timeout, requests.ConnectionError, HTTPError) as e:
74
+ code = e.status if isinstance(e, HTTPError) else 0
75
+ should = attempt < retries and (code in retry_on or code == 0)
76
+ if not should:
77
+ raise
78
+ attempt += 1
79
+ base = min(2.5, 0.4 * (2 ** attempt)) # full-jitter
80
+ delay = random.random() * base
81
+ time.sleep(delay)
82
+
83
+ # ============================= Curl parsing ===============================
84
+
85
+ CURL_DATA_RE = re.compile(
86
+ r"""--data(?:-raw)?\s+(?P<quote>['"])(?P<body>.*?)(?P=quote)""",
87
+ re.DOTALL
88
+ )
89
+ HDR_XTOKEN_RE = re.compile(r"""-H\s+(?P<q>['"])X-Token-Id:\s*(?P<val>[^'"]+)(?P=q)""", re.IGNORECASE)
90
+ HDR_AUTH_RE = re.compile(r"""-H\s+(?P<q>['"])Authorization:\s*Bearer\s+(?P<val>[^'"]+)(?P=q)""", re.IGNORECASE)
91
+
92
+ def parse_curl(curl_text: str) -> Tuple[str, Dict[str, Any]]:
93
  """
94
+ Extract X-Token-Id and JSON body from a curl command.
95
  """
96
+ token_id = ""
97
+ body_str = ""
98
+
99
+ m = HDR_XTOKEN_RE.search(curl_text)
100
+ if m:
101
+ token_id = m.group("val").strip()
102
+
103
+ # Some users might pass token in env; if absent, try to look for it inline as 'X-Token-Id: ...'
104
+ if not token_id:
105
+ hdr_inline = re.search(r"X-Token-Id:\s*([a-zA-Z0-9\-\._]+)", curl_text, re.IGNORECASE)
106
+ if hdr_inline:
107
+ token_id = hdr_inline.group(1).strip()
108
+
109
+ md = CURL_DATA_RE.search(curl_text)
110
+ if md:
111
+ body_str = md.group("body").strip()
112
+ else:
113
+ # fallback for -d '...'
114
+ md2 = re.search(r"-d\s+(?P<q>['\"])(?P<body>.*?)(?P=q)", curl_text, re.DOTALL)
115
+ if md2:
116
+ body_str = md2.group("body").strip()
117
+
118
+ if not body_str:
119
+ raise ValueError("Konnte den JSON Body aus dem curl nicht finden (erwarte --data-raw '...').")
120
+
121
+ # The curl example uses single quotes around valid JSON → parse directly
122
+ try:
123
+ payload = json.loads(body_str)
124
+ except json.JSONDecodeError as e:
125
+ # attempt to unescape smart single quotes or stray CRs
126
+ candidate = body_str.replace("\r\n", "\n").replace("\r", "\n")
127
+ payload = json.loads(candidate)
128
+
129
+ if not token_id:
130
+ # allow token in the payload via "X-Token-Id" or env var
131
+ env_token = os.getenv("X_TOKEN_ID", "").strip()
132
+ if env_token:
133
+ token_id = env_token
134
+
135
+ if not token_id:
136
+ raise ValueError("Konnte keinen X-Token-Id Header im curl (oder env X_TOKEN_ID) finden.")
137
+
138
+ return token_id, payload
139
+
140
+ # ============================ Wholix helpers ==============================
141
+
142
+ def wholix_login(email: str, password: str) -> str:
143
+ url = f"{WHOLIX_BASE_URL}/api/v1/auth/login"
144
+ res = req(url, method="POST", json_body={"email": email, "password": password}, timeout=30)
145
+ token = (res or {}).get("token")
146
+ if not token:
147
+ raise RuntimeError("Wholix-Login fehlgeschlagen: kein token in der Response.")
148
+ return token
149
+
150
+ def wholix_fetch_excludes(token: str, module: str = "Contacts", per_page: int = 500, max_pages: int = 100) -> List[str]:
151
+ url = f"{WHOLIX_BASE_URL}/api/v1/table-object-data/fetch-paginated-results"
152
+ headers = {"Authorization": f"Bearer {token}"}
153
+ exclude_hashes: List[str] = []
154
+ seen = set()
155
+ last_page = math.inf
156
+ session = requests.Session()
157
+
158
+ for page in range(1, max_pages + 1):
159
+ if page > last_page:
160
+ break
161
+ payload = {"module": module, "action": "search", "page": page, "per_page": per_page}
162
+ res = req(url, method="POST", headers=headers, json_body=payload, timeout=60, session=session) or {}
163
+ data_block = res.get("data") or {}
164
+ rows = data_block.get("data") or data_block.get("results") or []
165
+ last_page = data_block.get("last_page", page)
166
+ if not rows:
167
+ break
168
+ for row in rows:
169
+ h = str(row.get("exclude_hash") or row.get("excludeHash") or row.get("exclude_id") or "").strip()
170
+ if not h or h in seen:
171
+ continue
172
+ seen.add(h)
173
+ exclude_hashes.append(h)
174
+ return exclude_hashes
175
+
176
+ # ======================= Lead + Email generation =========================
177
+
178
+ def _pick_single(payload: Any) -> Dict[str, Any]:
179
+ def unwrap(x):
180
+ if isinstance(x, dict) and x.get("ok") and any(k in x for k in ("result", "data", "content")):
181
+ return x.get("result") or x.get("data") or x.get("content")
182
+ return x
183
+ p = unwrap(payload)
184
+ if isinstance(p, dict) and "items" in p and isinstance(p["items"], list) and p["items"]:
185
+ return p["items"][0]
186
+ if isinstance(p, list) and p:
187
+ return p[0]
188
+ if isinstance(p, dict):
189
+ return p
190
+ raise RuntimeError("Unerwartete Lead-Payload-Form.")
191
+
192
+ def lead_suggest(token_id: str, filters: Dict[str, Any], icp_text: str, exclude_ids: List[str]) -> Dict[str, Any]:
193
+ start_url = f"{LEAD_BASE_URL}/lead/suggest?async=1"
194
+ headers = {"X-Token-Id": token_id, "Prefer": "respond-async"}
195
+ start_body = {"filters": filters, "icp_text": icp_text or "", "exclude_ids": exclude_ids or []}
196
+
197
+ try:
198
+ start = req(start_url, method="POST", headers=headers, json_body=start_body, timeout=60)
199
+ except HTTPError:
200
+ start = None
201
+
202
+ if not start or not start.get("ok") or not start.get("job_id"):
203
+ sync = req(f"{LEAD_BASE_URL}/lead/suggest", method="POST", headers={"X-Token-Id": token_id}, json_body=start_body, timeout=180)
204
+ return _pick_single(sync)
205
+
206
+ job_id = start["job_id"]
207
+ t0 = time.time()
208
+ max_wait = 10 * 60
209
+ attempt = 0
210
+ while True:
211
+ st = req(f"{LEAD_BASE_URL}/lead/suggest/status/{job_id}?t={int(time.time()*1000)}",
212
+ method="GET", headers={"X-Token-Id": token_id}, timeout=60)
213
+ if st and st.get("status") == "done" and st.get("ok"):
214
+ return _pick_single(st)
215
+ if time.time() - t0 > max_wait:
216
+ raise TimeoutError("Timeout beim Warten auf Lead-Ergebnis.")
217
+ attempt += 1
218
+ time.sleep(min(2.4, 0.6 * attempt))
219
+
220
+ def _ci_get(d: Dict[str, Any], key: str) -> Any:
221
+ if key in d and str(d[key]).strip() != "":
222
+ return d[key]
223
+ k = next((k for k in d.keys() if k.lower() == key.lower() and str(d[k]).strip() != ""), None)
224
+ return d.get(k) if k else None
225
+
226
+ def _normalize_draft_result(raw: Any) -> Dict[str, Any]:
227
+ r = raw
228
+ if isinstance(r, str):
229
+ try:
230
+ r = json.loads(r)
231
+ except Exception:
232
+ r = {"Text": r}
233
+
234
+ if isinstance(r, dict) and r.get("ok") and any(k in r for k in ("result", "content", "data")):
235
+ r = r.get("result") or r.get("content") or r.get("data") or r
236
+
237
+ if isinstance(r, dict) and "message" in r and isinstance(r["message"], dict):
238
+ r = r["message"]
239
+ elif isinstance(r, dict) and "results" in r and isinstance(r["results"], list) and r["results"]:
240
+ cand = r["results"][0]
241
+ if isinstance(cand, dict) and "message" in cand and isinstance(cand["message"], dict):
242
+ r = cand["message"]
243
+ else:
244
+ r = cand
245
+
246
+ email_obj = r.get("email") if isinstance(r, dict) else {}
247
+ messages_obj = r.get("messages") if isinstance(r, dict) else {}
248
+
249
+ def pick(*keys):
250
+ for src in (r, email_obj, messages_obj):
251
+ if isinstance(src, dict):
252
+ for k in keys:
253
+ v = _ci_get(src, k)
254
+ if v is not None:
255
+ return v
256
+ return ""
257
+
258
+ subject = (pick("Betreff", "subject", "email_subject") or "Kurzer Austausch").strip()
259
+ body = (pick("Text", "text", "Body", "body", "email_body", "content") or "").strip()
260
+ fu1 = (pick("FollowUp1", "followup1", "LinkedIn", "linkedin", "li") or "").strip()
261
+ fu2 = (pick("FollowUp2", "followup2", "Facebook", "facebook", "fb") or "").strip()
262
+
263
+ if not body:
264
+ body = "Hallo,\n\nkurzer Kontaktaufbau – gerne Austausch, ob wir unterstützen können.\n\nBeste Grüße"
265
+
266
+ to = ""
267
+ if isinstance(email_obj, dict):
268
+ to = str(email_obj.get("to") or "").strip()
269
+
270
+ return {
271
+ "email": {"to": to, "subject": subject, "body": body},
272
+ "followup1": fu1,
273
+ "followup2": fu2,
274
+ }
275
+
276
+ def email_generate(token_id: str, variables: Dict[str, Any], items: List[Dict[str, Any]]) -> Dict[str, Any]:
277
+ if not items:
278
+ raise ValueError("items ist erforderlich und muss mind. 1 Lead enthalten.")
279
+ body = {**variables, "items": items, "item_index": 0}
280
+
281
+ start = None
282
+ try:
283
+ start = req(f"{LEAD_BASE_URL}/email/generate?async=1",
284
+ method="POST",
285
+ headers={"X-Token-Id": token_id, "Prefer": "respond-async"},
286
+ json_body=body, timeout=60)
287
+ except HTTPError:
288
+ pass
289
+
290
+ if not start or not start.get("ok") or not start.get("job_id"):
291
+ res = req(f"{LEAD_BASE_URL}/email/generate",
292
+ method="POST",
293
+ headers={"X-Token-Id": token_id},
294
+ json_body=body, timeout=180)
295
+ return _normalize_draft_result(res)
296
+
297
+ job_id = start["job_id"]
298
+ t0 = time.time()
299
+ max_wait = 10 * 60
300
+ attempt = 0
301
+ while True:
302
+ st = req(f"{LEAD_BASE_URL}/email/generate/status/{job_id}?t={int(time.time()*1000)}",
303
+ method="GET", headers={"X-Token-Id": token_id}, timeout=60)
304
+ if st and st.get("ok") and st.get("status") == "done":
305
+ return _normalize_draft_result(st)
306
+ if time.time() - t0 > max_wait:
307
+ raise TimeoutError("Timeout bei der E-Mail-Generierung.")
308
+ attempt += 1
309
+ time.sleep(min(2.4, 0.6 * attempt))
310
+
311
+ # ======== DROP-IN: JS-kompatible Wholix-Mapping & Store (1:1) ========
312
+
313
+ ALLOWED_FIELDS = {
314
+ "firstname",
315
+ "lastname",
316
+ "email", # Pflichtfeld
317
+ "adress", # (sic) exakt so geschrieben
318
+ "city",
319
+ "postcode",
320
+ "phonenumber",
321
+ "job_title",
322
+ "departments", # Text, KEINE Liste
323
+ "linkedin_url",
324
+ "company_name",
325
+ "company_url",
326
+ "message_mail",
327
+ "message_mail_subject",
328
+ "message_followup1",
329
+ "message_followup2",
330
+ "exclude_hash",
331
+ "status_field", # Multi-Select: { keys:[], values:[] }
332
+ "tags", # Multi-Select: { keys:[], values:[] }
333
+ }
334
+
335
+ def filter_wholix_contact_fields(obj: dict) -> dict:
336
+ """
337
+ 1:1 wie JS filterWholixContactFields:
338
+ - nur erlaubte Felder
339
+ - email immer getrimmt
340
+ - Strings getrimmt; leere Werte raus
341
+ """
342
+ out = {}
343
+ for k, v in (obj or {}).items():
344
+ if k not in ALLOWED_FIELDS:
345
+ continue
346
+ if k == "email":
347
+ out["email"] = str(v or "").strip()
348
+ continue
349
+ if v is None:
350
+ continue
351
+ s = v.strip() if isinstance(v, str) else v
352
+ if isinstance(s, str) and s == "":
353
+ continue
354
+ out[k] = s
355
+ return out
356
+
357
+ def normalize_wholix_dropdown(val):
358
+ """
359
+ 1:1 wie JS normalizeWholixDropdown:
360
+ akzeptiert {keys,values}, Array oder String
361
+ → normalisiert zu {keys:[...], values:[...]} oder None
362
+ """
363
+ if isinstance(val, dict) and ("keys" in val or "values" in val):
364
+ ks = [str(x) for x in (val.get("keys") or []) if str(x).strip()]
365
+ vs = [str(x) for x in (val.get("values") or []) if str(x).strip()]
366
+ if not vs and ks:
367
+ vs = ks[:]
368
+ return {"keys": ks, "values": vs} if (ks or vs) else None
369
+ if isinstance(val, list):
370
+ ks = [str(x) for x in val if str(x).strip()]
371
+ return {"keys": ks, "values": ks} if ks else None
372
+ if isinstance(val, str) and val.strip():
373
+ s = val.strip()
374
+ return {"keys": [s], "values": [s]}
375
+ return None
376
+
377
+ def map_to_wholix_record(lead: dict, draft: dict, tag_text: str = "AI") -> dict:
378
+ """
379
+ JS mapToWholixContactsPayload → Python
380
+ """
381
+ p = (lead or {}).get("person") or {}
382
+ c = (lead or {}).get("company") or {}
383
+ m = (lead or {}).get("messages") or {}
384
+ ctx = (lead or {}).get("context") or {}
385
+
386
+ email = str(p.get("email") or "").strip()
387
+ if not email:
388
+ e = ValueError("E-Mail-Adresse fehlt – Wholix benötigt 'email' als Pflichtfeld.")
389
+ e.name = "ValidationError"
390
+ raise e
391
+
392
+ depts_raw = p.get("departments")
393
+ if isinstance(depts_raw, list):
394
+ departments = ", ".join([str(x) for x in depts_raw if str(x).strip()]) or None
395
+ else:
396
+ departments = (str(depts_raw) if depts_raw is not None else None)
397
+
398
+ company_url = (
399
+ str(c.get("url") or c.get("website") or c.get("domain") or "").strip()
400
+ or str(ctx.get("url") or "").strip()
401
+ or None
402
+ )
403
+
404
+ mail = m.get("email") if isinstance(m, dict) else {}
405
+ mail_body = (mail.get("body") or "").strip() if isinstance(mail, dict) else ""
406
+ mail_subject = (mail.get("subject") or "").strip() if isinstance(mail, dict) else ""
407
+ followup1 = m.get("followup1")
408
+ followup2 = m.get("followup2")
409
+
410
+ street_parts = [c.get("street_name"), c.get("street_number")]
411
+ street = " ".join([str(x).strip() for x in street_parts if str(x or "").strip()]) or ""
412
+ street = street or None
413
+
414
+ payload = {
415
+ "firstname": p.get("first_name") or None,
416
+ "lastname": p.get("last_name") or None,
417
+ "email": email,
418
+ "adress": street, # (sic)
419
+ "city": c.get("city") or None,
420
+ "postcode": c.get("zip_code") or None,
421
+ "phonenumber": p.get("phone") or None,
422
+ "job_title": p.get("job_title") or None,
423
+ "departments": departments,
424
+ "linkedin_url": p.get("linkedin_url") or None,
425
+
426
+ "company_name": c.get("name") or None,
427
+ "company_url": company_url,
428
+
429
+ "message_mail": mail_body or None,
430
+ "message_mail_subject": mail_subject or None,
431
+ "message_followup1": followup1 or None,
432
+ "message_followup2": followup2 or None,
433
+
434
+ "exclude_hash": (lead.get("exclude_hash") or None),
435
+
436
+ "status_field": { "keys": ["Kontakt aufgenommen"], "values": ["Kontakt aufgenommen"] },
437
+ "tags": { "keys": [str(tag_text).strip("[]") or "AI"], "values": [str(tag_text).strip("[]") or "AI"] },
438
+ }
439
+
440
+ normalized = filter_wholix_contact_fields(payload)
441
+
442
+ if "status_field" in normalized:
443
+ fixed = normalize_wholix_dropdown(normalized["status_field"])
444
+ if fixed: normalized["status_field"] = fixed
445
+ else: normalized.pop("status_field", None)
446
+
447
+ if "tags" in normalized:
448
+ fixed = normalize_wholix_dropdown(normalized["tags"])
449
+ if fixed: normalized["tags"] = fixed
450
+ else: normalized.pop("tags", None)
451
+
452
+ return normalized
453
+
454
+ def wholix_store_contact(token: str, record: dict, module: str = "Contacts") -> dict:
455
+ """
456
+ JS wholixStoreContact exakt nachgebildet (mit Fallbacks)
457
+ """
458
+ email = str((record or {}).get("email") or "").strip()
459
+ if not email:
460
+ e = ValueError("Wholix: 'email' ist Pflichtfeld und darf nicht leer sein.")
461
+ e.name = "ValidationError"
462
+ raise e
463
+
464
+ normalized = filter_wholix_contact_fields({ **record, "email": email })
465
+
466
+ if "status_field" in normalized:
467
+ fixed = normalize_wholix_dropdown(normalized["status_field"])
468
+ if fixed: normalized["status_field"] = fixed
469
+ else: normalized.pop("status_field", None)
470
+
471
+ if "tags" in normalized:
472
+ fixed = normalize_wholix_dropdown(normalized["tags"])
473
+ if fixed: normalized["tags"] = fixed
474
+ else: normalized.pop("tags", None)
475
+
476
+ url = f"{WHOLIX_BASE_URL}/api/v1/table-object-data/store-objects"
477
+ headers = {"Authorization": f"Bearer {token}"}
478
+
479
+ body = {"module": module, "action": "store", "data": [normalized]}
480
+ try:
481
+ return req(url, method="POST", headers=headers, json_body=body, timeout=60)
482
+ except Exception as e1:
483
+ from requests import HTTPError as _ReqHTTPError
484
+ status = getattr(e1, "status", None) or (e1.response.status_code if isinstance(e1, _ReqHTTPError) and e1.response else None)
485
+ msg = ""
486
+ try:
487
+ msg = json.dumps(getattr(e1, "body", "") or "").lower()
488
+ except Exception:
489
+ pass
490
+ if status != 422 or not any(k in (msg or "") for k in ("status_field", "tags")):
491
+ raise
492
+
493
+ try:
494
+ legacy = dict(normalized)
495
+ def to_value(v):
496
+ if isinstance(v, dict) and "keys" in v and v["keys"]:
497
+ return v["keys"][0]
498
+ if isinstance(v, dict) and "values" in v and v["values"]:
499
+ return v["values"][0]
500
+ if isinstance(v, list) and v:
501
+ return v[0]
502
+ if isinstance(v, str) and v.strip():
503
+ return v.strip()
504
+ return None
505
+
506
+ if "status_field" in legacy and legacy["status_field"]:
507
+ legacy["status_field"] = {"value": to_value(legacy["status_field"])}
508
+ if "tags" in legacy and legacy["tags"]:
509
+ legacy["tags"] = {"value": to_value(legacy["tags"])}
510
+
511
+ body2 = {"module": module, "action": "store", "data": [legacy]}
512
+ return req(url, method="POST", headers=headers, json_body=body2, timeout=60)
513
+ except Exception as e2:
514
+ from requests import HTTPError as _ReqHTTPError
515
+ status2 = getattr(e2, "status", None) or (e2.response.status_code if isinstance(e2, _ReqHTTPError) and e2.response else None)
516
+ msg2 = ""
517
+ try:
518
+ msg2 = json.dumps(getattr(e2, "body", "") or "").lower()
519
+ except Exception:
520
+ pass
521
+ if status2 != 422 or not any(k in (msg2 or "") for k in ("status_field", "tags")):
522
+ raise
523
+
524
+ stripped = dict(normalized)
525
+ stripped.pop("status_field", None)
526
+ stripped.pop("tags", None)
527
+ body3 = {"module": module, "action": "store", "data": [stripped]}
528
+ return req(url, method="POST", headers=headers, json_body=body3, timeout=60)
529
+ # ======== END DROP-IN =====================================================
530
+
531
+
532
+ # ====================== Background-Jobs (robust UI) =======================
533
+
534
+ LEAD_COUNTS = [1, 2, 3, 5, 10, 20, 50, 100, 200]
535
+
536
+ # In-Memory Job Store
537
+ EXEC = ThreadPoolExecutor(max_workers=16)
538
+ JOBS: Dict[str, Dict[str, Any]] = {} # job_id -> state dict
539
+
540
+ def _job_init(job_id: str):
541
+ JOBS[job_id] = {
542
+ "log": [], # List[str]
543
+ "progress": 0.0, # 0..1
544
+ "rows": [], # Ergebnisse (List[Dict])
545
+ "done": False,
546
+ "error": None, # str | None
547
+ "lock": threading.RLock(),
548
+ }
549
+
550
+ def _job_emit(job_id: str, msg: str = "", progress: Optional[float] = None, rows: Optional[List[Dict[str, Any]]] = None):
551
+ st = JOBS.get(job_id)
552
+ if not st:
553
+ return
554
+ with st["lock"]:
555
+ if msg:
556
+ st["log"].append(msg)
557
+ st["log"] = st["log"][-1000:] # cap
558
+ if progress is not None:
559
+ st["progress"] = max(0.0, min(1.0, progress))
560
+ if rows is not None:
561
+ st["rows"] = rows[-1000:] # cap
562
+
563
+ def _job_finish(job_id: str, error: Optional[str] = None):
564
+ st = JOBS.get(job_id)
565
+ if not st:
566
+ return
567
+ with st["lock"]:
568
+ st["error"] = error
569
+ st["done"] = True
570
+ if error:
571
+ st["log"].append(f"❌ {error}")
572
+ else:
573
+ st["log"].append("✅ Alles erledigt.")
574
+
575
+ def run_pipeline_bg(job_id: str, curl_text: str, n_leads: int):
576
+ """
577
+ 1:1 wie vorherige run_pipeline(), aber ohne yield – stattdessen Status ins JOBS-Store schreiben.
578
+ """
579
+ results: List[Dict[str, Any]] = []
580
+
581
+ def log(msg: str):
582
+ # kleine Bequemlichkeit, auto-emit mit aktuellem Fortschritt
583
+ st = JOBS.get(job_id, {})
584
+ prog = st.get("progress", 0.0)
585
+ _job_emit(job_id, msg=msg, progress=prog, rows=results)
586
+
587
+ # parse curl
588
+ try:
589
+ token_id, payload = parse_curl(curl_text)
590
+ except Exception as e:
591
+ _job_finish(job_id, error=f"Parse-Fehler: {e}")
592
+ return
593
+
594
+ wh_email = payload.get("wholix_email") or payload.get("Wholix_email") or ""
595
+ wh_pass = payload.get("wholix_passwort") or payload.get("wholix_password") or ""
596
+ if not wh_email or not wh_pass:
597
+ _job_finish(job_id, error="In der JSON-Payload fehlen wholix_email / wholix_passwort.")
598
+ return
599
+
600
+ filters = payload.get("filters") or {}
601
+ icp_text = payload.get("Produkt_und_Dienstleistungsbeschreibung") or payload.get("icp_text") or ""
602
+ checklist = payload.get("Checkliste_Landingpage") or ""
603
+ signature = payload.get("Signatur") or ""
604
+ cta = payload.get("CTA") or ""
605
+ homepage_url = payload.get("icp_homepage_url") or ""
606
+ tag_text = payload.get("Wholic_tag") or payload.get("Wholix_tag") or "AI"
607
+
608
+ total_steps = max(1, n_leads) * 4 + 2 # login + excludes + (lead + email + store)*N
609
+ step = 0
610
+
611
+ # 1) Login
612
+ step += 1
613
+ _job_emit(job_id, msg="→ Logge bei Wholix ein …", progress=step/total_steps, rows=results)
614
+ try:
615
+ wh_token = wholix_login(wh_email, wh_pass)
616
+ except Exception as e:
617
+ _job_finish(job_id, error=f"Wholix-Login: {e}")
618
+ return
619
+
620
+ # 2) Excludes
621
+ step += 1
622
+ _job_emit(job_id, msg="→ Lade Exclude-Hashes …", progress=step/total_steps, rows=results)
623
+ try:
624
+ excludes = wholix_fetch_excludes(wh_token)
625
+ _job_emit(job_id, msg=f" {len(excludes)} Exclude-Hashes gefunden.")
626
+ except Exception as e:
627
+ _job_emit(job_id, msg=f"⚠️ Excludes konnten nicht geladen werden: {e} — fahre ohne fort.")
628
+ excludes = []
629
+
630
+ for i in range(1, n_leads + 1):
631
+ # 3) Lead
632
+ step += 1
633
+ _job_emit(job_id, msg=f"→ [{i}/{n_leads}] Fordere Lead an …", progress=step/total_steps, rows=results)
634
+ try:
635
+ lead = lead_suggest(token_id, filters, icp_text, excludes)
636
+ except Exception as e:
637
+ _job_emit(job_id, msg=f"❌ Lead-Fehler: {e}", progress=step/total_steps, rows=results)
638
+ continue
639
+
640
+ person = (lead or {}).get("person") or {}
641
+ company = (lead or {}).get("company") or {}
642
+ _job_emit(job_id, msg=f" Lead: {person.get('first_name','?')} {person.get('last_name','?')} @ {company.get('name') or company.get('company_name','?')}")
643
+
644
+ # 4) Email draft
645
+ step += 1
646
+ _job_emit(job_id, msg=" → Generiere Nachricht …", progress=step/total_steps, rows=results)
647
+ items = [{"combined_id": lead.get("combined_id"), "company": company, "person": person}]
648
+ variables = {
649
+ "Touch_Point": "LinkedIn DM",
650
+ **({"homepage_url": homepage_url} if homepage_url else {}),
651
+ "Produkt_und_Dienstleistungsbeschreibung": icp_text,
652
+ "Checkliste_Landingpage": checklist,
653
+ "CTA": cta,
654
+ "Signatur": signature,
655
+ }
656
+ try:
657
+ draft = email_generate(token_id, variables, items)
658
+ except Exception as e:
659
+ _job_emit(job_id, msg=f"❌ Email-Generate-Fehler: {e}", progress=step/total_steps, rows=results)
660
+ continue
661
+
662
+ # 5) Store
663
+ step += 1
664
+ _job_emit(job_id, msg=" → Speichere Kontakt + Nachricht in Wholix …", progress=step/total_steps, rows=results)
665
+ try:
666
+ record = map_to_wholix_record(lead, draft, tag_text=tag_text)
667
+ store_res = wholix_store_contact(wh_token, record)
668
+ except Exception as e:
669
+ _job_emit(job_id, msg=f"❌ Wholix-Store-Fehler: {e}", progress=step/total_steps, rows=results)
670
+ continue
671
+
672
+ if lead.get("exclude_hash"):
673
+ excludes.append(lead["exclude_hash"])
674
+
675
+ result_row = {
676
+ "person": f"{person.get('first_name','')} {person.get('last_name','')}".strip(),
677
+ "email": (draft.get("email") or {}).get("to") or person.get("email") or "",
678
+ "company": company.get("name") or company.get("company_name") or "",
679
+ "subject": (draft.get("email") or {}).get("subject") or "",
680
+ "stored_ok": bool(store_res),
681
+ }
682
+ results.append(result_row)
683
+
684
+ # 6) small completion step for this iteration
685
+ step += 1
686
+ _job_emit(job_id, msg=" ✓ Fertig für diesen Lead.", progress=step/total_steps, rows=results)
687
+
688
+ _job_finish(job_id, error=None)
689
+
690
+ # ================================ Gradio UI ===============================
691
+
692
+ def build_ui():
693
+ with gr.Blocks(theme=gr.themes.Soft(), css="""
694
+ .logbox textarea { font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", monospace; font-size: 12.5px; line-height: 1.35; }
695
+ """) as demo:
696
+ gr.Markdown("## Wholix Lead → Message → Store (robust, Background-Job + Polling)")
697
+ gr.Markdown("Füge unten deinen **gesamten `curl`** ein (inkl. `X-Token-Id` Header und JSON `--data-raw`), wähle die Lead-Anzahl und klicke **Start**. "
698
+ "Die Verarbeitung läuft als Hintergrund-Job weiter – selbst wenn der Browser kurz offline ist. "
699
+ "Mit „Aktualisieren“ holst du den aktuellen Status ab.")
700
+
701
+ with gr.Row():
702
+ curl_in = gr.Textbox(label="curl Befehl", placeholder="curl -sS -N -X POST 'https://.../stream' -H 'X-Token-Id: ...' --data-raw '{...}'", lines=12)
703
+ with gr.Row():
704
+ count = gr.Dropdown(choices=[str(x) for x in LEAD_COUNTS], value="1", label="Anzahl Leads")
705
+
706
+ with gr.Row():
707
+ start_btn = gr.Button("🚀 Start", variant="primary")
708
+ poll_btn = gr.Button("🔄 Aktualisieren")
709
+
710
+ with gr.Row():
711
+ job_id_tb = gr.Textbox(label="Job-ID", interactive=False)
712
+
713
+ with gr.Row():
714
+ status = gr.Textbox(label="Status / Log", lines=18, interactive=False, elem_classes=["logbox"])
715
+ with gr.Row():
716
+ progress = gr.Slider(label="Progress", minimum=0, maximum=100, value=0, interactive=False)
717
+ with gr.Row():
718
+ out = gr.Dataframe(
719
+ headers=["person", "email", "company", "subject", "stored_ok"],
720
+ label="Ergebnisse",
721
+ interactive=False,
722
+ wrap=True,
723
+ row_count=(0, "dynamic"),
724
+ col_count=(5, "fixed"),
725
+ )
726
+
727
+ # ----- Handlers -----
728
+
729
+ def start_job(curl_text: str, n: str):
730
+ try:
731
+ n_int = int(n)
732
+ except Exception:
733
+ n_int = 1
734
+ job_id = str(uuid.uuid4())
735
+ _job_init(job_id)
736
+ _job_emit(job_id, msg=f"Job gestartet: {job_id}")
737
+
738
+ # Start background work
739
+ EXEC.submit(run_pipeline_bg, job_id, curl_text, n_int)
740
+
741
+ # Return initial state
742
+ st = JOBS[job_id]
743
+ with st["lock"]:
744
+ log = "\n".join(st["log"])
745
+ prog = int(round(st["progress"] * 100))
746
+ rows = st["rows"]
747
+ return log, prog, rows, job_id
748
+
749
+ def poll_job(job_id: str):
750
+ st = JOBS.get(job_id)
751
+ if not st:
752
+ return "Unbekannte Job-ID.", 0, []
753
+ with st["lock"]:
754
+ log = "\n".join(st["log"][-500:])
755
+ prog = int(round(st["progress"] * 100))
756
+ rows = st["rows"]
757
+ return log, prog, rows
758
+
759
+ start_btn.click(
760
+ start_job,
761
+ inputs=[curl_in, count],
762
+ outputs=[status, progress, out, job_id_tb]
763
+ )
764
 
765
+ poll_btn.click(
766
+ poll_job,
767
+ inputs=[job_id_tb],
768
+ outputs=[status, progress, out]
769
+ )
770
 
771
+ return demo
772
 
773
  if __name__ == "__main__":
774
  demo.launch()