kidpro2002 commited on
Commit
615e194
·
1 Parent(s): 52833c5

🚀 Fusion: NexusCRM + Lead Gen Pro (100% Cloud Docker)

Browse files
.dockerignore ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ignore local development artifacts
2
+ **/__pycache__
3
+ **/*.pyc
4
+ **/venv
5
+ **/*.db
6
+ **/*.bat
7
+ **/*.log
8
+ **/*.zip
9
+ **/node_modules
10
+ .git
11
+ .gitignore
12
+ .dockerignore
Dockerfile ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # NexusCRM + Lead Gen Pro: Unified Cloud Dockerfile
2
+ # Optimized for Hugging Face Spaces (Docker SDK)
3
+
4
+ # Using Python 3.10 slim for a balance of speed and stability
5
+ FROM python:3.10-slim
6
+
7
+ # Prevent Python from writing .pyc files and enable unbuffered logging
8
+ ENV PYTHONUNBUFFERED=1 \
9
+ PYTHONDONTWRITEBYTECODE=1 \
10
+ # Hugging Face Spaces port is 7860
11
+ PORT=7860 \
12
+ # Playwright headless requirements
13
+ PLAYWRIGHT_BROWSERS_PATH=/app/pw-browsers
14
+
15
+ # Install system dependencies required for Chromium and Playwright
16
+ RUN apt-get update && apt-get install -y --no-install-recommends \
17
+ wget \
18
+ gnupg \
19
+ libnss3 \
20
+ libnspr4 \
21
+ libatk1.0-0 \
22
+ libatk-bridge2.0-0 \
23
+ libcups2 \
24
+ libdrm2 \
25
+ libxkbcommon0 \
26
+ libxcomposite1 \
27
+ libxdamage1 \
28
+ libxext6 \
29
+ libxfixes3 \
30
+ librandr2 \
31
+ libgbm1 \
32
+ libpango-1.0-0 \
33
+ libcairo2 \
34
+ libasound2 \
35
+ && rm -rf /var/lib/apt/lists/*
36
+
37
+ WORKDIR /app
38
+
39
+ # Install Python dependencies (including apify-client for the scraping engine)
40
+ RUN pip install --no-cache-dir \
41
+ flask \
42
+ flask-cors \
43
+ playwright \
44
+ supabase \
45
+ pandas \
46
+ python-dotenv \
47
+ gunicorn \
48
+ apify-client
49
+
50
+ # Install Playwright and the Chromium browser
51
+ RUN playwright install chromium
52
+ RUN playwright install-deps chromium
53
+
54
+ # Preparation of the working environment
55
+ # The entire folder (app.py, static/, lead_gen_pro/) is copied to /app
56
+ COPY . .
57
+
58
+ # Create a data directory for temporary SQLite files if it doesn't exist
59
+ RUN mkdir -p /app/lead_gen_pro/data && chmod 777 /app/lead_gen_pro/data
60
+
61
+ # Expose the HF port
62
+ EXPOSE 7860
63
+
64
+ # Use gunicorn for a production-ready server in the cloud
65
+ # We bind to 0.0.0.0 because it's a container
66
+ CMD ["gunicorn", "--bind", "0.0.0.0:7860", "--workers", "1", "--threads", "8", "--timeout", "0", "app:app"]
README.md CHANGED
@@ -1,12 +1,13 @@
1
  ---
2
- title: Nexus CRM
3
- emoji: 📊
4
  colorFrom: blue
5
- colorTo: purple
6
- sdk: static
 
7
  app_port: 7860
8
  ---
9
 
10
- # Nexus CRM
11
 
12
- CRM + Lead Gen SaaS - Gestiona clientes y leads en la nube.
 
1
  ---
2
+ title: NexusCRM
3
+ emoji: 💼
4
  colorFrom: blue
5
+ colorTo: indigo
6
+ sdk: docker
7
+ sdk_version: 4.0.0
8
  app_port: 7860
9
  ---
10
 
11
+ # NexusCRM
12
 
13
+ SaaS CRM System with Supabase backend
app.py ADDED
@@ -0,0 +1,194 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import subprocess
4
+ import threading
5
+ import queue
6
+ import time
7
+ from flask import Flask, send_from_directory, request, jsonify, Response
8
+ from flask_cors import CORS
9
+ from dotenv import load_dotenv
10
+
11
+ # Load environment variables (from Hugging Face Secrets)
12
+ load_dotenv()
13
+
14
+ # Ensure lead_gen_pro is in the path so we can import its modules
15
+ sys.path.append(os.path.join(os.getcwd(), 'lead_gen_pro'))
16
+
17
+ app = Flask(__name__, static_folder='static')
18
+ CORS(app)
19
+
20
+ # --- Configuration ---
21
+ PORT = int(os.environ.get("PORT", 7860))
22
+ BASE_DIR = os.path.join(os.getcwd(), 'lead_gen_pro')
23
+ # In Docker, we use the local path for the temporary DB
24
+ DB_PATH = os.path.join(BASE_DIR, "leads_doe.db")
25
+ MAIN_PRO = os.path.join(BASE_DIR, "main_pro.py")
26
+
27
+ # Log queue for SSE (Server-Sent Events)
28
+ log_queue = queue.Queue()
29
+ active_process = None
30
+ process_lock = threading.Lock()
31
+
32
+ # --- Unified UI Serving ---
33
+
34
+ @app.route("/")
35
+ def index():
36
+ """Serve the NexusCRM frontend."""
37
+ return send_from_directory(app.static_folder, "index.html")
38
+
39
+ @app.route("/<path:path>")
40
+ def static_proxy(path):
41
+ """Serve other static files if any."""
42
+ return send_from_directory(app.static_folder, path)
43
+
44
+ # --- Backend API Endpoints ---
45
+
46
+ @app.route("/api/health")
47
+ def health():
48
+ """Verify the cloud server is active."""
49
+ return jsonify({
50
+ "status": "online",
51
+ "environment": "Hugging Face Cloud",
52
+ "db_access": os.path.exists(DB_PATH)
53
+ })
54
+
55
+ @app.route("/api/scrape", methods=["POST"])
56
+ def scrape():
57
+ """Bridge to the Lead Gen Pro scraping engine."""
58
+ global active_process
59
+
60
+ with process_lock:
61
+ if active_process and active_process.poll() is None:
62
+ return jsonify({"error": "A scraping process is already running in the cloud."}), 409
63
+
64
+ data = request.get_json(force=True) or {}
65
+
66
+ # We use 'python' because it's the environment in the Docker container
67
+ cmd = [
68
+ "python", MAIN_PRO,
69
+ "--pipeline",
70
+ "--niche", data.get("niche", "Real Estate"),
71
+ "--limit", str(data.get("limit", 20)),
72
+ "--type", data.get("lead_type", "both")
73
+ ]
74
+
75
+ # Geographic settings
76
+ if data.get("country"): cmd += ["--country", data["country"]]
77
+ if data.get("state"): cmd += ["--state", data["state"]]
78
+ if data.get("city"): cmd += ["--city", data["city"]]
79
+
80
+ def _run_worker():
81
+ global active_process
82
+ log_queue.put("🚀 [CLOUD] Iniciando motor de scraping en Hugging Face...\n")
83
+
84
+ # Start the scraping process
85
+ proc = subprocess.Popen(
86
+ cmd,
87
+ cwd=BASE_DIR,
88
+ stdout=subprocess.PIPE,
89
+ stderr=subprocess.STDOUT,
90
+ text=True,
91
+ bufsize=1,
92
+ env={**os.environ, "PYTHONUNBUFFERED": "1"}
93
+ )
94
+
95
+ with process_lock:
96
+ active_process = proc
97
+
98
+ # Stream logs line by line
99
+ for line in proc.stdout:
100
+ log_queue.put(line)
101
+
102
+ proc.wait()
103
+ log_queue.put(f"\n✅ [CLOUD] Proceso finalizado (Código: {proc.returncode})\n")
104
+
105
+ # Automatic Cloud Sync (Supabase)
106
+ try:
107
+ log_queue.put("♻️ [CLOUD] Sincronizando resultados con Supabase Cloud...\n")
108
+ # Dynamic import to ensure it uses the cloud environment
109
+ from l3_execution.supabase_sync import sync_all_leads_to_supabase
110
+ stats = sync_all_leads_to_supabase()
111
+ log_queue.put(f"📊 [SYNC] Sincronización exitosa: {stats}\n")
112
+ except Exception as e:
113
+ log_queue.put(f"❌ [SYNC-ERROR] Error en sincronización cloud: {str(e)}\n")
114
+
115
+ log_queue.put("[END]")
116
+
117
+ # Run in background to not block the request
118
+ threading.Thread(target=_run_worker, daemon=True).start()
119
+ return jsonify({"status": "launched", "location": "Hugging Face Space"})
120
+
121
+ @app.route("/api/stream")
122
+ def stream():
123
+ """SSE endpoint for real-time logs in the CRM UI."""
124
+ def generate():
125
+ yield "data: [CONECTADO] Servidor Hugging Face listo...\n\n"
126
+ while True:
127
+ try:
128
+ line = log_queue.get(timeout=60)
129
+ if line == "[END]":
130
+ yield "data: [END]\n\n"
131
+ break
132
+ yield f"data: {line.rstrip()}\n\n"
133
+ except queue.Empty:
134
+ yield "data: [PING]\n\n"
135
+
136
+ return Response(generate(), mimetype="text/event-stream")
137
+
138
+ @app.route("/api/sync", methods=["POST"])
139
+ def manual_sync():
140
+ """Force manual sync to Supabase from the cloud."""
141
+ try:
142
+ from l3_execution.supabase_sync import sync_all_leads_to_supabase
143
+ res = sync_all_leads_to_supabase()
144
+ return jsonify({"status": "success", "data": res})
145
+ except Exception as e:
146
+ return jsonify({"status": "error", "message": str(e)}), 500
147
+
148
+ @app.route("/api/stats")
149
+ def stats():
150
+ """Get lead generation statistics from the local container DB."""
151
+ import sqlite3
152
+ try:
153
+ if not os.path.exists(DB_PATH):
154
+ return jsonify({
155
+ "total": 0,
156
+ "leads_30d": 0,
157
+ "pending_whatsapp": 0,
158
+ "by_niche": {}
159
+ })
160
+
161
+ conn = sqlite3.connect(DB_PATH)
162
+ conn.row_factory = sqlite3.Row
163
+ cur = conn.cursor()
164
+
165
+ # Total
166
+ cur.execute("SELECT COUNT(*) FROM leads")
167
+ total = cur.fetchone()[0]
168
+
169
+ # Last 30 days
170
+ cur.execute("SELECT COUNT(*) FROM leads WHERE created_at > date('now', '-30 days')")
171
+ total_30d = cur.fetchone()[0]
172
+
173
+ # Pending WhatsApp
174
+ cur.execute("SELECT COUNT(*) FROM leads WHERE (whatsapp_enviado IS NULL OR whatsapp_enviado = 0) AND (telefono IS NOT NULL OR telefono_formateado IS NOT NULL)")
175
+ pending_wa = cur.fetchone()[0]
176
+
177
+ # By niche
178
+ cur.execute("SELECT nicho, COUNT(*) as count FROM leads GROUP BY nicho ORDER BY count DESC LIMIT 5")
179
+ by_niche = {row['nicho']: row['count'] for row in cur.fetchall() if row['nicho']}
180
+
181
+ conn.close()
182
+
183
+ return jsonify({
184
+ "total": total,
185
+ "leads_30d": total_30d,
186
+ "pending_whatsapp": pending_wa,
187
+ "by_niche": by_niche
188
+ })
189
+ except Exception as e:
190
+ return jsonify({"error": str(e)}), 500
191
+
192
+ if __name__ == "__main__":
193
+ # Local fallback for testing, but Docker uses gunicorn
194
+ app.run(host="0.0.0.0", port=PORT)
lead_gen_pro/config.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "apify_token": "apify_api_J3amq3BFZwtBTddaYQn3PkJxLjeZno0geYY7",
3
+ "serpapi_key": "YOUR_SERPAPI_KEY",
4
+ "sender_name": "NexusCRM Butler",
5
+ "niches": {
6
+ "real_estate": {
7
+ "search_queries": [
8
+ "real estate agent in {city}",
9
+ "realtor in {city}",
10
+ "luxury homes in {city}",
11
+ "property management {city}"
12
+ ],
13
+ "templates": {
14
+ "email": {
15
+ "subject": "Colaboración para {name} en {city}",
16
+ "body": "Hola {name},\n\nVi tu excelente perfil inmobiliario en {city}. Estoy ayudando a realtors a automatizar su captación de leads.\n\n¿Te interesaría una breve llamada?\n\nSaludos,\n{sender_name}"
17
+ },
18
+ "whatsapp": "¡Hola {name}! Vi tu trabajo en {city}. Tengo una propuesta para automatizar tu prospección inmobiliaria. ¿Hablamos?"
19
+ }
20
+ },
21
+ "insurance": {
22
+ "search_queries": [
23
+ "insurance agent in {city}",
24
+ "seguros en {city}",
25
+ "broker de seguros {city}"
26
+ ],
27
+ "templates": {
28
+ "email": {
29
+ "subject": "Idea for your insurance business in {city}",
30
+ "body": "Hi {name},\n\nI noticed your insurance agency in {city}. We help brokers get more leads. Are you interested?\n\nBest,\n{sender_name}"
31
+ },
32
+ "whatsapp": "¡Hola {name}! Noté tu éxito en el sector seguros en {city}. Tengo una propuesta para automatizar tu prospección. ¿Hablamos?"
33
+ }
34
+ }
35
+ },
36
+ "automation_rules": [
37
+ {
38
+ "if": {"field": "quality_score", "operator": ">=", "value": 8},
39
+ "then": {"type": "mark_priority", "value": 3}
40
+ },
41
+ {
42
+ "if": {"field": "niche", "operator": "==", "value": "real_estate"},
43
+ "then": {"type": "notify", "message": "New Real Estate lead detected!"}
44
+ }
45
+ ]
46
+ }
lead_gen_pro/database.py ADDED
@@ -0,0 +1,268 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Lead Generation Database Module
3
+ ================================
4
+ Manages SQLite database for storing and tracking leads.
5
+ """
6
+
7
+ import sqlite3
8
+ import os
9
+ from datetime import datetime
10
+ from typing import Optional, List, Dict, Any
11
+
12
+ DB_PATH = os.path.join(os.path.dirname(__file__), "leads.db")
13
+
14
+
15
+ def get_connection() -> sqlite3.Connection:
16
+ """Get a database connection with row factory enabled."""
17
+ conn = sqlite3.connect(DB_PATH)
18
+ conn.row_factory = sqlite3.Row
19
+ return conn
20
+
21
+
22
+ def init_db() -> None:
23
+ """Initialize the database with required tables."""
24
+ conn = get_connection()
25
+ cursor = conn.cursor()
26
+
27
+ cursor.execute("""
28
+ CREATE TABLE IF NOT EXISTS leads (
29
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
30
+ name TEXT NOT NULL,
31
+ phone TEXT,
32
+ email TEXT,
33
+ address TEXT,
34
+ website TEXT,
35
+ rating REAL,
36
+ reviews_count INTEGER,
37
+ source TEXT DEFAULT 'google_maps',
38
+ niche TEXT NOT NULL,
39
+ country TEXT NOT NULL,
40
+ city TEXT,
41
+ status TEXT DEFAULT 'new',
42
+ whatsapp_sent BOOLEAN DEFAULT 0,
43
+ instagram_sent BOOLEAN DEFAULT 0,
44
+ replied BOOLEAN DEFAULT 0,
45
+ notes TEXT,
46
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
47
+ updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
48
+ )
49
+ """)
50
+
51
+ cursor.execute("""
52
+ CREATE TABLE IF NOT EXISTS outreach_log (
53
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
54
+ lead_id INTEGER NOT NULL,
55
+ channel TEXT NOT NULL,
56
+ message_template TEXT,
57
+ sent_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
58
+ response TEXT,
59
+ FOREIGN KEY (lead_id) REFERENCES leads(id)
60
+ )
61
+ """)
62
+
63
+ cursor.execute("""
64
+ CREATE INDEX IF NOT EXISTS idx_leads_status ON leads(status)
65
+ """)
66
+ cursor.execute("""
67
+ CREATE INDEX IF NOT EXISTS idx_leads_niche ON leads(niche)
68
+ """)
69
+ cursor.execute("""
70
+ CREATE INDEX IF NOT EXISTS idx_leads_country ON leads(country)
71
+ """)
72
+
73
+ conn.commit()
74
+ conn.close()
75
+ print(f"[OK] Base de datos inicializada en: {DB_PATH}")
76
+
77
+
78
+ def add_lead(lead_data: Dict[str, Any]) -> int:
79
+ """
80
+ Add a new lead to the database.
81
+
82
+ Args:
83
+ lead_data: Dictionary with lead information.
84
+
85
+ Returns:
86
+ The ID of the inserted lead.
87
+ """
88
+ conn = get_connection()
89
+ cursor = conn.cursor()
90
+
91
+ cursor.execute("""
92
+ INSERT INTO leads (name, phone, email, address, website, rating,
93
+ reviews_count, source, niche, country, city, notes)
94
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
95
+ """, (
96
+ lead_data.get("name"),
97
+ lead_data.get("phone"),
98
+ lead_data.get("email"),
99
+ lead_data.get("address"),
100
+ lead_data.get("website"),
101
+ lead_data.get("rating"),
102
+ lead_data.get("reviews_count"),
103
+ lead_data.get("source", "google_maps"),
104
+ lead_data.get("niche"),
105
+ lead_data.get("country"),
106
+ lead_data.get("city"),
107
+ lead_data.get("notes")
108
+ ))
109
+
110
+ lead_id = cursor.lastrowid
111
+ conn.commit()
112
+ conn.close()
113
+ return lead_id
114
+
115
+
116
+ def get_leads(
117
+ niche: Optional[str] = None,
118
+ country: Optional[str] = None,
119
+ status: Optional[str] = None,
120
+ limit: int = 100
121
+ ) -> List[Dict[str, Any]]:
122
+ """
123
+ Retrieve leads with optional filters.
124
+
125
+ Args:
126
+ niche: Filter by niche (real_estate, insurance)
127
+ country: Filter by country (usa, venezuela)
128
+ status: Filter by status (new, contacted, replied, converted)
129
+ limit: Maximum number of results
130
+
131
+ Returns:
132
+ List of lead dictionaries.
133
+ """
134
+ conn = get_connection()
135
+ cursor = conn.cursor()
136
+
137
+ query = "SELECT * FROM leads WHERE 1=1"
138
+ params = []
139
+
140
+ if niche:
141
+ query += " AND niche = ?"
142
+ params.append(niche)
143
+ if country:
144
+ query += " AND country = ?"
145
+ params.append(country)
146
+ if status:
147
+ query += " AND status = ?"
148
+ params.append(status)
149
+
150
+ query += " ORDER BY created_at DESC LIMIT ?"
151
+ params.append(limit)
152
+
153
+ cursor.execute(query, params)
154
+ rows = cursor.fetchall()
155
+ conn.close()
156
+
157
+ return [dict(row) for row in rows]
158
+
159
+
160
+ def update_lead_status(lead_id: int, status: str, notes: Optional[str] = None) -> None:
161
+ """Update the status of a lead."""
162
+ conn = get_connection()
163
+ cursor = conn.cursor()
164
+
165
+ if notes:
166
+ cursor.execute("""
167
+ UPDATE leads
168
+ SET status = ?, notes = ?, updated_at = CURRENT_TIMESTAMP
169
+ WHERE id = ?
170
+ """, (status, notes, lead_id))
171
+ else:
172
+ cursor.execute("""
173
+ UPDATE leads
174
+ SET status = ?, updated_at = CURRENT_TIMESTAMP
175
+ WHERE id = ?
176
+ """, (status, lead_id))
177
+
178
+ conn.commit()
179
+ conn.close()
180
+
181
+
182
+ def mark_outreach_sent(lead_id: int, channel: str, template: str) -> None:
183
+ """Mark that outreach was sent to a lead and log it."""
184
+ conn = get_connection()
185
+ cursor = conn.cursor()
186
+
187
+ # Update lead flags
188
+ if channel == "whatsapp":
189
+ cursor.execute("UPDATE leads SET whatsapp_sent = 1, status = 'contacted' WHERE id = ?", (lead_id,))
190
+ elif channel == "instagram":
191
+ cursor.execute("UPDATE leads SET instagram_sent = 1, status = 'contacted' WHERE id = ?", (lead_id,))
192
+
193
+ # Log the outreach
194
+ cursor.execute("""
195
+ INSERT INTO outreach_log (lead_id, channel, message_template)
196
+ VALUES (?, ?, ?)
197
+ """, (lead_id, channel, template))
198
+
199
+ conn.commit()
200
+ conn.close()
201
+
202
+
203
+ def get_pending_outreach(channel: str, niche: Optional[str] = None, limit: int = 10) -> List[Dict[str, Any]]:
204
+ """Get leads that haven't been contacted yet via a specific channel."""
205
+ conn = get_connection()
206
+ cursor = conn.cursor()
207
+
208
+ query = "SELECT * FROM leads WHERE status = 'new'"
209
+ params = []
210
+
211
+ if channel == "whatsapp":
212
+ query += " AND whatsapp_sent = 0 AND phone IS NOT NULL"
213
+ elif channel == "instagram":
214
+ query += " AND instagram_sent = 0"
215
+
216
+ if niche:
217
+ query += " AND niche = ?"
218
+ params.append(niche)
219
+
220
+ query += " LIMIT ?"
221
+ params.append(limit)
222
+
223
+ cursor.execute(query, params)
224
+ rows = cursor.fetchall()
225
+ conn.close()
226
+
227
+ return [dict(row) for row in rows]
228
+
229
+
230
+ def get_stats() -> Dict[str, Any]:
231
+ """Get statistics about the leads database."""
232
+ conn = get_connection()
233
+ cursor = conn.cursor()
234
+
235
+ stats = {}
236
+
237
+ cursor.execute("SELECT COUNT(*) FROM leads")
238
+ stats["total_leads"] = cursor.fetchone()[0]
239
+
240
+ cursor.execute("SELECT niche, COUNT(*) FROM leads GROUP BY niche")
241
+ stats["by_niche"] = dict(cursor.fetchall())
242
+
243
+ cursor.execute("SELECT country, COUNT(*) FROM leads GROUP BY country")
244
+ stats["by_country"] = dict(cursor.fetchall())
245
+
246
+ cursor.execute("SELECT status, COUNT(*) FROM leads GROUP BY status")
247
+ stats["by_status"] = dict(cursor.fetchall())
248
+
249
+ cursor.execute("SELECT COUNT(*) FROM leads WHERE whatsapp_sent = 1")
250
+ stats["whatsapp_sent"] = cursor.fetchone()[0]
251
+
252
+ cursor.execute("SELECT COUNT(*) FROM leads WHERE instagram_sent = 1")
253
+ stats["instagram_sent"] = cursor.fetchone()[0]
254
+
255
+ conn.close()
256
+ return stats
257
+
258
+
259
+ if __name__ == "__main__":
260
+ # Initialize the database when run directly
261
+ init_db()
262
+
263
+ # Show current stats
264
+ stats = get_stats()
265
+ print(f"\n[ESTADISTICAS] Estadisticas de la Base de Datos:")
266
+ print(f" Total leads: {stats['total_leads']}")
267
+ print(f" Por nicho: {stats['by_niche']}")
268
+ print(f" Por país: {stats['by_country']}")
lead_gen_pro/l1_directive/outreach_sop.md ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # L1 Directive: Outreach SOP
2
+ # ===========================
3
+ # Este documento define las reglas para mensajes de WhatsApp y Email.
4
+
5
+ ## Objetivo
6
+ Contactar leads de manera profesional y personalizada para generar oportunidades de negocio.
7
+
8
+ ## Canales
9
+ 1. **WhatsApp Business** - Para contacto directo e inmediato
10
+ 2. **Email (Instantly.ai)** - Para seguimiento profesional
11
+
12
+ ## Reglas Generales
13
+ - Nunca enviar más de 20 mensajes por hora
14
+ - Esperar mínimo 30 segundos entre cada mensaje
15
+ - Horario de envío: 9am-6pm zona del destinatario
16
+ - Personalizar SIEMPRE con nombre y ciudad
17
+
18
+ ---
19
+
20
+ ## Templates de WhatsApp
21
+
22
+ ### Real Estate - USA
23
+ ```
24
+ Hi {name}! I came across your profile while researching top agents in {city}.
25
+ I help real estate professionals automate their lead generation.
26
+ Would you be interested in a quick 5-min chat?
27
+ ```
28
+
29
+ ### Real Estate - Venezuela
30
+ ```
31
+ ¡Hola {name}! Vi tu perfil mientras buscaba los mejores agentes en {city}.
32
+ Ayudo a profesionales inmobiliarios a automatizar su captación de clientes.
33
+ ¿Te interesaría una llamada rápida de 5 minutos?
34
+ ```
35
+
36
+ ### Insurance - USA
37
+ ```
38
+ Hi {name}! I noticed your insurance agency in {city} has great reviews.
39
+ I work with insurance professionals to help them get more qualified leads.
40
+ Would you be open to a brief conversation?
41
+ ```
42
+
43
+ ### Insurance - Venezuela
44
+ ```
45
+ ¡Hola {name}! Noté que tu agencia de seguros en {city} tiene excelentes reseñas.
46
+ Trabajo con profesionales de seguros para ayudarles a conseguir más clientes.
47
+ ¿Estarías abierto a una breve conversación?
48
+ ```
49
+
50
+ ---
51
+
52
+ ## Templates de Email
53
+
54
+ ### Subject Lines
55
+ - "Quick question about {city} real estate"
56
+ - "Idea for your {niche} business"
57
+ - "Saw your great reviews in {city}"
58
+
59
+ ### Email Body (Real Estate)
60
+ ```
61
+ Hi {name},
62
+
63
+ I came across your profile while researching top real estate professionals in {city}.
64
+
65
+ I help agents like yourself automate their lead generation process,
66
+ often resulting in 2-3x more qualified leads without extra effort.
67
+
68
+ Would you be open to a 10-minute call this week to explore if this could work for you?
69
+
70
+ Best regards,
71
+ [Your Name]
72
+ ```
73
+
74
+ ---
75
+
76
+ ## Seguimiento
77
+ - Si no hay respuesta en 48h, enviar follow-up por el otro canal
78
+ - Máximo 3 intentos de contacto por lead
79
+ - Marcar lead como "no interesado" después de 3 intentos sin respuesta
lead_gen_pro/l1_directive/scraping_sop.md ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # L1 Directive: Scraping SOP
2
+ # ===========================
3
+ # Este documento define las reglas para el agente de scraping.
4
+
5
+ ## Objetivo
6
+ Extraer leads de negocios (bienes raíces y seguros) de Google Maps y LinkedIn usando Apify.
7
+
8
+ ## Parámetros de Entrada
9
+ - **niche**: `real_estate` | `insurance`
10
+ - **country**: `usa` | `venezuela`
11
+ - **cities**: Lista de ciudades objetivo
12
+ - **limit**: Máximo de leads por ciudad (default: 20)
13
+
14
+ ## Queries Predefinidas
15
+
16
+ ### Real Estate
17
+ | País | Queries |
18
+ |------|---------|
19
+ | USA | "real estate agent", "realtor", "property broker" |
20
+ | Venezuela | "inmobiliaria", "agente inmobiliario", "bienes raices" |
21
+
22
+ ### Insurance
23
+ | País | Queries |
24
+ |------|---------|
25
+ | USA | "insurance agent", "insurance broker", "life insurance" |
26
+ | Venezuela | "corredor de seguros", "agente de seguros" |
27
+
28
+ ## Reglas de Ejecución
29
+ 1. Ejecutar scraping en horario laboral (9am-6pm zona objetivo)
30
+ 2. Máximo 100 leads por ejecución para evitar rate limits
31
+ 3. Esperar 5 segundos entre cada request a Apify
32
+ 4. Guardar todos los leads en database antes de siguiente paso
33
+
34
+ ## Campos Requeridos por Lead
35
+ - `name` (obligatorio)
36
+ - `phone` (obligatorio para WhatsApp)
37
+ - `email` (deseable para Instantly)
38
+ - `address`
39
+ - `website`
40
+ - `rating`
41
+ - `reviews_count`
42
+
43
+ ## Errores y Retry
44
+ - Si Apify retorna error, esperar 60 segundos y reintentar
45
+ - Máximo 3 reintentos por query
46
+ - Loggear todos los errores en `logs/scraping.log`
lead_gen_pro/l2_orchestration/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # __init__.py for l2_orchestration module
lead_gen_pro/l2_orchestration/orchestrator.py ADDED
@@ -0,0 +1,319 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ L2 Orchestration: Main Workflow Orchestrator
3
+ =============================================
4
+ Orquestador principal que coordina todo el flujo de lead generation.
5
+ Implementa la capa L2 del framework DOE (lógica sin código de ejecución).
6
+ """
7
+
8
+ import os
9
+ import sys
10
+ import json
11
+ import time
12
+ from datetime import datetime
13
+ from typing import Optional, List, Dict, Any
14
+
15
+ # Add parent to path
16
+ sys.path.insert(0, os.path.dirname(__file__))
17
+
18
+ # Import L3 Execution modules
19
+ from l3_execution.database_doe import init_db, add_lead, get_leads, get_pending_outreach, mark_outreach, get_stats
20
+ from l3_execution.apify_scraper import scrape_leads
21
+ from l3_execution.enrichment import enrich_leads
22
+ from l3_execution.instantly_sender import batch_send_emails, demo_email_preview
23
+
24
+ # Import old WhatsApp module for compatibility
25
+ sys.path.insert(0, os.path.join(os.path.dirname(__file__)))
26
+ from whatsapp_sender import batch_generate_links
27
+
28
+
29
+ def load_config() -> Dict[str, Any]:
30
+ """Load configuration."""
31
+ config_path = os.path.join(os.path.dirname(__file__), "config.json")
32
+ if os.path.exists(config_path):
33
+ with open(config_path, "r") as f:
34
+ return json.load(f)
35
+ return {}
36
+
37
+
38
+ def print_header(title: str):
39
+ """Print formatted header."""
40
+ print(f"\n{'='*60}")
41
+ print(f" {title}")
42
+ print(f"{'='*60}\n")
43
+
44
+
45
+ def run_full_pipeline(
46
+ niche: str = "real_estate",
47
+ country: str = "usa",
48
+ location: Optional[str] = None,
49
+ limit_per_city: int = 10,
50
+ demo: bool = False
51
+ ) -> Dict[str, Any]:
52
+ """
53
+ Execute the complete lead generation pipeline:
54
+ 1. Scrape leads
55
+ 2. Enrich data
56
+ 3. Store in database
57
+ 4. Generate outreach
58
+
59
+ Args:
60
+ niche: 'real_estate' or 'insurance'
61
+ country: 'usa' or 'venezuela'
62
+ limit_per_city: Leads per city
63
+ demo: Use demo mode
64
+
65
+ Returns:
66
+ Pipeline results dictionary.
67
+ """
68
+ config = load_config()
69
+ results = {"steps": [], "stats": {}}
70
+
71
+ print_header(f"🚀 PIPELINE DOE: {niche.upper()} / {country.upper()}")
72
+
73
+ # Step 1: Scraping
74
+ print("📍 PASO 1: Scraping de Google Maps")
75
+ print("-" * 40)
76
+
77
+ leads = scrape_leads(
78
+ niche=niche,
79
+ country=country,
80
+ location=location,
81
+ limit_per_city=limit_per_city
82
+ )
83
+
84
+ results["steps"].append({
85
+ "step": "scraping",
86
+ "leads_found": len(leads)
87
+ })
88
+
89
+ if not leads:
90
+ print("⚠️ No se encontraron leads. Verifica tu APIFY_TOKEN.")
91
+ return results
92
+
93
+ # Step 2: Enrichment
94
+ print("\n📍 PASO 2: Enriquecimiento de datos")
95
+ print("-" * 40)
96
+
97
+ enriched_leads = enrich_leads(leads, delay=0.5)
98
+
99
+ results["steps"].append({
100
+ "step": "enrichment",
101
+ "enriched_count": sum(1 for l in enriched_leads if l.get("email"))
102
+ })
103
+
104
+ # Step 3: Store in database
105
+ print("\n📍 PASO 3: Guardando en base de datos")
106
+ print("-" * 40)
107
+
108
+ added_count = 0
109
+ for lead in enriched_leads:
110
+ try:
111
+ lead_id = add_lead(lead)
112
+ added_count += 1
113
+ print(f" ✅ ID:{lead_id} - {lead.get('name')}")
114
+ except Exception as e:
115
+ print(f" ⚠️ Error: {e}")
116
+
117
+ results["steps"].append({
118
+ "step": "database",
119
+ "added_count": added_count
120
+ })
121
+
122
+ # Step 4: Generate outreach
123
+ print("\n📍 PASO 4: Preparando outreach")
124
+ print("-" * 40)
125
+
126
+ # Get pending leads
127
+ pending_wa = get_pending_outreach("whatsapp", limit=10)
128
+ pending_email = get_pending_outreach("email", limit=10)
129
+
130
+ print(f" 📱 WhatsApp pendientes: {len(pending_wa)}")
131
+ print(f" 📧 Email pendientes: {len(pending_email)}")
132
+
133
+ # Generate WhatsApp links
134
+ if pending_wa:
135
+ wa_links = batch_generate_links(pending_wa)
136
+ results["steps"].append({
137
+ "step": "whatsapp_links",
138
+ "count": len(wa_links)
139
+ })
140
+
141
+ # Show summary
142
+ print_header("📊 RESUMEN DEL PIPELINE")
143
+
144
+ stats = get_stats()
145
+ results["stats"] = stats
146
+
147
+ print(f" Total leads en DB: {stats.get('total_leads', 0)}")
148
+ print(f" Con email: {stats.get('with_email', 0)}")
149
+ print(f" Enriquecidos: {stats.get('enriched', 0)}")
150
+ print(f" WhatsApp enviados: {stats.get('whatsapp_sent', 0)}")
151
+ print(f" Emails enviados: {stats.get('email_sent', 0)}")
152
+
153
+ return results
154
+
155
+
156
+ def run_scraping_only(
157
+ niche: str = "real_estate",
158
+ country: str = "usa",
159
+ location: Optional[str] = None,
160
+ limit: int = 10
161
+ ) -> int:
162
+ """Run only the scraping step."""
163
+ print_header(f"🔍 SCRAPING: {niche.upper()} / {country.upper()} ({location or 'default cities'})")
164
+
165
+ leads = scrape_leads(niche=niche, country=country, location=location, limit_per_city=limit)
166
+
167
+ if not leads:
168
+ print("⚠️ No se encontraron leads")
169
+ return 0
170
+
171
+ # Enrich
172
+ enriched = enrich_leads(leads)
173
+
174
+ # Save to DB
175
+ added = 0
176
+ for lead in enriched:
177
+ try:
178
+ add_lead(lead)
179
+ added += 1
180
+ except Exception as e:
181
+ pass
182
+
183
+ print(f"\n✅ {added} leads agregados a la base de datos")
184
+ return added
185
+
186
+
187
+ def run_whatsapp_outreach(limit: int = 10) -> List[Dict]:
188
+ """Generate WhatsApp links for pending leads."""
189
+ print_header("📱 OUTREACH: WhatsApp")
190
+
191
+ pending = get_pending_outreach("whatsapp", limit=limit)
192
+
193
+ if not pending:
194
+ print("✅ No hay leads pendientes de WhatsApp")
195
+ return []
196
+
197
+ print(f"📋 {len(pending)} leads pendientes\n")
198
+
199
+ results = batch_generate_links(pending)
200
+
201
+ for i, result in enumerate(results):
202
+ if result.get("success"):
203
+ lead = pending[i]
204
+ # Save link to DB
205
+ mark_outreach(lead["id"], "whatsapp", result["link"])
206
+
207
+ print(f"\n📱 {lead.get('name')}")
208
+ print(f" 📞 {result.get('phone')}")
209
+ print(f" 🔗 {result.get('link')[:60]}...")
210
+
211
+ return results
212
+
213
+
214
+ def run_email_outreach(limit: int = 10, preview_only: bool = True) -> List[Dict]:
215
+ """Send emails to pending leads."""
216
+ print_header("📧 OUTREACH: Email (Instantly.ai)")
217
+
218
+ pending = get_pending_outreach("email", limit=limit)
219
+
220
+ if not pending:
221
+ print("✅ No hay leads pendientes de email")
222
+ return []
223
+
224
+ print(f"📋 {len(pending)} leads pendientes\n")
225
+
226
+ config = load_config()
227
+ sender_name = config.get("sender", {}).get("name", "Your Name")
228
+
229
+ if preview_only:
230
+ print("🔍 MODO PREVIEW (no se envían emails)\n")
231
+ for lead in pending[:3]:
232
+ demo_email_preview(lead, sender_name)
233
+ return []
234
+ else:
235
+ return batch_send_emails(pending, sender_name=sender_name)
236
+
237
+
238
+ def show_dashboard():
239
+ """Print text-based dashboard."""
240
+ print_header("📊 DASHBOARD")
241
+
242
+ stats = get_stats()
243
+
244
+ print(f"📈 Total Leads: {stats.get('total_leads', 0)}")
245
+ print(f"📧 Con Email: {stats.get('with_email', 0)}")
246
+ print(f"✨ Enriquecidos: {stats.get('enriched', 0)}")
247
+
248
+ print(f"\n📤 Outreach:")
249
+ print(f" WhatsApp: {stats.get('whatsapp_sent', 0)}")
250
+ print(f" Email: {stats.get('email_sent', 0)}")
251
+ print(f" Respuestas: {stats.get('replied', 0)}")
252
+
253
+ print(f"\n🏷️ Por Nicho:")
254
+ for niche, count in stats.get("by_niche", {}).items():
255
+ print(f" {niche}: {count}")
256
+
257
+ print(f"\n🌎 Por País:")
258
+ for country, count in stats.get("by_country", {}).items():
259
+ print(f" {country}: {count}")
260
+
261
+ opp = stats.get("opportunities", {})
262
+ if opp.get("count", 0) > 0:
263
+ print(f"\n💰 Oportunidades: {opp['count']} (${opp.get('total_value', 0):,.2f})")
264
+
265
+
266
+ if __name__ == "__main__":
267
+ import argparse
268
+
269
+ parser = argparse.ArgumentParser(description="Lead Generation DOE Orchestrator")
270
+ parser.add_argument("--pipeline", action="store_true", help="Run full pipeline")
271
+ parser.add_argument("--scrape", action="store_true", help="Run scraping only")
272
+ parser.add_argument("--outreach-wa", action="store_true", help="Generate WhatsApp links")
273
+ parser.add_argument("--outreach-email", action="store_true", help="Send/preview emails")
274
+ parser.add_argument("--dashboard", action="store_true", help="Show dashboard")
275
+ parser.add_argument("--streamlit", action="store_true", help="Launch Streamlit dashboard")
276
+ parser.add_argument("--niche", choices=["real_estate", "insurance"], default="real_estate")
277
+ parser.add_argument("--country", choices=["usa", "venezuela"], default="usa")
278
+ parser.add_argument("--location", type=str, help="Specific City, State or ZIP")
279
+ parser.add_argument("--limit", type=int, default=10)
280
+ parser.add_argument("--demo", action="store_true", help="Demo mode")
281
+
282
+ args = parser.parse_args()
283
+
284
+ # Initialize DB
285
+ init_db()
286
+
287
+ if args.pipeline:
288
+ run_full_pipeline(
289
+ niche=args.niche,
290
+ country=args.country,
291
+ location=args.location,
292
+ limit_per_city=args.limit,
293
+ demo=args.demo
294
+ )
295
+ elif args.scrape:
296
+ run_scraping_only(
297
+ niche=args.niche,
298
+ country=args.country,
299
+ location=args.location,
300
+ limit=args.limit
301
+ )
302
+ elif args.outreach_wa:
303
+ run_whatsapp_outreach(limit=args.limit)
304
+ elif args.outreach_email:
305
+ run_email_outreach(limit=args.limit, preview_only=True)
306
+ elif args.dashboard:
307
+ show_dashboard()
308
+ elif args.streamlit:
309
+ import subprocess
310
+ dashboard_path = os.path.join(os.path.dirname(__file__), "dashboard", "app.py")
311
+ subprocess.run(["streamlit", "run", dashboard_path])
312
+ else:
313
+ print("Comandos disponibles:")
314
+ print(" --pipeline Ejecutar pipeline completo")
315
+ print(" --scrape Solo scraping")
316
+ print(" --outreach-wa Generar links WhatsApp")
317
+ print(" --outreach-email Preview emails")
318
+ print(" --dashboard Dashboard texto")
319
+ print(" --streamlit Dashboard web")
lead_gen_pro/l2_orchestration/orchestrator_pro.py ADDED
@@ -0,0 +1,486 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ L2 Orchestration: Pro Workflow Orchestrator - COMPLETAMENTE COMENTADO
3
+ ======================================================================
4
+
5
+ Este es el orquestador avanzado (Capa L2) que coordina todo el flujo de trabajo
6
+ del sistema de generación de leads. Implementa el pipeline completo con múltiples
7
+ etapas de procesamiento.
8
+
9
+ AUTOR: Lead Generation PRO System
10
+ FECHA: 2025
11
+ VERSION: 2.0
12
+
13
+ ARQUITECTURA:
14
+ - Nivel: L2 (Orquestación)
15
+ - Patrón: Pipeline de procesamiento
16
+ - Integra: Scrapers L3, Scoring L3, Database L3
17
+
18
+ FLUJO DEL PIPELINE:
19
+ 1. Scraping B2B (Empresas) → LinkedIn, Google Maps, YouTube, etc.
20
+ 2. Scraping B2C (Consumidores) → Twitter, Reddit, YouTube, Facebook
21
+ 3. Higienización → Deduplicación de leads
22
+ 4. Scoring → Calificación de leads (MQL/SQL)
23
+ 5. Almacenamiento → SQLite database
24
+ 6. Webhooks → Notificación a sistemas externos (Make.com/n8n)
25
+
26
+ DEPENDENCIAS:
27
+ - l3_execution.apify_pro_scraper: Para scraping B2B
28
+ - l3_execution.consumer_intent_scraper: Para scraping B2C
29
+ - l3_execution.lead_scoring: Para calificación de leads
30
+ - l3_execution.database_doe: Para persistencia de datos
31
+ """
32
+
33
+ # =============================================================================
34
+ # IMPORTS
35
+ # =============================================================================
36
+
37
+ import os # Operaciones con sistema de archivos
38
+ import sys # Manipulación del path de Python
39
+ import json # Lectura/escritura de JSON (configuración)
40
+ import requests # Para enviar webhooks HTTP
41
+ from datetime import datetime # Manejo de fechas y timestamps
42
+ from typing import Optional, List, Dict, Any # Type hints para claridad
43
+
44
+ # =============================================================================
45
+ # CONFIGURACIÓN DE PATHS
46
+ # =============================================================================
47
+
48
+ # Añadir paths para módulos pro
49
+ # Esto permite importar módulos desde directorios hermanos
50
+ sys.path.insert(0, os.path.dirname(__file__)) # Directorio actual
51
+ sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) # Directorio padre
52
+
53
+ # =============================================================================
54
+ # IMPORTS DE MÓDULOS L3 (Capa de Ejecución)
55
+ # =============================================================================
56
+
57
+ # Scraper B2B: Extrae información de empresas desde múltiples plataformas
58
+ from l3_execution.apify_pro_scraper import ProScraper
59
+
60
+ # Scraper B2C: Detecta intención de compra en redes sociales
61
+ from l3_execution.consumer_intent_scraper import scrape_b2c_leads
62
+
63
+ # Scoring: Califica leads según múltiples criterios
64
+ from l3_execution.lead_scoring import LeadScorer
65
+
66
+ # Database: Persistencia de leads en SQLite
67
+ from l3_execution.database_doe import add_lead, init_db
68
+
69
+ # =============================================================================
70
+ # CLASE PRINCIPAL: ProOrchestrator
71
+ # =============================================================================
72
+
73
+ class ProOrchestrator:
74
+ """
75
+ Orquestador principal del pipeline de generación de leads.
76
+
77
+ Esta clase coordina todas las operaciones del sistema:
78
+ - Scraping de múltiples fuentes (B2B y B2C)
79
+ - Procesamiento y calificación de datos
80
+ - Almacenamiento persistente
81
+ - Notificaciones a sistemas externos
82
+
83
+ ATRIBUTOS:
84
+ scraper (ProScraper): Instancia del scraper B2B
85
+ scorer (LeadScorer): Instancia del sistema de scoring
86
+ config (dict): Configuración cargada desde config.json
87
+ """
88
+
89
+ def __init__(self):
90
+ """
91
+ Constructor de la clase. Inicializa los componentes necesarios.
92
+
93
+ INICIALIZACIÓN:
94
+ 1. ProScraper: Para scraping B2B
95
+ 2. LeadScorer: Para calificación de leads
96
+ 3. Config: Carga desde config.json
97
+ """
98
+ # Inicializar scraper B2B (LinkedIn, Google Maps, etc.)
99
+ self.scraper = ProScraper()
100
+
101
+ # Inicializar sistema de scoring (calificación MQL/SQL)
102
+ self.scorer = LeadScorer()
103
+
104
+ # Cargar configuración desde archivo JSON
105
+ self.config = self.load_config()
106
+
107
+ def load_config(self):
108
+ """
109
+ Carga la configuración del sistema desde config.json.
110
+
111
+ RETORNA:
112
+ dict: Configuración del sistema o diccionario vacío si no existe
113
+
114
+ NOTA:
115
+ El archivo config.json debe estar en el directorio raíz del proyecto.
116
+ Contiene: tokens de API, webhooks, configuraciones de nichos, etc.
117
+ """
118
+ # Construir ruta al archivo de configuración
119
+ path = os.path.join(os.path.dirname(__file__), "..", "config.json")
120
+
121
+ # Verificar si el archivo existe
122
+ if os.path.exists(path):
123
+ # Abrir y leer el archivo JSON
124
+ with open(path, "r") as f:
125
+ return json.load(f)
126
+
127
+ # Si no existe, retornar configuración vacía
128
+ return {}
129
+
130
+ def send_webhook(self, lead: Dict[str, Any]):
131
+ """
132
+ Envía un lead calificado a un webhook externo (Make.com/n8n).
133
+
134
+ PARÁMETROS:
135
+ lead (dict): Diccionario con la información del lead
136
+
137
+ FUNCIONAMIENTO:
138
+ 1. Lee la URL del webhook desde config.json (webhooks.new_lead)
139
+ 2. Envía el lead como JSON vía POST
140
+ 3. Timeout de 5 segundos para evitar bloqueos
141
+
142
+ NOTA:
143
+ Solo se envían leads calificados como "SQL (Hot)" por el método run_pro_pipeline.
144
+ """
145
+ # Obtener URL del webhook desde configuración
146
+ webhook_url = self.config.get("webhooks", {}).get("new_lead")
147
+
148
+ # Solo enviar si hay una URL configurada
149
+ if webhook_url:
150
+ try:
151
+ # Enviar lead como JSON con timeout de 5 segundos
152
+ requests.post(webhook_url, json=lead, timeout=5)
153
+ except Exception as e:
154
+ # Mostrar error pero no detener el pipeline
155
+ print(f"⚠️ Error enviando webhook: {e}")
156
+
157
+ def run_pro_pipeline(
158
+ self,
159
+ niche: str,
160
+ region: str,
161
+ location_data: Dict[str, str] = None,
162
+ lead_type: str = "both",
163
+ b2b_platforms: List[str] = None,
164
+ b2c_platforms: List[str] = None,
165
+ limit: int = 20
166
+ ):
167
+ """
168
+ MÉTODO PRINCIPAL: Ejecuta el pipeline completo de generación de leads.
169
+
170
+ Este es el corazón del sistema. Coordina todas las etapas del proceso:
171
+ scraping → higienización → scoring → almacenamiento → webhooks.
172
+
173
+ PARÁMETROS:
174
+ niche (str): Industria o nicho a buscar (ej: "Insurance", "Real Estate")
175
+ region (str): String de región legacy (ej: "CA Bay Area")
176
+ location_data (dict): Diccionario con 'city', 'state', 'country'
177
+ lead_type (str): Tipo de leads a buscar ('b2b', 'b2c', 'both')
178
+ b2b_platforms (list): Lista de plataformas B2B a escanear
179
+ b2c_platforms (list): Lista de plataformas B2C a escanear
180
+ limit (int): Límite de resultados por plataforma
181
+
182
+ RETORNA:
183
+ list: Lista de leads procesados y calificados
184
+
185
+ EJEMPLO:
186
+ orchestrator = ProOrchestrator()
187
+ leads = orchestrator.run_pro_pipeline(
188
+ niche="Insurance",
189
+ region="FL",
190
+ location_data={"city": "Miami", "state": "FL", "country": "USA"},
191
+ lead_type="both",
192
+ b2b_platforms=["linkedin", "google_maps"],
193
+ b2c_platforms=["twitter", "facebook"],
194
+ limit=20
195
+ )
196
+ """
197
+
198
+ # -----------------------------------------------------------------
199
+ # PASO 1: PREPARACIÓN Y CONFIGURACIÓN
200
+ # -----------------------------------------------------------------
201
+
202
+ # Determinar ubicación para logs y registros
203
+ # Si hay location_data, usarla; si no, usar region legacy
204
+ display_location = region
205
+ if location_data:
206
+ # Filtrar valores no vacíos y unirlos con comas
207
+ parts = [v for k, v in location_data.items() if v]
208
+ if parts:
209
+ display_location = ", ".join(parts)
210
+
211
+ # Mostrar encabezado del pipeline
212
+ print(f"🚀 INICIANDO PIPELINE PRO: {niche} en {display_location}")
213
+ print(f" Tipo de búsqueda: {lead_type.upper()}")
214
+
215
+ # Inicializar lista para almacenar todos los leads
216
+ all_leads = []
217
+
218
+ # -----------------------------------------------------------------
219
+ # PASO 2: SCRAPING B2B (EMPRESAS/NEGOCIOS)
220
+ # -----------------------------------------------------------------
221
+
222
+ # Solo ejecutar si se solicitó B2B o ambos
223
+ if lead_type in ["b2b", "both"]:
224
+ print("\n--- B2B SCRAPING (Empresas) ---")
225
+
226
+ # Usar plataformas por defecto si no se especificaron
227
+ b2b_platforms = b2b_platforms or ["linkedin", "google_maps"]
228
+ print(f" Plataformas B2B: {', '.join(b2b_platforms)}")
229
+
230
+ # Iterar sobre cada plataforma B2B seleccionada
231
+ for platform in b2b_platforms:
232
+ try:
233
+ # Ejecutar scraping específico para esta plataforma
234
+ raw_data = self._scrape_b2b_platform(
235
+ platform=platform,
236
+ niche=niche,
237
+ location=display_location,
238
+ location_data=location_data,
239
+ limit=limit
240
+ )
241
+
242
+ # Procesar los resultados obtenidos
243
+ for source, items in raw_data.items():
244
+ for item in items:
245
+ # Construir objeto lead estandarizado
246
+ lead = {
247
+ "name": item.get("name") or item.get("username") or item.get("title"),
248
+ "email": item.get("email"),
249
+ "phone": item.get("phone") or item.get("phoneNumber"),
250
+ "linkedin_url": item.get("url") if source == "linkedin" else item.get("linkedin_url"),
251
+ "company": item.get("company") or item.get("companyName"),
252
+ "location": display_location,
253
+ "city": location_data.get("city") if location_data else region.split(",")[0].strip(),
254
+ "source_platform": source,
255
+ "niche": niche,
256
+ "type": "B2B",
257
+ "scraped_at": datetime.now().isoformat()
258
+ }
259
+ all_leads.append(lead)
260
+
261
+ # Mostrar resumen de esta plataforma
262
+ print(f"✅ {platform}: {len(raw_data.get(platform, []))} leads encontrados")
263
+
264
+ except Exception as e:
265
+ # Capturar errores pero continuar con otras plataformas
266
+ print(f"⚠️ Error en {platform}: {e}")
267
+ continue
268
+
269
+ # -----------------------------------------------------------------
270
+ # PASO 3: SCRAPING B2C (CONSUMIDORES/INTENCIÓN DE COMPRA)
271
+ # -----------------------------------------------------------------
272
+
273
+ # Solo ejecutar si se solicitó B2C o ambos
274
+ if lead_type in ["b2c", "both"]:
275
+ print("\n--- B2C SCRAPING (Consumidores) ---")
276
+
277
+ # Usar plataformas por defecto si no se especificaron
278
+ b2c_platforms = b2c_platforms or ["youtube", "reddit", "twitter"]
279
+ print(f" Plataformas B2C: {', '.join(b2c_platforms)}")
280
+
281
+ # Definir keywords según el nicho
282
+ # Si es seguros, usar términos específicos; si no, términos genéricos
283
+ if "insurance" in niche.lower() or "seguro" in niche.lower():
284
+ keywords = ["necesito seguro", "busco seguro"]
285
+ else:
286
+ keywords = [f"looking for {niche}", f"need {niche}"]
287
+
288
+ # Ejecutar scraping B2C con las plataformas seleccionadas
289
+ # Ajustar límite si es modo "both" (mitad para cada tipo)
290
+ b2c_leads = scrape_b2c_leads(
291
+ platforms=b2c_platforms,
292
+ keywords=keywords,
293
+ location=location_data,
294
+ limit_per_platform=limit//2 if lead_type == "both" else limit
295
+ )
296
+
297
+ # Procesar los leads B2C obtenidos
298
+ for item in b2c_leads:
299
+ lead = {
300
+ "name": item.get("username"),
301
+ "email": None, # B2C rara vez tiene email público directo
302
+ "phone": None,
303
+ "linkedin_url": item.get("url"), # URL del post como referencia
304
+ "company": "Consumer (B2C)",
305
+ "location": display_location,
306
+ "city": location_data.get("city") if location_data else "Unknown",
307
+ "source_platform": item.get("platform"),
308
+ "niche": niche,
309
+ "type": "B2C",
310
+ "intent_score": item.get("intent_score"), # Score de intención de compra
311
+ "content_snippet": item.get("content"), # Contenido del post/comentario
312
+ "scraped_at": datetime.now().isoformat()
313
+ }
314
+ all_leads.append(lead)
315
+
316
+ # Mostrar resumen
317
+ print(f"✅ B2C: {len(b2c_leads)} leads encontrados")
318
+
319
+ # -----------------------------------------------------------------
320
+ # PASO 4: HIGIENIZACIÓN Y DEDUPLICACIÓN
321
+ # -----------------------------------------------------------------
322
+
323
+ # Eliminar leads duplicados basándose en email o nombre
324
+ # Usar diccionario para mantener solo un lead por clave única
325
+ unique_leads = {l["email"] or l["name"]: l for l in all_leads if l.get("email") or l.get("name")}.values()
326
+ print(f"\n🧹 Leads después de deduplicar: {len(unique_leads)}")
327
+
328
+ # -----------------------------------------------------------------
329
+ # PASO 5: SCORING Y CALIFICACIÓN
330
+ # -----------------------------------------------------------------
331
+
332
+ # Calificar cada lead según múltiples criterios:
333
+ # - Ubicación geográfica
334
+ # - Datos de contacto disponibles
335
+ # - Nicho/sector
336
+ # - Presencia en redes sociales
337
+ scored_leads = self.scorer.process_batch(list(unique_leads))
338
+
339
+ # -----------------------------------------------------------------
340
+ # PASO 6: CONSOLIDACIÓN Y ALMACENAMIENTO
341
+ # -----------------------------------------------------------------
342
+
343
+ # Guardar cada lead en la base de datos SQLite
344
+ for lead in scored_leads:
345
+ try:
346
+ # Guardar lead en BD (con upsert automático)
347
+ add_lead(lead)
348
+
349
+ # Si el lead está calificado como "SQL (Hot)", enviar webhook
350
+ if lead.get("tier") == "SQL (Hot)":
351
+ print(f"🔥 LEAD CALIFICADO: {lead['name']} ({lead['score']} pts)")
352
+ self.send_webhook(lead)
353
+
354
+ except Exception as e:
355
+ # Mostrar error pero continuar con otros leads
356
+ print(f"⚠️ Error guardando lead: {e}")
357
+
358
+ # -----------------------------------------------------------------
359
+ # PASO 7: RESUMEN FINAL
360
+ # -----------------------------------------------------------------
361
+
362
+ print(f"\n✅ Pipeline Pro completado. {len(scored_leads)} leads procesados.")
363
+ return scored_leads
364
+
365
+ def _scrape_b2b_platform(
366
+ self,
367
+ platform: str,
368
+ niche: str,
369
+ location: str,
370
+ location_data: Dict[str, str],
371
+ limit: int
372
+ ) -> Dict[str, List[Dict]]:
373
+ """
374
+ MÉTODO PRIVADO: Scrapea una plataforma B2B específica.
375
+
376
+ Este método actúa como dispatcher que llama al método apropiado
377
+ del ProScraper según la plataforma seleccionada.
378
+
379
+ PARÁMETROS:
380
+ platform (str): Nombre de la plataforma (linkedin, google_maps, etc.)
381
+ niche (str): Nicho/industria a buscar
382
+ location (str): Ubicación formateada para mostrar
383
+ location_data (dict): Datos de ubicación estructurados
384
+ limit (int): Límite de resultados
385
+
386
+ RETORNA:
387
+ dict: Diccionario con los resultados organizados por fuente
388
+
389
+ NOTA:
390
+ Cada plataforma tiene diferentes parámetros y formatos de búsqueda.
391
+ Este método normaliza las llamadas al scraper.
392
+ """
393
+
394
+ # Inicializar diccionario de resultados
395
+ results = {}
396
+
397
+ # -----------------------------------------------------------------
398
+ # LINKEDIN: Buscar profesionales
399
+ # -----------------------------------------------------------------
400
+ if platform == "linkedin":
401
+ # Buscar dueños y directores del nicho especificado
402
+ items = self.scraper.scrape_linkedin(
403
+ keywords=[f"{niche} owner", f"{niche} director"],
404
+ location=location,
405
+ limit=limit
406
+ )
407
+ results["linkedin"] = items
408
+
409
+ # -----------------------------------------------------------------
410
+ # GOOGLE MAPS: Buscar negocios locales
411
+ # -----------------------------------------------------------------
412
+ elif platform == "google_maps":
413
+ # Buscar negocios del nicho en la ubicación especificada
414
+ items = self.scraper.scrape_google_maps(
415
+ search_terms=[f"{niche} in {location}"],
416
+ max_crawled_places=limit
417
+ )
418
+ results["google_maps"] = items
419
+
420
+ # -----------------------------------------------------------------
421
+ # YOUTUBE: Buscar canales relacionados
422
+ # -----------------------------------------------------------------
423
+ elif platform == "youtube":
424
+ # Buscar videos relacionados con el nicho en la ubicación
425
+ items = self.scraper.scrape_youtube(
426
+ search_queries=[f"best {niche} in {location}", f"{niche} tips {location}"],
427
+ max_results=limit // 2 # Menor límite para YouTube
428
+ )
429
+ results["youtube"] = items
430
+
431
+ # -----------------------------------------------------------------
432
+ # TIKTOK: Buscar por hashtags
433
+ # -----------------------------------------------------------------
434
+ elif platform == "tiktok":
435
+ # Scrapear por hashtags del nicho y ubicación
436
+ items = self.scraper.scrape_tiktok(
437
+ hashtags=[niche.replace(" ", ""), location.split(",")[0].lower()],
438
+ limit=limit // 2
439
+ )
440
+ results["tiktok"] = items
441
+
442
+ # -----------------------------------------------------------------
443
+ # INSTAGRAM: Similar a TikTok (usa hashtags)
444
+ # -----------------------------------------------------------------
445
+ elif platform == "instagram":
446
+ # Instagram scraper usa hashtags similar a TikTok
447
+ items = self.scraper.scrape_tiktok(
448
+ hashtags=[niche.replace(" ", ""), location.split(",")[0].lower()],
449
+ limit=limit // 2
450
+ )
451
+ results["instagram"] = items
452
+
453
+ return results
454
+
455
+ # =============================================================================
456
+ # PUNTO DE ENTRADA PARA PRUEBAS
457
+ # =============================================================================
458
+
459
+ if __name__ == "__main__":
460
+ """
461
+ Bloque de ejecución directa para pruebas del orquestador.
462
+
463
+ Este código solo se ejecuta cuando se corre el archivo directamente,
464
+ no cuando se importa como módulo.
465
+ """
466
+ # Inicializar base de datos
467
+ init_db()
468
+
469
+ # Crear instancia del orquestador
470
+ orchestrator = ProOrchestrator()
471
+
472
+ # Ejecutar pipeline de prueba
473
+ print("\n" + "="*70)
474
+ print("EJECUCIÓN DE PRUEBA DEL PIPELINE")
475
+ print("="*70)
476
+
477
+ orchestrator.run_pro_pipeline(
478
+ niche="Real Estate",
479
+ region="CA Bay Area",
480
+ lead_type="both",
481
+ limit=10
482
+ )
483
+
484
+ # =============================================================================
485
+ # FIN DEL ARCHIVO
486
+ # =============================================================================
lead_gen_pro/l3_execution/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # __init__.py for l3_execution module
lead_gen_pro/l3_execution/apify_pro_scraper.py ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ L3 Execution: Lead Gen Pro Scraper (Multi-platform)
3
+ ==================================================
4
+ Scraper avanzado que integra LinkedIn, YouTube, TikTok e Instagram vía Apify API.
5
+ """
6
+
7
+ import os
8
+ import json
9
+ import time
10
+ from datetime import datetime
11
+ from typing import Optional, List, Dict, Any
12
+ from apify_client import ApifyClient
13
+
14
+ # Actor IDs actualizados según especificación
15
+ PRO_ACTORS = {
16
+ "linkedin": "trudax/linkedin-people-search-scraper", # REPLACED: Valid Public Actor
17
+ "youtube": "clank/youtube-scraper",
18
+ "tiktok": "clockworks/tiktok-scraper",
19
+ "instagram": "jaroslavsemanko/instagram-scraper",
20
+ "google_maps": "compass/crawler-google-places"
21
+ }
22
+
23
+ def get_apify_token():
24
+ config_path = os.path.join(os.path.dirname(__file__), "..", "config.json")
25
+ if os.path.exists(config_path):
26
+ with open(config_path, "r") as f:
27
+ return json.load(f).get("apify_token")
28
+ return os.environ.get("APIFY_TOKEN")
29
+
30
+ class ProScraper:
31
+ def __init__(self):
32
+ self.token = get_apify_token()
33
+ self.client = ApifyClient(self.token) if self.token else None
34
+
35
+ def scrape_linkedin(self, keywords: List[str], location: str, limit: int = 50):
36
+ if not self.client: return []
37
+ print(f"🔗 Scraping LinkedIn para: {keywords} en {location}...")
38
+
39
+ # Trudax scraper expects 'searchTerms' list
40
+ # We combine keyword + location for better results without needing Geo URNs
41
+ combined_queries = [f"{k} {location}" for k in keywords]
42
+
43
+ run_input = {
44
+ "searchTerms": combined_queries,
45
+ "limit": limit,
46
+ }
47
+
48
+ try:
49
+ run = self.client.actor(PRO_ACTORS["linkedin"]).call(run_input=run_input)
50
+ return list(self.client.dataset(run["defaultDatasetId"]).iterate_items())
51
+ except Exception as e:
52
+ print(f"⚠️ Error en LinkedIn Scraper: {e}")
53
+ return []
54
+
55
+ def scrape_youtube(self, search_queries: List[str], max_results: int = 20):
56
+ if not self.client: return []
57
+ print(f"🎬 Scraping YouTube para: {search_queries}...")
58
+ run_input = {
59
+ "searchQueries": search_queries,
60
+ "maxResults": max_results,
61
+ "exportSubtitles": False
62
+ }
63
+ run = self.client.actor(PRO_ACTORS["youtube"]).call(run_input=run_input)
64
+ return list(self.client.dataset(run["defaultDatasetId"]).iterate_items())
65
+
66
+ def scrape_tiktok(self, hashtags: List[str], limit: int = 20):
67
+ if not self.client: return []
68
+ print(f"🎵 Scraping TikTok para hashtags: {hashtags}...")
69
+ run_input = {
70
+ "hashtags": hashtags,
71
+ "resultsPerPage": limit,
72
+ "shouldDownloadVideo": False
73
+ }
74
+ run = self.client.actor(PRO_ACTORS["tiktok"]).call(run_input=run_input)
75
+ return list(self.client.dataset(run["defaultDatasetId"]).iterate_items())
76
+
77
+ def scrape_google_maps(self, search_terms: List[str], max_crawled_places: int = 20):
78
+ if not self.client: return []
79
+ print(f"🗺️ Scraping Google Maps para: {search_terms}...")
80
+ run_input = {
81
+ "searchStrings": search_terms,
82
+ "maxCrawledPlacesPerSearch": max_crawled_places,
83
+ "onlyDataFromTopResult": False,
84
+ }
85
+ try:
86
+ run = self.client.actor(PRO_ACTORS["google_maps"]).call(run_input=run_input)
87
+ return list(self.client.dataset(run["defaultDatasetId"]).iterate_items())
88
+ except Exception as e:
89
+ print(f"⚠️ Error en Google Maps Scraper: {e}")
90
+ return []
91
+
92
+ def unified_scrape(self, niche: str, region: str, limit: int = 50):
93
+ """Pipeline de scraping unificado para múltiples plataformas."""
94
+ results = {
95
+ "linkedin": self.scrape_linkedin([f"{niche} owner", f"{niche} director"], region, limit),
96
+ "youtube": self.scrape_youtube([f"best {niche} in {region}", f"{niche} tips {region}"], limit // 2),
97
+ "tiktok": self.scrape_tiktok([niche.replace(" ", ""), region.split(",")[0].lower()], limit // 2),
98
+ "google_maps": self.scrape_google_maps([f"{niche} in {region}"], limit)
99
+ }
100
+ return results
101
+
102
+ if __name__ == "__main__":
103
+ scraper = ProScraper()
104
+ # Test rápido
105
+ if scraper.token:
106
+ print("✅ Token detectado. Scraper Pro listo.")
107
+ else:
108
+ print("❌ Token no detectado. Revisa config.json.")
lead_gen_pro/l3_execution/apify_scraper.py ADDED
@@ -0,0 +1,266 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ L3 Execution: Apify Google Maps Scraper
3
+ =========================================
4
+ Script determinista para extraer leads de Google Maps usando Apify.
5
+ Implementa la capa L3 del framework DOE.
6
+ """
7
+
8
+ import os
9
+ import json
10
+ import time
11
+ from datetime import datetime
12
+ from typing import Optional, List, Dict, Any
13
+
14
+ # Apify client - will be installed in venv
15
+ try:
16
+ from apify_client import ApifyClient
17
+ APIFY_AVAILABLE = True
18
+ except ImportError:
19
+ APIFY_AVAILABLE = False
20
+ print("⚠️ apify-client no instalado. Ejecuta: pip install apify-client")
21
+
22
+
23
+ # Apify Actor IDs (Free/Low-cost options)
24
+ APIFY_ACTORS = {
25
+ "google_maps_basic": "compass/crawler-google-places",
26
+ "google_maps_extensive": "nwua9Gu5YrADL7ZDj", # Free Google Maps Scraper
27
+ "google_maps_lead": "compass/crawler-google-places",
28
+ }
29
+
30
+ # Default cities for each country
31
+ DEFAULT_CITIES = {
32
+ "usa": ["Miami, FL", "Houston, TX", "Los Angeles, CA", "New York, NY", "Chicago, IL"],
33
+ "venezuela": ["Caracas", "Maracaibo", "Valencia", "Barquisimeto", "Maracay"],
34
+ }
35
+
36
+ # Search queries by niche
37
+ SEARCH_QUERIES = {
38
+ "real_estate": {
39
+ "usa": ["real estate agent", "realtor", "property broker"],
40
+ "venezuela": ["inmobiliaria", "agente inmobiliario", "bienes raices"],
41
+ },
42
+ "insurance": {
43
+ "usa": ["insurance agent", "insurance broker", "life insurance agent"],
44
+ "venezuela": ["corredor de seguros", "agente de seguros", "aseguradora"],
45
+ }
46
+ }
47
+
48
+
49
+ def get_config() -> Dict[str, Any]:
50
+ """Get full config."""
51
+ config_path = os.path.join(os.path.dirname(__file__), "..", "config.json")
52
+ if os.path.exists(config_path):
53
+ with open(config_path, "r") as f:
54
+ return json.load(f)
55
+ return {}
56
+
57
+ def get_queries_for_niche(niche: str, country: str) -> List[str]:
58
+ """Get search queries from config or fallback."""
59
+ config = get_config()
60
+ queries = config.get("niches", {}).get(niche, {}).get("queries", {}).get(country)
61
+
62
+ if not queries:
63
+ # Fallback to hardcoded defaults
64
+ queries = SEARCH_QUERIES.get(niche, {}).get(country, [])
65
+
66
+ return queries
67
+
68
+ def get_apify_token() -> Optional[str]:
69
+ """Get Apify token from environment or config."""
70
+ token = os.environ.get("APIFY_TOKEN")
71
+ if token:
72
+ return token
73
+
74
+ config_path = os.path.join(os.path.dirname(__file__), "..", "config.json")
75
+ if os.path.exists(config_path):
76
+ with open(config_path, "r") as f:
77
+ config = json.load(f)
78
+ return config.get("apify_token")
79
+
80
+ return None
81
+
82
+
83
+ def scrape_google_maps_apify(
84
+ query: str,
85
+ location: str,
86
+ max_results: int = 20,
87
+ actor_id: str = "google_maps_lead"
88
+ ) -> List[Dict[str, Any]]:
89
+ """
90
+ Scrape Google Maps using Apify actor.
91
+
92
+ Args:
93
+ query: Search query (e.g., "real estate agent")
94
+ location: Location string (e.g., "Miami, FL")
95
+ max_results: Maximum results to return
96
+ actor_id: Which Apify actor to use
97
+
98
+ Returns:
99
+ List of business results.
100
+ """
101
+ token = get_apify_token()
102
+ if not token:
103
+ print("❌ APIFY_TOKEN no configurado. Usando modo demo.")
104
+ return demo_results(query, location)
105
+
106
+ if not APIFY_AVAILABLE:
107
+ print("❌ apify-client no disponible.")
108
+ return demo_results(query, location)
109
+
110
+ try:
111
+ client = ApifyClient(token)
112
+
113
+ # Full actor ID
114
+ full_actor_id = APIFY_ACTORS.get(actor_id, actor_id)
115
+
116
+ # Prepare input - format for compass/crawler-google-places
117
+ run_input = {
118
+ "searchStringsArray": [f"{query}"],
119
+ "locationQuery": location,
120
+ "maxCrawledPlacesPerSearch": max_results,
121
+ "language": "en",
122
+ "deeperCityScrape": False,
123
+ }
124
+
125
+ print(f"🔍 Buscando: '{query}' en {location}...")
126
+
127
+ # Run the actor
128
+ run = client.actor(full_actor_id).call(run_input=run_input)
129
+
130
+ # Get results from dataset
131
+ results = []
132
+ for item in client.dataset(run["defaultDatasetId"]).iterate_items():
133
+ result = {
134
+ "name": item.get("title") or item.get("name"),
135
+ "phone": item.get("phone") or item.get("phoneNumber"),
136
+ "email": item.get("email"),
137
+ "address": item.get("address") or item.get("fullAddress"),
138
+ "website": item.get("website") or item.get("url"),
139
+ "rating": item.get("rating") or item.get("totalScore"),
140
+ "reviews_count": item.get("reviewsCount") or item.get("reviews"),
141
+ "place_id": item.get("placeId"),
142
+ "category": item.get("category") or item.get("type"),
143
+ }
144
+ results.append(result)
145
+
146
+ print(f"✅ {len(results)} resultados encontrados")
147
+ return results
148
+
149
+ except Exception as e:
150
+ print(f"❌ Error en Apify: {e}")
151
+ return demo_results(query, location)
152
+
153
+
154
+ def demo_results(query: str, location: str) -> List[Dict[str, Any]]:
155
+ """Generate demo results for testing without API."""
156
+ demo_data = [
157
+ {
158
+ "name": f"Demo Business 1 - {location}",
159
+ "phone": "+1-555-0101",
160
+ "email": "demo1@example.com",
161
+ "address": f"123 Main St, {location}",
162
+ "website": "https://demo1.example.com",
163
+ "rating": 4.8,
164
+ "reviews_count": 150,
165
+ "category": query,
166
+ },
167
+ {
168
+ "name": f"Demo Business 2 - {location}",
169
+ "phone": "+1-555-0102",
170
+ "email": "demo2@example.com",
171
+ "address": f"456 Oak Ave, {location}",
172
+ "website": "https://demo2.example.com",
173
+ "rating": 4.5,
174
+ "reviews_count": 89,
175
+ "category": query,
176
+ },
177
+ ]
178
+ print(f"🎭 Modo demo: {len(demo_data)} resultados generados")
179
+ return demo_data
180
+
181
+
182
+ def scrape_leads(
183
+ niche: str,
184
+ country: str,
185
+ location: Optional[str] = None,
186
+ limit_per_city: int = 20
187
+ ) -> List[Dict[str, Any]]:
188
+ """
189
+ Scrape leads for a specific niche and country/location.
190
+
191
+ Args:
192
+ niche: 'real_estate' or 'insurance'
193
+ country: 'usa' or 'venezuela'
194
+ location: Specific city, state or ZIP (optional)
195
+ limit_per_city: Max leads per location/query
196
+
197
+ Returns:
198
+ List of lead dictionaries.
199
+ """
200
+ if location:
201
+ cities = [location]
202
+ else:
203
+ cities = DEFAULT_CITIES.get(country, [])[:3]
204
+
205
+ queries = get_queries_for_niche(niche, country)
206
+ if not queries:
207
+ print(f"❌ No hay queries para {niche}/{country}")
208
+ return []
209
+
210
+ all_leads = []
211
+ seen_phones = set()
212
+
213
+ for city in cities:
214
+ for query in queries[:2]: # Use first 2 queries
215
+ results = scrape_google_maps_apify(
216
+ query=query,
217
+ location=city,
218
+ max_results=limit_per_city
219
+ )
220
+
221
+ for result in results:
222
+ phone = result.get("phone")
223
+ # Avoid duplicates
224
+ if phone and phone not in seen_phones:
225
+ seen_phones.add(phone)
226
+
227
+ lead = {
228
+ **result,
229
+ "source": "apify_google_maps",
230
+ "niche": niche,
231
+ "country": country,
232
+ "city": city,
233
+ "scraped_at": datetime.now().isoformat(),
234
+ }
235
+ all_leads.append(lead)
236
+
237
+ # Rate limiting
238
+ time.sleep(2)
239
+
240
+ print(f"\n📊 Total leads únicos: {len(all_leads)}")
241
+ return all_leads
242
+
243
+
244
+ if __name__ == "__main__":
245
+ import argparse
246
+
247
+ parser = argparse.ArgumentParser(description="Apify Google Maps Scraper")
248
+ parser.add_argument("--niche", choices=["real_estate", "insurance"], default="real_estate")
249
+ parser.add_argument("--country", choices=["usa", "venezuela"], default="usa")
250
+ parser.add_argument("--limit", type=int, default=10)
251
+ parser.add_argument("--demo", action="store_true", help="Use demo mode")
252
+
253
+ args = parser.parse_args()
254
+
255
+ if args.demo:
256
+ print("🎭 Ejecutando en modo demo...")
257
+
258
+ leads = scrape_leads(
259
+ niche=args.niche,
260
+ country=args.country,
261
+ limit_per_city=args.limit
262
+ )
263
+
264
+ print(f"\n📋 Leads encontrados:")
265
+ for lead in leads[:5]:
266
+ print(f" - {lead.get('name')} | {lead.get('phone')} | {lead.get('city')}")
lead_gen_pro/l3_execution/automation_engine.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+ from datetime import datetime
4
+
5
+ class AutomationEngine:
6
+ """
7
+ Handles 'If-Then' automation rules for leads.
8
+ Rules are defined in config.json.
9
+ """
10
+
11
+ def __init__(self, config_path=None):
12
+ if config_path is None:
13
+ config_path = os.path.join(os.path.dirname(__file__), "..", "config.json")
14
+ self.config_path = config_path
15
+ self.config = self._load_config()
16
+
17
+ def _load_config(self):
18
+ try:
19
+ if os.path.exists(self.config_path):
20
+ with open(self.config_path, "r") as f:
21
+ return json.load(f)
22
+ except Exception as e:
23
+ print(f"[Automation] Error loading config: {e}")
24
+ return {}
25
+
26
+ def process_new_lead(self, lead_data):
27
+ """
28
+ Evaluates automation rules for a newly added lead.
29
+ """
30
+ rules = self.config.get("automation_rules", [])
31
+ if not rules:
32
+ # Default rule if none defined: if quality > 7, log it
33
+ if lead_data.get("quality_score", 0) >= 7:
34
+ print(f"[Automation] High quality lead detected: {lead_data.get('name')}")
35
+ return
36
+
37
+ for rule in rules:
38
+ if self._evaluate_condition(rule.get("if"), lead_data):
39
+ self._execute_action(rule.get("then"), lead_data)
40
+
41
+ def _evaluate_condition(self, condition, data):
42
+ """Simple condition evaluator: 'field', 'operator', 'value'"""
43
+ if not condition: return False
44
+
45
+ field = condition.get("field")
46
+ op = condition.get("operator")
47
+ val = condition.get("value")
48
+
49
+ data_val = data.get(field)
50
+
51
+ if op == "==": return data_val == val
52
+ if op == ">=": return data_val >= val
53
+ if op == "<=": return data_val <= val
54
+ if op == "contains": return val in str(data_val)
55
+
56
+ return False
57
+
58
+ def _execute_action(self, action, data):
59
+ """Executes the 'then' part of a rule."""
60
+ action_type = action.get("type")
61
+
62
+ if action_type == "notify":
63
+ print(f"[NOTIFY] Automation triggered for {data.get('name')}: {action.get('message')}")
64
+
65
+ elif action_type == "mark_priority":
66
+ # This would typically update the DB, but since we are called inside add_lead,
67
+ # we should modify the data object before it's finalized (if possible)
68
+ data["priority"] = action.get("value", 1)
69
+ print(f"[Automation] Lead {data.get('name')} marked as priority {data['priority']}")
70
+
71
+ elif action_type == "trigger_outreach":
72
+ channel = action.get("channel", "whatsapp")
73
+ print(f"[Automation] Triggering {channel} outreach for {data.get('name')}")
74
+ # Actual outreach logic would go here or be queued
75
+
76
+ if __name__ == "__main__":
77
+ # Test
78
+ engine = AutomationEngine()
79
+ sample_lead = {"name": "Test Automation", "quality_score": 9, "niche": "real_estate"}
80
+ engine.process_new_lead(sample_lead)
lead_gen_pro/l3_execution/consumer_intent_scraper.py ADDED
@@ -0,0 +1,729 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ L3 Execution: B2C Consumer Intent Scraper
3
+ ==========================================
4
+ Script para detectar consumidores buscando activamente seguros en redes sociales.
5
+ Implementa scraping de intención de compra en YouTube, Reddit y Twitter/X.
6
+ """
7
+
8
+ import os
9
+ import json
10
+ import time
11
+ import re
12
+ from datetime import datetime
13
+ from typing import Optional, List, Dict, Any
14
+ from urllib.parse import urlencode
15
+
16
+ # Apify client - will be installed in venv
17
+ try:
18
+ from apify_client import ApifyClient
19
+ APIFY_AVAILABLE = True
20
+ except ImportError:
21
+ APIFY_AVAILABLE = False
22
+ print("⚠️ apify-client no instalado. Ejecuta: pip install apify-client")
23
+
24
+
25
+ # Apify Actor IDs for social media scraping
26
+ APIFY_ACTORS = {
27
+ "youtube_comments": "streamers/youtube-scraper", # For comments
28
+ "reddit_posts": "trudax/reddit-scraper", # For posts
29
+ "twitter_posts": "apidojo/tweet-scraper", # For tweets
30
+ "facebook_posts": "apify/facebook-posts-scraper", # For public posts and groups
31
+ }
32
+
33
+ # Keywords that indicate purchase intent (insurance-related)
34
+ INTENT_KEYWORDS = {
35
+ "es": [
36
+ "necesito seguro", "busco seguro", "recomiendan seguro",
37
+ "seguro barato", "mejor seguro", "seguro de auto",
38
+ "seguro de vida", "seguro dental", "seguro médico",
39
+ "cotización seguro", "precio seguro", "cuánto cuesta seguro",
40
+ "me cancelaron el seguro", "mi seguro es caro",
41
+ "cambiar de aseguradora", "alternativa a seguro"
42
+ ],
43
+ "en": [
44
+ "need insurance", "looking for insurance", "recommend insurance",
45
+ "cheap insurance", "best insurance", "car insurance",
46
+ "life insurance", "dental insurance", "health insurance",
47
+ "insurance quote", "insurance price", "how much is insurance",
48
+ "my insurance cancelled", "my insurance is expensive",
49
+ "switch insurance", "insurance alternatives"
50
+ ]
51
+ }
52
+
53
+ # Keywords that indicate the user might be an agent/agency (to filter out)
54
+ PROMOTIONAL_KEYWORDS = [
55
+ "agent", "agency", "broker", "vendo", "venta de seguros",
56
+ "corredor", "asesor", "consultor", "insurance agent",
57
+ "insurance broker", "sell insurance", "insurance sales"
58
+ ]
59
+
60
+ # Complaint/negative sentiment indicators
61
+ COMPLAINT_PATTERNS = [
62
+ r"too expensive", r"muy caro", r"overpriced",
63
+ r"cancelled", r"cancelaron", r"cancelaron mi",
64
+ r"bad experience", r"mala experiencia", r"terrible",
65
+ r"worst", r"peor", r"horrible",
66
+ r"problem", r"problema", r"issue", r"issues"
67
+ ]
68
+
69
+
70
+ def get_apify_token():
71
+ """Obtiene el token de Apify desde config.json o variables de entorno."""
72
+ config_path = os.path.join(os.path.dirname(__file__), "..", "config.json")
73
+ if os.path.exists(config_path):
74
+ with open(config_path, "r") as f:
75
+ return json.load(f).get("apify_token")
76
+ return os.environ.get("APIFY_TOKEN")
77
+
78
+
79
+ class B2CScraper:
80
+ """
81
+ Scraper para detectar intención de compra de consumidores en redes sociales.
82
+ """
83
+
84
+ def __init__(self):
85
+ self.apify_token = get_apify_token()
86
+ self.client = None
87
+
88
+ if APIFY_AVAILABLE and self.apify_token:
89
+ self.client = ApifyClient(self.apify_token)
90
+ else:
91
+ print("⚠️ Apify no configurado. Usando modo demo.")
92
+
93
+ def scrape_intent(
94
+ self,
95
+ platform: str,
96
+ keywords: List[str],
97
+ limit: int = 20,
98
+ language: str = "en"
99
+ ) -> List[Dict[str, Any]]:
100
+ """
101
+ Función orquestadora que decide qué scraper específico ejecutar.
102
+
103
+ Args:
104
+ platform: 'youtube', 'reddit', o 'twitter'
105
+ keywords: Lista de frases clave a buscar
106
+ limit: Cantidad máxima de resultados
107
+ language: 'es' o 'en'
108
+
109
+ Returns:
110
+ Lista de diccionarios con leads B2C detectados
111
+ """
112
+ print(f"🔍 Buscando intención de compra en {platform}...")
113
+ print(f" Keywords: {keywords[:3]}...")
114
+
115
+ if platform.lower() == "youtube":
116
+ return self._scrape_youtube(keywords, limit, language)
117
+ elif platform.lower() == "reddit":
118
+ return self._scrape_reddit(keywords, limit, language)
119
+ elif platform.lower() in ["twitter", "x"]:
120
+ return self._scrape_twitter(keywords, limit, language)
121
+ elif platform.lower() == "facebook":
122
+ return self._scrape_facebook(keywords, limit, language)
123
+ else:
124
+ print(f"❌ Plataforma no soportada: {platform}")
125
+ return []
126
+
127
+ def _calculate_intent_score(self, content: str, username: str = "") -> tuple:
128
+ """
129
+ Calcula el score de intención de compra y el sentimiento.
130
+
131
+ Returns:
132
+ Tuple de (intent_score: int, sentiment: str)
133
+ """
134
+ content_lower = content.lower()
135
+ username_lower = username.lower()
136
+ score = 50 # Base score
137
+
138
+ # Check for promotional content (filter out)
139
+ for prom_keyword in PROMOTIONAL_KEYWORDS:
140
+ if prom_keyword in content_lower or prom_keyword in username_lower:
141
+ return (0, "promotional") # Filter out
142
+
143
+ # Check for intent keywords
144
+ for lang_keywords in INTENT_KEYWORDS.values():
145
+ for keyword in lang_keywords:
146
+ if keyword in content_lower:
147
+ score += 15
148
+
149
+ # Check for complaints (indicates need)
150
+ has_complaint = False
151
+ for pattern in COMPLAINT_PATTERNS:
152
+ if re.search(pattern, content_lower):
153
+ has_complaint = True
154
+ score += 10
155
+
156
+ # Questions indicate high intent
157
+ if "?" in content:
158
+ score += 10
159
+
160
+ # Cap score at 100
161
+ score = min(score, 100)
162
+
163
+ # Determine sentiment
164
+ if has_complaint:
165
+ sentiment = "negative"
166
+ elif score > 70:
167
+ sentiment = "positive"
168
+ else:
169
+ sentiment = "neutral"
170
+
171
+ return (score, sentiment)
172
+
173
+ def _scrape_youtube(
174
+ self,
175
+ keywords: List[str],
176
+ limit: int,
177
+ language: str
178
+ ) -> List[Dict[str, Any]]:
179
+ """
180
+ Scrapea comentarios de YouTube buscando intención de compra.
181
+ Busca videos relacionados con seguros y extrae comentarios recientes.
182
+ """
183
+ if not self.client:
184
+ return self._demo_results("youtube", keywords, limit)
185
+
186
+ results = []
187
+
188
+ try:
189
+ # Search for insurance-related videos
190
+ search_queries = keywords[:3] if keywords else ["insurance review", "best insurance 2024"]
191
+
192
+ for query in search_queries:
193
+ print(f" Buscando videos: '{query}'...")
194
+
195
+ # Use YouTube scraper actor
196
+ run_input = {
197
+ "searchKeywords": query,
198
+ "maxResults": min(limit * 2, 20),
199
+ "includeComments": True,
200
+ "maxComments": 50,
201
+ }
202
+
203
+ try:
204
+ run = self.client.actor(APIFY_ACTORS["youtube_comments"]).call(run_input=run_input)
205
+
206
+ for item in self.client.dataset(run["defaultDatasetId"]).iterate_items():
207
+ # Process comments if available
208
+ comments = item.get("comments", [])
209
+ video_url = item.get("url", "")
210
+ video_title = item.get("title", "")
211
+
212
+ for comment in comments[:10]: # Limit comments per video
213
+ content = comment.get("text", "")
214
+ username = comment.get("author", "")
215
+ comment_url = comment.get("url", video_url)
216
+
217
+ # Calculate intent
218
+ intent_score, sentiment = self._calculate_intent_score(content, username)
219
+
220
+ if intent_score >= 40: # Only include if meaningful intent
221
+ result = {
222
+ "platform": "youtube",
223
+ "username": username,
224
+ "content": content[:500], # Limit content length
225
+ "url": comment_url,
226
+ "scraped_at": datetime.now().isoformat(),
227
+ "sentiment": sentiment,
228
+ "intent_score": intent_score,
229
+ "context": f"Video: {video_title[:100]}"
230
+ }
231
+ results.append(result)
232
+
233
+ time.sleep(2) # Rate limiting
234
+
235
+ except Exception as e:
236
+ print(f" ⚠️ Error procesando query '{query}': {e}")
237
+ continue
238
+
239
+ if len(results) >= limit:
240
+ break
241
+
242
+ except Exception as e:
243
+ print(f"❌ Error en YouTube scraper: {e}")
244
+ return self._demo_results("youtube", keywords, limit)
245
+
246
+ print(f"✅ {len(results)} leads encontrados en YouTube")
247
+ return results[:limit]
248
+
249
+ def _scrape_reddit(
250
+ self,
251
+ keywords: List[str],
252
+ limit: int,
253
+ language: str
254
+ ) -> List[Dict[str, Any]]:
255
+ """
256
+ Scrapea posts de Reddit buscando intención de compra.
257
+ Busca en subreddits como r/Insurance, r/personalfinance, etc.
258
+ """
259
+ if not self.client:
260
+ return self._demo_results("reddit", keywords, limit)
261
+
262
+ results = []
263
+
264
+ try:
265
+ # Key subreddits for insurance discussions
266
+ subreddits = ["Insurance", "personalfinance", "askcarsales", "LifeInsurance"]
267
+
268
+ for subreddit in subreddits:
269
+ if len(results) >= limit:
270
+ break
271
+
272
+ print(f" Buscando en r/{subreddit}...")
273
+
274
+ run_input = {
275
+ "subreddits": [subreddit],
276
+ "searchTerms": keywords[:3] if keywords else ["insurance"],
277
+ "maxResults": min(limit * 2, 25),
278
+ "sort": "new",
279
+ }
280
+
281
+ try:
282
+ run = self.client.actor(APIFY_ACTORS["reddit_posts"]).call(run_input=run_input)
283
+
284
+ for item in self.client.dataset(run["defaultDatasetId"]).iterate_items():
285
+ content = item.get("body", "") or item.get("title", "")
286
+ username = item.get("author", "")
287
+ post_url = item.get("url", "")
288
+
289
+ # Calculate intent
290
+ intent_score, sentiment = self._calculate_intent_score(content, username)
291
+
292
+ if intent_score >= 40:
293
+ result = {
294
+ "platform": "reddit",
295
+ "username": username,
296
+ "content": content[:500],
297
+ "url": post_url,
298
+ "scraped_at": datetime.now().isoformat(),
299
+ "sentiment": sentiment,
300
+ "intent_score": intent_score,
301
+ "context": f"r/{subreddit}"
302
+ }
303
+ results.append(result)
304
+
305
+ time.sleep(2) # Rate limiting
306
+
307
+ except Exception as e:
308
+ print(f" ⚠️ Error en r/{subreddit}: {e}")
309
+ continue
310
+
311
+ except Exception as e:
312
+ print(f"❌ Error en Reddit scraper: {e}")
313
+ return self._demo_results("reddit", keywords, limit)
314
+
315
+ print(f"✅ {len(results)} leads encontrados en Reddit")
316
+ return results[:limit]
317
+
318
+ def _scrape_twitter(
319
+ self,
320
+ keywords: List[str],
321
+ limit: int,
322
+ language: str
323
+ ) -> List[Dict[str, Any]]:
324
+ """
325
+ Scrapea tweets buscando intención de compra.
326
+ """
327
+ if not self.client:
328
+ return self._demo_results("twitter", keywords, limit)
329
+
330
+ results = []
331
+
332
+ try:
333
+ # Build search query
334
+ search_terms = keywords[:3] if keywords else ["need insurance", "looking for insurance"]
335
+
336
+ for term in search_terms:
337
+ if len(results) >= limit:
338
+ break
339
+
340
+ print(f" Buscando tweets: '{term}'...")
341
+
342
+ run_input = {
343
+ "searchTerms": [term],
344
+ "maxTweets": min(limit * 2, 30),
345
+ "includeReplies": False,
346
+ "language": language,
347
+ }
348
+
349
+ try:
350
+ run = self.client.actor(APIFY_ACTORS["twitter_posts"]).call(run_input=run_input)
351
+
352
+ for item in self.client.dataset(run["defaultDatasetId"]).iterate_items():
353
+ content = item.get("text", "")
354
+ username = item.get("username", "").replace("@", "")
355
+ tweet_url = item.get("url", f"https://twitter.com/{username}/status/{item.get('id', '')}")
356
+
357
+ # Calculate intent
358
+ intent_score, sentiment = self._calculate_intent_score(content, username)
359
+
360
+ if intent_score >= 40:
361
+ result = {
362
+ "platform": "twitter",
363
+ "username": username,
364
+ "content": content[:500],
365
+ "url": tweet_url,
366
+ "scraped_at": datetime.now().isoformat(),
367
+ "sentiment": sentiment,
368
+ "intent_score": intent_score,
369
+ "context": None
370
+ }
371
+ results.append(result)
372
+
373
+ time.sleep(2) # Rate limiting
374
+
375
+ except Exception as e:
376
+ print(f" ⚠️ Error buscando '{term}': {e}")
377
+ continue
378
+
379
+ except Exception as e:
380
+ print(f"❌ Error en Twitter scraper: {e}")
381
+ return self._demo_results("twitter", keywords, limit)
382
+
383
+ print(f"✅ {len(results)} leads encontrados en Twitter")
384
+ return results[:limit]
385
+
386
+ def _scrape_facebook(
387
+ self,
388
+ keywords: List[str],
389
+ limit: int,
390
+ language: str
391
+ ) -> List[Dict[str, Any]]:
392
+ """
393
+ Scrapea posts públicos de Facebook buscando intención de compra.
394
+ Busca en grupos públicos y posts públicos relacionados con seguros.
395
+ Ideal para leads B2C en Florida y USA.
396
+ """
397
+ if not self.client:
398
+ return self._demo_results("facebook", keywords, limit)
399
+
400
+ results = []
401
+
402
+ try:
403
+ # Search terms específicos para seguros
404
+ search_terms = keywords[:3] if keywords else [
405
+ "need insurance Florida",
406
+ "looking for insurance Miami",
407
+ "cheap insurance Orlando",
408
+ "car insurance Tampa",
409
+ "health insurance Florida"
410
+ ]
411
+
412
+ for term in search_terms:
413
+ if len(results) >= limit:
414
+ break
415
+
416
+ print(f" Buscando en Facebook: '{term}'...")
417
+
418
+ run_input = {
419
+ "searchTerms": [term],
420
+ "maxResults": min(limit * 2, 30),
421
+ "onlyPosts": True,
422
+ "includeComments": False,
423
+ }
424
+
425
+ try:
426
+ run = self.client.actor(APIFY_ACTORS["facebook_posts"]).call(run_input=run_input)
427
+
428
+ for item in self.client.dataset(run["defaultDatasetId"]).iterate_items():
429
+ content = item.get("text", "")
430
+ username = item.get("user", {}).get("name", "Unknown")
431
+ user_url = item.get("user", {}).get("url", "")
432
+ post_url = item.get("url", "")
433
+ group_name = item.get("group", {}).get("name", "")
434
+
435
+ # Calculate intent
436
+ intent_score, sentiment = self._calculate_intent_score(content, username)
437
+
438
+ if intent_score >= 40:
439
+ result = {
440
+ "platform": "facebook",
441
+ "username": username,
442
+ "content": content[:500],
443
+ "url": post_url or user_url,
444
+ "scraped_at": datetime.now().isoformat(),
445
+ "sentiment": sentiment,
446
+ "intent_score": intent_score,
447
+ "context": f"Group: {group_name}" if group_name else "Public Post"
448
+ }
449
+ results.append(result)
450
+
451
+ time.sleep(3) # Rate limiting (Facebook es más estricto)
452
+
453
+ except Exception as e:
454
+ print(f" ⚠️ Error buscando '{term}': {e}")
455
+ continue
456
+
457
+ except Exception as e:
458
+ print(f"❌ Error en Facebook scraper: {e}")
459
+ return self._demo_results("facebook", keywords, limit)
460
+
461
+ print(f"✅ {len(results)} leads encontrados en Facebook")
462
+ return results[:limit]
463
+
464
+ def _demo_results(
465
+ self,
466
+ platform: str,
467
+ keywords: List[str],
468
+ limit: int
469
+ ) -> List[Dict[str, Any]]:
470
+ """Genera resultados demo para pruebas sin API."""
471
+ demo_data = {
472
+ "youtube": [
473
+ {
474
+ "platform": "youtube",
475
+ "username": "consumer_john_2024",
476
+ "content": "Necesito un seguro de auto barato en Miami. Alguien tiene recomendaciones? Mi seguro actual es muy caro.",
477
+ "url": "https://youtube.com/watch?v=demo1",
478
+ "sentiment": "negative",
479
+ "intent_score": 85,
480
+ "context": "Video: Best Car Insurance 2024"
481
+ },
482
+ {
483
+ "platform": "youtube",
484
+ "username": "sarah_smith",
485
+ "content": "Looking for dental insurance recommendations. My current plan doesn't cover what I need. Help!",
486
+ "url": "https://youtube.com/watch?v=demo2",
487
+ "sentiment": "negative",
488
+ "intent_score": 78,
489
+ "context": "Video: Dental Insurance Guide"
490
+ },
491
+ ],
492
+ "reddit": [
493
+ {
494
+ "platform": "reddit",
495
+ "username": "insurance_seeker_22",
496
+ "content": "Just got my insurance cancelled. Need a new provider ASAP. Any recommendations for affordable life insurance?",
497
+ "url": "https://reddit.com/r/Insurance/demo1",
498
+ "sentiment": "negative",
499
+ "intent_score": 92,
500
+ "context": "r/Insurance"
501
+ },
502
+ {
503
+ "platform": "reddit",
504
+ "username": "budget_mom",
505
+ "content": "What's the best health insurance for a family of 4? Currently paying $800/month and it's killing us.",
506
+ "url": "https://reddit.com/r/personalfinance/demo2",
507
+ "sentiment": "negative",
508
+ "intent_score": 88,
509
+ "context": "r/personalfinance"
510
+ },
511
+ ],
512
+ "twitter": [
513
+ {
514
+ "platform": "twitter",
515
+ "username": "mike_looking",
516
+ "content": "Need car insurance quotes ASAP! My current provider raised rates by 40%. Anyone have good experiences with Geico or Progressive?",
517
+ "url": "https://twitter.com/mike_looking/status/demo1",
518
+ "sentiment": "negative",
519
+ "intent_score": 90,
520
+ "context": None
521
+ },
522
+ {
523
+ "platform": "twitter",
524
+ "username": "young_professional",
525
+ "content": "First time buying life insurance. What should I look for? Term vs whole life? #insurance #help",
526
+ "url": "https://twitter.com/young_professional/status/demo2",
527
+ "sentiment": "neutral",
528
+ "intent_score": 75,
529
+ "context": None
530
+ },
531
+ ],
532
+ "facebook": [
533
+ {
534
+ "platform": "facebook",
535
+ "username": "Sarah Johnson",
536
+ "content": "Hi everyone! Just moved to Florida and need car insurance recommendations. My previous insurer doesn't cover this state. Any suggestions for affordable options in Miami?",
537
+ "url": "https://facebook.com/groups/florida-insurance/posts/demo1",
538
+ "sentiment": "neutral",
539
+ "intent_score": 82,
540
+ "context": "Group: Florida Insurance Help"
541
+ },
542
+ {
543
+ "platform": "facebook",
544
+ "username": "Carlos Martinez",
545
+ "content": "Desperate for dental insurance in Orlando! My dentist says I need a root canal ASAP but my current plan won't cover it. Anyone know of good dental insurance that covers major procedures?",
546
+ "url": "https://facebook.com/groups/orlando-community/posts/demo2",
547
+ "sentiment": "negative",
548
+ "intent_score": 95,
549
+ "context": "Group: Orlando Community"
550
+ },
551
+ {
552
+ "platform": "facebook",
553
+ "username": "Jennifer Williams",
554
+ "content": "Looking for health insurance for my family of 5 in Tampa. Self-employed so no employer plan. What are my best options? Marketplace or private?",
555
+ "url": "https://facebook.com/groups/tampa-moms/posts/demo3",
556
+ "sentiment": "neutral",
557
+ "intent_score": 78,
558
+ "context": "Group: Tampa Moms Group"
559
+ },
560
+ ]
561
+ }
562
+
563
+ platform_data = demo_data.get(platform, [])
564
+ results = []
565
+
566
+ for item in platform_data[:limit]:
567
+ item_copy = item.copy()
568
+ item_copy["scraped_at"] = datetime.now().isoformat()
569
+ results.append(item_copy)
570
+
571
+ print(f"🎭 Modo demo: {len(results)} resultados generados para {platform}")
572
+ return results
573
+
574
+
575
+ def scrape_b2c_leads(
576
+ platforms: List[str] = None,
577
+ keywords: List[str] = None,
578
+ location: Dict[str, str] = None,
579
+ limit_per_platform: int = 10,
580
+ language: str = "en"
581
+ ) -> List[Dict[str, Any]]:
582
+ """
583
+ Función principal para scrapear leads B2C de múltiples plataformas.
584
+ Soporta segmentación geográfica granular.
585
+
586
+ Args:
587
+ platforms: Lista de plataformas ('youtube', 'reddit', 'twitter')
588
+ keywords: Lista de keywords base (opcional)
589
+ location: Dict con 'city', 'state', 'country'
590
+ limit_per_platform: Máximo de resultados por plataforma
591
+ language: Idioma ('es' o 'en')
592
+ """
593
+ if platforms is None:
594
+ platforms = ["youtube", "reddit", "twitter"]
595
+
596
+ # Base keywords if not provided
597
+ if keywords is None:
598
+ keywords = INTENT_KEYWORDS.get(language, INTENT_KEYWORDS["en"])
599
+
600
+ # Generate location-specific keywords
601
+ targeted_keywords = keywords.copy()
602
+ if location:
603
+ city = location.get("city")
604
+ state = location.get("state")
605
+ country = location.get("country")
606
+
607
+ location_terms = [term for term in [city, state, country] if term]
608
+
609
+ if location_terms:
610
+ print(f"📍 Aplicando segmentación geográfica: {', '.join(location_terms)}")
611
+ geo_keywords = []
612
+ for kw in keywords:
613
+ for term in location_terms:
614
+ geo_keywords.append(f"{kw} {term}")
615
+ geo_keywords.append(f"{kw} in {term}" if language == "en" else f"{kw} en {term}")
616
+
617
+ # Add specific geo keywords to the top of the list
618
+ targeted_keywords = geo_keywords + keywords
619
+
620
+ scraper = B2CScraper()
621
+ all_leads = []
622
+
623
+ for platform in platforms:
624
+ try:
625
+ leads = scraper.scrape_intent(
626
+ platform=platform,
627
+ keywords=targeted_keywords,
628
+ limit=limit_per_platform,
629
+ language=language
630
+ )
631
+ all_leads.extend(leads)
632
+
633
+ # Rate limiting entre plataformas
634
+ if platform != platforms[-1]:
635
+ time.sleep(3)
636
+ except Exception as e:
637
+ print(f"⚠️ Error en plataforma {platform}: {e}")
638
+
639
+ # Sort by intent score (highest first)
640
+ all_leads.sort(key=lambda x: x.get("intent_score", 0), reverse=True)
641
+
642
+ print(f"\n📊 Total leads B2C únicos: {len(all_leads)}")
643
+ return all_leads
644
+
645
+
646
+ if __name__ == "__main__":
647
+ import argparse
648
+
649
+ parser = argparse.ArgumentParser(description="B2C Consumer Intent Scraper")
650
+ parser.add_argument(
651
+ "--platform",
652
+ choices=["youtube", "reddit", "twitter", "all"],
653
+ default="all",
654
+ help="Plataforma a scrapear"
655
+ )
656
+ parser.add_argument(
657
+ "--keywords",
658
+ nargs="+",
659
+ default=None,
660
+ help="Keywords a buscar (separados por espacio)"
661
+ )
662
+ parser.add_argument(
663
+ "--limit",
664
+ type=int,
665
+ default=10,
666
+ help="Máximo de resultados por plataforma"
667
+ )
668
+ parser.add_argument(
669
+ "--language",
670
+ choices=["es", "en"],
671
+ default="en",
672
+ help="Idioma de búsqueda"
673
+ )
674
+ parser.add_argument(
675
+ "--demo",
676
+ action="store_true",
677
+ help="Usar modo demo (sin APIs)"
678
+ )
679
+ # Location args
680
+ parser.add_argument("--city", type=str, help="Ciudad")
681
+ parser.add_argument("--state", type=str, help="Estado")
682
+ parser.add_argument("--country", type=str, help="País")
683
+
684
+ args = parser.parse_args()
685
+
686
+ # Setup platforms
687
+ if args.platform == "all":
688
+ platforms = ["youtube", "reddit", "twitter"]
689
+ else:
690
+ platforms = [args.platform]
691
+
692
+ # Setup location
693
+ location = {}
694
+ if args.city: location["city"] = args.city
695
+ if args.state: location["state"] = args.state
696
+ if args.country: location["country"] = args.country
697
+
698
+ print("🎯 B2C Consumer Intent Scraper")
699
+ print("=" * 50)
700
+ print(f"Plataformas: {', '.join(platforms)}")
701
+ print(f"Keywords: {args.keywords or 'Default'}")
702
+ print(f"Ubicación: {location if location else 'Global'}")
703
+ print(f"Límite por plataforma: {args.limit}")
704
+ print(f"Idioma: {args.language}")
705
+ print("=" * 50)
706
+
707
+ # Run scraper
708
+ leads = scrape_b2c_leads(
709
+ platforms=platforms,
710
+ keywords=args.keywords,
711
+ location=location,
712
+ limit_per_platform=args.limit,
713
+ language=args.language
714
+ )
715
+
716
+ # Display results
717
+ print(f"\n📋 Top Leads B2C Detectados:")
718
+ print("-" * 80)
719
+ for i, lead in enumerate(leads[:10], 1):
720
+ print(f"\n{i}. [{lead['platform'].upper()}] Score: {lead['intent_score']}/100")
721
+ print(f" Usuario: @{lead['username']}")
722
+ print(f" Contenido: {lead['content'][:150]}...")
723
+ print(f" Sentimiento: {lead['sentiment']}")
724
+ print(f" URL: {lead['url']}")
725
+ if lead.get('context'):
726
+ print(f" Contexto: {lead['context']}")
727
+
728
+ print("\n" + "=" * 80)
729
+ print(f"✅ Proceso completado. {len(leads)} leads B2C detectados.")
lead_gen_pro/l3_execution/database_doe.py ADDED
@@ -0,0 +1,480 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ L3 Execution: Enhanced Database Module
3
+ ========================================
4
+ Base de datos SQLite expandida con soporte para contactos,
5
+ oportunidades e historial de outreach.
6
+ Implementa la capa L3 del framework DOE.
7
+ """
8
+
9
+ import sqlite3
10
+ import os
11
+ import json
12
+ from datetime import datetime
13
+ from typing import Optional, List, Dict, Any
14
+
15
+ # NexusCRM Fase 2 Cloud Sync
16
+ try:
17
+ from l3_execution.supabase_sync import sync_lead_to_supabase
18
+ from l3_execution.automation_engine import AutomationEngine
19
+ except ImportError:
20
+ try:
21
+ from supabase_sync import sync_lead_to_supabase
22
+ from automation_engine import AutomationEngine
23
+ except ImportError:
24
+ def sync_lead_to_supabase(data): pass
25
+ class AutomationEngine:
26
+ def process_new_lead(self, data): pass
27
+
28
+ automation = AutomationEngine()
29
+
30
+ DB_PATH = os.path.join(os.path.dirname(__file__), "..", "leads_doe.db")
31
+
32
+
33
+ def get_connection(timeout: int = 30) -> sqlite3.Connection:
34
+ """Get database connection with row factory and timeout.
35
+
36
+ Args:
37
+ timeout: Seconds to wait for database lock (default: 30)
38
+
39
+ Returns:
40
+ sqlite3.Connection with row factory enabled
41
+ """
42
+ conn = sqlite3.connect(DB_PATH, timeout=timeout)
43
+ conn.row_factory = sqlite3.Row
44
+ return conn
45
+
46
+
47
+ def init_db() -> None:
48
+ """Initialize database with enhanced schema."""
49
+ conn = get_connection()
50
+ cursor = conn.cursor()
51
+
52
+ # Main leads table
53
+ cursor.execute("""
54
+ CREATE TABLE IF NOT EXISTS leads (
55
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
56
+ name TEXT NOT NULL,
57
+ phone TEXT,
58
+ phone_formatted TEXT,
59
+ email TEXT,
60
+ address TEXT,
61
+ website TEXT,
62
+ rating REAL,
63
+ reviews_count INTEGER,
64
+ category TEXT,
65
+ source TEXT DEFAULT 'apify',
66
+ niche TEXT NOT NULL,
67
+ country TEXT NOT NULL,
68
+ city TEXT,
69
+
70
+ -- Enrichment data
71
+ enriched BOOLEAN DEFAULT 0,
72
+ facebook_url TEXT,
73
+ instagram_url TEXT,
74
+ linkedin_url TEXT,
75
+ twitter_url TEXT,
76
+
77
+ -- Status tracking
78
+ status TEXT DEFAULT 'new',
79
+ priority INTEGER DEFAULT 0,
80
+
81
+ -- Outreach status
82
+ whatsapp_sent BOOLEAN DEFAULT 0,
83
+ whatsapp_sent_at TIMESTAMP,
84
+ whatsapp_link TEXT,
85
+ email_sent BOOLEAN DEFAULT 0,
86
+ email_sent_at TIMESTAMP,
87
+ replied BOOLEAN DEFAULT 0,
88
+ replied_at TIMESTAMP,
89
+
90
+ -- Opportunity tracking
91
+ is_opportunity BOOLEAN DEFAULT 0,
92
+ opportunity_value REAL,
93
+ opportunity_notes TEXT,
94
+
95
+ -- Metadata
96
+ scraped_at TIMESTAMP,
97
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
98
+ updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
99
+ )
100
+ """)
101
+
102
+ # Outreach history log
103
+ cursor.execute("""
104
+ CREATE TABLE IF NOT EXISTS outreach_log (
105
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
106
+ lead_id INTEGER NOT NULL,
107
+ channel TEXT NOT NULL,
108
+ message_template TEXT,
109
+ message_sent TEXT,
110
+ status TEXT DEFAULT 'sent',
111
+ response TEXT,
112
+ sent_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
113
+ FOREIGN KEY (lead_id) REFERENCES leads(id)
114
+ )
115
+ """)
116
+
117
+ # Opportunities (CRM-like)
118
+ cursor.execute("""
119
+ CREATE TABLE IF NOT EXISTS opportunities (
120
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
121
+ lead_id INTEGER NOT NULL,
122
+ stage TEXT DEFAULT 'qualified',
123
+ value REAL,
124
+ probability INTEGER DEFAULT 50,
125
+ notes TEXT,
126
+ next_action TEXT,
127
+ next_action_date DATE,
128
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
129
+ updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
130
+ FOREIGN KEY (lead_id) REFERENCES leads(id)
131
+ )
132
+ """)
133
+
134
+ # Dashboard metrics cache
135
+ cursor.execute("""
136
+ CREATE TABLE IF NOT EXISTS metrics (
137
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
138
+ metric_name TEXT NOT NULL,
139
+ metric_value TEXT,
140
+ calculated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
141
+ )
142
+ """)
143
+
144
+ # Create indexes
145
+ cursor.execute("CREATE INDEX IF NOT EXISTS idx_leads_status ON leads(status)")
146
+ cursor.execute("CREATE INDEX IF NOT EXISTS idx_leads_niche ON leads(niche)")
147
+ cursor.execute("CREATE INDEX IF NOT EXISTS idx_leads_country ON leads(country)")
148
+ cursor.execute("CREATE INDEX IF NOT EXISTS idx_leads_email ON leads(email)")
149
+ cursor.execute("CREATE INDEX IF NOT EXISTS idx_outreach_lead ON outreach_log(lead_id)")
150
+
151
+ conn.commit()
152
+ conn.close()
153
+
154
+ print(f"[OK] Base de datos DOE inicializada: {DB_PATH}")
155
+
156
+
157
+ def add_lead(lead_data: Dict[str, Any], upsert: bool = True) -> int:
158
+ """
159
+ Add a new lead to the database.
160
+ If upsert is True, it updates existing lead based on phone or email.
161
+ """
162
+ conn = get_connection()
163
+ cursor = conn.cursor()
164
+
165
+ phone = lead_data.get("phone")
166
+ email = lead_data.get("email")
167
+
168
+ # Check for existing lead (Deduplication)
169
+ existing_id = None
170
+ if phone:
171
+ cursor.execute("SELECT id FROM leads WHERE phone = ? OR phone_formatted = ?", (phone, phone))
172
+ row = cursor.fetchone()
173
+ if row: existing_id = row[0]
174
+
175
+ if not existing_id and email:
176
+ cursor.execute("SELECT id FROM leads WHERE email = ?", (email,))
177
+ row = cursor.fetchone()
178
+ if row: existing_id = row[0]
179
+
180
+ if existing_id:
181
+ if not upsert:
182
+ conn.close()
183
+ return existing_id
184
+
185
+ # Update existing lead (Upsert logic)
186
+ # Extract social profiles if present
187
+ social = lead_data.get("social_profiles", {})
188
+
189
+ cursor.execute("""
190
+ UPDATE leads
191
+ SET name = COALESCE(?, name),
192
+ email = COALESCE(?, email),
193
+ address = COALESCE(?, address),
194
+ website = COALESCE(?, website),
195
+ rating = COALESCE(?, rating),
196
+ whatsapp_link = COALESCE(?, whatsapp_link),
197
+ facebook_url = COALESCE(?, facebook_url),
198
+ instagram_url = COALESCE(?, instagram_url),
199
+ linkedin_url = COALESCE(?, linkedin_url),
200
+ twitter_url = COALESCE(?, twitter_url),
201
+ quality_score = COALESCE(?, quality_score),
202
+ updated_at = CURRENT_TIMESTAMP
203
+ WHERE id = ?
204
+ """, (
205
+ lead_data.get("name"),
206
+ lead_data.get("email"),
207
+ lead_data.get("address"),
208
+ lead_data.get("website"),
209
+ lead_data.get("rating"),
210
+ lead_data.get("whatsapp_link"),
211
+ social.get("facebook") if isinstance(social, dict) else None,
212
+ social.get("instagram") if isinstance(social, dict) else None,
213
+ social.get("linkedin") if isinstance(social, dict) else None,
214
+ social.get("twitter") if isinstance(social, dict) else None,
215
+ lead_data.get("quality_score"),
216
+ existing_id
217
+ ))
218
+ lead_id = existing_id
219
+ else:
220
+ # Extract social profiles if present
221
+ social = lead_data.get("social_profiles", {})
222
+
223
+ cursor.execute("""
224
+ INSERT INTO leads (
225
+ name, phone, phone_formatted, email, address, website,
226
+ rating, reviews_count, category, source, niche, country, city,
227
+ enriched, facebook_url, instagram_url, linkedin_url, twitter_url,
228
+ whatsapp_link, quality_score, scraped_at
229
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
230
+ """, (
231
+ lead_data.get("name"),
232
+ lead_data.get("phone"),
233
+ lead_data.get("phone_formatted"),
234
+ lead_data.get("email"),
235
+ lead_data.get("address"),
236
+ lead_data.get("website"),
237
+ lead_data.get("rating"),
238
+ lead_data.get("reviews_count"),
239
+ lead_data.get("category"),
240
+ lead_data.get("source", "apify"),
241
+ lead_data.get("niche"),
242
+ lead_data.get("country"),
243
+ lead_data.get("city"),
244
+ lead_data.get("enriched", False),
245
+ social.get("facebook") if isinstance(social, dict) else None,
246
+ social.get("instagram") if isinstance(social, dict) else None,
247
+ social.get("linkedin") if isinstance(social, dict) else None,
248
+ social.get("twitter") if isinstance(social, dict) else None,
249
+ lead_data.get("whatsapp_link"),
250
+ lead_data.get("quality_score"),
251
+ lead_data.get("scraped_at")
252
+ ))
253
+ lead_id = cursor.lastrowid
254
+
255
+ conn.commit()
256
+ conn.close()
257
+
258
+ # Proactive Sync to NexusCRM Supabase Cloud (Fase 2)
259
+ try:
260
+ sync_lead_to_supabase(lead_data)
261
+ except Exception as e:
262
+ print(f"[NexusCRM] Warning: Auto-sync to cloud failed: {e}")
263
+
264
+ # Process Automation Rules (Fase 2)
265
+ try:
266
+ automation.process_new_lead(lead_data)
267
+ except Exception as e:
268
+ print(f"[Automation] Warning: Rule processing failed: {e}")
269
+
270
+ return lead_id
271
+
272
+
273
+ def get_leads(
274
+ niche: Optional[str] = None,
275
+ country: Optional[str] = None,
276
+ status: Optional[str] = None,
277
+ with_email: bool = False,
278
+ with_phone: bool = False,
279
+ limit: int = 100
280
+ ) -> List[Dict[str, Any]]:
281
+ """Get leads with optional filters."""
282
+ conn = get_connection()
283
+ cursor = conn.cursor()
284
+
285
+ query = "SELECT * FROM leads WHERE 1=1"
286
+ params = []
287
+
288
+ if niche:
289
+ query += " AND niche = ?"
290
+ params.append(niche)
291
+ if country:
292
+ query += " AND country = ?"
293
+ params.append(country)
294
+ if status:
295
+ query += " AND status = ?"
296
+ params.append(status)
297
+ if with_email:
298
+ query += " AND email IS NOT NULL AND email != ''"
299
+ if with_phone:
300
+ query += " AND phone IS NOT NULL AND phone != ''"
301
+
302
+ query += " ORDER BY created_at DESC LIMIT ?"
303
+ params.append(limit)
304
+
305
+ cursor.execute(query, params)
306
+ rows = cursor.fetchall()
307
+ conn.close()
308
+
309
+ return [dict(row) for row in rows]
310
+
311
+
312
+ def get_pending_outreach(
313
+ channel: str,
314
+ niche: Optional[str] = None,
315
+ limit: int = 20
316
+ ) -> List[Dict[str, Any]]:
317
+ """Get leads pending outreach for a specific channel."""
318
+ conn = get_connection()
319
+ cursor = conn.cursor()
320
+
321
+ query = "SELECT * FROM leads WHERE status = 'new'"
322
+ params = []
323
+
324
+ if channel == "whatsapp":
325
+ query += " AND whatsapp_sent = 0 AND phone IS NOT NULL"
326
+ elif channel == "email":
327
+ query += " AND email_sent = 0 AND email IS NOT NULL"
328
+
329
+ if niche:
330
+ query += " AND niche = ?"
331
+ params.append(niche)
332
+
333
+ query += " ORDER BY priority DESC, rating DESC LIMIT ?"
334
+ params.append(limit)
335
+
336
+ cursor.execute(query, params)
337
+ rows = cursor.fetchall()
338
+ conn.close()
339
+
340
+ return [dict(row) for row in rows]
341
+
342
+
343
+ def mark_outreach(
344
+ lead_id: int,
345
+ channel: str,
346
+ message: str,
347
+ status: str = "sent"
348
+ ) -> None:
349
+ """Mark outreach as sent and log it."""
350
+ conn = get_connection()
351
+ cursor = conn.cursor()
352
+
353
+ now = datetime.now().isoformat()
354
+
355
+ # Update lead
356
+ if channel == "whatsapp":
357
+ cursor.execute("""
358
+ UPDATE leads
359
+ SET whatsapp_sent = 1, whatsapp_sent_at = ?, whatsapp_link = ?, status = 'contacted'
360
+ WHERE id = ?
361
+ """, (now, message, lead_id))
362
+ elif channel == "email":
363
+ cursor.execute("""
364
+ UPDATE leads
365
+ SET email_sent = 1, email_sent_at = ?, status = 'contacted'
366
+ WHERE id = ?
367
+ """, (now, lead_id))
368
+
369
+ # Log outreach
370
+ cursor.execute("""
371
+ INSERT INTO outreach_log (lead_id, channel, message_sent, status)
372
+ VALUES (?, ?, ?, ?)
373
+ """, (lead_id, channel, message, status))
374
+
375
+ conn.commit()
376
+ conn.close()
377
+
378
+
379
+ def create_opportunity(
380
+ lead_id: int,
381
+ value: float,
382
+ notes: str = "",
383
+ stage: str = "qualified"
384
+ ) -> int:
385
+ """Create an opportunity from a lead."""
386
+ conn = get_connection()
387
+ cursor = conn.cursor()
388
+
389
+ # Mark lead as opportunity
390
+ cursor.execute("""
391
+ UPDATE leads
392
+ SET is_opportunity = 1, opportunity_value = ?, opportunity_notes = ?
393
+ WHERE id = ?
394
+ """, (value, notes, lead_id))
395
+
396
+ # Create opportunity record
397
+ cursor.execute("""
398
+ INSERT INTO opportunities (lead_id, stage, value, notes)
399
+ VALUES (?, ?, ?, ?)
400
+ """, (lead_id, stage, value, notes))
401
+
402
+ opp_id = cursor.lastrowid
403
+ conn.commit()
404
+ conn.close()
405
+
406
+ return opp_id
407
+
408
+
409
+ def get_stats() -> Dict[str, Any]:
410
+ """Get comprehensive statistics."""
411
+ conn = get_connection()
412
+ cursor = conn.cursor()
413
+
414
+ stats = {}
415
+
416
+ # Total leads
417
+ cursor.execute("SELECT COUNT(*) FROM leads")
418
+ stats["total_leads"] = cursor.fetchone()[0]
419
+
420
+ # By status
421
+ cursor.execute("SELECT status, COUNT(*) FROM leads GROUP BY status")
422
+ stats["by_status"] = dict(cursor.fetchall())
423
+
424
+ # By niche
425
+ cursor.execute("SELECT niche, COUNT(*) FROM leads GROUP BY niche")
426
+ stats["by_niche"] = dict(cursor.fetchall())
427
+
428
+ # By country
429
+ cursor.execute("SELECT country, COUNT(*) FROM leads GROUP BY country")
430
+ stats["by_country"] = dict(cursor.fetchall())
431
+
432
+ # Enrichment stats
433
+ cursor.execute("SELECT COUNT(*) FROM leads WHERE enriched = 1")
434
+ stats["enriched"] = cursor.fetchone()[0]
435
+
436
+ cursor.execute("SELECT COUNT(*) FROM leads WHERE email IS NOT NULL")
437
+ stats["with_email"] = cursor.fetchone()[0]
438
+
439
+ # Outreach stats
440
+ cursor.execute("SELECT COUNT(*) FROM leads WHERE whatsapp_sent = 1")
441
+ stats["whatsapp_sent"] = cursor.fetchone()[0]
442
+
443
+ cursor.execute("SELECT COUNT(*) FROM leads WHERE email_sent = 1")
444
+ stats["email_sent"] = cursor.fetchone()[0]
445
+
446
+ cursor.execute("SELECT COUNT(*) FROM leads WHERE replied = 1")
447
+ stats["replied"] = cursor.fetchone()[0]
448
+
449
+ # Opportunity stats
450
+ cursor.execute("SELECT COUNT(*), COALESCE(SUM(value), 0) FROM opportunities")
451
+ row = cursor.fetchone()
452
+ stats["opportunities"] = {"count": row[0], "total_value": row[1]}
453
+
454
+ conn.close()
455
+ return stats
456
+
457
+
458
+ def get_leads_count_last_30_days() -> int:
459
+ """Get count of leads scraped in the last 30 days."""
460
+ conn = get_connection()
461
+ cursor = conn.cursor()
462
+ cursor.execute("""
463
+ SELECT COUNT(*) FROM leads
464
+ WHERE scraped_at >= datetime('now', '-30 days')
465
+ """)
466
+ count = cursor.fetchone()[0]
467
+ conn.close()
468
+ return count
469
+
470
+
471
+ if __name__ == "__main__":
472
+ init_db()
473
+ stats = get_stats()
474
+
475
+ print(f"\n[ESTADISTICAS] Estadísticas DOE:")
476
+ print(f" Total leads: {stats['total_leads']}")
477
+ print(f" Con email: {stats['with_email']}")
478
+ print(f" Enriquecidos: {stats['enriched']}")
479
+ print(f" WhatsApp enviados: {stats['whatsapp_sent']}")
480
+ print(f" Emails enviados: {stats['email_sent']}")
lead_gen_pro/l3_execution/enrichment.py ADDED
@@ -0,0 +1,309 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ L3 Execution: Data Enrichment Module
3
+ =====================================
4
+ Enriquece leads con emails, teléfonos adicionales y perfiles sociales.
5
+ Implementa la capa L3 del framework DOE.
6
+ """
7
+
8
+ import os
9
+ import re
10
+ import json
11
+ import time
12
+ from typing import Optional, List, Dict, Any
13
+ from urllib.parse import urlparse, quote
14
+
15
+ # Optional: requests for web scraping
16
+ try:
17
+ import requests
18
+ REQUESTS_AVAILABLE = True
19
+ except ImportError:
20
+ REQUESTS_AVAILABLE = False
21
+
22
+
23
+ def extract_email_from_website(url: str, timeout: int = 10) -> Optional[str]:
24
+ """
25
+ Extract email from a website homepage.
26
+
27
+ Args:
28
+ url: Website URL
29
+ timeout: Request timeout in seconds
30
+
31
+ Returns:
32
+ First email found or None.
33
+ """
34
+ if not REQUESTS_AVAILABLE or not url:
35
+ return None
36
+
37
+ try:
38
+ # Clean URL
39
+ if not url.startswith("http"):
40
+ url = f"https://{url}"
41
+
42
+ response = requests.get(url, timeout=timeout, headers={
43
+ "User-Agent": "Mozilla/5.0 (compatible; LeadGen/1.0)"
44
+ })
45
+
46
+ if response.status_code == 200:
47
+ # Email regex pattern
48
+ email_pattern = r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}'
49
+ emails = re.findall(email_pattern, response.text)
50
+
51
+ # Filter out common non-business emails
52
+ excluded = ["example.com", "domain.com", "email.com", "test.com"]
53
+ for email in emails:
54
+ domain = email.split("@")[1].lower()
55
+ if domain not in excluded:
56
+ return email.lower()
57
+
58
+ return None
59
+
60
+ except Exception as e:
61
+ print(f" ⚠️ Error extrayendo email de {url}: {e}")
62
+ return None
63
+
64
+
65
+ def extract_social_profiles(url: str, timeout: int = 10) -> Dict[str, Optional[str]]:
66
+ """
67
+ Extract social media profile links from website.
68
+
69
+ Args:
70
+ url: Website URL
71
+ timeout: Request timeout
72
+
73
+ Returns:
74
+ Dictionary with social profile URLs.
75
+ """
76
+ profiles = {
77
+ "facebook": None,
78
+ "instagram": None,
79
+ "linkedin": None,
80
+ "twitter": None,
81
+ }
82
+
83
+ if not REQUESTS_AVAILABLE or not url:
84
+ return profiles
85
+
86
+ try:
87
+ if not url.startswith("http"):
88
+ url = f"https://{url}"
89
+
90
+ response = requests.get(url, timeout=timeout, headers={
91
+ "User-Agent": "Mozilla/5.0 (compatible; LeadGen/1.0)"
92
+ })
93
+
94
+ if response.status_code == 200:
95
+ text = response.text
96
+
97
+ # Facebook
98
+ fb_match = re.search(r'https?://(?:www\.)?facebook\.com/[a-zA-Z0-9._-]+', text)
99
+ if fb_match:
100
+ profiles["facebook"] = fb_match.group()
101
+
102
+ # Instagram
103
+ ig_match = re.search(r'https?://(?:www\.)?instagram\.com/[a-zA-Z0-9._-]+', text)
104
+ if ig_match:
105
+ profiles["instagram"] = ig_match.group()
106
+
107
+ # LinkedIn
108
+ li_match = re.search(r'https?://(?:www\.)?linkedin\.com/(?:in|company)/[a-zA-Z0-9._-]+', text)
109
+ if li_match:
110
+ profiles["linkedin"] = li_match.group()
111
+
112
+ # Twitter/X
113
+ tw_match = re.search(r'https?://(?:www\.)?(?:twitter|x)\.com/[a-zA-Z0-9._-]+', text)
114
+ if tw_match:
115
+ profiles["twitter"] = tw_match.group()
116
+
117
+ except Exception as e:
118
+ print(f" ⚠️ Error extrayendo perfiles de {url}: {e}")
119
+
120
+ return profiles
121
+
122
+
123
+ def format_phone(phone: str, country: str = "usa") -> str:
124
+ """
125
+ Format phone number to international format.
126
+ """
127
+ if not phone:
128
+ return ""
129
+
130
+ # Remove all non-numeric characters except +
131
+ cleaned = re.sub(r'[^\d+]', '', phone)
132
+
133
+ # Add country code if missing
134
+ if not cleaned.startswith("+"):
135
+ if country == "usa":
136
+ if len(cleaned) == 10:
137
+ cleaned = f"+1{cleaned}"
138
+ elif country == "venezuela":
139
+ if len(cleaned) == 10: # local with area code
140
+ cleaned = f"+58{cleaned}"
141
+
142
+ return cleaned
143
+
144
+
145
+ def generate_whatsapp_link_raw(phone: str, message: str) -> str:
146
+ """Helper to generate WA link without importing the whole module."""
147
+ formatted_phone = "".join(c for c in phone if c.isdigit())
148
+ if formatted_phone.startswith("00"): formatted_phone = formatted_phone[2:]
149
+ return f"https://wa.me/{formatted_phone}?text={quote(message)}"
150
+
151
+ def get_default_message(name: str, city: str, niche: str) -> str:
152
+ """Fast fallback message."""
153
+ return f"Hola {name}, vi tu negocio en {city}. Me gustaría hablar contigo."
154
+
155
+ def clean_text(text: Optional[str]) -> Optional[str]:
156
+ """Clean text, normalize to Title Case and remove extra spaces."""
157
+ if not text:
158
+ return text
159
+
160
+ # Remove extra spaces and normalize title case
161
+ cleaned = " ".join(text.split())
162
+ # Title cases but keeps acronyms if needed? Simple title() for now
163
+ return cleaned.title()
164
+
165
+
166
+ def calculate_quality_score(lead: Dict[str, Any]) -> int:
167
+ """
168
+ Calculate a quality score from 0 to 100.
169
+ """
170
+ score = 0
171
+ weights = {
172
+ "email": 30,
173
+ "phone": 20,
174
+ "website": 15,
175
+ "social_profiles": 15,
176
+ "rating": 10,
177
+ "address": 10
178
+ }
179
+
180
+ if lead.get("email"): score += weights["email"]
181
+ if lead.get("phone"): score += weights["phone"]
182
+ if lead.get("website"): score += weights["website"]
183
+
184
+ social = lead.get("social_profiles", {})
185
+ social_count = sum(1 for v in social.values() if v)
186
+ if social_count > 0:
187
+ score += weights["social_profiles"] * (min(social_count, 3) / 3)
188
+
189
+ if lead.get("rating"): score += weights["rating"]
190
+ if lead.get("address"): score += weights["address"]
191
+
192
+ return int(score)
193
+
194
+
195
+ def enrich_lead(lead: Dict[str, Any]) -> Dict[str, Any]:
196
+ """
197
+ Enrich a single lead with additional data and hygiene.
198
+ """
199
+ enriched = lead.copy()
200
+
201
+ # 1. Hygiene: Normalize names and addresses
202
+ enriched["name"] = clean_text(lead.get("name"))
203
+ enriched["address"] = clean_text(lead.get("address"))
204
+
205
+ print(f" 🔄 Enriqueciendo: {enriched.get('name', 'Unknown')}")
206
+
207
+ # 2. Format phone
208
+ if lead.get("phone"):
209
+ enriched["phone_formatted"] = format_phone(
210
+ lead["phone"],
211
+ lead.get("country", "usa")
212
+ )
213
+
214
+ # 3. Extract email from website if not present
215
+ if not lead.get("email") and lead.get("website"):
216
+ email = extract_email_from_website(lead["website"])
217
+ if email:
218
+ enriched["email"] = email
219
+ print(f" ✅ Email encontrado: {email}")
220
+
221
+ # 4. Extract social profiles
222
+ if lead.get("website"):
223
+ profiles = extract_social_profiles(lead["website"])
224
+ enriched["social_profiles"] = profiles
225
+
226
+ found = [k for k, v in profiles.items() if v]
227
+ if found:
228
+ print(f" ✅ Perfiles sociales: {', '.join(found)}")
229
+
230
+ # 5. WhatsApp Link Auto-generation
231
+ if enriched.get("phone_formatted"):
232
+ # Try to get template from config
233
+ from l3_execution.apify_scraper import get_config
234
+ config = get_config()
235
+ niche_cfg = config.get("niches", {}).get(lead.get("niche", "real_estate"), {})
236
+ template = niche_cfg.get("templates", {}).get("whatsapp", "Hola {name}")
237
+
238
+ msg = template.format(
239
+ name=enriched.get("name", "there"),
240
+ city=enriched.get("city", "your area")
241
+ )
242
+ enriched["whatsapp_link"] = generate_whatsapp_link_raw(enriched["phone_formatted"], msg)
243
+
244
+ # 6. Quality Score
245
+ enriched["quality_score"] = calculate_quality_score(enriched)
246
+
247
+ # Mark as enriched
248
+ enriched["enriched"] = True
249
+
250
+ return enriched
251
+
252
+
253
+ def enrich_leads(leads: List[Dict[str, Any]], delay: float = 1.0) -> List[Dict[str, Any]]:
254
+ """
255
+ Enrich a batch of leads.
256
+
257
+ Args:
258
+ leads: List of lead dictionaries
259
+ delay: Delay between enrichments (seconds)
260
+
261
+ Returns:
262
+ List of enriched leads.
263
+ """
264
+ print(f"\n🔍 Enriqueciendo {len(leads)} leads...\n")
265
+
266
+ enriched_leads = []
267
+
268
+ for i, lead in enumerate(leads):
269
+ enriched = enrich_lead(lead)
270
+ enriched_leads.append(enriched)
271
+
272
+ if i < len(leads) - 1:
273
+ time.sleep(delay)
274
+
275
+ # Stats
276
+ with_email = sum(1 for l in enriched_leads if l.get("email"))
277
+ with_social = sum(1 for l in enriched_leads if any(l.get("social_profiles", {}).values()))
278
+
279
+ print(f"\n📊 Resultados del enriquecimiento:")
280
+ print(f" - Con email: {with_email}/{len(enriched_leads)}")
281
+ print(f" - Con perfiles sociales: {with_social}/{len(enriched_leads)}")
282
+
283
+ return enriched_leads
284
+
285
+
286
+ if __name__ == "__main__":
287
+ # Demo leads for testing
288
+ demo_leads = [
289
+ {
290
+ "name": "Miami Luxury Realty",
291
+ "phone": "786-555-0101",
292
+ "website": "https://www.rei.com", # Using real site for demo
293
+ "country": "usa",
294
+ },
295
+ {
296
+ "name": "Test Business",
297
+ "phone": "212-555-0202",
298
+ "website": "https://example.com",
299
+ "country": "venezuela",
300
+ },
301
+ ]
302
+
303
+ enriched = enrich_leads(demo_leads)
304
+
305
+ print("\n📋 Leads enriquecidos:")
306
+ for lead in enriched:
307
+ print(f" - {lead.get('name')}")
308
+ print(f" 📧 Email: {lead.get('email', 'N/A')}")
309
+ print(f" 📱 Phone: {lead.get('phone_formatted', lead.get('phone'))}")
lead_gen_pro/l3_execution/enrichment_pro.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ L3 Execution: Pro Enrichment Pipeline
3
+ ====================================
4
+ Integración con Apollo, Clearbit y Lusha (Simulada vía API calls).
5
+ """
6
+
7
+ from typing import Dict, Any, List
8
+
9
+ class ProEnrichment:
10
+ def __init__(self, config: Dict[str, Any]):
11
+ self.config = config.get("enrichment", {})
12
+
13
+ def enrich_with_apollo(self, lead: Dict[str, Any]) -> Dict[str, Any]:
14
+ """Simula enriquecimiento con Apollo para emails/phones."""
15
+ if self.config.get("apollo_enabled") and lead.get("email"):
16
+ # Aquí iría la llamada real: requests.post("https://api.apollo.io/v1/...", ...)
17
+ lead["enrichment_source"] = "Apollo"
18
+ lead["verified_email"] = True
19
+ lead["direct_phone"] = lead.get("phone") # Simulación
20
+ return lead
21
+
22
+ def enrich_with_clearbit(self, lead: Dict[str, Any]) -> Dict[str, Any]:
23
+ """Simula enriquecimiento con Clearbit para datos de empresa."""
24
+ if self.config.get("clearbit_enabled") and lead.get("company"):
25
+ lead["company_size"] = "50-200"
26
+ lead["company_revenue"] = "$10M+"
27
+ lead["industry_nicho"] = lead.get("niche")
28
+ return lead
29
+
30
+ def full_enrichment_flow(self, leads: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
31
+ for lead in leads:
32
+ lead = self.enrich_with_apollo(lead)
33
+ lead = self.enrich_with_clearbit(lead)
34
+ return leads
lead_gen_pro/l3_execution/instantly_sender.py ADDED
@@ -0,0 +1,260 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ L3 Execution: Instantly.ai Email Sender
3
+ =========================================
4
+ Envía emails profesionales usando la API de Instantly.ai.
5
+ Implementa la capa L3 del framework DOE.
6
+ """
7
+
8
+ import os
9
+ import json
10
+ import time
11
+ from datetime import datetime
12
+ from typing import Optional, List, Dict, Any
13
+
14
+ try:
15
+ import requests
16
+ REQUESTS_AVAILABLE = True
17
+ except ImportError:
18
+ REQUESTS_AVAILABLE = False
19
+
20
+
21
+ # Instantly API base URL
22
+ INSTANTLY_API_BASE = "https://api.instantly.ai/api/v1"
23
+
24
+ # Email templates by niche
25
+ # Default templates (will be overriden by config.json)
26
+ DEFAULT_TEMPLATES = {
27
+ "real_estate": {
28
+ "subject": "Quick question about {city} real estate",
29
+ "body": "Hi {name},\n\nI saw your work in {city}.\n\nBest,\n{sender_name}"
30
+ }
31
+ }
32
+
33
+ def get_config():
34
+ config_path = os.path.join(os.path.dirname(__file__), "..", "config.json")
35
+ if os.path.exists(config_path):
36
+ with open(config_path, "r") as f:
37
+ return json.load(f)
38
+ return {}
39
+
40
+
41
+ def get_instantly_api_key() -> Optional[str]:
42
+ """Get Instantly API key from environment or config."""
43
+ key = os.environ.get("INSTANTLY_API_KEY")
44
+ if key:
45
+ return key
46
+
47
+ config_path = os.path.join(os.path.dirname(__file__), "..", "config.json")
48
+ if os.path.exists(config_path):
49
+ with open(config_path, "r") as f:
50
+ config = json.load(f)
51
+ return config.get("instantly_api_key")
52
+
53
+ return None
54
+
55
+
56
+ def format_email_template(
57
+ template_type: str,
58
+ lead: Dict[str, Any],
59
+ sender_name: str = "Your Name"
60
+ ) -> Dict[str, str]:
61
+ """
62
+ Format email template with lead data.
63
+
64
+ Args:
65
+ template_type: 'real_estate' or 'insurance'
66
+ lead: Lead dictionary
67
+ sender_name: Name to sign emails with
68
+
69
+ Returns:
70
+ Dictionary with 'subject' and 'body'.
71
+ """
72
+ config = get_config()
73
+ niche_config = config.get("niches", {}).get(template_type, {})
74
+ template = niche_config.get("templates", {}).get("email")
75
+
76
+ if not template:
77
+ # Fallback to default
78
+ template = DEFAULT_TEMPLATES.get(template_type, DEFAULT_TEMPLATES["real_estate"])
79
+
80
+ # Sign with config name if available
81
+ final_sender_name = config.get("sender", {}).get("name", sender_name)
82
+
83
+ return {
84
+ "subject": template.get("subject", "Hello").format(
85
+ name=lead.get("name", "there"),
86
+ city=lead.get("city", "your area"),
87
+ ),
88
+ "body": template.get("body", "Hi").format(
89
+ name=lead.get("name", "there"),
90
+ city=lead.get("city", "your area"),
91
+ sender_name=final_sender_name,
92
+ )
93
+ }
94
+
95
+
96
+ def send_email_instantly(
97
+ to_email: str,
98
+ subject: str,
99
+ body: str,
100
+ from_email: Optional[str] = None,
101
+ campaign_id: Optional[str] = None
102
+ ) -> Dict[str, Any]:
103
+ """
104
+ Send email via Instantly.ai API.
105
+
106
+ Args:
107
+ to_email: Recipient email address
108
+ subject: Email subject
109
+ body: Email body (plain text)
110
+ from_email: Sender email (optional)
111
+ campaign_id: Instantly campaign ID (optional)
112
+
113
+ Returns:
114
+ API response or error dictionary.
115
+ """
116
+ api_key = get_instantly_api_key()
117
+
118
+ if not api_key:
119
+ print("❌ INSTANTLY_API_KEY no configurado")
120
+ return {"success": False, "error": "API key missing", "mode": "demo"}
121
+
122
+ if not REQUESTS_AVAILABLE:
123
+ return {"success": False, "error": "requests not available"}
124
+
125
+ try:
126
+ # Instantly uses campaign-based sending
127
+ # For direct send, we need to add to a campaign or use transactional endpoint
128
+
129
+ url = f"{INSTANTLY_API_BASE}/email/send"
130
+
131
+ payload = {
132
+ "api_key": api_key,
133
+ "to": to_email,
134
+ "subject": subject,
135
+ "body": body,
136
+ }
137
+
138
+ if from_email:
139
+ payload["from"] = from_email
140
+ if campaign_id:
141
+ payload["campaign_id"] = campaign_id
142
+
143
+ response = requests.post(url, json=payload, timeout=30)
144
+
145
+ if response.status_code == 200:
146
+ print(f"✅ Email enviado a {to_email}")
147
+ return {"success": True, "response": response.json()}
148
+ else:
149
+ print(f"❌ Error enviando email: {response.status_code}")
150
+ return {"success": False, "error": response.text}
151
+
152
+ except Exception as e:
153
+ print(f"❌ Error: {e}")
154
+ return {"success": False, "error": str(e)}
155
+
156
+
157
+ def send_email_to_lead(
158
+ lead: Dict[str, Any],
159
+ sender_name: str = "Your Name"
160
+ ) -> Dict[str, Any]:
161
+ """
162
+ Send personalized email to a lead.
163
+
164
+ Args:
165
+ lead: Lead dictionary with email
166
+ sender_name: Name to sign email with
167
+
168
+ Returns:
169
+ Result dictionary.
170
+ """
171
+ email = lead.get("email")
172
+ if not email:
173
+ return {"success": False, "error": "No email address", "lead_id": lead.get("id")}
174
+
175
+ niche = lead.get("niche", "real_estate")
176
+ formatted = format_email_template(niche, lead, sender_name)
177
+
178
+ result = send_email_instantly(
179
+ to_email=email,
180
+ subject=formatted["subject"],
181
+ body=formatted["body"]
182
+ )
183
+
184
+ result["lead_id"] = lead.get("id")
185
+ result["to_email"] = email
186
+
187
+ return result
188
+
189
+
190
+ def batch_send_emails(
191
+ leads: List[Dict[str, Any]],
192
+ sender_name: str = "Your Name",
193
+ delay_seconds: float = 30.0,
194
+ max_per_hour: int = 20
195
+ ) -> List[Dict[str, Any]]:
196
+ """
197
+ Send emails to a batch of leads with rate limiting.
198
+
199
+ Args:
200
+ leads: List of leads with emails
201
+ sender_name: Name to sign emails
202
+ delay_seconds: Delay between emails
203
+ max_per_hour: Maximum emails per hour
204
+
205
+ Returns:
206
+ List of results.
207
+ """
208
+ # Filter leads with emails
209
+ leads_with_email = [l for l in leads if l.get("email")]
210
+
211
+ if not leads_with_email:
212
+ print("❌ No hay leads con email")
213
+ return []
214
+
215
+ print(f"\n📧 Enviando {len(leads_with_email)} emails...")
216
+
217
+ results = []
218
+ sent_count = 0
219
+
220
+ for i, lead in enumerate(leads_with_email[:max_per_hour]):
221
+ result = send_email_to_lead(lead, sender_name)
222
+ results.append(result)
223
+
224
+ if result.get("success"):
225
+ sent_count += 1
226
+
227
+ # Rate limiting
228
+ if i < len(leads_with_email) - 1:
229
+ print(f" ⏳ Esperando {delay_seconds}s...")
230
+ time.sleep(delay_seconds)
231
+
232
+ print(f"\n📊 Emails enviados: {sent_count}/{len(leads_with_email)}")
233
+ return results
234
+
235
+
236
+ def demo_email_preview(lead: Dict[str, Any], sender_name: str = "Your Name") -> None:
237
+ """Print email preview without sending."""
238
+ niche = lead.get("niche", "real_estate")
239
+ formatted = format_email_template(niche, lead, sender_name)
240
+
241
+ print(f"\n{'='*50}")
242
+ print(f"📧 PREVIEW - Email para: {lead.get('email', 'N/A')}")
243
+ print(f"{'='*50}")
244
+ print(f"Subject: {formatted['subject']}")
245
+ print(f"\n{formatted['body']}")
246
+ print(f"{'='*50}\n")
247
+
248
+
249
+ if __name__ == "__main__":
250
+ # Demo
251
+ demo_lead = {
252
+ "id": 1,
253
+ "name": "Miami Luxury Realty",
254
+ "email": "info@example.com",
255
+ "city": "Miami, FL",
256
+ "niche": "real_estate",
257
+ }
258
+
259
+ print("🎭 Modo demo - Preview de email:")
260
+ demo_email_preview(demo_lead, sender_name="Test User")
lead_gen_pro/l3_execution/lead_scoring.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ L3 Execution: Lead Scoring & Qualification (MQL/SQL)
3
+ ===================================================
4
+ Modelo de scoring avanzado para calificar leads basados en datos firmográficos y geográficos.
5
+ """
6
+
7
+ from typing import Dict, Any, List
8
+
9
+ class LeadScorer:
10
+ def __init__(self):
11
+ # Configuración de regiones clave de USA
12
+ self.hot_regions = ["CA", "NY", "TX", "FL", "IL"]
13
+ self.priority_cities = ["San Francisco", "New York", "Austin", "Miami", "Chicago"]
14
+
15
+ def calculate_score(self, lead: Dict[str, Any]) -> Dict[str, Any]:
16
+ """Calcula el score de un lead de 0 a 100."""
17
+ score = 0
18
+ details = []
19
+
20
+ # 1. Scoring Geográfico (Max 30 pts)
21
+ # Obtener location y asegurar que sea string (manejar None)
22
+ location = lead.get("location") or ""
23
+ location = location.upper()
24
+
25
+ # Obtener city y asegurar que sea string (manejar None)
26
+ city = lead.get("city") or ""
27
+
28
+ if any(region in location for region in self.hot_regions):
29
+ score += 20
30
+ details.append("Region de alta prioridad (+20)")
31
+ if city and any(p_city in city for p_city in self.priority_cities):
32
+ score += 10
33
+ details.append("Ciudad estratégica (+10)")
34
+
35
+ # 2. Scoring de Contactabilidad (Max 40 pts)
36
+ if lead.get("email"):
37
+ score += 25
38
+ details.append("Email disponible (+25)")
39
+ if lead.get("phone"):
40
+ score += 15
41
+ details.append("Teléfono disponible (+15)")
42
+
43
+ # 3. Scoring de Relevancia (Max 30 pts)
44
+ niche = lead.get("niche", "").lower()
45
+ if niche in ["real_estate", "healthcare", "saas"]:
46
+ score += 20
47
+ details.append("Nicho premium (+20)")
48
+
49
+ if lead.get("linkedin_url"):
50
+ score += 10
51
+ details.append("Enlace LinkedIn (+10)")
52
+
53
+ # Determinar Tier
54
+ if score >= 80:
55
+ tier = "SQL (Hot)"
56
+ elif score >= 40:
57
+ tier = "MQL (Warm)"
58
+ else:
59
+ tier = "Lead (Cold)"
60
+
61
+ return {
62
+ "score": score,
63
+ "tier": tier,
64
+ "score_details": ", ".join(details)
65
+ }
66
+
67
+ def process_batch(self, leads: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
68
+ for lead in leads:
69
+ scoring_data = self.calculate_score(lead)
70
+ lead.update(scoring_data)
71
+ return leads
lead_gen_pro/l3_execution/supabase_sync.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import json
3
+ from datetime import datetime
4
+
5
+ # Supabase Configuration
6
+ SUPABASE_URL = "https://nvssvykqxaurtlgwxwwy.supabase.co"
7
+ SERVICE_ROLE_KEY = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6Im52c3N2eWtxeGF1cnRsZ3d4d3d5Iiwicm9sZSI6InNlcnZpY2Vfcm9sZSIsImlhdCI6MTc3Mjg5MjE2NSwiZXhwIjoyMDg4NDY4MTY1fQ.AceH8GESr9xKNmJBMHpM56HZASLRIjHc5dfxyLKBH_o"
8
+
9
+ HEADERS = {
10
+ "apikey": SERVICE_ROLE_KEY,
11
+ "Authorization": f"Bearer {SERVICE_ROLE_KEY}",
12
+ "Content-Type": "application/json",
13
+ "Prefer": "resolution=merge-duplicates"
14
+ }
15
+
16
+ def sync_lead_to_supabase(lead_data):
17
+ """Syncs a single lead to Supabase 'leads' table."""
18
+ social = lead_data.get("social_profiles", {})
19
+
20
+ # PG Table names (Spanish according to Notion findings)
21
+ payload = {
22
+ "nombre": lead_data.get("name") or "Sin nombre",
23
+ "telefono": lead_data.get("phone"),
24
+ "telefono_formateado": lead_data.get("phone_formatted"),
25
+ "correo": lead_data.get("email"),
26
+ "direccion": lead_data.get("address"),
27
+ "website": lead_data.get("website"),
28
+ "rating": lead_data.get("rating"),
29
+ "reviews_count": lead_data.get("reviews_count"),
30
+ "categoria": lead_data.get("category") or lead_data.get("niche"),
31
+ "fuente": lead_data.get("source") or "lead_gen_pro",
32
+ "nicho": lead_data.get("niche"),
33
+ "pais": lead_data.get("country"),
34
+ "ciudad": lead_data.get("city"),
35
+ "enriquecido": lead_data.get("enriched", False),
36
+ "estado": lead_data.get("status") or "nuevo",
37
+ "prioridad": lead_data.get("priority", 0),
38
+ "whatsapp_enviado": lead_data.get("whatsapp_sent", False),
39
+ "whatsapp_link": lead_data.get("whatsapp_link"),
40
+ "correo_enviado": lead_data.get("email_sent", False),
41
+ "es_oportunidad": lead_data.get("is_opportunity", False),
42
+ "valor_oportunidad": lead_data.get("opportunity_value"),
43
+ "notas_oportunidad": lead_data.get("opportunity_notes"),
44
+ "quality_score": lead_data.get("quality_score", 0),
45
+ "creado_en": lead_data.get("created_at") or datetime.now().isoformat(),
46
+ "actualizado_en": datetime.now().isoformat()
47
+ }
48
+
49
+ try:
50
+ response = requests.post(f"{SUPABASE_URL}/rest/v1/leads", headers=HEADERS, json=payload)
51
+ return response.status_code in [200, 201, 204]
52
+ except Exception as e:
53
+ print(f"[ERROR] Lead Sync Failed: {e}")
54
+ return False
55
+
56
+
57
+ def sync_all_leads_to_supabase(db_path: str = None, limit: int = 500) -> dict:
58
+ """
59
+ Lee los leads más recientes de leads_doe.db y los sincroniza en batch a Supabase.
60
+ Útil para sincronización post-scraping masiva.
61
+ """
62
+ import sqlite3
63
+ import os
64
+
65
+ if db_path is None:
66
+ db_path = os.path.join(os.path.dirname(__file__), "..", "leads_doe.db")
67
+
68
+ if not os.path.exists(db_path):
69
+ return {"synced": 0, "errors": 0, "message": "DB not found"}
70
+
71
+ try:
72
+ conn = sqlite3.connect(db_path, timeout=10)
73
+ conn.row_factory = sqlite3.Row
74
+ cur = conn.cursor()
75
+ # Solo sincroniza leads que aún no han sido sincronizados (o los más recientes)
76
+ cur.execute(f"SELECT * FROM leads ORDER BY created_at DESC LIMIT {limit}")
77
+ rows = cur.fetchall()
78
+ conn.close()
79
+ except Exception as e:
80
+ return {"synced": 0, "errors": 0, "message": str(e)}
81
+
82
+ synced, errors = 0, 0
83
+ for row in rows:
84
+ lead_data = dict(row)
85
+ ok = sync_lead_to_supabase(lead_data)
86
+ if ok:
87
+ synced += 1
88
+ else:
89
+ errors += 1
90
+
91
+ return {"synced": synced, "errors": errors, "total": len(rows)}
lead_gen_pro/main.py ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Main Entry Point for Lead Generation System
3
+ ============================================
4
+ Wrapper script that runs the DOE orchestrator.
5
+ """
6
+
7
+ import os
8
+ import sys
9
+
10
+ # Ensure we're using the venv
11
+ venv_path = os.path.join(os.path.dirname(__file__), "venv", "bin", "python")
12
+ if os.path.exists(venv_path) and sys.executable != venv_path:
13
+ os.execv(venv_path, [venv_path] + sys.argv)
14
+
15
+ # Add paths
16
+ sys.path.insert(0, os.path.dirname(__file__))
17
+
18
+ from l2_orchestration.orchestrator import (
19
+ run_full_pipeline,
20
+ run_scraping_only,
21
+ run_whatsapp_outreach,
22
+ run_email_outreach,
23
+ show_dashboard
24
+ )
25
+ from l3_execution.database_doe import init_db
26
+
27
+
28
+ if __name__ == "__main__":
29
+ import argparse
30
+
31
+ parser = argparse.ArgumentParser(
32
+ description="🚀 Lead Generation DOE System",
33
+ formatter_class=argparse.RawDescriptionHelpFormatter,
34
+ epilog="""
35
+ Ejemplos:
36
+ python main.py --pipeline --niche real_estate --country usa --location "Miami, FL"
37
+ python main.py --scrape --niche insurance --country venezuela --location "Caracas"
38
+ python main.py --outreach-wa --limit 10
39
+ python main.py --dashboard
40
+ python main.py --streamlit
41
+ """
42
+ )
43
+
44
+ # Actions
45
+ parser.add_argument("--pipeline", action="store_true", help="Ejecutar pipeline completo")
46
+ parser.add_argument("--scrape", action="store_true", help="Solo scraping de leads")
47
+ parser.add_argument("--outreach-wa", action="store_true", help="Generar links de WhatsApp")
48
+ parser.add_argument("--outreach-email", action="store_true", help="Preview/enviar emails")
49
+ parser.add_argument("--dashboard", action="store_true", help="Dashboard en texto")
50
+ parser.add_argument("--streamlit", action="store_true", help="Dashboard web Streamlit")
51
+
52
+ # Options
53
+ parser.add_argument("--niche", choices=["real_estate", "insurance"], default="real_estate",
54
+ help="Nicho objetivo (default: real_estate)")
55
+ parser.add_argument("--country", choices=["usa", "venezuela"], default="usa",
56
+ help="País objetivo (default: usa)")
57
+ parser.add_argument("--limit", type=int, default=10, help="Límite de leads (default: 10)")
58
+ parser.add_argument("--location", type=str, help="Ubicación específica (Ciudad, Estado o ZIP)")
59
+ parser.add_argument("--demo", action="store_true", help="Modo demo sin API")
60
+
61
+ args = parser.parse_args()
62
+
63
+ # Initialize DB
64
+ init_db()
65
+
66
+ # Execute action
67
+ if args.pipeline:
68
+ run_full_pipeline(
69
+ niche=args.niche,
70
+ country=args.country,
71
+ location=args.location,
72
+ limit_per_city=args.limit,
73
+ demo=args.demo
74
+ )
75
+ elif args.scrape:
76
+ run_scraping_only(
77
+ niche=args.niche,
78
+ country=args.country,
79
+ location=args.location,
80
+ limit=args.limit
81
+ )
82
+ elif args.outreach_wa:
83
+ run_whatsapp_outreach(limit=args.limit)
84
+ elif args.outreach_email:
85
+ run_email_outreach(limit=args.limit, preview_only=True)
86
+ elif args.dashboard:
87
+ show_dashboard()
88
+ elif args.streamlit:
89
+ import subprocess
90
+ dashboard_path = os.path.join(os.path.dirname(__file__), "dashboard", "app.py")
91
+ venv_streamlit = os.path.join(os.path.dirname(__file__), "venv", "bin", "streamlit")
92
+ subprocess.run([venv_streamlit, "run", dashboard_path, "--server.headless", "true"])
93
+ else:
94
+ # Show help if no action
95
+ parser.print_help()
96
+ print("\n📊 Estado actual:")
97
+ show_dashboard()
lead_gen_pro/main_pro.py ADDED
@@ -0,0 +1,264 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Main Entry Point for Lead Generation PRO - COMPLETAMENTE COMENTADO
3
+ ===================================================================
4
+
5
+ Este es el punto de entrada principal del sistema Lead Generation PRO.
6
+ Funciona como una CLI (Command Line Interface) que permite ejecutar el
7
+ pipeline completo de generación de leads mediante argumentos de línea de comandos.
8
+
9
+ AUTOR: Lead Generation PRO System
10
+ FECHA: 2025
11
+ VERSION: 2.0
12
+
13
+ USO DESDE LÍNEA DE COMANDOS:
14
+ python3 main_pro.py --pipeline --niche "Insurance" --city "Miami" --state "FL" --country "USA"
15
+
16
+ COMPONENTES PRINCIPALES:
17
+ - Parser de argumentos CLI (argparse)
18
+ - Inicialización de base de datos
19
+ - Delegación al ProOrchestrator para ejecución del pipeline
20
+
21
+ FLUJO DE EJECUCIÓN:
22
+ 1. Parsear argumentos de línea de comandos
23
+ 2. Inicializar base de datos SQLite
24
+ 3. Construir objeto de configuración con datos de ubicación
25
+ 4. Llamar a ProOrchestrator.run_pro_pipeline()
26
+ 5. Ejecutar scraping según tipo (B2B, B2C o ambos)
27
+ """
28
+
29
+ # =============================================================================
30
+ # IMPORTS
31
+ # =============================================================================
32
+
33
+ import os # Para operaciones con sistema de archivos y variables de entorno
34
+ import argparse # Para parsear argumentos de línea de comandos
35
+
36
+ # Importar el orquestador principal desde la capa L2
37
+ # El orquestador coordina todo el flujo de trabajo
38
+ from l2_orchestration.orchestrator_pro import ProOrchestrator
39
+
40
+ # Importar función de inicialización de base de datos
41
+ # Esta función crea las tablas necesarias si no existen
42
+ from l3_execution.database_doe import init_db
43
+
44
+ # =============================================================================
45
+ # FUNCIÓN PRINCIPAL
46
+ # =============================================================================
47
+
48
+ def main():
49
+ """
50
+ Función principal que configura y ejecuta el pipeline de generación de leads.
51
+
52
+ Esta función:
53
+ 1. Define todos los argumentos CLI disponibles
54
+ 2. Parsea los argumentos proporcionados por el usuario
55
+ 3. Inicializa la base de datos
56
+ 4. Ejecuta el pipeline según la configuración
57
+ """
58
+
59
+ # -------------------------------------------------------------------------
60
+ # CONFIGURACIÓN DEL PARSER DE ARGUMENTOS
61
+ # -------------------------------------------------------------------------
62
+
63
+ # Crear el parser con una descripción útil
64
+ parser = argparse.ArgumentParser(
65
+ description="Lead Generation PRO DOE System - Pipeline de generacion de leads B2B y B2C",
66
+ formatter_class=argparse.RawDescriptionHelpFormatter,
67
+ epilog="""
68
+ Ejemplos de uso:
69
+ # Buscar leads B2B de seguros en Miami
70
+ python3 main_pro.py --pipeline --niche "Insurance" --city "Miami" --state "FL" --type b2b
71
+
72
+ # Buscar leads B2C en redes sociales
73
+ python3 main_pro.py --pipeline --niche "Real Estate" --city "Austin" --type b2c --b2c-platforms twitter facebook
74
+
75
+ # Buscar ambos tipos con todas las plataformas
76
+ python3 main_pro.py --pipeline --niche "Healthcare" --country "USA" --type both --limit 50
77
+ """
78
+ )
79
+
80
+ # -------------------------------------------------------------------------
81
+ # ARGUMENTOS PRINCIPALES
82
+ # -------------------------------------------------------------------------
83
+
84
+ # --pipeline: Flag obligatorio para ejecutar el pipeline
85
+ parser.add_argument(
86
+ "--pipeline",
87
+ action="store_true",
88
+ help="Ejecutar Pipeline Pro (requerido para iniciar el scraping)"
89
+ )
90
+
91
+ # --niche: Nicho o industria a buscar
92
+ parser.add_argument(
93
+ "--niche",
94
+ type=str,
95
+ default="Real Estate",
96
+ help="Nicho de mercado a buscar (ej: Real Estate, Insurance, Healthcare)"
97
+ )
98
+
99
+ # --region: Región legacy (opcional, para compatibilidad hacia atrás)
100
+ parser.add_argument(
101
+ "--region",
102
+ type=str,
103
+ default="CA Bay Area",
104
+ help="Región legacy (opcional si se usa city/state/country)"
105
+ )
106
+
107
+ # -------------------------------------------------------------------------
108
+ # ARGUMENTOS DE UBICACIÓN (GEO-TARGETING)
109
+ # -------------------------------------------------------------------------
110
+
111
+ # --city: Ciudad objetivo
112
+ parser.add_argument(
113
+ "--city",
114
+ type=str,
115
+ help="Ciudad objetivo (ej: Miami, Austin, New York)"
116
+ )
117
+
118
+ # --state: Estado o provincia
119
+ parser.add_argument(
120
+ "--state",
121
+ type=str,
122
+ help="Estado/Provincia (ej: FL, CA, TX)"
123
+ )
124
+
125
+ # --country: País
126
+ parser.add_argument(
127
+ "--country",
128
+ type=str,
129
+ help="País (ej: USA, Canada, Mexico)"
130
+ )
131
+
132
+ # -------------------------------------------------------------------------
133
+ # ARGUMENTO DE TIPO DE LEAD (B2B, B2C o AMBOS)
134
+ # -------------------------------------------------------------------------
135
+
136
+ parser.add_argument(
137
+ "--type",
138
+ choices=["b2b", "b2c", "both"],
139
+ default="both",
140
+ help="""
141
+ Tipo de leads a buscar:
142
+ - b2b: Solo empresas y negocios (LinkedIn, Google Maps)
143
+ - b2c: Solo consumidores (Twitter, Reddit, YouTube, Facebook)
144
+ - both: Ambos tipos (por defecto)
145
+ """
146
+ )
147
+
148
+ # -------------------------------------------------------------------------
149
+ # PLATAFORMAS B2B
150
+ # -------------------------------------------------------------------------
151
+
152
+ parser.add_argument(
153
+ "--b2b-platforms",
154
+ nargs="+", # Acepta uno o más valores
155
+ choices=["linkedin", "youtube", "tiktok", "instagram", "google_maps"],
156
+ default=["linkedin", "google_maps"],
157
+ help="""
158
+ Plataformas B2B a escanear:
159
+ - linkedin: Perfiles profesionales
160
+ - google_maps: Negocios locales
161
+ - youtube: Canales de YouTube
162
+ - tiktok: Cuentas por hashtags
163
+ - instagram: Perfiles por hashtags
164
+ """
165
+ )
166
+
167
+ # -------------------------------------------------------------------------
168
+ # PLATAFORMAS B2C
169
+ # -------------------------------------------------------------------------
170
+
171
+ parser.add_argument(
172
+ "--b2c-platforms",
173
+ nargs="+",
174
+ choices=["youtube", "reddit", "twitter", "facebook"],
175
+ default=["youtube", "reddit", "twitter"],
176
+ help="""
177
+ Plataformas B2C a escanear:
178
+ - youtube: Comentarios en videos
179
+ - reddit: Posts en subreddits
180
+ - twitter: Tweets públicos
181
+ - facebook: Posts públicos y grupos públicos
182
+ """
183
+ )
184
+
185
+ # -------------------------------------------------------------------------
186
+ # ARGUMENTOS DE CONFIGURACIÓN
187
+ # -------------------------------------------------------------------------
188
+
189
+ # --limit: Límite de leads por plataforma
190
+ parser.add_argument(
191
+ "--limit",
192
+ type=int,
193
+ default=20,
194
+ help="Límite de leads a obtener por cada plataforma (default: 20)"
195
+ )
196
+
197
+ # -------------------------------------------------------------------------
198
+ # PARSEAR ARGUMENTOS
199
+ # -------------------------------------------------------------------------
200
+
201
+ # Obtener los argumentos proporcionados por el usuario
202
+ args = parser.parse_args()
203
+
204
+ # -------------------------------------------------------------------------
205
+ # INICIALIZACIÓN DE BASE DE DATOS
206
+ # -------------------------------------------------------------------------
207
+
208
+ # Crear las tablas necesarias en SQLite si no existen
209
+ # Esto incluye: leads, outreach_log, opportunities, metrics
210
+ init_db()
211
+
212
+ # -------------------------------------------------------------------------
213
+ # EJECUCIÓN DEL PIPELINE
214
+ # -------------------------------------------------------------------------
215
+
216
+ # Verificar si se debe ejecutar el pipeline
217
+ if args.pipeline:
218
+
219
+ # Construir diccionario de datos de ubicación
220
+ # Solo incluye los campos que fueron proporcionados
221
+ location_data = {}
222
+ if args.city:
223
+ location_data["city"] = args.city
224
+ if args.state:
225
+ location_data["state"] = args.state
226
+ if args.country:
227
+ location_data["country"] = args.country
228
+
229
+ # Crear instancia del orquestador
230
+ # El orquestador coordina todo el flujo de trabajo
231
+ orchestrator = ProOrchestrator()
232
+
233
+ # Ejecutar el pipeline principal
234
+ # Esta función hace todo el trabajo: scraping, scoring, deduplicación, almacenamiento
235
+ orchestrator.run_pro_pipeline(
236
+ niche=args.niche, # Nicho/industria a buscar
237
+ region=args.region, # Región legacy (para compatibilidad)
238
+ location_data=location_data, # Datos de ubicación granular
239
+ lead_type=args.type, # Tipo: b2b, b2c, both
240
+ b2b_platforms=args.b2b_platforms, # Lista de plataformas B2B
241
+ b2c_platforms=args.b2c_platforms, # Lista de plataformas B2C
242
+ limit=args.limit # Límite por plataforma
243
+ )
244
+
245
+ else:
246
+ # Si no se proporcionó --pipeline, mostrar la ayuda
247
+ parser.print_help()
248
+
249
+ # =============================================================================
250
+ # PUNTO DE ENTRADA
251
+ # =============================================================================
252
+
253
+ if __name__ == "__main__":
254
+ """
255
+ Punto de entrada estándar de Python.
256
+
257
+ Este bloque se ejecuta solo cuando el archivo se ejecuta directamente
258
+ (no cuando se importa como módulo).
259
+ """
260
+ main()
261
+
262
+ # =============================================================================
263
+ # FIN DEL ARCHIVO
264
+ # =============================================================================
lead_gen_pro/requirements.txt ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Lead Generation System - Requirements
2
+ # =====================================
3
+
4
+ # Core
5
+ requests>=2.31.0
6
+ streamlit>=1.30.0
7
+ pandas>=2.0.0
8
+ plotly>=5.18.0
9
+ apify-client>=1.6.0
10
+
11
+ # WhatsApp & Automation
12
+ playwright>=1.40.0
13
+
14
+ # Utilities
15
+ python-dotenv>=1.0.0
16
+ schedule>=1.2.0
lead_gen_pro/scraper.py ADDED
@@ -0,0 +1,311 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Google Maps Lead Scraper Module
3
+ ================================
4
+ Uses SerpApi to search for business leads on Google Maps.
5
+ Supports real estate agents and insurance brokers in USA and Venezuela.
6
+ """
7
+
8
+ import os
9
+ import json
10
+ import time
11
+ from typing import Optional, List, Dict, Any
12
+ from dataclasses import dataclass
13
+
14
+ # You'll need to install: pip install google-search-results
15
+ try:
16
+ from serpapi import GoogleSearch
17
+ SERPAPI_AVAILABLE = True
18
+ except ImportError:
19
+ SERPAPI_AVAILABLE = False
20
+ print("⚠️ SerpApi no está instalado. Ejecuta: pip install google-search-results")
21
+
22
+
23
+ @dataclass
24
+ class SearchConfig:
25
+ """Configuration for a lead search."""
26
+ query: str
27
+ location: str
28
+ country: str # 'usa' or 'venezuela'
29
+ niche: str # 'real_estate' or 'insurance'
30
+ limit: int = 20
31
+
32
+
33
+ # Predefined search queries for each niche
34
+ SEARCH_QUERIES = {
35
+ "real_estate": {
36
+ "usa": [
37
+ "real estate agent",
38
+ "realtor",
39
+ "real estate broker",
40
+ "property agent",
41
+ ],
42
+ "venezuela": [
43
+ "inmobiliaria",
44
+ "agente inmobiliario",
45
+ "bienes raices",
46
+ "corredor inmobiliario",
47
+ ]
48
+ },
49
+ "insurance": {
50
+ "usa": [
51
+ "insurance agent",
52
+ "insurance broker",
53
+ "insurance agency",
54
+ "life insurance agent",
55
+ ],
56
+ "venezuela": [
57
+ "corredor de seguros",
58
+ "agente de seguros",
59
+ "aseguradora",
60
+ "seguros de vida",
61
+ ]
62
+ }
63
+ }
64
+
65
+ # Major cities for each country
66
+ CITIES = {
67
+ "usa": [
68
+ "Miami, FL",
69
+ "Houston, TX",
70
+ "Los Angeles, CA",
71
+ "New York, NY",
72
+ "Chicago, IL",
73
+ "Dallas, TX",
74
+ "Phoenix, AZ",
75
+ "Orlando, FL",
76
+ ],
77
+ "venezuela": [
78
+ "Caracas",
79
+ "Maracaibo",
80
+ "Valencia",
81
+ "Barquisimeto",
82
+ "Maracay",
83
+ "Puerto La Cruz",
84
+ "San Cristobal",
85
+ ]
86
+ }
87
+
88
+
89
+ def get_serpapi_key() -> Optional[str]:
90
+ """Get SerpApi key from environment or config file."""
91
+ # Try environment variable first
92
+ key = os.environ.get("SERPAPI_KEY")
93
+ if key:
94
+ return key
95
+
96
+ # Try config file
97
+ config_path = os.path.join(os.path.dirname(__file__), "config.json")
98
+ if os.path.exists(config_path):
99
+ with open(config_path, "r") as f:
100
+ config = json.load(f)
101
+ return config.get("serpapi_key")
102
+
103
+ return None
104
+
105
+
106
+ def search_google_maps(
107
+ query: str,
108
+ location: str,
109
+ api_key: str,
110
+ limit: int = 20
111
+ ) -> List[Dict[str, Any]]:
112
+ """
113
+ Search Google Maps via SerpApi.
114
+
115
+ Args:
116
+ query: Search query (e.g., "real estate agent")
117
+ location: Location string (e.g., "Miami, FL")
118
+ api_key: SerpApi API key
119
+ limit: Maximum number of results
120
+
121
+ Returns:
122
+ List of business results with contact info.
123
+ """
124
+ if not SERPAPI_AVAILABLE:
125
+ print("❌ SerpApi no disponible. Instalando...")
126
+ return []
127
+
128
+ results = []
129
+
130
+ params = {
131
+ "engine": "google_maps",
132
+ "q": query,
133
+ "ll": None, # Will be geocoded by location
134
+ "type": "search",
135
+ "api_key": api_key,
136
+ }
137
+
138
+ # Add location to query
139
+ full_query = f"{query} in {location}"
140
+ params["q"] = full_query
141
+
142
+ try:
143
+ search = GoogleSearch(params)
144
+ data = search.get_dict()
145
+
146
+ places = data.get("local_results", [])
147
+
148
+ for place in places[:limit]:
149
+ result = {
150
+ "name": place.get("title"),
151
+ "address": place.get("address"),
152
+ "phone": place.get("phone"),
153
+ "website": place.get("website"),
154
+ "rating": place.get("rating"),
155
+ "reviews_count": place.get("reviews"),
156
+ "place_id": place.get("place_id"),
157
+ "thumbnail": place.get("thumbnail"),
158
+ }
159
+ results.append(result)
160
+
161
+ except Exception as e:
162
+ print(f"❌ Error en búsqueda: {e}")
163
+
164
+ return results
165
+
166
+
167
+ def scrape_leads(
168
+ niche: str,
169
+ country: str,
170
+ cities: Optional[List[str]] = None,
171
+ limit_per_city: int = 10
172
+ ) -> List[Dict[str, Any]]:
173
+ """
174
+ Scrape leads for a specific niche and country.
175
+
176
+ Args:
177
+ niche: 'real_estate' or 'insurance'
178
+ country: 'usa' or 'venezuela'
179
+ cities: List of cities to search (uses defaults if None)
180
+ limit_per_city: Max leads per city
181
+
182
+ Returns:
183
+ List of lead dictionaries ready for database insertion.
184
+ """
185
+ api_key = get_serpapi_key()
186
+ if not api_key:
187
+ print("❌ No se encontró SERPAPI_KEY. Configura la variable de entorno o config.json")
188
+ return []
189
+
190
+ if cities is None:
191
+ cities = CITIES.get(country, [])[:3] # Default to first 3 cities
192
+
193
+ queries = SEARCH_QUERIES.get(niche, {}).get(country, [])
194
+ if not queries:
195
+ print(f"❌ No hay queries definidas para {niche}/{country}")
196
+ return []
197
+
198
+ all_leads = []
199
+ seen_phones = set() # Avoid duplicates
200
+
201
+ for city in cities:
202
+ for query in queries[:2]: # Use first 2 queries per city
203
+ print(f"🔍 Buscando: '{query}' en {city}...")
204
+
205
+ results = search_google_maps(
206
+ query=query,
207
+ location=city,
208
+ api_key=api_key,
209
+ limit=limit_per_city
210
+ )
211
+
212
+ for result in results:
213
+ phone = result.get("phone")
214
+ if phone and phone not in seen_phones:
215
+ seen_phones.add(phone)
216
+
217
+ lead = {
218
+ "name": result.get("name"),
219
+ "phone": phone,
220
+ "email": None, # Usually not available in Maps
221
+ "address": result.get("address"),
222
+ "website": result.get("website"),
223
+ "rating": result.get("rating"),
224
+ "reviews_count": result.get("reviews_count"),
225
+ "source": "google_maps",
226
+ "niche": niche,
227
+ "country": country,
228
+ "city": city,
229
+ }
230
+ all_leads.append(lead)
231
+
232
+ # Rate limiting
233
+ time.sleep(1)
234
+
235
+ print(f"✅ Total leads encontrados: {len(all_leads)}")
236
+ return all_leads
237
+
238
+
239
+ def demo_mode() -> List[Dict[str, Any]]:
240
+ """
241
+ Return sample demo data without API calls.
242
+ Useful for testing the pipeline.
243
+ """
244
+ demo_leads = [
245
+ {
246
+ "name": "Miami Luxury Realty",
247
+ "phone": "+1-786-555-0101",
248
+ "email": None,
249
+ "address": "1234 Brickell Ave, Miami, FL",
250
+ "website": "https://example-realty.com",
251
+ "rating": 4.8,
252
+ "reviews_count": 127,
253
+ "source": "demo",
254
+ "niche": "real_estate",
255
+ "country": "usa",
256
+ "city": "Miami, FL",
257
+ },
258
+ {
259
+ "name": "Seguros Caracas Plus",
260
+ "phone": "+58-212-555-0202",
261
+ "email": None,
262
+ "address": "Av. Francisco de Miranda, Caracas",
263
+ "website": "https://example-seguros.com",
264
+ "rating": 4.5,
265
+ "reviews_count": 89,
266
+ "source": "demo",
267
+ "niche": "insurance",
268
+ "country": "venezuela",
269
+ "city": "Caracas",
270
+ },
271
+ {
272
+ "name": "Texas Home Experts",
273
+ "phone": "+1-713-555-0303",
274
+ "email": None,
275
+ "address": "5678 Westheimer Rd, Houston, TX",
276
+ "website": "https://example-homes.com",
277
+ "rating": 4.9,
278
+ "reviews_count": 215,
279
+ "source": "demo",
280
+ "niche": "real_estate",
281
+ "country": "usa",
282
+ "city": "Houston, TX",
283
+ },
284
+ ]
285
+ return demo_leads
286
+
287
+
288
+ if __name__ == "__main__":
289
+ import argparse
290
+
291
+ parser = argparse.ArgumentParser(description="Google Maps Lead Scraper")
292
+ parser.add_argument("--niche", choices=["real_estate", "insurance"], default="real_estate")
293
+ parser.add_argument("--country", choices=["usa", "venezuela"], default="usa")
294
+ parser.add_argument("--demo", action="store_true", help="Use demo data instead of API")
295
+ parser.add_argument("--limit", type=int, default=5, help="Leads per city")
296
+
297
+ args = parser.parse_args()
298
+
299
+ if args.demo:
300
+ print("🎭 Modo demo activado")
301
+ leads = demo_mode()
302
+ else:
303
+ leads = scrape_leads(
304
+ niche=args.niche,
305
+ country=args.country,
306
+ limit_per_city=args.limit
307
+ )
308
+
309
+ print(f"\n📋 Leads encontrados: {len(leads)}")
310
+ for lead in leads[:5]:
311
+ print(f" - {lead['name']} | {lead['phone']} | {lead['city']}")
lead_gen_pro/whatsapp_sender.py ADDED
@@ -0,0 +1,267 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ WhatsApp Web Sender Module
3
+ ==========================
4
+ Automates WhatsApp Web using Playwright for lead outreach.
5
+ Includes rate limiting and human-like delays to avoid detection.
6
+ """
7
+
8
+ import os
9
+ import time
10
+ import random
11
+ import json
12
+ from datetime import datetime
13
+ from typing import Optional, List, Dict, Any
14
+
15
+ # Playwright is optional - will work in semi-manual mode without it
16
+ try:
17
+ from playwright.sync_api import sync_playwright, Browser, Page
18
+ PLAYWRIGHT_AVAILABLE = True
19
+ except ImportError:
20
+ PLAYWRIGHT_AVAILABLE = False
21
+ print("⚠️ Playwright no está instalado. Modo semi-automático disponible.")
22
+
23
+
24
+ # Message templates for each niche
25
+ # Default templates (will be overriden by config.json)
26
+ DEFAULT_TEMPLATES = {
27
+ "real_estate": {
28
+ "usa": "Hi {name}! I saw your work in {city}. I have a proposal for you!",
29
+ "venezuela": "¡Hola {name}! Vi tu trabajo en {city}. ¡Tengo una propuesta!"
30
+ }
31
+ }
32
+
33
+ def get_config():
34
+ config_path = os.path.join(os.path.dirname(__file__), "config.json")
35
+ if os.path.exists(config_path):
36
+ with open(config_path, "r") as f:
37
+ return json.load(f)
38
+ return {}
39
+
40
+
41
+ def get_random_delay(min_seconds: int = 5, max_seconds: int = 15) -> float:
42
+ """Get a random delay to simulate human behavior."""
43
+ return random.uniform(min_seconds, max_seconds)
44
+
45
+
46
+ def format_phone_for_whatsapp(phone: str) -> str:
47
+ """
48
+ Format phone number for WhatsApp Web URL.
49
+ Removes all non-numeric characters except the leading +.
50
+ """
51
+ # Remove all non-numeric characters
52
+ cleaned = ''.join(c for c in phone if c.isdigit() or c == '+')
53
+
54
+ # Remove leading + if present (WhatsApp API doesn't need it)
55
+ if cleaned.startswith('+'):
56
+ cleaned = cleaned[1:]
57
+
58
+ return cleaned
59
+
60
+
61
+ def get_message_for_lead(lead: Dict[str, Any]) -> str:
62
+ """
63
+ Get a personalized message for a lead based on their niche and country.
64
+ """
65
+ niche = lead.get("niche", "real_estate")
66
+ country = lead.get("country", "usa")
67
+
68
+ config = get_config()
69
+ niche_config = config.get("niches", {}).get(niche, {})
70
+ template = niche_config.get("templates", {}).get("whatsapp")
71
+
72
+ if not template:
73
+ template = DEFAULT_TEMPLATES.get(niche, {}).get(country, "Hello {name}")
74
+
75
+ if isinstance(template, list):
76
+ template = random.choice(template)
77
+
78
+ return template.format(
79
+ name=lead.get("name", "there"),
80
+ city=lead.get("city", "your area"),
81
+ )
82
+
83
+
84
+ def generate_whatsapp_link(phone: str, message: str) -> str:
85
+ """
86
+ Generate a WhatsApp Web link for a given phone and message.
87
+ This can be opened manually or via automation.
88
+ """
89
+ from urllib.parse import quote
90
+
91
+ formatted_phone = format_phone_for_whatsapp(phone)
92
+ encoded_message = quote(message)
93
+
94
+ return f"https://wa.me/{formatted_phone}?text={encoded_message}"
95
+
96
+
97
+ def send_whatsapp_message_manual(lead: Dict[str, Any]) -> Dict[str, Any]:
98
+ """
99
+ Generate a WhatsApp link for manual sending.
100
+ Returns the link and message for the user to send manually.
101
+ """
102
+ phone = lead.get("phone")
103
+ if not phone:
104
+ return {"success": False, "error": "No phone number"}
105
+
106
+ message = get_message_for_lead(lead)
107
+ link = generate_whatsapp_link(phone, message)
108
+
109
+ return {
110
+ "success": True,
111
+ "mode": "manual",
112
+ "phone": phone,
113
+ "message": message,
114
+ "link": link,
115
+ "lead_id": lead.get("id"),
116
+ }
117
+
118
+
119
+ class WhatsAppWebAutomation:
120
+ """
121
+ Automates WhatsApp Web using Playwright.
122
+ Requires a logged-in WhatsApp Web session.
123
+ """
124
+
125
+ def __init__(self, headless: bool = False):
126
+ self.headless = headless
127
+ self.browser: Optional[Browser] = None
128
+ self.page: Optional[Page] = None
129
+ self.session_dir = os.path.join(os.path.dirname(__file__), ".whatsapp_session")
130
+
131
+ def start(self) -> bool:
132
+ """Start the browser and load WhatsApp Web."""
133
+ if not PLAYWRIGHT_AVAILABLE:
134
+ print("❌ Playwright no disponible. Usa el modo manual.")
135
+ return False
136
+
137
+ try:
138
+ self.playwright = sync_playwright().start()
139
+
140
+ # Use persistent context to keep login
141
+ self.browser = self.playwright.chromium.launch_persistent_context(
142
+ self.session_dir,
143
+ headless=self.headless,
144
+ args=["--start-maximized"]
145
+ )
146
+
147
+ self.page = self.browser.pages[0] if self.browser.pages else self.browser.new_page()
148
+ self.page.goto("https://web.whatsapp.com")
149
+
150
+ print("🟢 WhatsApp Web abierto. Esperando login...")
151
+
152
+ # Wait for the main interface to load (indicates successful login)
153
+ self.page.wait_for_selector('[data-testid="chat-list"]', timeout=120000)
154
+
155
+ print("✅ WhatsApp Web listo!")
156
+ return True
157
+
158
+ except Exception as e:
159
+ print(f"❌ Error iniciando WhatsApp: {e}")
160
+ return False
161
+
162
+ def send_message(self, phone: str, message: str) -> bool:
163
+ """
164
+ Send a message to a phone number.
165
+ Returns True if successful.
166
+ """
167
+ if not self.page:
168
+ print("❌ Navegador no iniciado")
169
+ return False
170
+
171
+ try:
172
+ formatted_phone = format_phone_for_whatsapp(phone)
173
+ link = generate_whatsapp_link(formatted_phone, message)
174
+
175
+ self.page.goto(link)
176
+
177
+ # Wait for the send button
178
+ time.sleep(get_random_delay(3, 6))
179
+
180
+ # Try to find and click the send button
181
+ send_button = self.page.query_selector('[data-testid="send"]')
182
+ if send_button:
183
+ time.sleep(get_random_delay(1, 3))
184
+ send_button.click()
185
+ print(f"✅ Mensaje enviado a {phone}")
186
+ return True
187
+ else:
188
+ print(f"⚠️ No se encontró botón de enviar para {phone}")
189
+ return False
190
+
191
+ except Exception as e:
192
+ print(f"❌ Error enviando mensaje a {phone}: {e}")
193
+ return False
194
+
195
+ def stop(self):
196
+ """Close the browser."""
197
+ if self.browser:
198
+ self.browser.close()
199
+ if hasattr(self, 'playwright'):
200
+ self.playwright.stop()
201
+
202
+
203
+ def batch_generate_links(leads: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
204
+ """
205
+ Generate WhatsApp links for a batch of leads.
206
+ Returns a list of results with links for manual sending.
207
+ """
208
+ results = []
209
+
210
+ for lead in leads:
211
+ result = send_whatsapp_message_manual(lead)
212
+ results.append(result)
213
+
214
+ return results
215
+
216
+
217
+ if __name__ == "__main__":
218
+ import argparse
219
+
220
+ parser = argparse.ArgumentParser(description="WhatsApp Web Sender")
221
+ parser.add_argument("--demo", action="store_true", help="Generate demo links")
222
+ parser.add_argument("--auto", action="store_true", help="Use automated sending (requires Playwright)")
223
+
224
+ args = parser.parse_args()
225
+
226
+ # Demo leads for testing
227
+ demo_leads = [
228
+ {
229
+ "id": 1,
230
+ "name": "Miami Luxury Realty",
231
+ "phone": "+1-786-555-0101",
232
+ "niche": "real_estate",
233
+ "country": "usa",
234
+ "city": "Miami, FL",
235
+ },
236
+ {
237
+ "id": 2,
238
+ "name": "Seguros Caracas",
239
+ "phone": "+58-212-555-0202",
240
+ "niche": "insurance",
241
+ "country": "venezuela",
242
+ "city": "Caracas",
243
+ },
244
+ ]
245
+
246
+ if args.demo:
247
+ print("🔗 Generando links de WhatsApp en modo demo...\n")
248
+ results = batch_generate_links(demo_leads)
249
+
250
+ for r in results:
251
+ if r["success"]:
252
+ print(f"📱 {r['phone']}")
253
+ print(f" Mensaje: {r['message'][:60]}...")
254
+ print(f" Link: {r['link'][:80]}...\n")
255
+
256
+ elif args.auto:
257
+ print("🤖 Modo automático (requiere Playwright instalado)")
258
+ wa = WhatsAppWebAutomation(headless=False)
259
+ if wa.start():
260
+ for lead in demo_leads:
261
+ message = get_message_for_lead(lead)
262
+ wa.send_message(lead["phone"], message)
263
+ time.sleep(get_random_delay(20, 40)) # Long delay between messages
264
+ wa.stop()
265
+
266
+ else:
267
+ print("Usa --demo para generar links o --auto para envío automático")
lead_gen_pro/workflow.py ADDED
@@ -0,0 +1,230 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Lead Generation Workflow Orchestrator
3
+ ======================================
4
+ Main entry point that orchestrates the entire lead generation pipeline:
5
+ 1. Scrape leads from Google Maps
6
+ 2. Store leads in database
7
+ 3. Generate outreach messages
8
+ 4. Track outreach status
9
+ """
10
+
11
+ import os
12
+ import sys
13
+ import time
14
+ import json
15
+ from datetime import datetime
16
+ from typing import Optional, List, Dict, Any
17
+
18
+ # Import local modules
19
+ from database import init_db, add_lead, get_leads, get_pending_outreach, mark_outreach_sent, get_stats
20
+ from scraper import scrape_leads, demo_mode as scraper_demo
21
+ from whatsapp_sender import batch_generate_links, send_whatsapp_message_manual
22
+
23
+
24
+ def load_config() -> Dict[str, Any]:
25
+ """Load configuration from config.json."""
26
+ config_path = os.path.join(os.path.dirname(__file__), "config.json")
27
+ if os.path.exists(config_path):
28
+ with open(config_path, "r") as f:
29
+ return json.load(f)
30
+ return {}
31
+
32
+
33
+ def run_scraping_pipeline(
34
+ niche: str,
35
+ country: str,
36
+ limit_per_city: int = 5,
37
+ demo: bool = False
38
+ ) -> int:
39
+ """
40
+ Run the scraping pipeline for a specific niche and country.
41
+
42
+ Args:
43
+ niche: 'real_estate' or 'insurance'
44
+ country: 'usa' or 'venezuela'
45
+ limit_per_city: Max leads per city
46
+ demo: Use demo data instead of API
47
+
48
+ Returns:
49
+ Number of leads added to database.
50
+ """
51
+ print(f"\n{'='*50}")
52
+ print(f"🔍 SCRAPING: {niche.upper()} en {country.upper()}")
53
+ print(f"{'='*50}\n")
54
+
55
+ if demo:
56
+ leads = scraper_demo()
57
+ # Filter demo leads by niche/country
58
+ leads = [l for l in leads if l.get("niche") == niche and l.get("country") == country]
59
+ else:
60
+ leads = scrape_leads(
61
+ niche=niche,
62
+ country=country,
63
+ limit_per_city=limit_per_city
64
+ )
65
+
66
+ # Add leads to database
67
+ added_count = 0
68
+ for lead in leads:
69
+ try:
70
+ lead_id = add_lead(lead)
71
+ added_count += 1
72
+ print(f" ✅ Agregado: {lead['name']} (ID: {lead_id})")
73
+ except Exception as e:
74
+ print(f" ⚠️ Error agregando lead: {e}")
75
+
76
+ print(f"\n📊 Total leads agregados: {added_count}")
77
+ return added_count
78
+
79
+
80
+ def run_outreach_pipeline(
81
+ niche: Optional[str] = None,
82
+ channel: str = "whatsapp",
83
+ limit: int = 10,
84
+ auto_send: bool = False
85
+ ) -> List[Dict[str, Any]]:
86
+ """
87
+ Run the outreach pipeline for pending leads.
88
+
89
+ Args:
90
+ niche: Filter by niche (optional)
91
+ channel: 'whatsapp' or 'instagram'
92
+ limit: Max leads to process
93
+ auto_send: If True, attempt automatic sending (requires Playwright)
94
+
95
+ Returns:
96
+ List of outreach results.
97
+ """
98
+ print(f"\n{'='*50}")
99
+ print(f"📤 OUTREACH: {channel.upper()} ({limit} leads)")
100
+ print(f"{'='*50}\n")
101
+
102
+ # Get pending leads
103
+ leads = get_pending_outreach(channel=channel, niche=niche, limit=limit)
104
+
105
+ if not leads:
106
+ print("✅ No hay leads pendientes de contactar.")
107
+ return []
108
+
109
+ print(f"📋 {len(leads)} leads pendientes encontrados\n")
110
+
111
+ results = []
112
+
113
+ if channel == "whatsapp":
114
+ # Generate WhatsApp links
115
+ link_results = batch_generate_links(leads)
116
+
117
+ for i, result in enumerate(link_results):
118
+ if result["success"]:
119
+ lead = leads[i]
120
+ print(f"📱 Lead #{lead.get('id', i)}: {lead.get('name', 'Unknown')}")
121
+ print(f" Teléfono: {result['phone']}")
122
+ print(f" Mensaje: {result['message'][:50]}...")
123
+ print(f" 🔗 Link: {result['link'][:60]}...\n")
124
+
125
+ # Mark as sent if in auto mode
126
+ if auto_send:
127
+ # TODO: Implement actual sending with Playwright
128
+ pass
129
+
130
+ results.append(result)
131
+
132
+ return results
133
+
134
+
135
+ def print_dashboard():
136
+ """Print a dashboard with current stats."""
137
+ stats = get_stats()
138
+
139
+ print(f"\n{'='*50}")
140
+ print(f"📊 DASHBOARD - Lead Generation System")
141
+ print(f"{'='*50}")
142
+ print(f"\n📈 Total de Leads: {stats['total_leads']}")
143
+
144
+ if stats['by_niche']:
145
+ print("\n🏷️ Por Nicho:")
146
+ for niche, count in stats['by_niche'].items():
147
+ print(f" - {niche}: {count}")
148
+
149
+ if stats['by_country']:
150
+ print("\n🌎 Por País:")
151
+ for country, count in stats['by_country'].items():
152
+ print(f" - {country}: {count}")
153
+
154
+ if stats['by_status']:
155
+ print("\n📌 Por Estado:")
156
+ for status, count in stats['by_status'].items():
157
+ print(f" - {status}: {count}")
158
+
159
+ print(f"\n📤 Outreach Enviado:")
160
+ print(f" - WhatsApp: {stats['whatsapp_sent']}")
161
+ print(f" - Instagram: {stats['instagram_sent']}")
162
+ print(f"\n{'='*50}\n")
163
+
164
+
165
+ def full_pipeline_demo():
166
+ """Run a full demo of the entire pipeline."""
167
+ print("\n🚀 INICIANDO PIPELINE COMPLETO EN MODO DEMO\n")
168
+
169
+ # Initialize database
170
+ print("1️⃣ Inicializando base de datos...")
171
+ init_db()
172
+
173
+ # Scrape demo leads for all combinations
174
+ combinations = [
175
+ ("real_estate", "usa"),
176
+ ("real_estate", "venezuela"),
177
+ ("insurance", "usa"),
178
+ ("insurance", "venezuela"),
179
+ ]
180
+
181
+ print("\n2️⃣ Scraping de leads (modo demo)...")
182
+ for niche, country in combinations:
183
+ run_scraping_pipeline(niche=niche, country=country, demo=True)
184
+
185
+ # Show dashboard
186
+ print("\n3️⃣ Estado actual del sistema:")
187
+ print_dashboard()
188
+
189
+ # Generate outreach links
190
+ print("\n4️⃣ Generando links de WhatsApp para outreach...")
191
+ results = run_outreach_pipeline(channel="whatsapp", limit=5)
192
+
193
+ print(f"\n✅ Pipeline demo completado. {len(results)} links generados.")
194
+ return results
195
+
196
+
197
+ if __name__ == "__main__":
198
+ import argparse
199
+
200
+ parser = argparse.ArgumentParser(description="Lead Generation Workflow")
201
+ parser.add_argument("--demo", action="store_true", help="Run full demo pipeline")
202
+ parser.add_argument("--scrape", action="store_true", help="Run scraping only")
203
+ parser.add_argument("--outreach", action="store_true", help="Run outreach only")
204
+ parser.add_argument("--stats", action="store_true", help="Show dashboard only")
205
+ parser.add_argument("--niche", choices=["real_estate", "insurance"], default="real_estate")
206
+ parser.add_argument("--country", choices=["usa", "venezuela"], default="usa")
207
+ parser.add_argument("--limit", type=int, default=10)
208
+
209
+ args = parser.parse_args()
210
+
211
+ if args.demo:
212
+ full_pipeline_demo()
213
+ elif args.scrape:
214
+ init_db()
215
+ run_scraping_pipeline(
216
+ niche=args.niche,
217
+ country=args.country,
218
+ limit_per_city=args.limit,
219
+ demo=True # Change to False when SerpApi is configured
220
+ )
221
+ elif args.outreach:
222
+ run_outreach_pipeline(niche=args.niche, limit=args.limit)
223
+ elif args.stats:
224
+ init_db()
225
+ print_dashboard()
226
+ else:
227
+ print("Usa --demo para ejecutar el pipeline completo en modo demo")
228
+ print("Usa --scrape para ejecutar solo el scraping")
229
+ print("Usa --outreach para ejecutar solo el outreach")
230
+ print("Usa --stats para ver las estadísticas")
index.html → static/index.html RENAMED
@@ -1,216 +1,595 @@
1
  <!DOCTYPE html>
2
  <html lang="es">
 
3
  <head>
4
  <meta charset="UTF-8">
5
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
  <title>NexusCRM - CRM + Lead Gen</title>
7
  <style>
8
- * { margin: 0; padding: 0; box-sizing: border-box; }
9
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  body {
11
- font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
12
- background: #f5f5f5;
 
 
 
 
13
  min-height: 100vh;
 
14
  }
15
-
16
  .header {
17
- background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
 
 
18
  color: white;
19
- padding: 1rem 2rem;
20
- box-shadow: 0 2px 10px rgba(0,0,0,0.1);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  }
22
-
23
- .header h1 { font-size: 1.5rem; }
24
- .header .subtitle { font-size: 0.875rem; opacity: 0.9; }
25
-
26
  .nav {
27
- background: white;
28
- padding: 0.5rem 2rem;
29
- border-bottom: 1px solid #e0e0e0;
30
  display: flex;
31
- gap: 0.5rem;
32
  flex-wrap: wrap;
33
  }
34
-
35
  .nav button {
36
- padding: 0.5rem 1rem;
37
- border: none;
38
- background: transparent;
 
 
39
  cursor: pointer;
40
  font-size: 0.9rem;
41
- border-radius: 4px;
42
- transition: all 0.2s;
 
43
  }
44
-
45
- .nav button:hover { background: #f0f0f0; }
46
-
 
 
 
 
47
  .nav button.active {
48
- background: #667eea;
49
- color: white;
 
 
 
 
 
50
  }
51
-
52
- .main { padding: 1.5rem; }
53
-
54
  .stats {
55
  display: grid;
56
- grid-template-columns: repeat(auto-fit, minmax(180px, 1fr));
57
- gap: 1rem;
58
- margin-bottom: 1.5rem;
59
  }
60
-
61
  .stat-card {
62
- background: white;
63
- padding: 1.25rem;
64
- border-radius: 8px;
65
- box-shadow: 0 2px 4px rgba(0,0,0,0.1);
66
- }
67
-
68
- .stat-card h3 { color: #666; font-size: 0.8rem; margin-bottom: 0.5rem; text-transform: uppercase; }
69
- .stat-card .value { font-size: 1.75rem; font-weight: bold; color: #667eea; }
70
- .stat-card .value.green { color: #4caf50; }
71
- .stat-card .value.orange { color: #f57c00; }
72
- .stat-card .value.red { color: #f44336; }
73
-
74
- .card {
75
- background: white;
76
  padding: 1.5rem;
77
- border-radius: 8px;
78
- box-shadow: 0 2px 4px rgba(0,0,0,0.1);
79
- margin-bottom: 1rem;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
  }
81
-
82
- .card h2 { margin-bottom: 1rem; color: #333; font-size: 1.25rem; }
83
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
  .filters {
85
  display: flex;
86
  gap: 1rem;
87
- margin-bottom: 1rem;
88
  flex-wrap: wrap;
89
  }
90
-
91
- .filters select, .filters input {
92
- padding: 0.5rem;
93
- border: 1px solid #e0e0e0;
94
- border-radius: 4px;
 
 
 
95
  font-size: 0.9rem;
 
 
96
  }
97
-
98
- .filters input[type="text"] { flex: 1; min-width: 200px; }
99
-
 
 
 
 
 
 
 
 
 
 
 
 
100
  table {
101
  width: 100%;
102
- border-collapse: collapse;
 
103
  }
104
-
105
- th, td {
106
- padding: 0.75rem;
107
  text-align: left;
108
- border-bottom: 1px solid #e0e0e0;
109
- font-size: 0.9rem;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
  }
111
-
112
- th { background: #f9f9f9; font-weight: 600; color: #555; }
113
- tr:hover { background: #f5f5f5; }
114
-
 
 
 
 
 
 
115
  .badge {
116
- display: inline-block;
117
- padding: 0.2rem 0.5rem;
118
- border-radius: 4px;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
  font-size: 0.75rem;
120
- font-weight: 600;
 
 
 
 
 
 
 
 
 
 
 
 
121
  }
122
-
123
- .badge-lead { background: #e3f2fd; color: #1976d2; }
124
- .badge-cliente { background: #e8f5e9; color: #388e3c; }
125
- .badge-nuevo { background: #fff3e0; color: #f57c00; }
126
- .badge-contactado { background: #e1f5fe; color: #0288d1; }
127
- .badge-calificado { background: #f3e5f5; color: #7b1fa2; }
128
- .badge-ganado { background: #e8f5e9; color: #2e7d32; }
129
- .badge-perdido { background: #ffebee; color: #c62828; }
130
-
131
  .btn {
132
- padding: 0.4rem 0.75rem;
133
- border: none;
134
- border-radius: 4px;
135
  cursor: pointer;
136
- font-size: 0.8rem;
137
  transition: all 0.2s;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
  }
139
-
140
- .btn-primary { background: #667eea; color: white; }
141
- .btn-primary:hover { background: #5a6fd6; }
142
- .btn-success { background: #4caf50; color: white; }
143
- .btn-success:hover { background: #43a047; }
144
- .btn-whatsapp { background: #25d366; color: white; }
145
- .btn-whatsapp:hover { background: #20b655; }
146
-
147
  .pipeline-stages {
148
  display: flex;
149
- gap: 1rem;
150
  overflow-x: auto;
151
  padding-bottom: 1rem;
152
  }
153
-
154
  .pipeline-stage {
155
- min-width: 250px;
156
- background: #f9f9f9;
157
- border-radius: 8px;
158
- padding: 1rem;
 
159
  }
160
-
161
  .pipeline-stage h4 {
162
- color: #555;
163
- margin-bottom: 0.75rem;
164
- padding-bottom: 0.5rem;
165
- border-bottom: 2px solid #ddd;
166
  }
167
-
168
  .pipeline-item {
169
- background: white;
170
- padding: 0.75rem;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
171
  border-radius: 4px;
172
- margin-bottom: 0.5rem;
173
- box-shadow: 0 1px 2px rgba(0,0,0,0.1);
174
- }
175
-
176
- .pipeline-item .name { font-weight: 600; font-size: 0.9rem; }
177
- .pipeline-item .value { color: #4caf50; font-weight: 600; }
178
- .pipeline-item .meta { font-size: 0.75rem; color: #888; }
179
-
180
- .form-group { margin-bottom: 1rem; }
181
- .form-group label { display: block; margin-bottom: 0.5rem; font-weight: 500; font-size: 0.9rem; }
182
- .form-group input, .form-group select, .form-group textarea {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
183
  width: 100%;
184
- padding: 0.6rem;
185
- border: 1px solid #e0e0e0;
186
- border-radius: 4px;
 
 
187
  font-size: 0.9rem;
 
 
188
  }
189
-
190
- .grid-2 { display: grid; grid-template-columns: 1fr 1fr; gap: 1rem; }
191
-
192
- .whatsapp-card {
193
- background: #f0fdf4;
194
- border: 1px solid #bbf7d0;
195
- padding: 1rem;
 
 
 
 
 
 
 
 
 
 
 
 
 
196
  border-radius: 8px;
197
- margin-bottom: 1rem;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
198
  }
199
-
200
- .whatsapp-card .phone { font-size: 1.1rem; font-weight: 600; color: #166534; }
201
- .whatsapp-card .name { color: #166534; }
202
-
203
- .loading { text-align: center; padding: 2rem; color: #666; }
204
-
205
- .action-buttons { display: flex; gap: 0.5rem; }
206
  </style>
207
  </head>
 
208
  <body>
209
  <div class="header">
210
  <h1>NexusCRM</h1>
211
  <div class="subtitle">CRM + Lead Gen Pro</div>
212
  </div>
213
-
214
  <div class="nav">
215
  <button class="active" onclick="showTab('dashboard')">Dashboard</button>
216
  <button onclick="showTab('leads')">Leads</button>
@@ -218,8 +597,9 @@
218
  <button onclick="showTab('outreach')">WhatsApp</button>
219
  <button onclick="showTab('customers')">Clientes</button>
220
  <button onclick="showTab('nuevo')">+ Nuevo</button>
 
221
  </div>
222
-
223
  <div class="main">
224
  <!-- Dashboard Tab -->
225
  <div id="dashboard" class="tab-content">
@@ -249,7 +629,7 @@
249
  <div class="value" id="stat-enriquecidos">0</div>
250
  </div>
251
  </div>
252
-
253
  <div class="grid-2">
254
  <div class="card">
255
  <h2>Leads por Estado</h2>
@@ -260,13 +640,13 @@
260
  <div id="leads-by-niche"></div>
261
  </div>
262
  </div>
263
-
264
  <div class="card">
265
  <h2>Leads Recientes</h2>
266
  <div id="recent-leads"></div>
267
  </div>
268
  </div>
269
-
270
  <!-- Leads Tab -->
271
  <div id="leads" class="tab-content" style="display: none;">
272
  <div class="card">
@@ -287,7 +667,7 @@
287
  <div id="leads-list"></div>
288
  </div>
289
  </div>
290
-
291
  <!-- Pipeline Tab -->
292
  <div id="pipeline" class="tab-content" style="display: none;">
293
  <div class="card">
@@ -295,7 +675,7 @@
295
  <div class="pipeline-stages" id="pipeline-view"></div>
296
  </div>
297
  </div>
298
-
299
  <!-- Outreach Tab -->
300
  <div id="outreach" class="tab-content" style="display: none;">
301
  <div class="card">
@@ -311,16 +691,18 @@
311
  <div id="whatsapp-list"></div>
312
  </div>
313
  </div>
314
-
315
  <!-- Customers Tab -->
316
  <div id="customers" class="tab-content" style="display: none;">
317
  <div class="card">
318
  <h2>Clientes</h2>
319
- <input type="text" class="search-box" placeholder="Buscar clientes..." onkeyup="filterCustomers(this.value)" style="width: 100%; padding: 0.75rem; border: 1px solid #e0e0e0; border-radius: 4px; margin-bottom: 1rem;">
 
 
320
  <div id="customers-list"></div>
321
  </div>
322
  </div>
323
-
324
  <!-- Nuevo Tab -->
325
  <div id="nuevo" class="tab-content" style="display: none;">
326
  <div class="card">
@@ -390,18 +772,18 @@
390
  <script>
391
  const SUPABASE_URL = 'https://nvssvykqxaurtlgwxwwy.supabase.co';
392
  const SUPABASE_KEY = 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6Im52c3N2eWtxeGF1cnRsZ3d4d3d5Iiwicm9sZSI6ImFub24iLCJpYXQiOjE3NzI4OTIxNjUsImV4cCI6MjA4ODQ2ODE2NX0.onzmNQGBy6jDxWO7TcjZuyvgvId9HiGNzUNs1HmOAMk';
393
-
394
  let customers = [];
395
  let leads = [];
396
  let opportunities = [];
397
-
398
  async function apiGet(table) {
399
  const response = await fetch(`${SUPABASE_URL}/rest/v1/${table}?select=*`, {
400
  headers: { 'apikey': SUPABASE_KEY, 'Authorization': `Bearer ${SUPABASE_KEY}` }
401
  });
402
  return response.json();
403
  }
404
-
405
  async function apiPost(table, data) {
406
  const response = await fetch(`${SUPABASE_URL}/rest/v1/${table}`, {
407
  method: 'POST',
@@ -415,7 +797,7 @@
415
  });
416
  return response;
417
  }
418
-
419
  async function apiPatch(table, id, data) {
420
  const response = await fetch(`${SUPABASE_URL}/rest/v1/${table}?id=eq.${id}`, {
421
  method: 'PATCH',
@@ -429,13 +811,13 @@
429
  });
430
  return response;
431
  }
432
-
433
  async function loadData() {
434
  try {
435
  customers = await apiGet('customers');
436
  leads = await apiGet('leads');
437
  opportunities = await apiGet('opportunities');
438
-
439
  populateNichoFilter();
440
  updateStats();
441
  renderRecentLeads();
@@ -447,7 +829,7 @@
447
  console.error('Error:', error);
448
  }
449
  }
450
-
451
  function populateNichoFilter() {
452
  const niches = [...new Set(leads.map(l => l.nicho).filter(n => n))];
453
  const select = document.getElementById('lead-nicho-filter');
@@ -458,7 +840,7 @@
458
  select.appendChild(opt);
459
  });
460
  }
461
-
462
  function updateStats() {
463
  document.getElementById('stat-leads').textContent = leads.length;
464
  document.getElementById('stat-customers').textContent = customers.length;
@@ -466,15 +848,15 @@
466
  document.getElementById('stat-contactados').textContent = leads.filter(l => l.estado === 'contactado').length;
467
  document.getElementById('stat-con-telefono').textContent = leads.filter(l => l.telefono || l.telefono_formateado).length;
468
  document.getElementById('stat-enriquecidos').textContent = leads.filter(l => l.enriquecido).length;
469
-
470
  renderLeadsByStatus();
471
  renderLeadsByNiche();
472
  }
473
-
474
  function renderLeadsByStatus() {
475
  const counts = {};
476
  leads.forEach(l => { counts[l.estado || 'nuevo'] = (counts[l.estado || 'nuevo'] || 0) + 1; });
477
-
478
  let html = '<table><thead><tr><th>Estado</th><th>Cantidad</th></tr></thead><tbody>';
479
  Object.entries(counts).forEach(([estado, count]) => {
480
  const badge = getBadgeClass(estado);
@@ -483,11 +865,11 @@
483
  html += '</tbody></table>';
484
  document.getElementById('leads-by-status').innerHTML = html;
485
  }
486
-
487
  function renderLeadsByNiche() {
488
  const counts = {};
489
- leads.forEach(l => { if(l.nicho) { counts[l.nicho] = (counts[l.nicho] || 0) + 1; }});
490
-
491
  let html = '<table><thead><tr><th>Nicho</th><th>Cantidad</th></tr></thead><tbody>';
492
  Object.entries(counts).slice(0, 10).forEach(([nicho, count]) => {
493
  html += `<tr><td>${nicho}</td><td>${count}</td></tr>`;
@@ -495,32 +877,45 @@
495
  html += '</tbody></table>';
496
  document.getElementById('leads-by-niche').innerHTML = html;
497
  }
498
-
499
  function getBadgeClass(estado) {
500
  const classes = { 'nuevo': 'badge-nuevo', 'contactado': 'badge-contactado', 'calificado': 'badge-calificado', 'cliente': 'badge-cliente', 'convertido': 'badge-ganado' };
501
  return classes[estado] || 'badge-lead';
502
  }
503
-
 
 
 
 
 
 
504
  function renderRecentLeads() {
505
  const container = document.getElementById('recent-leads');
506
- const recent = leads.slice(0, 5);
507
-
 
508
  if (!recent.length) {
509
  container.innerHTML = '<p class="loading">No hay leads</p>';
510
  return;
511
  }
512
-
513
- let html = '<table><thead><tr><th>Nombre</th><th>Nicho</th><th>Ciudad</th><th>Estado</th><th>Acciones</th></tr></thead><tbody>';
514
  recent.forEach(lead => {
515
  const badge = getBadgeClass(lead.estado);
 
 
516
  html += `
517
  <tr>
518
- <td>${lead.nombre || 'Sin nombre'}</td>
 
519
  <td>${lead.nicho || '-'}</td>
520
  <td>${lead.ciudad || '-'}</td>
521
  <td><span class="badge ${badge}">${lead.estado || 'nuevo'}</span></td>
522
  <td>
523
- ${lead.telefono || lead.telefono_formateado ? `<a href="${generateWhatsAppLink(lead)}" target="_blank" class="btn btn-whatsapp">WhatsApp</a>` : ''}
 
 
 
524
  </td>
525
  </tr>
526
  `;
@@ -528,27 +923,27 @@
528
  html += '</tbody></table>';
529
  container.innerHTML = html;
530
  }
531
-
532
  function generateWhatsAppLink(lead) {
533
  const phone = lead.telefono_formateado || lead.telefono || '';
534
  const cleanPhone = phone.replace(/\D/g, '');
535
  const message = encodeURIComponent(`Hola ${lead.nombre || ''}, te contactamos de DonNadie Apps. ¿Cómo podemos ayudarte?`);
536
  return `https://wa.me/${cleanPhone}?text=${message}`;
537
  }
538
-
539
  function renderCustomers(filter = '') {
540
  const container = document.getElementById('customers-list');
541
- const filtered = customers.filter(c =>
542
  c.nombre?.toLowerCase().includes(filter.toLowerCase()) ||
543
  c.cedula?.includes(filter) ||
544
  c.correo?.toLowerCase().includes(filter.toLowerCase())
545
  );
546
-
547
  if (!filtered.length) {
548
  container.innerHTML = '<p class="loading">No hay clientes</p>';
549
  return;
550
  }
551
-
552
  let html = '<table><thead><tr><th>Nombre</th><th>Cédula</th><th>Teléfono</th><th>WhatsApp</th><th>Correo</th><th>Estado</th></tr></thead><tbody>';
553
  filtered.forEach(c => {
554
  html += `
@@ -565,18 +960,18 @@
565
  html += '</tbody></table>';
566
  container.innerHTML = html;
567
  }
568
-
569
  function filterCustomers(value) {
570
  renderCustomers(value);
571
  }
572
-
573
  function renderLeads() {
574
  const search = document.getElementById('lead-search').value.toLowerCase();
575
  const estado = document.getElementById('lead-estado-filter').value;
576
  const nicho = document.getElementById('lead-nicho-filter').value;
577
-
578
  const filtered = leads.filter(l => {
579
- const matchSearch = !search ||
580
  l.nombre?.toLowerCase().includes(search) ||
581
  l.nicho?.toLowerCase().includes(search) ||
582
  l.ciudad?.toLowerCase().includes(search);
@@ -584,28 +979,32 @@
584
  const matchNicho = !nicho || l.nicho === nicho;
585
  return matchSearch && matchEstado && matchNicho;
586
  });
587
-
588
  const container = document.getElementById('leads-list');
589
-
590
  if (!filtered.length) {
591
  container.innerHTML = '<p class="loading">No hay leads</p>';
592
  return;
593
  }
594
-
595
- let html = '<table><thead><tr><th>Nombre</th><th>Nicho</th><th>Ciudad</th><th>Teléfono</th><th>Estado</th><th>Acciones</th></tr></thead><tbody>';
596
  filtered.forEach(l => {
597
  const badge = getBadgeClass(l.estado);
 
 
598
  const hasPhone = l.telefono || l.telefono_formateado;
599
  html += `
600
  <tr>
601
- <td>${l.nombre || 'Sin nombre'}</td>
 
 
 
 
602
  <td>${l.nicho || '-'}</td>
603
  <td>${l.ciudad || '-'}</td>
604
- <td>${hasPhone ? 'Sí' : 'No'}</td>
605
- <td><span class="badge ${badge}">${l.estado || 'nuevo'}</span></td>
606
  <td class="action-buttons">
607
- ${hasPhone ? `<a href="${generateWhatsAppLink(l)}" target="_blank" class="btn btn-whatsapp">WhatsApp</a>` : ''}
608
- <button class="btn btn-primary" onclick="updateLeadStatus('${l.id}', 'contactado')">Contactar</button>
609
  </td>
610
  </tr>
611
  `;
@@ -613,11 +1012,17 @@
613
  html += '</tbody></table>';
614
  container.innerHTML = html;
615
  }
616
-
617
  function filterLeads() {
618
  renderLeads();
619
  }
620
-
 
 
 
 
 
 
621
  async function updateLeadStatus(id, estado) {
622
  try {
623
  await apiPatch('leads', id, { estado: estado, actualizado_en: new Date().toISOString() });
@@ -626,11 +1031,11 @@
626
  alert('Error actualizando: ' + error.message);
627
  }
628
  }
629
-
630
  function renderPipeline() {
631
  const stages = ['nuevo', 'contactado', 'calificado', 'convertido', 'perdido'];
632
  const stageNames = { 'nuevo': 'Nuevo', 'contactado': 'Contactado', 'calificado': 'Calificado', 'convertido': 'Ganado', 'perdido': 'Perdido' };
633
-
634
  let html = '';
635
  stages.forEach(stage => {
636
  const stageLeads = leads.filter(l => l.estado === stage);
@@ -648,60 +1053,67 @@
648
  });
649
  document.getElementById('pipeline-view').innerHTML = html;
650
  }
651
-
652
  function renderWhatsApp() {
653
  const search = document.getElementById('wa-search').value.toLowerCase();
654
  const filter = document.getElementById('wa-filter').value;
655
-
656
  const filtered = leads.filter(l => {
657
  const phone = l.telefono || l.telefono_formateado;
658
  if (!phone) return false;
659
-
660
- const matchSearch = !search ||
661
  l.nombre?.toLowerCase().includes(search) ||
662
  l.nicho?.toLowerCase().includes(search);
663
- const matchFilter = filter === 'todos' ||
664
  (filter === 'sin-contactar' && l.estado === 'nuevo') ||
665
  (filter === 'contactados' && l.whatsapp_enviado);
666
  return matchSearch && matchFilter;
667
  });
668
-
669
  const container = document.getElementById('whatsapp-list');
670
-
671
  if (!filtered.length) {
672
  container.innerHTML = '<p class="loading">No hay leads con teléfono</p>';
673
  return;
674
  }
675
-
676
  let html = '';
677
  filtered.forEach(l => {
678
  const phone = l.telefono_formateado || l.telefono;
 
 
679
  html += `
680
- <div class="whatsapp-card">
681
- <div class="name">${l.nombre || 'Sin nombre'}</div>
682
- <div class="phone">${phone}</div>
683
- <div style="margin-top: 0.5rem;">
684
- <span class="badge ${getBadgeClass(l.estado)}">${l.estado || 'nuevo'}</span>
685
- <span style="color: #666; margin-left: 1rem;">${l.nicho || ''} ${l.ciudad ? '- ' + l.ciudad : ''}</span>
 
 
 
 
 
686
  </div>
687
- <div style="margin-top: 1rem;">
688
- <a href="${generateWhatsAppLink(l)}" target="_blank" class="btn btn-whatsapp">Abrir WhatsApp</a>
689
- <button class="btn btn-primary" onclick="markWhatsAppSent('${l.id}')" style="margin-left: 0.5rem;">Marcar como enviado</button>
690
  </div>
691
  </div>
692
  `;
693
  });
694
  container.innerHTML = html;
695
  }
696
-
697
  function filterWhatsApp() {
698
  renderWhatsApp();
699
  }
700
-
701
  async function markWhatsAppSent(id) {
702
  try {
703
- await apiPatch('leads', id, {
704
- whatsapp_enviado: true,
705
  whatsapp_enviado_en: new Date().toISOString(),
706
  estado: 'contactado'
707
  });
@@ -711,20 +1123,25 @@
711
  alert('Error: ' + error.message);
712
  }
713
  }
714
-
715
  function showTab(tabId) {
716
  document.querySelectorAll('.tab-content').forEach(t => t.style.display = 'none');
717
  document.querySelectorAll('.nav button').forEach(b => b.classList.remove('active'));
718
  document.getElementById(tabId).style.display = 'block';
719
  event.target.classList.add('active');
 
 
 
 
 
720
  }
721
-
722
  async function saveNew(e) {
723
  e.preventDefault();
724
-
725
  const tipo = document.getElementById('nuevo-tipo').value;
726
  const isLead = tipo === 'lead';
727
-
728
  const data = {
729
  nombre: document.getElementById('nuevo-nombre').value,
730
  correo: document.getElementById('nuevo-correo').value || null,
@@ -739,9 +1156,9 @@
739
  created_at: new Date().toISOString(),
740
  updated_at: new Date().toISOString()
741
  };
742
-
743
  const table = isLead ? 'leads' : 'customers';
744
-
745
  try {
746
  await apiPost(table, data);
747
  alert('Guardado correctamente');
@@ -752,8 +1169,320 @@
752
  alert('Error: ' + error.message);
753
  }
754
  }
755
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
756
  loadData();
757
  </script>
758
  </body>
759
- </html>
 
 
1
  <!DOCTYPE html>
2
  <html lang="es">
3
+
4
  <head>
5
  <meta charset="UTF-8">
6
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
7
  <title>NexusCRM - CRM + Lead Gen</title>
8
  <style>
9
+ :root {
10
+ --primary: #6366f1;
11
+ --primary-glow: rgba(99, 102, 241, 0.5);
12
+ --secondary: #a855f7;
13
+ --bg-dark: #0f172a;
14
+ --glass-bg: rgba(255, 255, 255, 0.05);
15
+ --glass-border: rgba(255, 255, 255, 0.1);
16
+ --text-main: #f8fafc;
17
+ --text-dim: #94a3b8;
18
+ --success: #22c55e;
19
+ --warning: #f59e0b;
20
+ --danger: #ef4444;
21
+ }
22
+
23
+ * {
24
+ margin: 0;
25
+ padding: 0;
26
+ box-sizing: border-box;
27
+ }
28
+
29
  body {
30
+ font-family: 'Inter', -apple-system, system-ui, sans-serif;
31
+ background: var(--bg-dark);
32
+ background-image:
33
+ radial-gradient(at 0% 0%, rgba(99, 102, 241, 0.15) 0, transparent 50%),
34
+ radial-gradient(at 100% 100%, rgba(168, 85, 247, 0.15) 0, transparent 50%);
35
+ color: var(--text-main);
36
  min-height: 100vh;
37
+ overflow-x: hidden;
38
  }
39
+
40
  .header {
41
+ background: rgba(15, 23, 42, 0.8);
42
+ backdrop-filter: blur(12px);
43
+ border-bottom: 1px solid var(--glass-border);
44
  color: white;
45
+ padding: 1.5rem 2rem;
46
+ position: sticky;
47
+ top: 0;
48
+ z-index: 100;
49
+ display: flex;
50
+ justify-content: space-between;
51
+ align-items: center;
52
+ }
53
+
54
+ .header h1 {
55
+ font-size: 1.75rem;
56
+ font-weight: 800;
57
+ background: linear-gradient(to right, #818cf8, #c084fc);
58
+ -webkit-background-clip: text;
59
+ background-clip: text;
60
+ -webkit-text-fill-color: transparent;
61
+ letter-spacing: -0.025em;
62
+ }
63
+
64
+ .header .subtitle {
65
+ font-size: 0.875rem;
66
+ color: var(--text-dim);
67
+ font-weight: 500;
68
  }
69
+
 
 
 
70
  .nav {
71
+ padding: 1rem 2rem;
 
 
72
  display: flex;
73
+ gap: 0.75rem;
74
  flex-wrap: wrap;
75
  }
76
+
77
  .nav button {
78
+ padding: 0.6rem 1.25rem;
79
+ border: 1px solid var(--glass-border);
80
+ background: var(--glass-bg);
81
+ backdrop-filter: blur(4px);
82
+ color: var(--text-main);
83
  cursor: pointer;
84
  font-size: 0.9rem;
85
+ font-weight: 600;
86
+ border-radius: 9999px;
87
+ transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1);
88
  }
89
+
90
+ .nav button:hover {
91
+ background: rgba(255, 255, 255, 0.1);
92
+ border-color: rgba(255, 255, 255, 0.2);
93
+ transform: translateY(-1px);
94
+ }
95
+
96
  .nav button.active {
97
+ background: var(--primary);
98
+ border-color: var(--primary);
99
+ box-shadow: 0 0 20px var(--primary-glow);
100
+ }
101
+
102
+ .main {
103
+ padding: 0 2rem 2rem 2rem;
104
  }
105
+
 
 
106
  .stats {
107
  display: grid;
108
+ grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
109
+ gap: 1.5rem;
110
+ margin-bottom: 2rem;
111
  }
112
+
113
  .stat-card {
114
+ background: var(--glass-bg);
115
+ backdrop-filter: blur(20px);
 
 
 
 
 
 
 
 
 
 
 
 
116
  padding: 1.5rem;
117
+ border-radius: 16px;
118
+ border: 1px solid var(--glass-border);
119
+ transition: transform 0.3s ease;
120
+ }
121
+
122
+ .stat-card:hover {
123
+ transform: translateY(-4px);
124
+ }
125
+
126
+ .stat-card h3 {
127
+ color: var(--text-dim);
128
+ font-size: 0.75rem;
129
+ margin-bottom: 0.75rem;
130
+ text-transform: uppercase;
131
+ letter-spacing: 0.05em;
132
+ }
133
+
134
+ .stat-card .value {
135
+ font-size: 2.25rem;
136
+ font-weight: 800;
137
+ color: white;
138
+ }
139
+
140
+ .stat-card .value.green {
141
+ color: var(--success);
142
  }
143
+
144
+ .stat-card .value.orange {
145
+ color: var(--warning);
146
+ }
147
+
148
+ .card {
149
+ background: var(--glass-bg);
150
+ backdrop-filter: blur(20px);
151
+ padding: 2rem;
152
+ border-radius: 20px;
153
+ border: 1px solid var(--glass-border);
154
+ margin-bottom: 2rem;
155
+ }
156
+
157
+ .card h2 {
158
+ margin-bottom: 1.5rem;
159
+ color: white;
160
+ font-size: 1.5rem;
161
+ font-weight: 700;
162
+ }
163
+
164
  .filters {
165
  display: flex;
166
  gap: 1rem;
167
+ margin-bottom: 1.5rem;
168
  flex-wrap: wrap;
169
  }
170
+
171
+ .filters select,
172
+ .filters input {
173
+ background: rgba(255, 255, 255, 0.05);
174
+ border: 1px solid var(--glass-border);
175
+ padding: 0.75rem 1rem;
176
+ border-radius: 12px;
177
+ color: white;
178
  font-size: 0.9rem;
179
+ outline: none;
180
+ transition: border-color 0.2s;
181
  }
182
+
183
+ .filters select:focus,
184
+ .filters input:focus {
185
+ border-color: var(--primary);
186
+ }
187
+
188
+ .filters input[type="text"] {
189
+ flex: 1;
190
+ min-width: 280px;
191
+ }
192
+
193
+ .filters select option {
194
+ background: var(--bg-dark);
195
+ }
196
+
197
  table {
198
  width: 100%;
199
+ border-collapse: separate;
200
+ border-spacing: 0 0.5rem;
201
  }
202
+
203
+ th {
204
+ padding: 1rem;
205
  text-align: left;
206
+ font-size: 0.75rem;
207
+ text-transform: uppercase;
208
+ color: var(--text-dim);
209
+ letter-spacing: 0.05em;
210
+ font-weight: 600;
211
+ }
212
+
213
+ td {
214
+ padding: 1rem;
215
+ background: rgba(255, 255, 255, 0.02);
216
+ border-top: 1px solid var(--glass-border);
217
+ border-bottom: 1px solid var(--glass-border);
218
+ font-size: 0.95rem;
219
+ }
220
+
221
+ td:first-child {
222
+ border-left: 1px solid var(--glass-border);
223
+ border-radius: 12px 0 0 12px;
224
  }
225
+
226
+ td:last-child {
227
+ border-right: 1px solid var(--glass-border);
228
+ border-radius: 0 12px 12px 0;
229
+ }
230
+
231
+ tr:hover td {
232
+ background: rgba(255, 255, 255, 0.05);
233
+ }
234
+
235
  .badge {
236
+ padding: 0.35rem 0.75rem;
237
+ border-radius: 9999px;
238
+ font-size: 0.7rem;
239
+ font-weight: 700;
240
+ text-transform: uppercase;
241
+ }
242
+
243
+ .badge-lead {
244
+ background: rgba(99, 102, 241, 0.1);
245
+ color: #818cf8;
246
+ border: 1px solid rgba(99, 102, 241, 0.2);
247
+ }
248
+
249
+ .badge-cliente {
250
+ background: rgba(34, 197, 94, 0.1);
251
+ color: #4ade80;
252
+ border: 1px solid rgba(34, 197, 94, 0.2);
253
+ }
254
+
255
+ .badge-nuevo {
256
+ background: rgba(245, 158, 11, 0.1);
257
+ color: #fbbf24;
258
+ border: 1px solid rgba(245, 158, 11, 0.2);
259
+ }
260
+
261
+ .badge-contactado {
262
+ background: rgba(14, 165, 233, 0.1);
263
+ color: #38bdf8;
264
+ border: 1px solid rgba(14, 165, 233, 0.2);
265
+ }
266
+
267
+ .badge-ganado {
268
+ background: rgba(34, 197, 94, 0.1);
269
+ color: #4ade80;
270
+ }
271
+
272
+ .quality-ring {
273
+ display: inline-flex;
274
+ width: 32px;
275
+ height: 32px;
276
+ border-radius: 50%;
277
+ align-items: center;
278
+ justify-content: center;
279
+ font-weight: 800;
280
  font-size: 0.75rem;
281
+ border: 2px solid;
282
+ }
283
+
284
+ .q-high {
285
+ color: var(--success);
286
+ border-color: var(--success);
287
+ background: rgba(34, 197, 94, 0.1);
288
+ }
289
+
290
+ .q-med {
291
+ color: var(--warning);
292
+ border-color: var(--warning);
293
+ background: rgba(245, 158, 11, 0.1);
294
  }
295
+
296
+ .q-low {
297
+ color: var(--danger);
298
+ border-color: var(--danger);
299
+ background: rgba(239, 68, 68, 0.1);
300
+ }
301
+
 
 
302
  .btn {
303
+ padding: 0.6rem 1rem;
304
+ border-radius: 10px;
305
+ font-weight: 600;
306
  cursor: pointer;
 
307
  transition: all 0.2s;
308
+ border: none;
309
+ display: inline-flex;
310
+ align-items: center;
311
+ gap: 0.5rem;
312
+ }
313
+
314
+ .btn-primary {
315
+ background: var(--primary);
316
+ color: white;
317
+ }
318
+
319
+ .btn-whatsapp {
320
+ background: #25d366;
321
+ color: white;
322
+ text-decoration: none;
323
  }
324
+
325
+ .btn:active {
326
+ transform: scale(0.95);
327
+ }
328
+
 
 
 
329
  .pipeline-stages {
330
  display: flex;
331
+ gap: 1.5rem;
332
  overflow-x: auto;
333
  padding-bottom: 1rem;
334
  }
335
+
336
  .pipeline-stage {
337
+ min-width: 280px;
338
+ background: rgba(255, 255, 255, 0.02);
339
+ border-radius: 20px;
340
+ padding: 1.25rem;
341
+ border: 1px solid var(--glass-border);
342
  }
343
+
344
  .pipeline-stage h4 {
345
+ color: white;
346
+ margin-bottom: 1rem;
347
+ display: flex;
348
+ justify-content: space-between;
349
  }
350
+
351
  .pipeline-item {
352
+ background: rgba(255, 255, 255, 0.05);
353
+ padding: 1rem;
354
+ border-radius: 12px;
355
+ margin-bottom: 0.75rem;
356
+ border: 1px solid var(--glass-border);
357
+ cursor: pointer;
358
+ }
359
+
360
+ .pipeline-item:hover {
361
+ border-color: var(--primary);
362
+ }
363
+
364
+ #recent-leads table tr td:last-child {
365
+ text-align: right;
366
+ }
367
+
368
+ ::-webkit-scrollbar {
369
+ width: 8px;
370
+ }
371
+
372
+ ::-webkit-scrollbar-track {
373
+ background: var(--bg-dark);
374
+ }
375
+
376
+ ::-webkit-scrollbar-thumb {
377
+ background: var(--glass-border);
378
  border-radius: 4px;
379
+ }
380
+
381
+ ::-webkit-scrollbar-thumb:hover {
382
+ background: var(--text-dim);
383
+ }
384
+
385
+ /* ─── Lead Gen Tab ─────────────────────────── */
386
+ .leadgen-grid {
387
+ display: grid;
388
+ grid-template-columns: 380px 1fr;
389
+ gap: 1.5rem;
390
+ align-items: start;
391
+ }
392
+
393
+ .leadgen-form-card {
394
+ background: var(--glass-bg);
395
+ border: 1px solid var(--glass-border);
396
+ border-radius: 16px;
397
+ padding: 1.75rem;
398
+ backdrop-filter: blur(8px);
399
+ }
400
+
401
+ .leadgen-form-card h3 {
402
+ font-size: 1rem;
403
+ font-weight: 700;
404
+ margin-bottom: 1.25rem;
405
+ color: #818cf8;
406
+ text-transform: uppercase;
407
+ letter-spacing: 0.08em;
408
+ }
409
+
410
+ .form-row {
411
+ margin-bottom: 0.9rem;
412
+ }
413
+
414
+ .form-row label {
415
+ display: block;
416
+ font-size: 0.75rem;
417
+ font-weight: 600;
418
+ color: var(--text-dim);
419
+ text-transform: uppercase;
420
+ letter-spacing: 0.05em;
421
+ margin-bottom: 0.4rem;
422
+ }
423
+
424
+ .form-row select,
425
+ .form-row input[type=number],
426
+ .form-row input[type=text] {
427
  width: 100%;
428
+ background: rgba(255, 255, 255, 0.05);
429
+ border: 1px solid var(--glass-border);
430
+ border-radius: 10px;
431
+ color: var(--text-main);
432
+ padding: 0.6rem 0.9rem;
433
  font-size: 0.9rem;
434
+ outline: none;
435
+ transition: border-color 0.2s;
436
  }
437
+
438
+ .form-row select:focus,
439
+ .form-row input:focus {
440
+ border-color: var(--primary);
441
+ }
442
+
443
+ .platform-grid {
444
+ display: grid;
445
+ grid-template-columns: 1fr 1fr;
446
+ gap: 0.4rem;
447
+ margin-top: 0.4rem;
448
+ }
449
+
450
+ .platform-check {
451
+ display: flex;
452
+ align-items: center;
453
+ gap: 0.5rem;
454
+ font-size: 0.85rem;
455
+ cursor: pointer;
456
+ padding: 0.4rem 0.5rem;
457
  border-radius: 8px;
458
+ transition: background 0.15s;
459
+ }
460
+
461
+ .platform-check:hover {
462
+ background: rgba(255, 255, 255, 0.06);
463
+ }
464
+
465
+ .platform-check input {
466
+ accent-color: var(--primary);
467
+ }
468
+
469
+ .btn-run {
470
+ width: 100%;
471
+ padding: 0.9rem;
472
+ background: linear-gradient(135deg, var(--primary), var(--secondary));
473
+ border: none;
474
+ border-radius: 12px;
475
+ color: white;
476
+ font-weight: 700;
477
+ font-size: 1rem;
478
+ cursor: pointer;
479
+ margin-top: 1rem;
480
+ transition: opacity 0.2s, transform 0.15s;
481
+ letter-spacing: 0.03em;
482
+ }
483
+
484
+ .btn-run:hover {
485
+ opacity: 0.9;
486
+ transform: translateY(-1px);
487
+ }
488
+
489
+ .btn-run:disabled {
490
+ opacity: 0.5;
491
+ cursor: not-allowed;
492
+ transform: none;
493
+ }
494
+
495
+ .log-terminal {
496
+ background: #0a0f1e;
497
+ border: 1px solid rgba(99, 102, 241, 0.3);
498
+ border-radius: 12px;
499
+ padding: 1rem;
500
+ height: 280px;
501
+ overflow-y: auto;
502
+ font-family: 'Courier New', monospace;
503
+ font-size: 0.78rem;
504
+ color: #4ade80;
505
+ line-height: 1.5;
506
+ white-space: pre-wrap;
507
+ word-break: break-all;
508
+ }
509
+
510
+ .log-terminal .log-err {
511
+ color: #f87171;
512
+ }
513
+
514
+ .log-terminal .log-done {
515
+ color: #818cf8;
516
+ font-weight: 700;
517
+ }
518
+
519
+ .server-banner {
520
+ background: rgba(245, 158, 11, 0.1);
521
+ border: 1px solid rgba(245, 158, 11, 0.3);
522
+ border-radius: 12px;
523
+ padding: 1rem 1.25rem;
524
+ margin-bottom: 1.5rem;
525
+ display: flex;
526
+ align-items: center;
527
+ gap: 0.75rem;
528
+ font-size: 0.88rem;
529
+ }
530
+
531
+ .server-banner.online {
532
+ background: rgba(34, 197, 94, 0.1);
533
+ border-color: rgba(34, 197, 94, 0.3);
534
+ color: #4ade80;
535
+ }
536
+
537
+ .server-banner.offline {
538
+ color: #fbbf24;
539
+ }
540
+
541
+ .lg-stat-row {
542
+ display: grid;
543
+ grid-template-columns: repeat(3, 1fr);
544
+ gap: 1rem;
545
+ margin-bottom: 1.5rem;
546
+ }
547
+
548
+ .lg-stat {
549
+ background: var(--glass-bg);
550
+ border: 1px solid var(--glass-border);
551
+ border-radius: 12px;
552
+ padding: 1rem;
553
+ text-align: center;
554
+ }
555
+
556
+ .lg-stat .val {
557
+ font-size: 1.75rem;
558
+ font-weight: 800;
559
+ color: #818cf8;
560
+ }
561
+
562
+ .lg-stat .lbl {
563
+ font-size: 0.7rem;
564
+ color: var(--text-dim);
565
+ text-transform: uppercase;
566
+ margin-top: 0.2rem;
567
+ }
568
+
569
+ .btn-sync {
570
+ padding: 0.6rem 1.2rem;
571
+ background: rgba(99, 102, 241, 0.15);
572
+ border: 1px solid rgba(99, 102, 241, 0.4);
573
+ border-radius: 10px;
574
+ color: #818cf8;
575
+ font-weight: 600;
576
+ font-size: 0.85rem;
577
+ cursor: pointer;
578
+ transition: background 0.2s;
579
+ }
580
+
581
+ .btn-sync:hover {
582
+ background: rgba(99, 102, 241, 0.3);
583
  }
 
 
 
 
 
 
 
584
  </style>
585
  </head>
586
+
587
  <body>
588
  <div class="header">
589
  <h1>NexusCRM</h1>
590
  <div class="subtitle">CRM + Lead Gen Pro</div>
591
  </div>
592
+
593
  <div class="nav">
594
  <button class="active" onclick="showTab('dashboard')">Dashboard</button>
595
  <button onclick="showTab('leads')">Leads</button>
 
597
  <button onclick="showTab('outreach')">WhatsApp</button>
598
  <button onclick="showTab('customers')">Clientes</button>
599
  <button onclick="showTab('nuevo')">+ Nuevo</button>
600
+ <button onclick="showTab('leadgen')" id="nav-leadgen">🔍 Lead Gen</button>
601
  </div>
602
+
603
  <div class="main">
604
  <!-- Dashboard Tab -->
605
  <div id="dashboard" class="tab-content">
 
629
  <div class="value" id="stat-enriquecidos">0</div>
630
  </div>
631
  </div>
632
+
633
  <div class="grid-2">
634
  <div class="card">
635
  <h2>Leads por Estado</h2>
 
640
  <div id="leads-by-niche"></div>
641
  </div>
642
  </div>
643
+
644
  <div class="card">
645
  <h2>Leads Recientes</h2>
646
  <div id="recent-leads"></div>
647
  </div>
648
  </div>
649
+
650
  <!-- Leads Tab -->
651
  <div id="leads" class="tab-content" style="display: none;">
652
  <div class="card">
 
667
  <div id="leads-list"></div>
668
  </div>
669
  </div>
670
+
671
  <!-- Pipeline Tab -->
672
  <div id="pipeline" class="tab-content" style="display: none;">
673
  <div class="card">
 
675
  <div class="pipeline-stages" id="pipeline-view"></div>
676
  </div>
677
  </div>
678
+
679
  <!-- Outreach Tab -->
680
  <div id="outreach" class="tab-content" style="display: none;">
681
  <div class="card">
 
691
  <div id="whatsapp-list"></div>
692
  </div>
693
  </div>
694
+
695
  <!-- Customers Tab -->
696
  <div id="customers" class="tab-content" style="display: none;">
697
  <div class="card">
698
  <h2>Clientes</h2>
699
+ <input type="text" class="search-box" placeholder="Buscar clientes..."
700
+ onkeyup="filterCustomers(this.value)"
701
+ style="width: 100%; padding: 0.75rem; border: 1px solid #e0e0e0; border-radius: 4px; margin-bottom: 1rem;">
702
  <div id="customers-list"></div>
703
  </div>
704
  </div>
705
+
706
  <!-- Nuevo Tab -->
707
  <div id="nuevo" class="tab-content" style="display: none;">
708
  <div class="card">
 
772
  <script>
773
  const SUPABASE_URL = 'https://nvssvykqxaurtlgwxwwy.supabase.co';
774
  const SUPABASE_KEY = 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6Im52c3N2eWtxeGF1cnRsZ3d4d3d5Iiwicm9sZSI6ImFub24iLCJpYXQiOjE3NzI4OTIxNjUsImV4cCI6MjA4ODQ2ODE2NX0.onzmNQGBy6jDxWO7TcjZuyvgvId9HiGNzUNs1HmOAMk';
775
+
776
  let customers = [];
777
  let leads = [];
778
  let opportunities = [];
779
+
780
  async function apiGet(table) {
781
  const response = await fetch(`${SUPABASE_URL}/rest/v1/${table}?select=*`, {
782
  headers: { 'apikey': SUPABASE_KEY, 'Authorization': `Bearer ${SUPABASE_KEY}` }
783
  });
784
  return response.json();
785
  }
786
+
787
  async function apiPost(table, data) {
788
  const response = await fetch(`${SUPABASE_URL}/rest/v1/${table}`, {
789
  method: 'POST',
 
797
  });
798
  return response;
799
  }
800
+
801
  async function apiPatch(table, id, data) {
802
  const response = await fetch(`${SUPABASE_URL}/rest/v1/${table}?id=eq.${id}`, {
803
  method: 'PATCH',
 
811
  });
812
  return response;
813
  }
814
+
815
  async function loadData() {
816
  try {
817
  customers = await apiGet('customers');
818
  leads = await apiGet('leads');
819
  opportunities = await apiGet('opportunities');
820
+
821
  populateNichoFilter();
822
  updateStats();
823
  renderRecentLeads();
 
829
  console.error('Error:', error);
830
  }
831
  }
832
+
833
  function populateNichoFilter() {
834
  const niches = [...new Set(leads.map(l => l.nicho).filter(n => n))];
835
  const select = document.getElementById('lead-nicho-filter');
 
840
  select.appendChild(opt);
841
  });
842
  }
843
+
844
  function updateStats() {
845
  document.getElementById('stat-leads').textContent = leads.length;
846
  document.getElementById('stat-customers').textContent = customers.length;
 
848
  document.getElementById('stat-contactados').textContent = leads.filter(l => l.estado === 'contactado').length;
849
  document.getElementById('stat-con-telefono').textContent = leads.filter(l => l.telefono || l.telefono_formateado).length;
850
  document.getElementById('stat-enriquecidos').textContent = leads.filter(l => l.enriquecido).length;
851
+
852
  renderLeadsByStatus();
853
  renderLeadsByNiche();
854
  }
855
+
856
  function renderLeadsByStatus() {
857
  const counts = {};
858
  leads.forEach(l => { counts[l.estado || 'nuevo'] = (counts[l.estado || 'nuevo'] || 0) + 1; });
859
+
860
  let html = '<table><thead><tr><th>Estado</th><th>Cantidad</th></tr></thead><tbody>';
861
  Object.entries(counts).forEach(([estado, count]) => {
862
  const badge = getBadgeClass(estado);
 
865
  html += '</tbody></table>';
866
  document.getElementById('leads-by-status').innerHTML = html;
867
  }
868
+
869
  function renderLeadsByNiche() {
870
  const counts = {};
871
+ leads.forEach(l => { if (l.nicho) { counts[l.nicho] = (counts[l.nicho] || 0) + 1; } });
872
+
873
  let html = '<table><thead><tr><th>Nicho</th><th>Cantidad</th></tr></thead><tbody>';
874
  Object.entries(counts).slice(0, 10).forEach(([nicho, count]) => {
875
  html += `<tr><td>${nicho}</td><td>${count}</td></tr>`;
 
877
  html += '</tbody></table>';
878
  document.getElementById('leads-by-niche').innerHTML = html;
879
  }
880
+
881
  function getBadgeClass(estado) {
882
  const classes = { 'nuevo': 'badge-nuevo', 'contactado': 'badge-contactado', 'calificado': 'badge-calificado', 'cliente': 'badge-cliente', 'convertido': 'badge-ganado' };
883
  return classes[estado] || 'badge-lead';
884
  }
885
+
886
+ function getQualityClass(score) {
887
+ if (score >= 8) return 'q-high';
888
+ if (score >= 5) return 'q-med';
889
+ return 'q-low';
890
+ }
891
+
892
  function renderRecentLeads() {
893
  const container = document.getElementById('recent-leads');
894
+ // Sort by creation date descending
895
+ const recent = [...leads].sort((a, b) => new Date(b.created_at) - new Date(a.created_at)).slice(0, 8);
896
+
897
  if (!recent.length) {
898
  container.innerHTML = '<p class="loading">No hay leads</p>';
899
  return;
900
  }
901
+
902
+ let html = '<table><thead><tr><th>Score</th><th>Nombre</th><th>Nicho</th><th>Ciudad</th><th>Estado</th><th>Acciones</th></tr></thead><tbody>';
903
  recent.forEach(lead => {
904
  const badge = getBadgeClass(lead.estado);
905
+ const qScore = lead.quality_score || 0;
906
+ const qClass = getQualityClass(qScore);
907
  html += `
908
  <tr>
909
+ <td width="50"><div class="quality-ring ${qClass}">${qScore}</div></td>
910
+ <td><div style="font-weight:700">${lead.nombre || 'Sin nombre'}</div><div style="font-size: 0.75rem; color: var(--text-dim)">${lead.origen || 'lead_gen'}</div></td>
911
  <td>${lead.nicho || '-'}</td>
912
  <td>${lead.ciudad || '-'}</td>
913
  <td><span class="badge ${badge}">${lead.estado || 'nuevo'}</span></td>
914
  <td>
915
+ <div class="action-buttons">
916
+ ${lead.telefono || lead.telefono_formateado ? `<a href="${generateWhatsAppLink(lead)}" target="_blank" class="btn btn-whatsapp">WA</a>` : ''}
917
+ <button class="btn btn-primary" onclick="showLeadDetails('${lead.id}')">Ver</button>
918
+ </div>
919
  </td>
920
  </tr>
921
  `;
 
923
  html += '</tbody></table>';
924
  container.innerHTML = html;
925
  }
926
+
927
  function generateWhatsAppLink(lead) {
928
  const phone = lead.telefono_formateado || lead.telefono || '';
929
  const cleanPhone = phone.replace(/\D/g, '');
930
  const message = encodeURIComponent(`Hola ${lead.nombre || ''}, te contactamos de DonNadie Apps. ¿Cómo podemos ayudarte?`);
931
  return `https://wa.me/${cleanPhone}?text=${message}`;
932
  }
933
+
934
  function renderCustomers(filter = '') {
935
  const container = document.getElementById('customers-list');
936
+ const filtered = customers.filter(c =>
937
  c.nombre?.toLowerCase().includes(filter.toLowerCase()) ||
938
  c.cedula?.includes(filter) ||
939
  c.correo?.toLowerCase().includes(filter.toLowerCase())
940
  );
941
+
942
  if (!filtered.length) {
943
  container.innerHTML = '<p class="loading">No hay clientes</p>';
944
  return;
945
  }
946
+
947
  let html = '<table><thead><tr><th>Nombre</th><th>Cédula</th><th>Teléfono</th><th>WhatsApp</th><th>Correo</th><th>Estado</th></tr></thead><tbody>';
948
  filtered.forEach(c => {
949
  html += `
 
960
  html += '</tbody></table>';
961
  container.innerHTML = html;
962
  }
963
+
964
  function filterCustomers(value) {
965
  renderCustomers(value);
966
  }
967
+
968
  function renderLeads() {
969
  const search = document.getElementById('lead-search').value.toLowerCase();
970
  const estado = document.getElementById('lead-estado-filter').value;
971
  const nicho = document.getElementById('lead-nicho-filter').value;
972
+
973
  const filtered = leads.filter(l => {
974
+ const matchSearch = !search ||
975
  l.nombre?.toLowerCase().includes(search) ||
976
  l.nicho?.toLowerCase().includes(search) ||
977
  l.ciudad?.toLowerCase().includes(search);
 
979
  const matchNicho = !nicho || l.nicho === nicho;
980
  return matchSearch && matchEstado && matchNicho;
981
  });
982
+
983
  const container = document.getElementById('leads-list');
984
+
985
  if (!filtered.length) {
986
  container.innerHTML = '<p class="loading">No hay leads</p>';
987
  return;
988
  }
989
+
990
+ let html = '<table><thead><tr><th>Score</th><th>Nombre</th><th>Nicho</th><th>Ciudad</th><th>Acciones</th></tr></thead><tbody>';
991
  filtered.forEach(l => {
992
  const badge = getBadgeClass(l.estado);
993
+ const qScore = l.quality_score || 0;
994
+ const qClass = getQualityClass(qScore);
995
  const hasPhone = l.telefono || l.telefono_formateado;
996
  html += `
997
  <tr>
998
+ <td><div class="quality-ring ${qClass}">${qScore}</div></td>
999
+ <td>
1000
+ <div style="font-weight:700">${l.nombre || 'Sin nombre'}</div>
1001
+ <span class="badge ${badge}">${l.estado || 'nuevo'}</span>
1002
+ </td>
1003
  <td>${l.nicho || '-'}</td>
1004
  <td>${l.ciudad || '-'}</td>
 
 
1005
  <td class="action-buttons">
1006
+ ${hasPhone ? `<a href="${generateWhatsAppLink(l)}" target="_blank" class="btn btn-whatsapp">WA</a>` : ''}
1007
+ <button class="btn btn-primary" onclick="showLeadDetails('${l.id}')">Ver</button>
1008
  </td>
1009
  </tr>
1010
  `;
 
1012
  html += '</tbody></table>';
1013
  container.innerHTML = html;
1014
  }
1015
+
1016
  function filterLeads() {
1017
  renderLeads();
1018
  }
1019
+
1020
+ function showLeadDetails(id) {
1021
+ const lead = leads.find(l => l.id === id);
1022
+ if (!lead) return;
1023
+ alert(`Detalles de ${lead.nombre}:\n\nNicho: ${lead.nicho}\nCiudad: ${lead.ciudad}\nEstado: ${lead.estado}\nScore: ${lead.quality_score || 0}\n\nNotas: ${lead.observaciones || 'Sin notas'}`);
1024
+ }
1025
+
1026
  async function updateLeadStatus(id, estado) {
1027
  try {
1028
  await apiPatch('leads', id, { estado: estado, actualizado_en: new Date().toISOString() });
 
1031
  alert('Error actualizando: ' + error.message);
1032
  }
1033
  }
1034
+
1035
  function renderPipeline() {
1036
  const stages = ['nuevo', 'contactado', 'calificado', 'convertido', 'perdido'];
1037
  const stageNames = { 'nuevo': 'Nuevo', 'contactado': 'Contactado', 'calificado': 'Calificado', 'convertido': 'Ganado', 'perdido': 'Perdido' };
1038
+
1039
  let html = '';
1040
  stages.forEach(stage => {
1041
  const stageLeads = leads.filter(l => l.estado === stage);
 
1053
  });
1054
  document.getElementById('pipeline-view').innerHTML = html;
1055
  }
1056
+
1057
  function renderWhatsApp() {
1058
  const search = document.getElementById('wa-search').value.toLowerCase();
1059
  const filter = document.getElementById('wa-filter').value;
1060
+
1061
  const filtered = leads.filter(l => {
1062
  const phone = l.telefono || l.telefono_formateado;
1063
  if (!phone) return false;
1064
+
1065
+ const matchSearch = !search ||
1066
  l.nombre?.toLowerCase().includes(search) ||
1067
  l.nicho?.toLowerCase().includes(search);
1068
+ const matchFilter = filter === 'todos' ||
1069
  (filter === 'sin-contactar' && l.estado === 'nuevo') ||
1070
  (filter === 'contactados' && l.whatsapp_enviado);
1071
  return matchSearch && matchFilter;
1072
  });
1073
+
1074
  const container = document.getElementById('whatsapp-list');
1075
+
1076
  if (!filtered.length) {
1077
  container.innerHTML = '<p class="loading">No hay leads con teléfono</p>';
1078
  return;
1079
  }
1080
+
1081
  let html = '';
1082
  filtered.forEach(l => {
1083
  const phone = l.telefono_formateado || l.telefono;
1084
+ const qScore = l.quality_score || 0;
1085
+ const qClass = getQualityClass(qScore);
1086
  html += `
1087
+ <div class="whatsapp-card" style="display: flex; justify-content: space-between; align-items: center;">
1088
+ <div>
1089
+ <div style="display:flex; align-items:center; gap:10px;">
1090
+ <div class="quality-ring ${qClass}">${qScore}</div>
1091
+ <div class="name" style="font-weight:700; font-size:1.1rem">${l.nombre || 'Sin nombre'}</div>
1092
+ </div>
1093
+ <div class="phone" style="margin-top:5px; opacity:0.8">${phone}</div>
1094
+ <div style="margin-top: 0.5rem;">
1095
+ <span class="badge ${getBadgeClass(l.estado)}">${l.estado || 'nuevo'}</span>
1096
+ <span style="color: var(--text-dim); margin-left: 1rem; font-size:0.8rem">${l.nicho || ''} ${l.ciudad ? '- ' + l.ciudad : ''}</span>
1097
+ </div>
1098
  </div>
1099
+ <div class="action-buttons">
1100
+ <a href="${generateWhatsAppLink(l)}" target="_blank" class="btn btn-whatsapp">Abrir Chat</a>
1101
+ <button class="btn btn-primary" onclick="markWhatsAppSent('${l.id}')">Marcar Enviado</button>
1102
  </div>
1103
  </div>
1104
  `;
1105
  });
1106
  container.innerHTML = html;
1107
  }
1108
+
1109
  function filterWhatsApp() {
1110
  renderWhatsApp();
1111
  }
1112
+
1113
  async function markWhatsAppSent(id) {
1114
  try {
1115
+ await apiPatch('leads', id, {
1116
+ whatsapp_enviado: true,
1117
  whatsapp_enviado_en: new Date().toISOString(),
1118
  estado: 'contactado'
1119
  });
 
1123
  alert('Error: ' + error.message);
1124
  }
1125
  }
1126
+
1127
  function showTab(tabId) {
1128
  document.querySelectorAll('.tab-content').forEach(t => t.style.display = 'none');
1129
  document.querySelectorAll('.nav button').forEach(b => b.classList.remove('active'));
1130
  document.getElementById(tabId).style.display = 'block';
1131
  event.target.classList.add('active');
1132
+ // Lead Gen tab side-effects
1133
+ if (tabId === 'leadgen') {
1134
+ if (typeof checkServer === 'function') checkServer();
1135
+ if (typeof updatePlatforms === 'function') updatePlatforms();
1136
+ }
1137
  }
1138
+
1139
  async function saveNew(e) {
1140
  e.preventDefault();
1141
+
1142
  const tipo = document.getElementById('nuevo-tipo').value;
1143
  const isLead = tipo === 'lead';
1144
+
1145
  const data = {
1146
  nombre: document.getElementById('nuevo-nombre').value,
1147
  correo: document.getElementById('nuevo-correo').value || null,
 
1156
  created_at: new Date().toISOString(),
1157
  updated_at: new Date().toISOString()
1158
  };
1159
+
1160
  const table = isLead ? 'leads' : 'customers';
1161
+
1162
  try {
1163
  await apiPost(table, data);
1164
  alert('Guardado correctamente');
 
1169
  alert('Error: ' + error.message);
1170
  }
1171
  }
1172
+
1173
+ loadData();
1174
+
1175
+ /* ════════════════════════════════════════════════════
1176
+ LEAD GEN PRO INTEGRATION
1177
+ API bridge: http://localhost:5000
1178
+ ════════════════════════════════════════════════════ */
1179
+
1180
+ // 100% CLOUD: La API se sirve desde la misma raíz que el CRM
1181
+ const API_BASE = '';
1182
+ let sseSource = null;
1183
+ let scrapeRunning = false;
1184
+
1185
+ // Insert Lead Gen tab HTML
1186
+ (function () {
1187
+ const leadgenHtml = `
1188
+ <div id="leadgen" class="tab-content" style="display:none">
1189
+ <div id="server-banner" class="server-banner offline">
1190
+ <span id="server-icon">⚠️</span>
1191
+ <span id="server-msg">Verificando conexión con API Server (localhost:5000)...</span>
1192
+ <button class="btn-sync" onclick="checkServer()" style="margin-left:auto">Reconectar</button>
1193
+ </div>
1194
+
1195
+ <div class="lg-stat-row">
1196
+ <div class="lg-stat">
1197
+ <div class="val" id="lg-total">—</div>
1198
+ <div class="lbl">Total Local</div>
1199
+ </div>
1200
+ <div class="lg-stat">
1201
+ <div class="val" id="lg-30d">—</div>
1202
+ <div class="lbl">Últimos 30d</div>
1203
+ </div>
1204
+ <div class="lg-stat">
1205
+ <div class="val" id="lg-pending-wa">—</div>
1206
+ <div class="lbl">Pendiente WA</div>
1207
+ </div>
1208
+ </div>
1209
+
1210
+ <div class="leadgen-grid">
1211
+ <!-- Panel Izquierdo: Configuración de Scraping -->
1212
+ <div>
1213
+ <div class="leadgen-form-card">
1214
+ <h3>🎯 Configurar Búsqueda</h3>
1215
+
1216
+ <div class="form-row">
1217
+ <label>Nicho / Industria</label>
1218
+ <input type="text" id="lg-niche" value="Real Estate" placeholder="Real Estate, Insurance, Healthcare...">
1219
+ </div>
1220
+
1221
+ <div class="form-row">
1222
+ <label>Tipo de Lead</label>
1223
+ <select id="lg-type" onchange="updatePlatforms()">
1224
+ <option value="both">Ambos (B2B + B2C)</option>
1225
+ <option value="b2b">B2B — Empresas</option>
1226
+ <option value="b2c">B2C — Consumidores</option>
1227
+ </select>
1228
+ </div>
1229
+
1230
+ <div class="form-row">
1231
+ <label>País</label>
1232
+ <select id="lg-country" onchange="updatePlatforms()">
1233
+ <option value="USA">🇺🇸 USA</option>
1234
+ <option value="Venezuela">🇻🇪 Venezuela</option>
1235
+ <option value="Mexico">🇲🇽 México</option>
1236
+ <option value="Colombia">🇨🇴 Colombia</option>
1237
+ <option value="Argentina">🇦🇷 Argentina</option>
1238
+ <option value="Spain">🇪🇸 España</option>
1239
+ </select>
1240
+ </div>
1241
+
1242
+ <div class="form-row">
1243
+ <label>Estado / Provincia</label>
1244
+ <input type="text" id="lg-state" placeholder="FL, California, Miranda...">
1245
+ </div>
1246
+
1247
+ <div class="form-row">
1248
+ <label>Ciudad</label>
1249
+ <input type="text" id="lg-city" placeholder="Miami, Caracas, Bogotá...">
1250
+ </div>
1251
+
1252
+ <div class="form-row">
1253
+ <label>Límite por plataforma</label>
1254
+ <input type="number" id="lg-limit" value="15" min="5" max="50">
1255
+ </div>
1256
+
1257
+ <div class="form-row" id="b2b-platforms">
1258
+ <label>Plataformas B2B</label>
1259
+ <div class="platform-grid">
1260
+ <label class="platform-check"><input type="checkbox" id="p-linkedin" checked> LinkedIn</label>
1261
+ <label class="platform-check"><input type="checkbox" id="p-gmaps" checked> Google Maps</label>
1262
+ <label class="platform-check"><input type="checkbox" id="p-youtube-b2b"> YouTube</label>
1263
+ </div>
1264
+ </div>
1265
+
1266
+ <div class="form-row" id="b2c-platforms">
1267
+ <label>Plataformas B2C</label>
1268
+ <div class="platform-grid">
1269
+ <label class="platform-check"><input type="checkbox" id="p-twitter" checked> Twitter/X</label>
1270
+ <label class="platform-check"><input type="checkbox" id="p-reddit" checked> Reddit</label>
1271
+ <label class="platform-check"><input type="checkbox" id="p-youtube-b2c" checked> YouTube</label>
1272
+ <label class="platform-check"><input type="checkbox" id="p-facebook"> Facebook</label>
1273
+ </div>
1274
+ </div>
1275
+
1276
+ <button class="btn-run" id="btn-run-scrape" onclick="startScraping()">
1277
+ ⚡ Ejecutar Scraping
1278
+ </button>
1279
+
1280
+ <button class="btn-sync" style="width:100%;margin-top:0.75rem" onclick="syncToCloud()">
1281
+ ☁️ Sincronizar a Cloud (Supabase)
1282
+ </button>
1283
+ </div>
1284
+ </div>
1285
+
1286
+ <!-- Panel Derecho: Terminal + Info -->
1287
+ <div>
1288
+ <div class="leadgen-form-card" style="margin-bottom:1.5rem">
1289
+ <h3>🖥 Terminal de Logs</h3>
1290
+ <div class="log-terminal" id="log-terminal">[Esperando comando...]
1291
+ </div>
1292
+ <div style="display:flex;gap:0.5rem;margin-top:0.75rem">
1293
+ <button class="btn-sync" onclick="clearLogs()">Limpiar</button>
1294
+ <button class="btn-sync" onclick="loadLocalStats()">Actualizar Stats</button>
1295
+ <span id="scrape-status" style="margin-left:auto;font-size:0.8rem;color:var(--text-dim)">Inactivo</span>
1296
+ </div>
1297
+ </div>
1298
+
1299
+ <div class="leadgen-form-card">
1300
+ <h3>📊 Top Nichos (Local)</h3>
1301
+ <div id="lg-nicho-breakdown" style="color:var(--text-dim);font-size:0.85rem">Cargando...</div>
1302
+ </div>
1303
+ </div>
1304
+ </div>
1305
+ </div>`;
1306
+ document.querySelector('.main').insertAdjacentHTML('beforeend', leadgenHtml);
1307
+ })();
1308
+
1309
+ /** Check if the local API server is reachable */
1310
+ async function checkServer() {
1311
+ const banner = document.getElementById('server-banner');
1312
+ const msg = document.getElementById('server-msg');
1313
+ const icon = document.getElementById('server-icon');
1314
+ try {
1315
+ const res = await fetch(`${API_BASE}/api/health`, { signal: AbortSignal.timeout(3000) });
1316
+ if (res.ok) {
1317
+ banner.className = 'server-banner online';
1318
+ icon.textContent = '🟢';
1319
+ msg.textContent = 'API Server en línea — http://localhost:5000';
1320
+ loadLocalStats();
1321
+ return true;
1322
+ }
1323
+ } catch { }
1324
+ banner.className = 'server-banner offline';
1325
+ icon.textContent = '⚠️';
1326
+ msg.innerHTML = 'API Server offline. Ejecuta <code>start_api_server.bat</code> en lead_gen_pro/';
1327
+ return false;
1328
+ }
1329
+
1330
+ /** Load local DB statistics from Flask */
1331
+ async function loadLocalStats() {
1332
+ try {
1333
+ const res = await fetch(`${API_BASE}/api/stats`);
1334
+ const d = await res.json();
1335
+ document.getElementById('lg-total').textContent = d.total ?? '—';
1336
+ document.getElementById('lg-30d').textContent = d.leads_30d ?? '—';
1337
+ document.getElementById('lg-pending-wa').textContent = d.pending_whatsapp ?? '—';
1338
+
1339
+ // Nicho breakdown
1340
+ const elem = document.getElementById('lg-nicho-breakdown');
1341
+ if (d.by_niche && Object.keys(d.by_niche).length) {
1342
+ let html = '<table style="width:100%;border-collapse:collapse">';
1343
+ for (const [n, cnt] of Object.entries(d.by_niche)) {
1344
+ html += `<tr><td style="padding:0.35rem 0;color:var(--text-main)">${n}</td>
1345
+ <td style="text-align:right;color:#818cf8;font-weight:700">${cnt}</td></tr>`;
1346
+ }
1347
+ html += '</table>';
1348
+ elem.innerHTML = html;
1349
+ } else {
1350
+ elem.textContent = 'No hay datos aún.';
1351
+ }
1352
+ } catch (e) {
1353
+ document.getElementById('lg-total').textContent = '—';
1354
+ }
1355
+ }
1356
+
1357
+ /** Show/hide platform groups based on lead type */
1358
+ function updatePlatforms() {
1359
+ const type = document.getElementById('lg-type').value;
1360
+ document.getElementById('b2b-platforms').style.display = (type === 'b2c') ? 'none' : 'block';
1361
+ document.getElementById('b2c-platforms').style.display = (type === 'b2b') ? 'none' : 'block';
1362
+ }
1363
+
1364
+ /** Append a line to the terminal */
1365
+ function appendLog(line) {
1366
+ const t = document.getElementById('log-terminal');
1367
+ const span = document.createElement('span');
1368
+ const cls = line.includes('[ERR]') || line.includes('Error') ? 'log-err'
1369
+ : line.includes('[DONE]') || line.includes('[SYNC]') ? 'log-done' : '';
1370
+ if (cls) span.className = cls;
1371
+ span.textContent = line + '\n';
1372
+ t.appendChild(span);
1373
+ t.scrollTop = t.scrollHeight;
1374
+ }
1375
+
1376
+ function clearLogs() {
1377
+ document.getElementById('log-terminal').textContent = '[Logs limpios]\n';
1378
+ }
1379
+
1380
+ /** Launch the scraping pipeline */
1381
+ async function startScraping() {
1382
+ if (scrapeRunning) return;
1383
+ const online = await checkServer();
1384
+ if (!online) return;
1385
+
1386
+ const type = document.getElementById('lg-type').value;
1387
+
1388
+ const b2b = [];
1389
+ if (type !== 'b2c') {
1390
+ if (document.getElementById('p-linkedin').checked) b2b.push('linkedin');
1391
+ if (document.getElementById('p-gmaps').checked) b2b.push('google_maps');
1392
+ if (document.getElementById('p-youtube-b2b').checked) b2b.push('youtube');
1393
+ }
1394
+ const b2c = [];
1395
+ if (type !== 'b2b') {
1396
+ if (document.getElementById('p-twitter').checked) b2c.push('twitter');
1397
+ if (document.getElementById('p-reddit').checked) b2c.push('reddit');
1398
+ if (document.getElementById('p-youtube-b2c').checked) b2c.push('youtube');
1399
+ if (document.getElementById('p-facebook').checked) b2c.push('facebook');
1400
+ }
1401
+
1402
+ const payload = {
1403
+ niche: document.getElementById('lg-niche').value || 'Real Estate',
1404
+ lead_type: type,
1405
+ country: document.getElementById('lg-country').value,
1406
+ state: document.getElementById('lg-state').value,
1407
+ city: document.getElementById('lg-city').value,
1408
+ limit: parseInt(document.getElementById('lg-limit').value) || 15,
1409
+ b2b_platforms: b2b,
1410
+ b2c_platforms: b2c
1411
+ };
1412
+
1413
+ clearLogs();
1414
+ appendLog(`[INICIO] ${new Date().toLocaleTimeString()} — Lanzando pipeline...`);
1415
+ appendLog(`Nicho: ${payload.niche} | Tipo: ${payload.lead_type} | Ciudad: ${payload.city || '—'}`);
1416
+
1417
+ scrapeRunning = true;
1418
+ document.getElementById('btn-run-scrape').disabled = true;
1419
+ document.getElementById('scrape-status').textContent = '⏳ Ejecutando...';
1420
+
1421
+ // POST to start the scrape
1422
+ try {
1423
+ const res = await fetch(`${API_BASE}/api/scrape`, {
1424
+ method: 'POST',
1425
+ headers: { 'Content-Type': 'application/json' },
1426
+ body: JSON.stringify(payload)
1427
+ });
1428
+ const d = await res.json();
1429
+ if (!res.ok) {
1430
+ appendLog('[ERR] ' + (d.error || 'Error desconocido'));
1431
+ resetScrapeUI();
1432
+ return;
1433
+ }
1434
+ } catch (e) {
1435
+ appendLog('[ERR] No se pudo conectar: ' + e.message);
1436
+ resetScrapeUI();
1437
+ return;
1438
+ }
1439
+
1440
+ // Subscribe to SSE log stream
1441
+ if (sseSource) sseSource.close();
1442
+ sseSource = new EventSource(`${API_BASE}/api/stream`);
1443
+ sseSource.onmessage = (e) => {
1444
+ if (e.data === '[END]') {
1445
+ sseSource.close();
1446
+ resetScrapeUI();
1447
+ loadLocalStats();
1448
+ // Refresh cloud leads
1449
+ loadData();
1450
+ return;
1451
+ }
1452
+ if (e.data !== '[PING]') appendLog(e.data);
1453
+ };
1454
+ sseSource.onerror = () => {
1455
+ sseSource.close();
1456
+ resetScrapeUI();
1457
+ };
1458
+ }
1459
+
1460
+ function resetScrapeUI() {
1461
+ scrapeRunning = false;
1462
+ document.getElementById('btn-run-scrape').disabled = false;
1463
+ document.getElementById('scrape-status').textContent = '✅ Completado';
1464
+ setTimeout(() => {
1465
+ document.getElementById('scrape-status').textContent = 'Inactivo';
1466
+ }, 5000);
1467
+ }
1468
+
1469
+ /** Force sync local SQLite → Supabase */
1470
+ async function syncToCloud() {
1471
+ const online = await checkServer();
1472
+ if (!online) return;
1473
+ appendLog('[SYNC] Iniciando sincronización con Supabase...');
1474
+ try {
1475
+ const res = await fetch(`${API_BASE}/api/sync`, { method: 'POST' });
1476
+ const d = await res.json();
1477
+ appendLog(`[SYNC] ${d.message} — Sincronizados: ${d.result || ''}`);
1478
+ loadData();
1479
+ } catch (e) {
1480
+ appendLog('[ERR] Error sincronizando: ' + e.message);
1481
+ }
1482
+ }
1483
+
1484
  loadData();
1485
  </script>
1486
  </body>
1487
+
1488
+ </html>