Spaces:
Sleeping
Sleeping
Commit ·
615e194
1
Parent(s): 52833c5
🚀 Fusion: NexusCRM + Lead Gen Pro (100% Cloud Docker)
Browse files- .dockerignore +12 -0
- Dockerfile +66 -0
- README.md +7 -6
- app.py +194 -0
- lead_gen_pro/config.json +46 -0
- lead_gen_pro/database.py +268 -0
- lead_gen_pro/l1_directive/outreach_sop.md +79 -0
- lead_gen_pro/l1_directive/scraping_sop.md +46 -0
- lead_gen_pro/l2_orchestration/__init__.py +1 -0
- lead_gen_pro/l2_orchestration/orchestrator.py +319 -0
- lead_gen_pro/l2_orchestration/orchestrator_pro.py +486 -0
- lead_gen_pro/l3_execution/__init__.py +1 -0
- lead_gen_pro/l3_execution/apify_pro_scraper.py +108 -0
- lead_gen_pro/l3_execution/apify_scraper.py +266 -0
- lead_gen_pro/l3_execution/automation_engine.py +80 -0
- lead_gen_pro/l3_execution/consumer_intent_scraper.py +729 -0
- lead_gen_pro/l3_execution/database_doe.py +480 -0
- lead_gen_pro/l3_execution/enrichment.py +309 -0
- lead_gen_pro/l3_execution/enrichment_pro.py +34 -0
- lead_gen_pro/l3_execution/instantly_sender.py +260 -0
- lead_gen_pro/l3_execution/lead_scoring.py +71 -0
- lead_gen_pro/l3_execution/supabase_sync.py +91 -0
- lead_gen_pro/main.py +97 -0
- lead_gen_pro/main_pro.py +264 -0
- lead_gen_pro/requirements.txt +16 -0
- lead_gen_pro/scraper.py +311 -0
- lead_gen_pro/whatsapp_sender.py +267 -0
- lead_gen_pro/workflow.py +230 -0
- index.html → static/index.html +953 -224
.dockerignore
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Ignore local development artifacts
|
| 2 |
+
**/__pycache__
|
| 3 |
+
**/*.pyc
|
| 4 |
+
**/venv
|
| 5 |
+
**/*.db
|
| 6 |
+
**/*.bat
|
| 7 |
+
**/*.log
|
| 8 |
+
**/*.zip
|
| 9 |
+
**/node_modules
|
| 10 |
+
.git
|
| 11 |
+
.gitignore
|
| 12 |
+
.dockerignore
|
Dockerfile
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# NexusCRM + Lead Gen Pro: Unified Cloud Dockerfile
|
| 2 |
+
# Optimized for Hugging Face Spaces (Docker SDK)
|
| 3 |
+
|
| 4 |
+
# Using Python 3.10 slim for a balance of speed and stability
|
| 5 |
+
FROM python:3.10-slim
|
| 6 |
+
|
| 7 |
+
# Prevent Python from writing .pyc files and enable unbuffered logging
|
| 8 |
+
ENV PYTHONUNBUFFERED=1 \
|
| 9 |
+
PYTHONDONTWRITEBYTECODE=1 \
|
| 10 |
+
# Hugging Face Spaces port is 7860
|
| 11 |
+
PORT=7860 \
|
| 12 |
+
# Playwright headless requirements
|
| 13 |
+
PLAYWRIGHT_BROWSERS_PATH=/app/pw-browsers
|
| 14 |
+
|
| 15 |
+
# Install system dependencies required for Chromium and Playwright
|
| 16 |
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 17 |
+
wget \
|
| 18 |
+
gnupg \
|
| 19 |
+
libnss3 \
|
| 20 |
+
libnspr4 \
|
| 21 |
+
libatk1.0-0 \
|
| 22 |
+
libatk-bridge2.0-0 \
|
| 23 |
+
libcups2 \
|
| 24 |
+
libdrm2 \
|
| 25 |
+
libxkbcommon0 \
|
| 26 |
+
libxcomposite1 \
|
| 27 |
+
libxdamage1 \
|
| 28 |
+
libxext6 \
|
| 29 |
+
libxfixes3 \
|
| 30 |
+
librandr2 \
|
| 31 |
+
libgbm1 \
|
| 32 |
+
libpango-1.0-0 \
|
| 33 |
+
libcairo2 \
|
| 34 |
+
libasound2 \
|
| 35 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 36 |
+
|
| 37 |
+
WORKDIR /app
|
| 38 |
+
|
| 39 |
+
# Install Python dependencies (including apify-client for the scraping engine)
|
| 40 |
+
RUN pip install --no-cache-dir \
|
| 41 |
+
flask \
|
| 42 |
+
flask-cors \
|
| 43 |
+
playwright \
|
| 44 |
+
supabase \
|
| 45 |
+
pandas \
|
| 46 |
+
python-dotenv \
|
| 47 |
+
gunicorn \
|
| 48 |
+
apify-client
|
| 49 |
+
|
| 50 |
+
# Install Playwright and the Chromium browser
|
| 51 |
+
RUN playwright install chromium
|
| 52 |
+
RUN playwright install-deps chromium
|
| 53 |
+
|
| 54 |
+
# Preparation of the working environment
|
| 55 |
+
# The entire folder (app.py, static/, lead_gen_pro/) is copied to /app
|
| 56 |
+
COPY . .
|
| 57 |
+
|
| 58 |
+
# Create a data directory for temporary SQLite files if it doesn't exist
|
| 59 |
+
RUN mkdir -p /app/lead_gen_pro/data && chmod 777 /app/lead_gen_pro/data
|
| 60 |
+
|
| 61 |
+
# Expose the HF port
|
| 62 |
+
EXPOSE 7860
|
| 63 |
+
|
| 64 |
+
# Use gunicorn for a production-ready server in the cloud
|
| 65 |
+
# We bind to 0.0.0.0 because it's a container
|
| 66 |
+
CMD ["gunicorn", "--bind", "0.0.0.0:7860", "--workers", "1", "--threads", "8", "--timeout", "0", "app:app"]
|
README.md
CHANGED
|
@@ -1,12 +1,13 @@
|
|
| 1 |
---
|
| 2 |
-
title:
|
| 3 |
-
emoji:
|
| 4 |
colorFrom: blue
|
| 5 |
-
colorTo:
|
| 6 |
-
sdk:
|
|
|
|
| 7 |
app_port: 7860
|
| 8 |
---
|
| 9 |
|
| 10 |
-
#
|
| 11 |
|
| 12 |
-
|
|
|
|
| 1 |
---
|
| 2 |
+
title: NexusCRM
|
| 3 |
+
emoji: 💼
|
| 4 |
colorFrom: blue
|
| 5 |
+
colorTo: indigo
|
| 6 |
+
sdk: docker
|
| 7 |
+
sdk_version: 4.0.0
|
| 8 |
app_port: 7860
|
| 9 |
---
|
| 10 |
|
| 11 |
+
# NexusCRM
|
| 12 |
|
| 13 |
+
SaaS CRM System with Supabase backend
|
app.py
ADDED
|
@@ -0,0 +1,194 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import sys
|
| 3 |
+
import subprocess
|
| 4 |
+
import threading
|
| 5 |
+
import queue
|
| 6 |
+
import time
|
| 7 |
+
from flask import Flask, send_from_directory, request, jsonify, Response
|
| 8 |
+
from flask_cors import CORS
|
| 9 |
+
from dotenv import load_dotenv
|
| 10 |
+
|
| 11 |
+
# Load environment variables (from Hugging Face Secrets)
|
| 12 |
+
load_dotenv()
|
| 13 |
+
|
| 14 |
+
# Ensure lead_gen_pro is in the path so we can import its modules
|
| 15 |
+
sys.path.append(os.path.join(os.getcwd(), 'lead_gen_pro'))
|
| 16 |
+
|
| 17 |
+
app = Flask(__name__, static_folder='static')
|
| 18 |
+
CORS(app)
|
| 19 |
+
|
| 20 |
+
# --- Configuration ---
|
| 21 |
+
PORT = int(os.environ.get("PORT", 7860))
|
| 22 |
+
BASE_DIR = os.path.join(os.getcwd(), 'lead_gen_pro')
|
| 23 |
+
# In Docker, we use the local path for the temporary DB
|
| 24 |
+
DB_PATH = os.path.join(BASE_DIR, "leads_doe.db")
|
| 25 |
+
MAIN_PRO = os.path.join(BASE_DIR, "main_pro.py")
|
| 26 |
+
|
| 27 |
+
# Log queue for SSE (Server-Sent Events)
|
| 28 |
+
log_queue = queue.Queue()
|
| 29 |
+
active_process = None
|
| 30 |
+
process_lock = threading.Lock()
|
| 31 |
+
|
| 32 |
+
# --- Unified UI Serving ---
|
| 33 |
+
|
| 34 |
+
@app.route("/")
|
| 35 |
+
def index():
|
| 36 |
+
"""Serve the NexusCRM frontend."""
|
| 37 |
+
return send_from_directory(app.static_folder, "index.html")
|
| 38 |
+
|
| 39 |
+
@app.route("/<path:path>")
|
| 40 |
+
def static_proxy(path):
|
| 41 |
+
"""Serve other static files if any."""
|
| 42 |
+
return send_from_directory(app.static_folder, path)
|
| 43 |
+
|
| 44 |
+
# --- Backend API Endpoints ---
|
| 45 |
+
|
| 46 |
+
@app.route("/api/health")
|
| 47 |
+
def health():
|
| 48 |
+
"""Verify the cloud server is active."""
|
| 49 |
+
return jsonify({
|
| 50 |
+
"status": "online",
|
| 51 |
+
"environment": "Hugging Face Cloud",
|
| 52 |
+
"db_access": os.path.exists(DB_PATH)
|
| 53 |
+
})
|
| 54 |
+
|
| 55 |
+
@app.route("/api/scrape", methods=["POST"])
|
| 56 |
+
def scrape():
|
| 57 |
+
"""Bridge to the Lead Gen Pro scraping engine."""
|
| 58 |
+
global active_process
|
| 59 |
+
|
| 60 |
+
with process_lock:
|
| 61 |
+
if active_process and active_process.poll() is None:
|
| 62 |
+
return jsonify({"error": "A scraping process is already running in the cloud."}), 409
|
| 63 |
+
|
| 64 |
+
data = request.get_json(force=True) or {}
|
| 65 |
+
|
| 66 |
+
# We use 'python' because it's the environment in the Docker container
|
| 67 |
+
cmd = [
|
| 68 |
+
"python", MAIN_PRO,
|
| 69 |
+
"--pipeline",
|
| 70 |
+
"--niche", data.get("niche", "Real Estate"),
|
| 71 |
+
"--limit", str(data.get("limit", 20)),
|
| 72 |
+
"--type", data.get("lead_type", "both")
|
| 73 |
+
]
|
| 74 |
+
|
| 75 |
+
# Geographic settings
|
| 76 |
+
if data.get("country"): cmd += ["--country", data["country"]]
|
| 77 |
+
if data.get("state"): cmd += ["--state", data["state"]]
|
| 78 |
+
if data.get("city"): cmd += ["--city", data["city"]]
|
| 79 |
+
|
| 80 |
+
def _run_worker():
|
| 81 |
+
global active_process
|
| 82 |
+
log_queue.put("🚀 [CLOUD] Iniciando motor de scraping en Hugging Face...\n")
|
| 83 |
+
|
| 84 |
+
# Start the scraping process
|
| 85 |
+
proc = subprocess.Popen(
|
| 86 |
+
cmd,
|
| 87 |
+
cwd=BASE_DIR,
|
| 88 |
+
stdout=subprocess.PIPE,
|
| 89 |
+
stderr=subprocess.STDOUT,
|
| 90 |
+
text=True,
|
| 91 |
+
bufsize=1,
|
| 92 |
+
env={**os.environ, "PYTHONUNBUFFERED": "1"}
|
| 93 |
+
)
|
| 94 |
+
|
| 95 |
+
with process_lock:
|
| 96 |
+
active_process = proc
|
| 97 |
+
|
| 98 |
+
# Stream logs line by line
|
| 99 |
+
for line in proc.stdout:
|
| 100 |
+
log_queue.put(line)
|
| 101 |
+
|
| 102 |
+
proc.wait()
|
| 103 |
+
log_queue.put(f"\n✅ [CLOUD] Proceso finalizado (Código: {proc.returncode})\n")
|
| 104 |
+
|
| 105 |
+
# Automatic Cloud Sync (Supabase)
|
| 106 |
+
try:
|
| 107 |
+
log_queue.put("♻️ [CLOUD] Sincronizando resultados con Supabase Cloud...\n")
|
| 108 |
+
# Dynamic import to ensure it uses the cloud environment
|
| 109 |
+
from l3_execution.supabase_sync import sync_all_leads_to_supabase
|
| 110 |
+
stats = sync_all_leads_to_supabase()
|
| 111 |
+
log_queue.put(f"📊 [SYNC] Sincronización exitosa: {stats}\n")
|
| 112 |
+
except Exception as e:
|
| 113 |
+
log_queue.put(f"❌ [SYNC-ERROR] Error en sincronización cloud: {str(e)}\n")
|
| 114 |
+
|
| 115 |
+
log_queue.put("[END]")
|
| 116 |
+
|
| 117 |
+
# Run in background to not block the request
|
| 118 |
+
threading.Thread(target=_run_worker, daemon=True).start()
|
| 119 |
+
return jsonify({"status": "launched", "location": "Hugging Face Space"})
|
| 120 |
+
|
| 121 |
+
@app.route("/api/stream")
|
| 122 |
+
def stream():
|
| 123 |
+
"""SSE endpoint for real-time logs in the CRM UI."""
|
| 124 |
+
def generate():
|
| 125 |
+
yield "data: [CONECTADO] Servidor Hugging Face listo...\n\n"
|
| 126 |
+
while True:
|
| 127 |
+
try:
|
| 128 |
+
line = log_queue.get(timeout=60)
|
| 129 |
+
if line == "[END]":
|
| 130 |
+
yield "data: [END]\n\n"
|
| 131 |
+
break
|
| 132 |
+
yield f"data: {line.rstrip()}\n\n"
|
| 133 |
+
except queue.Empty:
|
| 134 |
+
yield "data: [PING]\n\n"
|
| 135 |
+
|
| 136 |
+
return Response(generate(), mimetype="text/event-stream")
|
| 137 |
+
|
| 138 |
+
@app.route("/api/sync", methods=["POST"])
|
| 139 |
+
def manual_sync():
|
| 140 |
+
"""Force manual sync to Supabase from the cloud."""
|
| 141 |
+
try:
|
| 142 |
+
from l3_execution.supabase_sync import sync_all_leads_to_supabase
|
| 143 |
+
res = sync_all_leads_to_supabase()
|
| 144 |
+
return jsonify({"status": "success", "data": res})
|
| 145 |
+
except Exception as e:
|
| 146 |
+
return jsonify({"status": "error", "message": str(e)}), 500
|
| 147 |
+
|
| 148 |
+
@app.route("/api/stats")
|
| 149 |
+
def stats():
|
| 150 |
+
"""Get lead generation statistics from the local container DB."""
|
| 151 |
+
import sqlite3
|
| 152 |
+
try:
|
| 153 |
+
if not os.path.exists(DB_PATH):
|
| 154 |
+
return jsonify({
|
| 155 |
+
"total": 0,
|
| 156 |
+
"leads_30d": 0,
|
| 157 |
+
"pending_whatsapp": 0,
|
| 158 |
+
"by_niche": {}
|
| 159 |
+
})
|
| 160 |
+
|
| 161 |
+
conn = sqlite3.connect(DB_PATH)
|
| 162 |
+
conn.row_factory = sqlite3.Row
|
| 163 |
+
cur = conn.cursor()
|
| 164 |
+
|
| 165 |
+
# Total
|
| 166 |
+
cur.execute("SELECT COUNT(*) FROM leads")
|
| 167 |
+
total = cur.fetchone()[0]
|
| 168 |
+
|
| 169 |
+
# Last 30 days
|
| 170 |
+
cur.execute("SELECT COUNT(*) FROM leads WHERE created_at > date('now', '-30 days')")
|
| 171 |
+
total_30d = cur.fetchone()[0]
|
| 172 |
+
|
| 173 |
+
# Pending WhatsApp
|
| 174 |
+
cur.execute("SELECT COUNT(*) FROM leads WHERE (whatsapp_enviado IS NULL OR whatsapp_enviado = 0) AND (telefono IS NOT NULL OR telefono_formateado IS NOT NULL)")
|
| 175 |
+
pending_wa = cur.fetchone()[0]
|
| 176 |
+
|
| 177 |
+
# By niche
|
| 178 |
+
cur.execute("SELECT nicho, COUNT(*) as count FROM leads GROUP BY nicho ORDER BY count DESC LIMIT 5")
|
| 179 |
+
by_niche = {row['nicho']: row['count'] for row in cur.fetchall() if row['nicho']}
|
| 180 |
+
|
| 181 |
+
conn.close()
|
| 182 |
+
|
| 183 |
+
return jsonify({
|
| 184 |
+
"total": total,
|
| 185 |
+
"leads_30d": total_30d,
|
| 186 |
+
"pending_whatsapp": pending_wa,
|
| 187 |
+
"by_niche": by_niche
|
| 188 |
+
})
|
| 189 |
+
except Exception as e:
|
| 190 |
+
return jsonify({"error": str(e)}), 500
|
| 191 |
+
|
| 192 |
+
if __name__ == "__main__":
|
| 193 |
+
# Local fallback for testing, but Docker uses gunicorn
|
| 194 |
+
app.run(host="0.0.0.0", port=PORT)
|
lead_gen_pro/config.json
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"apify_token": "apify_api_J3amq3BFZwtBTddaYQn3PkJxLjeZno0geYY7",
|
| 3 |
+
"serpapi_key": "YOUR_SERPAPI_KEY",
|
| 4 |
+
"sender_name": "NexusCRM Butler",
|
| 5 |
+
"niches": {
|
| 6 |
+
"real_estate": {
|
| 7 |
+
"search_queries": [
|
| 8 |
+
"real estate agent in {city}",
|
| 9 |
+
"realtor in {city}",
|
| 10 |
+
"luxury homes in {city}",
|
| 11 |
+
"property management {city}"
|
| 12 |
+
],
|
| 13 |
+
"templates": {
|
| 14 |
+
"email": {
|
| 15 |
+
"subject": "Colaboración para {name} en {city}",
|
| 16 |
+
"body": "Hola {name},\n\nVi tu excelente perfil inmobiliario en {city}. Estoy ayudando a realtors a automatizar su captación de leads.\n\n¿Te interesaría una breve llamada?\n\nSaludos,\n{sender_name}"
|
| 17 |
+
},
|
| 18 |
+
"whatsapp": "¡Hola {name}! Vi tu trabajo en {city}. Tengo una propuesta para automatizar tu prospección inmobiliaria. ¿Hablamos?"
|
| 19 |
+
}
|
| 20 |
+
},
|
| 21 |
+
"insurance": {
|
| 22 |
+
"search_queries": [
|
| 23 |
+
"insurance agent in {city}",
|
| 24 |
+
"seguros en {city}",
|
| 25 |
+
"broker de seguros {city}"
|
| 26 |
+
],
|
| 27 |
+
"templates": {
|
| 28 |
+
"email": {
|
| 29 |
+
"subject": "Idea for your insurance business in {city}",
|
| 30 |
+
"body": "Hi {name},\n\nI noticed your insurance agency in {city}. We help brokers get more leads. Are you interested?\n\nBest,\n{sender_name}"
|
| 31 |
+
},
|
| 32 |
+
"whatsapp": "¡Hola {name}! Noté tu éxito en el sector seguros en {city}. Tengo una propuesta para automatizar tu prospección. ¿Hablamos?"
|
| 33 |
+
}
|
| 34 |
+
}
|
| 35 |
+
},
|
| 36 |
+
"automation_rules": [
|
| 37 |
+
{
|
| 38 |
+
"if": {"field": "quality_score", "operator": ">=", "value": 8},
|
| 39 |
+
"then": {"type": "mark_priority", "value": 3}
|
| 40 |
+
},
|
| 41 |
+
{
|
| 42 |
+
"if": {"field": "niche", "operator": "==", "value": "real_estate"},
|
| 43 |
+
"then": {"type": "notify", "message": "New Real Estate lead detected!"}
|
| 44 |
+
}
|
| 45 |
+
]
|
| 46 |
+
}
|
lead_gen_pro/database.py
ADDED
|
@@ -0,0 +1,268 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Lead Generation Database Module
|
| 3 |
+
================================
|
| 4 |
+
Manages SQLite database for storing and tracking leads.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import sqlite3
|
| 8 |
+
import os
|
| 9 |
+
from datetime import datetime
|
| 10 |
+
from typing import Optional, List, Dict, Any
|
| 11 |
+
|
| 12 |
+
DB_PATH = os.path.join(os.path.dirname(__file__), "leads.db")
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def get_connection() -> sqlite3.Connection:
|
| 16 |
+
"""Get a database connection with row factory enabled."""
|
| 17 |
+
conn = sqlite3.connect(DB_PATH)
|
| 18 |
+
conn.row_factory = sqlite3.Row
|
| 19 |
+
return conn
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
def init_db() -> None:
|
| 23 |
+
"""Initialize the database with required tables."""
|
| 24 |
+
conn = get_connection()
|
| 25 |
+
cursor = conn.cursor()
|
| 26 |
+
|
| 27 |
+
cursor.execute("""
|
| 28 |
+
CREATE TABLE IF NOT EXISTS leads (
|
| 29 |
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
| 30 |
+
name TEXT NOT NULL,
|
| 31 |
+
phone TEXT,
|
| 32 |
+
email TEXT,
|
| 33 |
+
address TEXT,
|
| 34 |
+
website TEXT,
|
| 35 |
+
rating REAL,
|
| 36 |
+
reviews_count INTEGER,
|
| 37 |
+
source TEXT DEFAULT 'google_maps',
|
| 38 |
+
niche TEXT NOT NULL,
|
| 39 |
+
country TEXT NOT NULL,
|
| 40 |
+
city TEXT,
|
| 41 |
+
status TEXT DEFAULT 'new',
|
| 42 |
+
whatsapp_sent BOOLEAN DEFAULT 0,
|
| 43 |
+
instagram_sent BOOLEAN DEFAULT 0,
|
| 44 |
+
replied BOOLEAN DEFAULT 0,
|
| 45 |
+
notes TEXT,
|
| 46 |
+
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
| 47 |
+
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
| 48 |
+
)
|
| 49 |
+
""")
|
| 50 |
+
|
| 51 |
+
cursor.execute("""
|
| 52 |
+
CREATE TABLE IF NOT EXISTS outreach_log (
|
| 53 |
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
| 54 |
+
lead_id INTEGER NOT NULL,
|
| 55 |
+
channel TEXT NOT NULL,
|
| 56 |
+
message_template TEXT,
|
| 57 |
+
sent_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
| 58 |
+
response TEXT,
|
| 59 |
+
FOREIGN KEY (lead_id) REFERENCES leads(id)
|
| 60 |
+
)
|
| 61 |
+
""")
|
| 62 |
+
|
| 63 |
+
cursor.execute("""
|
| 64 |
+
CREATE INDEX IF NOT EXISTS idx_leads_status ON leads(status)
|
| 65 |
+
""")
|
| 66 |
+
cursor.execute("""
|
| 67 |
+
CREATE INDEX IF NOT EXISTS idx_leads_niche ON leads(niche)
|
| 68 |
+
""")
|
| 69 |
+
cursor.execute("""
|
| 70 |
+
CREATE INDEX IF NOT EXISTS idx_leads_country ON leads(country)
|
| 71 |
+
""")
|
| 72 |
+
|
| 73 |
+
conn.commit()
|
| 74 |
+
conn.close()
|
| 75 |
+
print(f"[OK] Base de datos inicializada en: {DB_PATH}")
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
def add_lead(lead_data: Dict[str, Any]) -> int:
|
| 79 |
+
"""
|
| 80 |
+
Add a new lead to the database.
|
| 81 |
+
|
| 82 |
+
Args:
|
| 83 |
+
lead_data: Dictionary with lead information.
|
| 84 |
+
|
| 85 |
+
Returns:
|
| 86 |
+
The ID of the inserted lead.
|
| 87 |
+
"""
|
| 88 |
+
conn = get_connection()
|
| 89 |
+
cursor = conn.cursor()
|
| 90 |
+
|
| 91 |
+
cursor.execute("""
|
| 92 |
+
INSERT INTO leads (name, phone, email, address, website, rating,
|
| 93 |
+
reviews_count, source, niche, country, city, notes)
|
| 94 |
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
| 95 |
+
""", (
|
| 96 |
+
lead_data.get("name"),
|
| 97 |
+
lead_data.get("phone"),
|
| 98 |
+
lead_data.get("email"),
|
| 99 |
+
lead_data.get("address"),
|
| 100 |
+
lead_data.get("website"),
|
| 101 |
+
lead_data.get("rating"),
|
| 102 |
+
lead_data.get("reviews_count"),
|
| 103 |
+
lead_data.get("source", "google_maps"),
|
| 104 |
+
lead_data.get("niche"),
|
| 105 |
+
lead_data.get("country"),
|
| 106 |
+
lead_data.get("city"),
|
| 107 |
+
lead_data.get("notes")
|
| 108 |
+
))
|
| 109 |
+
|
| 110 |
+
lead_id = cursor.lastrowid
|
| 111 |
+
conn.commit()
|
| 112 |
+
conn.close()
|
| 113 |
+
return lead_id
|
| 114 |
+
|
| 115 |
+
|
| 116 |
+
def get_leads(
|
| 117 |
+
niche: Optional[str] = None,
|
| 118 |
+
country: Optional[str] = None,
|
| 119 |
+
status: Optional[str] = None,
|
| 120 |
+
limit: int = 100
|
| 121 |
+
) -> List[Dict[str, Any]]:
|
| 122 |
+
"""
|
| 123 |
+
Retrieve leads with optional filters.
|
| 124 |
+
|
| 125 |
+
Args:
|
| 126 |
+
niche: Filter by niche (real_estate, insurance)
|
| 127 |
+
country: Filter by country (usa, venezuela)
|
| 128 |
+
status: Filter by status (new, contacted, replied, converted)
|
| 129 |
+
limit: Maximum number of results
|
| 130 |
+
|
| 131 |
+
Returns:
|
| 132 |
+
List of lead dictionaries.
|
| 133 |
+
"""
|
| 134 |
+
conn = get_connection()
|
| 135 |
+
cursor = conn.cursor()
|
| 136 |
+
|
| 137 |
+
query = "SELECT * FROM leads WHERE 1=1"
|
| 138 |
+
params = []
|
| 139 |
+
|
| 140 |
+
if niche:
|
| 141 |
+
query += " AND niche = ?"
|
| 142 |
+
params.append(niche)
|
| 143 |
+
if country:
|
| 144 |
+
query += " AND country = ?"
|
| 145 |
+
params.append(country)
|
| 146 |
+
if status:
|
| 147 |
+
query += " AND status = ?"
|
| 148 |
+
params.append(status)
|
| 149 |
+
|
| 150 |
+
query += " ORDER BY created_at DESC LIMIT ?"
|
| 151 |
+
params.append(limit)
|
| 152 |
+
|
| 153 |
+
cursor.execute(query, params)
|
| 154 |
+
rows = cursor.fetchall()
|
| 155 |
+
conn.close()
|
| 156 |
+
|
| 157 |
+
return [dict(row) for row in rows]
|
| 158 |
+
|
| 159 |
+
|
| 160 |
+
def update_lead_status(lead_id: int, status: str, notes: Optional[str] = None) -> None:
|
| 161 |
+
"""Update the status of a lead."""
|
| 162 |
+
conn = get_connection()
|
| 163 |
+
cursor = conn.cursor()
|
| 164 |
+
|
| 165 |
+
if notes:
|
| 166 |
+
cursor.execute("""
|
| 167 |
+
UPDATE leads
|
| 168 |
+
SET status = ?, notes = ?, updated_at = CURRENT_TIMESTAMP
|
| 169 |
+
WHERE id = ?
|
| 170 |
+
""", (status, notes, lead_id))
|
| 171 |
+
else:
|
| 172 |
+
cursor.execute("""
|
| 173 |
+
UPDATE leads
|
| 174 |
+
SET status = ?, updated_at = CURRENT_TIMESTAMP
|
| 175 |
+
WHERE id = ?
|
| 176 |
+
""", (status, lead_id))
|
| 177 |
+
|
| 178 |
+
conn.commit()
|
| 179 |
+
conn.close()
|
| 180 |
+
|
| 181 |
+
|
| 182 |
+
def mark_outreach_sent(lead_id: int, channel: str, template: str) -> None:
|
| 183 |
+
"""Mark that outreach was sent to a lead and log it."""
|
| 184 |
+
conn = get_connection()
|
| 185 |
+
cursor = conn.cursor()
|
| 186 |
+
|
| 187 |
+
# Update lead flags
|
| 188 |
+
if channel == "whatsapp":
|
| 189 |
+
cursor.execute("UPDATE leads SET whatsapp_sent = 1, status = 'contacted' WHERE id = ?", (lead_id,))
|
| 190 |
+
elif channel == "instagram":
|
| 191 |
+
cursor.execute("UPDATE leads SET instagram_sent = 1, status = 'contacted' WHERE id = ?", (lead_id,))
|
| 192 |
+
|
| 193 |
+
# Log the outreach
|
| 194 |
+
cursor.execute("""
|
| 195 |
+
INSERT INTO outreach_log (lead_id, channel, message_template)
|
| 196 |
+
VALUES (?, ?, ?)
|
| 197 |
+
""", (lead_id, channel, template))
|
| 198 |
+
|
| 199 |
+
conn.commit()
|
| 200 |
+
conn.close()
|
| 201 |
+
|
| 202 |
+
|
| 203 |
+
def get_pending_outreach(channel: str, niche: Optional[str] = None, limit: int = 10) -> List[Dict[str, Any]]:
|
| 204 |
+
"""Get leads that haven't been contacted yet via a specific channel."""
|
| 205 |
+
conn = get_connection()
|
| 206 |
+
cursor = conn.cursor()
|
| 207 |
+
|
| 208 |
+
query = "SELECT * FROM leads WHERE status = 'new'"
|
| 209 |
+
params = []
|
| 210 |
+
|
| 211 |
+
if channel == "whatsapp":
|
| 212 |
+
query += " AND whatsapp_sent = 0 AND phone IS NOT NULL"
|
| 213 |
+
elif channel == "instagram":
|
| 214 |
+
query += " AND instagram_sent = 0"
|
| 215 |
+
|
| 216 |
+
if niche:
|
| 217 |
+
query += " AND niche = ?"
|
| 218 |
+
params.append(niche)
|
| 219 |
+
|
| 220 |
+
query += " LIMIT ?"
|
| 221 |
+
params.append(limit)
|
| 222 |
+
|
| 223 |
+
cursor.execute(query, params)
|
| 224 |
+
rows = cursor.fetchall()
|
| 225 |
+
conn.close()
|
| 226 |
+
|
| 227 |
+
return [dict(row) for row in rows]
|
| 228 |
+
|
| 229 |
+
|
| 230 |
+
def get_stats() -> Dict[str, Any]:
|
| 231 |
+
"""Get statistics about the leads database."""
|
| 232 |
+
conn = get_connection()
|
| 233 |
+
cursor = conn.cursor()
|
| 234 |
+
|
| 235 |
+
stats = {}
|
| 236 |
+
|
| 237 |
+
cursor.execute("SELECT COUNT(*) FROM leads")
|
| 238 |
+
stats["total_leads"] = cursor.fetchone()[0]
|
| 239 |
+
|
| 240 |
+
cursor.execute("SELECT niche, COUNT(*) FROM leads GROUP BY niche")
|
| 241 |
+
stats["by_niche"] = dict(cursor.fetchall())
|
| 242 |
+
|
| 243 |
+
cursor.execute("SELECT country, COUNT(*) FROM leads GROUP BY country")
|
| 244 |
+
stats["by_country"] = dict(cursor.fetchall())
|
| 245 |
+
|
| 246 |
+
cursor.execute("SELECT status, COUNT(*) FROM leads GROUP BY status")
|
| 247 |
+
stats["by_status"] = dict(cursor.fetchall())
|
| 248 |
+
|
| 249 |
+
cursor.execute("SELECT COUNT(*) FROM leads WHERE whatsapp_sent = 1")
|
| 250 |
+
stats["whatsapp_sent"] = cursor.fetchone()[0]
|
| 251 |
+
|
| 252 |
+
cursor.execute("SELECT COUNT(*) FROM leads WHERE instagram_sent = 1")
|
| 253 |
+
stats["instagram_sent"] = cursor.fetchone()[0]
|
| 254 |
+
|
| 255 |
+
conn.close()
|
| 256 |
+
return stats
|
| 257 |
+
|
| 258 |
+
|
| 259 |
+
if __name__ == "__main__":
|
| 260 |
+
# Initialize the database when run directly
|
| 261 |
+
init_db()
|
| 262 |
+
|
| 263 |
+
# Show current stats
|
| 264 |
+
stats = get_stats()
|
| 265 |
+
print(f"\n[ESTADISTICAS] Estadisticas de la Base de Datos:")
|
| 266 |
+
print(f" Total leads: {stats['total_leads']}")
|
| 267 |
+
print(f" Por nicho: {stats['by_niche']}")
|
| 268 |
+
print(f" Por país: {stats['by_country']}")
|
lead_gen_pro/l1_directive/outreach_sop.md
ADDED
|
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# L1 Directive: Outreach SOP
|
| 2 |
+
# ===========================
|
| 3 |
+
# Este documento define las reglas para mensajes de WhatsApp y Email.
|
| 4 |
+
|
| 5 |
+
## Objetivo
|
| 6 |
+
Contactar leads de manera profesional y personalizada para generar oportunidades de negocio.
|
| 7 |
+
|
| 8 |
+
## Canales
|
| 9 |
+
1. **WhatsApp Business** - Para contacto directo e inmediato
|
| 10 |
+
2. **Email (Instantly.ai)** - Para seguimiento profesional
|
| 11 |
+
|
| 12 |
+
## Reglas Generales
|
| 13 |
+
- Nunca enviar más de 20 mensajes por hora
|
| 14 |
+
- Esperar mínimo 30 segundos entre cada mensaje
|
| 15 |
+
- Horario de envío: 9am-6pm zona del destinatario
|
| 16 |
+
- Personalizar SIEMPRE con nombre y ciudad
|
| 17 |
+
|
| 18 |
+
---
|
| 19 |
+
|
| 20 |
+
## Templates de WhatsApp
|
| 21 |
+
|
| 22 |
+
### Real Estate - USA
|
| 23 |
+
```
|
| 24 |
+
Hi {name}! I came across your profile while researching top agents in {city}.
|
| 25 |
+
I help real estate professionals automate their lead generation.
|
| 26 |
+
Would you be interested in a quick 5-min chat?
|
| 27 |
+
```
|
| 28 |
+
|
| 29 |
+
### Real Estate - Venezuela
|
| 30 |
+
```
|
| 31 |
+
¡Hola {name}! Vi tu perfil mientras buscaba los mejores agentes en {city}.
|
| 32 |
+
Ayudo a profesionales inmobiliarios a automatizar su captación de clientes.
|
| 33 |
+
¿Te interesaría una llamada rápida de 5 minutos?
|
| 34 |
+
```
|
| 35 |
+
|
| 36 |
+
### Insurance - USA
|
| 37 |
+
```
|
| 38 |
+
Hi {name}! I noticed your insurance agency in {city} has great reviews.
|
| 39 |
+
I work with insurance professionals to help them get more qualified leads.
|
| 40 |
+
Would you be open to a brief conversation?
|
| 41 |
+
```
|
| 42 |
+
|
| 43 |
+
### Insurance - Venezuela
|
| 44 |
+
```
|
| 45 |
+
¡Hola {name}! Noté que tu agencia de seguros en {city} tiene excelentes reseñas.
|
| 46 |
+
Trabajo con profesionales de seguros para ayudarles a conseguir más clientes.
|
| 47 |
+
¿Estarías abierto a una breve conversación?
|
| 48 |
+
```
|
| 49 |
+
|
| 50 |
+
---
|
| 51 |
+
|
| 52 |
+
## Templates de Email
|
| 53 |
+
|
| 54 |
+
### Subject Lines
|
| 55 |
+
- "Quick question about {city} real estate"
|
| 56 |
+
- "Idea for your {niche} business"
|
| 57 |
+
- "Saw your great reviews in {city}"
|
| 58 |
+
|
| 59 |
+
### Email Body (Real Estate)
|
| 60 |
+
```
|
| 61 |
+
Hi {name},
|
| 62 |
+
|
| 63 |
+
I came across your profile while researching top real estate professionals in {city}.
|
| 64 |
+
|
| 65 |
+
I help agents like yourself automate their lead generation process,
|
| 66 |
+
often resulting in 2-3x more qualified leads without extra effort.
|
| 67 |
+
|
| 68 |
+
Would you be open to a 10-minute call this week to explore if this could work for you?
|
| 69 |
+
|
| 70 |
+
Best regards,
|
| 71 |
+
[Your Name]
|
| 72 |
+
```
|
| 73 |
+
|
| 74 |
+
---
|
| 75 |
+
|
| 76 |
+
## Seguimiento
|
| 77 |
+
- Si no hay respuesta en 48h, enviar follow-up por el otro canal
|
| 78 |
+
- Máximo 3 intentos de contacto por lead
|
| 79 |
+
- Marcar lead como "no interesado" después de 3 intentos sin respuesta
|
lead_gen_pro/l1_directive/scraping_sop.md
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# L1 Directive: Scraping SOP
|
| 2 |
+
# ===========================
|
| 3 |
+
# Este documento define las reglas para el agente de scraping.
|
| 4 |
+
|
| 5 |
+
## Objetivo
|
| 6 |
+
Extraer leads de negocios (bienes raíces y seguros) de Google Maps y LinkedIn usando Apify.
|
| 7 |
+
|
| 8 |
+
## Parámetros de Entrada
|
| 9 |
+
- **niche**: `real_estate` | `insurance`
|
| 10 |
+
- **country**: `usa` | `venezuela`
|
| 11 |
+
- **cities**: Lista de ciudades objetivo
|
| 12 |
+
- **limit**: Máximo de leads por ciudad (default: 20)
|
| 13 |
+
|
| 14 |
+
## Queries Predefinidas
|
| 15 |
+
|
| 16 |
+
### Real Estate
|
| 17 |
+
| País | Queries |
|
| 18 |
+
|------|---------|
|
| 19 |
+
| USA | "real estate agent", "realtor", "property broker" |
|
| 20 |
+
| Venezuela | "inmobiliaria", "agente inmobiliario", "bienes raices" |
|
| 21 |
+
|
| 22 |
+
### Insurance
|
| 23 |
+
| País | Queries |
|
| 24 |
+
|------|---------|
|
| 25 |
+
| USA | "insurance agent", "insurance broker", "life insurance" |
|
| 26 |
+
| Venezuela | "corredor de seguros", "agente de seguros" |
|
| 27 |
+
|
| 28 |
+
## Reglas de Ejecución
|
| 29 |
+
1. Ejecutar scraping en horario laboral (9am-6pm zona objetivo)
|
| 30 |
+
2. Máximo 100 leads por ejecución para evitar rate limits
|
| 31 |
+
3. Esperar 5 segundos entre cada request a Apify
|
| 32 |
+
4. Guardar todos los leads en database antes de siguiente paso
|
| 33 |
+
|
| 34 |
+
## Campos Requeridos por Lead
|
| 35 |
+
- `name` (obligatorio)
|
| 36 |
+
- `phone` (obligatorio para WhatsApp)
|
| 37 |
+
- `email` (deseable para Instantly)
|
| 38 |
+
- `address`
|
| 39 |
+
- `website`
|
| 40 |
+
- `rating`
|
| 41 |
+
- `reviews_count`
|
| 42 |
+
|
| 43 |
+
## Errores y Retry
|
| 44 |
+
- Si Apify retorna error, esperar 60 segundos y reintentar
|
| 45 |
+
- Máximo 3 reintentos por query
|
| 46 |
+
- Loggear todos los errores en `logs/scraping.log`
|
lead_gen_pro/l2_orchestration/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
# __init__.py for l2_orchestration module
|
lead_gen_pro/l2_orchestration/orchestrator.py
ADDED
|
@@ -0,0 +1,319 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
L2 Orchestration: Main Workflow Orchestrator
|
| 3 |
+
=============================================
|
| 4 |
+
Orquestador principal que coordina todo el flujo de lead generation.
|
| 5 |
+
Implementa la capa L2 del framework DOE (lógica sin código de ejecución).
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import os
|
| 9 |
+
import sys
|
| 10 |
+
import json
|
| 11 |
+
import time
|
| 12 |
+
from datetime import datetime
|
| 13 |
+
from typing import Optional, List, Dict, Any
|
| 14 |
+
|
| 15 |
+
# Add parent to path
|
| 16 |
+
sys.path.insert(0, os.path.dirname(__file__))
|
| 17 |
+
|
| 18 |
+
# Import L3 Execution modules
|
| 19 |
+
from l3_execution.database_doe import init_db, add_lead, get_leads, get_pending_outreach, mark_outreach, get_stats
|
| 20 |
+
from l3_execution.apify_scraper import scrape_leads
|
| 21 |
+
from l3_execution.enrichment import enrich_leads
|
| 22 |
+
from l3_execution.instantly_sender import batch_send_emails, demo_email_preview
|
| 23 |
+
|
| 24 |
+
# Import old WhatsApp module for compatibility
|
| 25 |
+
sys.path.insert(0, os.path.join(os.path.dirname(__file__)))
|
| 26 |
+
from whatsapp_sender import batch_generate_links
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
def load_config() -> Dict[str, Any]:
|
| 30 |
+
"""Load configuration."""
|
| 31 |
+
config_path = os.path.join(os.path.dirname(__file__), "config.json")
|
| 32 |
+
if os.path.exists(config_path):
|
| 33 |
+
with open(config_path, "r") as f:
|
| 34 |
+
return json.load(f)
|
| 35 |
+
return {}
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
def print_header(title: str):
|
| 39 |
+
"""Print formatted header."""
|
| 40 |
+
print(f"\n{'='*60}")
|
| 41 |
+
print(f" {title}")
|
| 42 |
+
print(f"{'='*60}\n")
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
def run_full_pipeline(
|
| 46 |
+
niche: str = "real_estate",
|
| 47 |
+
country: str = "usa",
|
| 48 |
+
location: Optional[str] = None,
|
| 49 |
+
limit_per_city: int = 10,
|
| 50 |
+
demo: bool = False
|
| 51 |
+
) -> Dict[str, Any]:
|
| 52 |
+
"""
|
| 53 |
+
Execute the complete lead generation pipeline:
|
| 54 |
+
1. Scrape leads
|
| 55 |
+
2. Enrich data
|
| 56 |
+
3. Store in database
|
| 57 |
+
4. Generate outreach
|
| 58 |
+
|
| 59 |
+
Args:
|
| 60 |
+
niche: 'real_estate' or 'insurance'
|
| 61 |
+
country: 'usa' or 'venezuela'
|
| 62 |
+
limit_per_city: Leads per city
|
| 63 |
+
demo: Use demo mode
|
| 64 |
+
|
| 65 |
+
Returns:
|
| 66 |
+
Pipeline results dictionary.
|
| 67 |
+
"""
|
| 68 |
+
config = load_config()
|
| 69 |
+
results = {"steps": [], "stats": {}}
|
| 70 |
+
|
| 71 |
+
print_header(f"🚀 PIPELINE DOE: {niche.upper()} / {country.upper()}")
|
| 72 |
+
|
| 73 |
+
# Step 1: Scraping
|
| 74 |
+
print("📍 PASO 1: Scraping de Google Maps")
|
| 75 |
+
print("-" * 40)
|
| 76 |
+
|
| 77 |
+
leads = scrape_leads(
|
| 78 |
+
niche=niche,
|
| 79 |
+
country=country,
|
| 80 |
+
location=location,
|
| 81 |
+
limit_per_city=limit_per_city
|
| 82 |
+
)
|
| 83 |
+
|
| 84 |
+
results["steps"].append({
|
| 85 |
+
"step": "scraping",
|
| 86 |
+
"leads_found": len(leads)
|
| 87 |
+
})
|
| 88 |
+
|
| 89 |
+
if not leads:
|
| 90 |
+
print("⚠️ No se encontraron leads. Verifica tu APIFY_TOKEN.")
|
| 91 |
+
return results
|
| 92 |
+
|
| 93 |
+
# Step 2: Enrichment
|
| 94 |
+
print("\n📍 PASO 2: Enriquecimiento de datos")
|
| 95 |
+
print("-" * 40)
|
| 96 |
+
|
| 97 |
+
enriched_leads = enrich_leads(leads, delay=0.5)
|
| 98 |
+
|
| 99 |
+
results["steps"].append({
|
| 100 |
+
"step": "enrichment",
|
| 101 |
+
"enriched_count": sum(1 for l in enriched_leads if l.get("email"))
|
| 102 |
+
})
|
| 103 |
+
|
| 104 |
+
# Step 3: Store in database
|
| 105 |
+
print("\n📍 PASO 3: Guardando en base de datos")
|
| 106 |
+
print("-" * 40)
|
| 107 |
+
|
| 108 |
+
added_count = 0
|
| 109 |
+
for lead in enriched_leads:
|
| 110 |
+
try:
|
| 111 |
+
lead_id = add_lead(lead)
|
| 112 |
+
added_count += 1
|
| 113 |
+
print(f" ✅ ID:{lead_id} - {lead.get('name')}")
|
| 114 |
+
except Exception as e:
|
| 115 |
+
print(f" ⚠️ Error: {e}")
|
| 116 |
+
|
| 117 |
+
results["steps"].append({
|
| 118 |
+
"step": "database",
|
| 119 |
+
"added_count": added_count
|
| 120 |
+
})
|
| 121 |
+
|
| 122 |
+
# Step 4: Generate outreach
|
| 123 |
+
print("\n📍 PASO 4: Preparando outreach")
|
| 124 |
+
print("-" * 40)
|
| 125 |
+
|
| 126 |
+
# Get pending leads
|
| 127 |
+
pending_wa = get_pending_outreach("whatsapp", limit=10)
|
| 128 |
+
pending_email = get_pending_outreach("email", limit=10)
|
| 129 |
+
|
| 130 |
+
print(f" 📱 WhatsApp pendientes: {len(pending_wa)}")
|
| 131 |
+
print(f" 📧 Email pendientes: {len(pending_email)}")
|
| 132 |
+
|
| 133 |
+
# Generate WhatsApp links
|
| 134 |
+
if pending_wa:
|
| 135 |
+
wa_links = batch_generate_links(pending_wa)
|
| 136 |
+
results["steps"].append({
|
| 137 |
+
"step": "whatsapp_links",
|
| 138 |
+
"count": len(wa_links)
|
| 139 |
+
})
|
| 140 |
+
|
| 141 |
+
# Show summary
|
| 142 |
+
print_header("📊 RESUMEN DEL PIPELINE")
|
| 143 |
+
|
| 144 |
+
stats = get_stats()
|
| 145 |
+
results["stats"] = stats
|
| 146 |
+
|
| 147 |
+
print(f" Total leads en DB: {stats.get('total_leads', 0)}")
|
| 148 |
+
print(f" Con email: {stats.get('with_email', 0)}")
|
| 149 |
+
print(f" Enriquecidos: {stats.get('enriched', 0)}")
|
| 150 |
+
print(f" WhatsApp enviados: {stats.get('whatsapp_sent', 0)}")
|
| 151 |
+
print(f" Emails enviados: {stats.get('email_sent', 0)}")
|
| 152 |
+
|
| 153 |
+
return results
|
| 154 |
+
|
| 155 |
+
|
| 156 |
+
def run_scraping_only(
|
| 157 |
+
niche: str = "real_estate",
|
| 158 |
+
country: str = "usa",
|
| 159 |
+
location: Optional[str] = None,
|
| 160 |
+
limit: int = 10
|
| 161 |
+
) -> int:
|
| 162 |
+
"""Run only the scraping step."""
|
| 163 |
+
print_header(f"🔍 SCRAPING: {niche.upper()} / {country.upper()} ({location or 'default cities'})")
|
| 164 |
+
|
| 165 |
+
leads = scrape_leads(niche=niche, country=country, location=location, limit_per_city=limit)
|
| 166 |
+
|
| 167 |
+
if not leads:
|
| 168 |
+
print("⚠️ No se encontraron leads")
|
| 169 |
+
return 0
|
| 170 |
+
|
| 171 |
+
# Enrich
|
| 172 |
+
enriched = enrich_leads(leads)
|
| 173 |
+
|
| 174 |
+
# Save to DB
|
| 175 |
+
added = 0
|
| 176 |
+
for lead in enriched:
|
| 177 |
+
try:
|
| 178 |
+
add_lead(lead)
|
| 179 |
+
added += 1
|
| 180 |
+
except Exception as e:
|
| 181 |
+
pass
|
| 182 |
+
|
| 183 |
+
print(f"\n✅ {added} leads agregados a la base de datos")
|
| 184 |
+
return added
|
| 185 |
+
|
| 186 |
+
|
| 187 |
+
def run_whatsapp_outreach(limit: int = 10) -> List[Dict]:
|
| 188 |
+
"""Generate WhatsApp links for pending leads."""
|
| 189 |
+
print_header("📱 OUTREACH: WhatsApp")
|
| 190 |
+
|
| 191 |
+
pending = get_pending_outreach("whatsapp", limit=limit)
|
| 192 |
+
|
| 193 |
+
if not pending:
|
| 194 |
+
print("✅ No hay leads pendientes de WhatsApp")
|
| 195 |
+
return []
|
| 196 |
+
|
| 197 |
+
print(f"📋 {len(pending)} leads pendientes\n")
|
| 198 |
+
|
| 199 |
+
results = batch_generate_links(pending)
|
| 200 |
+
|
| 201 |
+
for i, result in enumerate(results):
|
| 202 |
+
if result.get("success"):
|
| 203 |
+
lead = pending[i]
|
| 204 |
+
# Save link to DB
|
| 205 |
+
mark_outreach(lead["id"], "whatsapp", result["link"])
|
| 206 |
+
|
| 207 |
+
print(f"\n📱 {lead.get('name')}")
|
| 208 |
+
print(f" 📞 {result.get('phone')}")
|
| 209 |
+
print(f" 🔗 {result.get('link')[:60]}...")
|
| 210 |
+
|
| 211 |
+
return results
|
| 212 |
+
|
| 213 |
+
|
| 214 |
+
def run_email_outreach(limit: int = 10, preview_only: bool = True) -> List[Dict]:
|
| 215 |
+
"""Send emails to pending leads."""
|
| 216 |
+
print_header("📧 OUTREACH: Email (Instantly.ai)")
|
| 217 |
+
|
| 218 |
+
pending = get_pending_outreach("email", limit=limit)
|
| 219 |
+
|
| 220 |
+
if not pending:
|
| 221 |
+
print("✅ No hay leads pendientes de email")
|
| 222 |
+
return []
|
| 223 |
+
|
| 224 |
+
print(f"📋 {len(pending)} leads pendientes\n")
|
| 225 |
+
|
| 226 |
+
config = load_config()
|
| 227 |
+
sender_name = config.get("sender", {}).get("name", "Your Name")
|
| 228 |
+
|
| 229 |
+
if preview_only:
|
| 230 |
+
print("🔍 MODO PREVIEW (no se envían emails)\n")
|
| 231 |
+
for lead in pending[:3]:
|
| 232 |
+
demo_email_preview(lead, sender_name)
|
| 233 |
+
return []
|
| 234 |
+
else:
|
| 235 |
+
return batch_send_emails(pending, sender_name=sender_name)
|
| 236 |
+
|
| 237 |
+
|
| 238 |
+
def show_dashboard():
|
| 239 |
+
"""Print text-based dashboard."""
|
| 240 |
+
print_header("📊 DASHBOARD")
|
| 241 |
+
|
| 242 |
+
stats = get_stats()
|
| 243 |
+
|
| 244 |
+
print(f"📈 Total Leads: {stats.get('total_leads', 0)}")
|
| 245 |
+
print(f"📧 Con Email: {stats.get('with_email', 0)}")
|
| 246 |
+
print(f"✨ Enriquecidos: {stats.get('enriched', 0)}")
|
| 247 |
+
|
| 248 |
+
print(f"\n📤 Outreach:")
|
| 249 |
+
print(f" WhatsApp: {stats.get('whatsapp_sent', 0)}")
|
| 250 |
+
print(f" Email: {stats.get('email_sent', 0)}")
|
| 251 |
+
print(f" Respuestas: {stats.get('replied', 0)}")
|
| 252 |
+
|
| 253 |
+
print(f"\n🏷️ Por Nicho:")
|
| 254 |
+
for niche, count in stats.get("by_niche", {}).items():
|
| 255 |
+
print(f" {niche}: {count}")
|
| 256 |
+
|
| 257 |
+
print(f"\n🌎 Por País:")
|
| 258 |
+
for country, count in stats.get("by_country", {}).items():
|
| 259 |
+
print(f" {country}: {count}")
|
| 260 |
+
|
| 261 |
+
opp = stats.get("opportunities", {})
|
| 262 |
+
if opp.get("count", 0) > 0:
|
| 263 |
+
print(f"\n💰 Oportunidades: {opp['count']} (${opp.get('total_value', 0):,.2f})")
|
| 264 |
+
|
| 265 |
+
|
| 266 |
+
if __name__ == "__main__":
|
| 267 |
+
import argparse
|
| 268 |
+
|
| 269 |
+
parser = argparse.ArgumentParser(description="Lead Generation DOE Orchestrator")
|
| 270 |
+
parser.add_argument("--pipeline", action="store_true", help="Run full pipeline")
|
| 271 |
+
parser.add_argument("--scrape", action="store_true", help="Run scraping only")
|
| 272 |
+
parser.add_argument("--outreach-wa", action="store_true", help="Generate WhatsApp links")
|
| 273 |
+
parser.add_argument("--outreach-email", action="store_true", help="Send/preview emails")
|
| 274 |
+
parser.add_argument("--dashboard", action="store_true", help="Show dashboard")
|
| 275 |
+
parser.add_argument("--streamlit", action="store_true", help="Launch Streamlit dashboard")
|
| 276 |
+
parser.add_argument("--niche", choices=["real_estate", "insurance"], default="real_estate")
|
| 277 |
+
parser.add_argument("--country", choices=["usa", "venezuela"], default="usa")
|
| 278 |
+
parser.add_argument("--location", type=str, help="Specific City, State or ZIP")
|
| 279 |
+
parser.add_argument("--limit", type=int, default=10)
|
| 280 |
+
parser.add_argument("--demo", action="store_true", help="Demo mode")
|
| 281 |
+
|
| 282 |
+
args = parser.parse_args()
|
| 283 |
+
|
| 284 |
+
# Initialize DB
|
| 285 |
+
init_db()
|
| 286 |
+
|
| 287 |
+
if args.pipeline:
|
| 288 |
+
run_full_pipeline(
|
| 289 |
+
niche=args.niche,
|
| 290 |
+
country=args.country,
|
| 291 |
+
location=args.location,
|
| 292 |
+
limit_per_city=args.limit,
|
| 293 |
+
demo=args.demo
|
| 294 |
+
)
|
| 295 |
+
elif args.scrape:
|
| 296 |
+
run_scraping_only(
|
| 297 |
+
niche=args.niche,
|
| 298 |
+
country=args.country,
|
| 299 |
+
location=args.location,
|
| 300 |
+
limit=args.limit
|
| 301 |
+
)
|
| 302 |
+
elif args.outreach_wa:
|
| 303 |
+
run_whatsapp_outreach(limit=args.limit)
|
| 304 |
+
elif args.outreach_email:
|
| 305 |
+
run_email_outreach(limit=args.limit, preview_only=True)
|
| 306 |
+
elif args.dashboard:
|
| 307 |
+
show_dashboard()
|
| 308 |
+
elif args.streamlit:
|
| 309 |
+
import subprocess
|
| 310 |
+
dashboard_path = os.path.join(os.path.dirname(__file__), "dashboard", "app.py")
|
| 311 |
+
subprocess.run(["streamlit", "run", dashboard_path])
|
| 312 |
+
else:
|
| 313 |
+
print("Comandos disponibles:")
|
| 314 |
+
print(" --pipeline Ejecutar pipeline completo")
|
| 315 |
+
print(" --scrape Solo scraping")
|
| 316 |
+
print(" --outreach-wa Generar links WhatsApp")
|
| 317 |
+
print(" --outreach-email Preview emails")
|
| 318 |
+
print(" --dashboard Dashboard texto")
|
| 319 |
+
print(" --streamlit Dashboard web")
|
lead_gen_pro/l2_orchestration/orchestrator_pro.py
ADDED
|
@@ -0,0 +1,486 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
L2 Orchestration: Pro Workflow Orchestrator - COMPLETAMENTE COMENTADO
|
| 3 |
+
======================================================================
|
| 4 |
+
|
| 5 |
+
Este es el orquestador avanzado (Capa L2) que coordina todo el flujo de trabajo
|
| 6 |
+
del sistema de generación de leads. Implementa el pipeline completo con múltiples
|
| 7 |
+
etapas de procesamiento.
|
| 8 |
+
|
| 9 |
+
AUTOR: Lead Generation PRO System
|
| 10 |
+
FECHA: 2025
|
| 11 |
+
VERSION: 2.0
|
| 12 |
+
|
| 13 |
+
ARQUITECTURA:
|
| 14 |
+
- Nivel: L2 (Orquestación)
|
| 15 |
+
- Patrón: Pipeline de procesamiento
|
| 16 |
+
- Integra: Scrapers L3, Scoring L3, Database L3
|
| 17 |
+
|
| 18 |
+
FLUJO DEL PIPELINE:
|
| 19 |
+
1. Scraping B2B (Empresas) → LinkedIn, Google Maps, YouTube, etc.
|
| 20 |
+
2. Scraping B2C (Consumidores) → Twitter, Reddit, YouTube, Facebook
|
| 21 |
+
3. Higienización → Deduplicación de leads
|
| 22 |
+
4. Scoring → Calificación de leads (MQL/SQL)
|
| 23 |
+
5. Almacenamiento → SQLite database
|
| 24 |
+
6. Webhooks → Notificación a sistemas externos (Make.com/n8n)
|
| 25 |
+
|
| 26 |
+
DEPENDENCIAS:
|
| 27 |
+
- l3_execution.apify_pro_scraper: Para scraping B2B
|
| 28 |
+
- l3_execution.consumer_intent_scraper: Para scraping B2C
|
| 29 |
+
- l3_execution.lead_scoring: Para calificación de leads
|
| 30 |
+
- l3_execution.database_doe: Para persistencia de datos
|
| 31 |
+
"""
|
| 32 |
+
|
| 33 |
+
# =============================================================================
|
| 34 |
+
# IMPORTS
|
| 35 |
+
# =============================================================================
|
| 36 |
+
|
| 37 |
+
import os # Operaciones con sistema de archivos
|
| 38 |
+
import sys # Manipulación del path de Python
|
| 39 |
+
import json # Lectura/escritura de JSON (configuración)
|
| 40 |
+
import requests # Para enviar webhooks HTTP
|
| 41 |
+
from datetime import datetime # Manejo de fechas y timestamps
|
| 42 |
+
from typing import Optional, List, Dict, Any # Type hints para claridad
|
| 43 |
+
|
| 44 |
+
# =============================================================================
|
| 45 |
+
# CONFIGURACIÓN DE PATHS
|
| 46 |
+
# =============================================================================
|
| 47 |
+
|
| 48 |
+
# Añadir paths para módulos pro
|
| 49 |
+
# Esto permite importar módulos desde directorios hermanos
|
| 50 |
+
sys.path.insert(0, os.path.dirname(__file__)) # Directorio actual
|
| 51 |
+
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) # Directorio padre
|
| 52 |
+
|
| 53 |
+
# =============================================================================
|
| 54 |
+
# IMPORTS DE MÓDULOS L3 (Capa de Ejecución)
|
| 55 |
+
# =============================================================================
|
| 56 |
+
|
| 57 |
+
# Scraper B2B: Extrae información de empresas desde múltiples plataformas
|
| 58 |
+
from l3_execution.apify_pro_scraper import ProScraper
|
| 59 |
+
|
| 60 |
+
# Scraper B2C: Detecta intención de compra en redes sociales
|
| 61 |
+
from l3_execution.consumer_intent_scraper import scrape_b2c_leads
|
| 62 |
+
|
| 63 |
+
# Scoring: Califica leads según múltiples criterios
|
| 64 |
+
from l3_execution.lead_scoring import LeadScorer
|
| 65 |
+
|
| 66 |
+
# Database: Persistencia de leads en SQLite
|
| 67 |
+
from l3_execution.database_doe import add_lead, init_db
|
| 68 |
+
|
| 69 |
+
# =============================================================================
|
| 70 |
+
# CLASE PRINCIPAL: ProOrchestrator
|
| 71 |
+
# =============================================================================
|
| 72 |
+
|
| 73 |
+
class ProOrchestrator:
|
| 74 |
+
"""
|
| 75 |
+
Orquestador principal del pipeline de generación de leads.
|
| 76 |
+
|
| 77 |
+
Esta clase coordina todas las operaciones del sistema:
|
| 78 |
+
- Scraping de múltiples fuentes (B2B y B2C)
|
| 79 |
+
- Procesamiento y calificación de datos
|
| 80 |
+
- Almacenamiento persistente
|
| 81 |
+
- Notificaciones a sistemas externos
|
| 82 |
+
|
| 83 |
+
ATRIBUTOS:
|
| 84 |
+
scraper (ProScraper): Instancia del scraper B2B
|
| 85 |
+
scorer (LeadScorer): Instancia del sistema de scoring
|
| 86 |
+
config (dict): Configuración cargada desde config.json
|
| 87 |
+
"""
|
| 88 |
+
|
| 89 |
+
def __init__(self):
|
| 90 |
+
"""
|
| 91 |
+
Constructor de la clase. Inicializa los componentes necesarios.
|
| 92 |
+
|
| 93 |
+
INICIALIZACIÓN:
|
| 94 |
+
1. ProScraper: Para scraping B2B
|
| 95 |
+
2. LeadScorer: Para calificación de leads
|
| 96 |
+
3. Config: Carga desde config.json
|
| 97 |
+
"""
|
| 98 |
+
# Inicializar scraper B2B (LinkedIn, Google Maps, etc.)
|
| 99 |
+
self.scraper = ProScraper()
|
| 100 |
+
|
| 101 |
+
# Inicializar sistema de scoring (calificación MQL/SQL)
|
| 102 |
+
self.scorer = LeadScorer()
|
| 103 |
+
|
| 104 |
+
# Cargar configuración desde archivo JSON
|
| 105 |
+
self.config = self.load_config()
|
| 106 |
+
|
| 107 |
+
def load_config(self):
|
| 108 |
+
"""
|
| 109 |
+
Carga la configuración del sistema desde config.json.
|
| 110 |
+
|
| 111 |
+
RETORNA:
|
| 112 |
+
dict: Configuración del sistema o diccionario vacío si no existe
|
| 113 |
+
|
| 114 |
+
NOTA:
|
| 115 |
+
El archivo config.json debe estar en el directorio raíz del proyecto.
|
| 116 |
+
Contiene: tokens de API, webhooks, configuraciones de nichos, etc.
|
| 117 |
+
"""
|
| 118 |
+
# Construir ruta al archivo de configuración
|
| 119 |
+
path = os.path.join(os.path.dirname(__file__), "..", "config.json")
|
| 120 |
+
|
| 121 |
+
# Verificar si el archivo existe
|
| 122 |
+
if os.path.exists(path):
|
| 123 |
+
# Abrir y leer el archivo JSON
|
| 124 |
+
with open(path, "r") as f:
|
| 125 |
+
return json.load(f)
|
| 126 |
+
|
| 127 |
+
# Si no existe, retornar configuración vacía
|
| 128 |
+
return {}
|
| 129 |
+
|
| 130 |
+
def send_webhook(self, lead: Dict[str, Any]):
|
| 131 |
+
"""
|
| 132 |
+
Envía un lead calificado a un webhook externo (Make.com/n8n).
|
| 133 |
+
|
| 134 |
+
PARÁMETROS:
|
| 135 |
+
lead (dict): Diccionario con la información del lead
|
| 136 |
+
|
| 137 |
+
FUNCIONAMIENTO:
|
| 138 |
+
1. Lee la URL del webhook desde config.json (webhooks.new_lead)
|
| 139 |
+
2. Envía el lead como JSON vía POST
|
| 140 |
+
3. Timeout de 5 segundos para evitar bloqueos
|
| 141 |
+
|
| 142 |
+
NOTA:
|
| 143 |
+
Solo se envían leads calificados como "SQL (Hot)" por el método run_pro_pipeline.
|
| 144 |
+
"""
|
| 145 |
+
# Obtener URL del webhook desde configuración
|
| 146 |
+
webhook_url = self.config.get("webhooks", {}).get("new_lead")
|
| 147 |
+
|
| 148 |
+
# Solo enviar si hay una URL configurada
|
| 149 |
+
if webhook_url:
|
| 150 |
+
try:
|
| 151 |
+
# Enviar lead como JSON con timeout de 5 segundos
|
| 152 |
+
requests.post(webhook_url, json=lead, timeout=5)
|
| 153 |
+
except Exception as e:
|
| 154 |
+
# Mostrar error pero no detener el pipeline
|
| 155 |
+
print(f"⚠️ Error enviando webhook: {e}")
|
| 156 |
+
|
| 157 |
+
def run_pro_pipeline(
|
| 158 |
+
self,
|
| 159 |
+
niche: str,
|
| 160 |
+
region: str,
|
| 161 |
+
location_data: Dict[str, str] = None,
|
| 162 |
+
lead_type: str = "both",
|
| 163 |
+
b2b_platforms: List[str] = None,
|
| 164 |
+
b2c_platforms: List[str] = None,
|
| 165 |
+
limit: int = 20
|
| 166 |
+
):
|
| 167 |
+
"""
|
| 168 |
+
MÉTODO PRINCIPAL: Ejecuta el pipeline completo de generación de leads.
|
| 169 |
+
|
| 170 |
+
Este es el corazón del sistema. Coordina todas las etapas del proceso:
|
| 171 |
+
scraping → higienización → scoring → almacenamiento → webhooks.
|
| 172 |
+
|
| 173 |
+
PARÁMETROS:
|
| 174 |
+
niche (str): Industria o nicho a buscar (ej: "Insurance", "Real Estate")
|
| 175 |
+
region (str): String de región legacy (ej: "CA Bay Area")
|
| 176 |
+
location_data (dict): Diccionario con 'city', 'state', 'country'
|
| 177 |
+
lead_type (str): Tipo de leads a buscar ('b2b', 'b2c', 'both')
|
| 178 |
+
b2b_platforms (list): Lista de plataformas B2B a escanear
|
| 179 |
+
b2c_platforms (list): Lista de plataformas B2C a escanear
|
| 180 |
+
limit (int): Límite de resultados por plataforma
|
| 181 |
+
|
| 182 |
+
RETORNA:
|
| 183 |
+
list: Lista de leads procesados y calificados
|
| 184 |
+
|
| 185 |
+
EJEMPLO:
|
| 186 |
+
orchestrator = ProOrchestrator()
|
| 187 |
+
leads = orchestrator.run_pro_pipeline(
|
| 188 |
+
niche="Insurance",
|
| 189 |
+
region="FL",
|
| 190 |
+
location_data={"city": "Miami", "state": "FL", "country": "USA"},
|
| 191 |
+
lead_type="both",
|
| 192 |
+
b2b_platforms=["linkedin", "google_maps"],
|
| 193 |
+
b2c_platforms=["twitter", "facebook"],
|
| 194 |
+
limit=20
|
| 195 |
+
)
|
| 196 |
+
"""
|
| 197 |
+
|
| 198 |
+
# -----------------------------------------------------------------
|
| 199 |
+
# PASO 1: PREPARACIÓN Y CONFIGURACIÓN
|
| 200 |
+
# -----------------------------------------------------------------
|
| 201 |
+
|
| 202 |
+
# Determinar ubicación para logs y registros
|
| 203 |
+
# Si hay location_data, usarla; si no, usar region legacy
|
| 204 |
+
display_location = region
|
| 205 |
+
if location_data:
|
| 206 |
+
# Filtrar valores no vacíos y unirlos con comas
|
| 207 |
+
parts = [v for k, v in location_data.items() if v]
|
| 208 |
+
if parts:
|
| 209 |
+
display_location = ", ".join(parts)
|
| 210 |
+
|
| 211 |
+
# Mostrar encabezado del pipeline
|
| 212 |
+
print(f"🚀 INICIANDO PIPELINE PRO: {niche} en {display_location}")
|
| 213 |
+
print(f" Tipo de búsqueda: {lead_type.upper()}")
|
| 214 |
+
|
| 215 |
+
# Inicializar lista para almacenar todos los leads
|
| 216 |
+
all_leads = []
|
| 217 |
+
|
| 218 |
+
# -----------------------------------------------------------------
|
| 219 |
+
# PASO 2: SCRAPING B2B (EMPRESAS/NEGOCIOS)
|
| 220 |
+
# -----------------------------------------------------------------
|
| 221 |
+
|
| 222 |
+
# Solo ejecutar si se solicitó B2B o ambos
|
| 223 |
+
if lead_type in ["b2b", "both"]:
|
| 224 |
+
print("\n--- B2B SCRAPING (Empresas) ---")
|
| 225 |
+
|
| 226 |
+
# Usar plataformas por defecto si no se especificaron
|
| 227 |
+
b2b_platforms = b2b_platforms or ["linkedin", "google_maps"]
|
| 228 |
+
print(f" Plataformas B2B: {', '.join(b2b_platforms)}")
|
| 229 |
+
|
| 230 |
+
# Iterar sobre cada plataforma B2B seleccionada
|
| 231 |
+
for platform in b2b_platforms:
|
| 232 |
+
try:
|
| 233 |
+
# Ejecutar scraping específico para esta plataforma
|
| 234 |
+
raw_data = self._scrape_b2b_platform(
|
| 235 |
+
platform=platform,
|
| 236 |
+
niche=niche,
|
| 237 |
+
location=display_location,
|
| 238 |
+
location_data=location_data,
|
| 239 |
+
limit=limit
|
| 240 |
+
)
|
| 241 |
+
|
| 242 |
+
# Procesar los resultados obtenidos
|
| 243 |
+
for source, items in raw_data.items():
|
| 244 |
+
for item in items:
|
| 245 |
+
# Construir objeto lead estandarizado
|
| 246 |
+
lead = {
|
| 247 |
+
"name": item.get("name") or item.get("username") or item.get("title"),
|
| 248 |
+
"email": item.get("email"),
|
| 249 |
+
"phone": item.get("phone") or item.get("phoneNumber"),
|
| 250 |
+
"linkedin_url": item.get("url") if source == "linkedin" else item.get("linkedin_url"),
|
| 251 |
+
"company": item.get("company") or item.get("companyName"),
|
| 252 |
+
"location": display_location,
|
| 253 |
+
"city": location_data.get("city") if location_data else region.split(",")[0].strip(),
|
| 254 |
+
"source_platform": source,
|
| 255 |
+
"niche": niche,
|
| 256 |
+
"type": "B2B",
|
| 257 |
+
"scraped_at": datetime.now().isoformat()
|
| 258 |
+
}
|
| 259 |
+
all_leads.append(lead)
|
| 260 |
+
|
| 261 |
+
# Mostrar resumen de esta plataforma
|
| 262 |
+
print(f"✅ {platform}: {len(raw_data.get(platform, []))} leads encontrados")
|
| 263 |
+
|
| 264 |
+
except Exception as e:
|
| 265 |
+
# Capturar errores pero continuar con otras plataformas
|
| 266 |
+
print(f"⚠️ Error en {platform}: {e}")
|
| 267 |
+
continue
|
| 268 |
+
|
| 269 |
+
# -----------------------------------------------------------------
|
| 270 |
+
# PASO 3: SCRAPING B2C (CONSUMIDORES/INTENCIÓN DE COMPRA)
|
| 271 |
+
# -----------------------------------------------------------------
|
| 272 |
+
|
| 273 |
+
# Solo ejecutar si se solicitó B2C o ambos
|
| 274 |
+
if lead_type in ["b2c", "both"]:
|
| 275 |
+
print("\n--- B2C SCRAPING (Consumidores) ---")
|
| 276 |
+
|
| 277 |
+
# Usar plataformas por defecto si no se especificaron
|
| 278 |
+
b2c_platforms = b2c_platforms or ["youtube", "reddit", "twitter"]
|
| 279 |
+
print(f" Plataformas B2C: {', '.join(b2c_platforms)}")
|
| 280 |
+
|
| 281 |
+
# Definir keywords según el nicho
|
| 282 |
+
# Si es seguros, usar términos específicos; si no, términos genéricos
|
| 283 |
+
if "insurance" in niche.lower() or "seguro" in niche.lower():
|
| 284 |
+
keywords = ["necesito seguro", "busco seguro"]
|
| 285 |
+
else:
|
| 286 |
+
keywords = [f"looking for {niche}", f"need {niche}"]
|
| 287 |
+
|
| 288 |
+
# Ejecutar scraping B2C con las plataformas seleccionadas
|
| 289 |
+
# Ajustar límite si es modo "both" (mitad para cada tipo)
|
| 290 |
+
b2c_leads = scrape_b2c_leads(
|
| 291 |
+
platforms=b2c_platforms,
|
| 292 |
+
keywords=keywords,
|
| 293 |
+
location=location_data,
|
| 294 |
+
limit_per_platform=limit//2 if lead_type == "both" else limit
|
| 295 |
+
)
|
| 296 |
+
|
| 297 |
+
# Procesar los leads B2C obtenidos
|
| 298 |
+
for item in b2c_leads:
|
| 299 |
+
lead = {
|
| 300 |
+
"name": item.get("username"),
|
| 301 |
+
"email": None, # B2C rara vez tiene email público directo
|
| 302 |
+
"phone": None,
|
| 303 |
+
"linkedin_url": item.get("url"), # URL del post como referencia
|
| 304 |
+
"company": "Consumer (B2C)",
|
| 305 |
+
"location": display_location,
|
| 306 |
+
"city": location_data.get("city") if location_data else "Unknown",
|
| 307 |
+
"source_platform": item.get("platform"),
|
| 308 |
+
"niche": niche,
|
| 309 |
+
"type": "B2C",
|
| 310 |
+
"intent_score": item.get("intent_score"), # Score de intención de compra
|
| 311 |
+
"content_snippet": item.get("content"), # Contenido del post/comentario
|
| 312 |
+
"scraped_at": datetime.now().isoformat()
|
| 313 |
+
}
|
| 314 |
+
all_leads.append(lead)
|
| 315 |
+
|
| 316 |
+
# Mostrar resumen
|
| 317 |
+
print(f"✅ B2C: {len(b2c_leads)} leads encontrados")
|
| 318 |
+
|
| 319 |
+
# -----------------------------------------------------------------
|
| 320 |
+
# PASO 4: HIGIENIZACIÓN Y DEDUPLICACIÓN
|
| 321 |
+
# -----------------------------------------------------------------
|
| 322 |
+
|
| 323 |
+
# Eliminar leads duplicados basándose en email o nombre
|
| 324 |
+
# Usar diccionario para mantener solo un lead por clave única
|
| 325 |
+
unique_leads = {l["email"] or l["name"]: l for l in all_leads if l.get("email") or l.get("name")}.values()
|
| 326 |
+
print(f"\n🧹 Leads después de deduplicar: {len(unique_leads)}")
|
| 327 |
+
|
| 328 |
+
# -----------------------------------------------------------------
|
| 329 |
+
# PASO 5: SCORING Y CALIFICACIÓN
|
| 330 |
+
# -----------------------------------------------------------------
|
| 331 |
+
|
| 332 |
+
# Calificar cada lead según múltiples criterios:
|
| 333 |
+
# - Ubicación geográfica
|
| 334 |
+
# - Datos de contacto disponibles
|
| 335 |
+
# - Nicho/sector
|
| 336 |
+
# - Presencia en redes sociales
|
| 337 |
+
scored_leads = self.scorer.process_batch(list(unique_leads))
|
| 338 |
+
|
| 339 |
+
# -----------------------------------------------------------------
|
| 340 |
+
# PASO 6: CONSOLIDACIÓN Y ALMACENAMIENTO
|
| 341 |
+
# -----------------------------------------------------------------
|
| 342 |
+
|
| 343 |
+
# Guardar cada lead en la base de datos SQLite
|
| 344 |
+
for lead in scored_leads:
|
| 345 |
+
try:
|
| 346 |
+
# Guardar lead en BD (con upsert automático)
|
| 347 |
+
add_lead(lead)
|
| 348 |
+
|
| 349 |
+
# Si el lead está calificado como "SQL (Hot)", enviar webhook
|
| 350 |
+
if lead.get("tier") == "SQL (Hot)":
|
| 351 |
+
print(f"🔥 LEAD CALIFICADO: {lead['name']} ({lead['score']} pts)")
|
| 352 |
+
self.send_webhook(lead)
|
| 353 |
+
|
| 354 |
+
except Exception as e:
|
| 355 |
+
# Mostrar error pero continuar con otros leads
|
| 356 |
+
print(f"⚠️ Error guardando lead: {e}")
|
| 357 |
+
|
| 358 |
+
# -----------------------------------------------------------------
|
| 359 |
+
# PASO 7: RESUMEN FINAL
|
| 360 |
+
# -----------------------------------------------------------------
|
| 361 |
+
|
| 362 |
+
print(f"\n✅ Pipeline Pro completado. {len(scored_leads)} leads procesados.")
|
| 363 |
+
return scored_leads
|
| 364 |
+
|
| 365 |
+
def _scrape_b2b_platform(
|
| 366 |
+
self,
|
| 367 |
+
platform: str,
|
| 368 |
+
niche: str,
|
| 369 |
+
location: str,
|
| 370 |
+
location_data: Dict[str, str],
|
| 371 |
+
limit: int
|
| 372 |
+
) -> Dict[str, List[Dict]]:
|
| 373 |
+
"""
|
| 374 |
+
MÉTODO PRIVADO: Scrapea una plataforma B2B específica.
|
| 375 |
+
|
| 376 |
+
Este método actúa como dispatcher que llama al método apropiado
|
| 377 |
+
del ProScraper según la plataforma seleccionada.
|
| 378 |
+
|
| 379 |
+
PARÁMETROS:
|
| 380 |
+
platform (str): Nombre de la plataforma (linkedin, google_maps, etc.)
|
| 381 |
+
niche (str): Nicho/industria a buscar
|
| 382 |
+
location (str): Ubicación formateada para mostrar
|
| 383 |
+
location_data (dict): Datos de ubicación estructurados
|
| 384 |
+
limit (int): Límite de resultados
|
| 385 |
+
|
| 386 |
+
RETORNA:
|
| 387 |
+
dict: Diccionario con los resultados organizados por fuente
|
| 388 |
+
|
| 389 |
+
NOTA:
|
| 390 |
+
Cada plataforma tiene diferentes parámetros y formatos de búsqueda.
|
| 391 |
+
Este método normaliza las llamadas al scraper.
|
| 392 |
+
"""
|
| 393 |
+
|
| 394 |
+
# Inicializar diccionario de resultados
|
| 395 |
+
results = {}
|
| 396 |
+
|
| 397 |
+
# -----------------------------------------------------------------
|
| 398 |
+
# LINKEDIN: Buscar profesionales
|
| 399 |
+
# -----------------------------------------------------------------
|
| 400 |
+
if platform == "linkedin":
|
| 401 |
+
# Buscar dueños y directores del nicho especificado
|
| 402 |
+
items = self.scraper.scrape_linkedin(
|
| 403 |
+
keywords=[f"{niche} owner", f"{niche} director"],
|
| 404 |
+
location=location,
|
| 405 |
+
limit=limit
|
| 406 |
+
)
|
| 407 |
+
results["linkedin"] = items
|
| 408 |
+
|
| 409 |
+
# -----------------------------------------------------------------
|
| 410 |
+
# GOOGLE MAPS: Buscar negocios locales
|
| 411 |
+
# -----------------------------------------------------------------
|
| 412 |
+
elif platform == "google_maps":
|
| 413 |
+
# Buscar negocios del nicho en la ubicación especificada
|
| 414 |
+
items = self.scraper.scrape_google_maps(
|
| 415 |
+
search_terms=[f"{niche} in {location}"],
|
| 416 |
+
max_crawled_places=limit
|
| 417 |
+
)
|
| 418 |
+
results["google_maps"] = items
|
| 419 |
+
|
| 420 |
+
# -----------------------------------------------------------------
|
| 421 |
+
# YOUTUBE: Buscar canales relacionados
|
| 422 |
+
# -----------------------------------------------------------------
|
| 423 |
+
elif platform == "youtube":
|
| 424 |
+
# Buscar videos relacionados con el nicho en la ubicación
|
| 425 |
+
items = self.scraper.scrape_youtube(
|
| 426 |
+
search_queries=[f"best {niche} in {location}", f"{niche} tips {location}"],
|
| 427 |
+
max_results=limit // 2 # Menor límite para YouTube
|
| 428 |
+
)
|
| 429 |
+
results["youtube"] = items
|
| 430 |
+
|
| 431 |
+
# -----------------------------------------------------------------
|
| 432 |
+
# TIKTOK: Buscar por hashtags
|
| 433 |
+
# -----------------------------------------------------------------
|
| 434 |
+
elif platform == "tiktok":
|
| 435 |
+
# Scrapear por hashtags del nicho y ubicación
|
| 436 |
+
items = self.scraper.scrape_tiktok(
|
| 437 |
+
hashtags=[niche.replace(" ", ""), location.split(",")[0].lower()],
|
| 438 |
+
limit=limit // 2
|
| 439 |
+
)
|
| 440 |
+
results["tiktok"] = items
|
| 441 |
+
|
| 442 |
+
# -----------------------------------------------------------------
|
| 443 |
+
# INSTAGRAM: Similar a TikTok (usa hashtags)
|
| 444 |
+
# -----------------------------------------------------------------
|
| 445 |
+
elif platform == "instagram":
|
| 446 |
+
# Instagram scraper usa hashtags similar a TikTok
|
| 447 |
+
items = self.scraper.scrape_tiktok(
|
| 448 |
+
hashtags=[niche.replace(" ", ""), location.split(",")[0].lower()],
|
| 449 |
+
limit=limit // 2
|
| 450 |
+
)
|
| 451 |
+
results["instagram"] = items
|
| 452 |
+
|
| 453 |
+
return results
|
| 454 |
+
|
| 455 |
+
# =============================================================================
|
| 456 |
+
# PUNTO DE ENTRADA PARA PRUEBAS
|
| 457 |
+
# =============================================================================
|
| 458 |
+
|
| 459 |
+
if __name__ == "__main__":
|
| 460 |
+
"""
|
| 461 |
+
Bloque de ejecución directa para pruebas del orquestador.
|
| 462 |
+
|
| 463 |
+
Este código solo se ejecuta cuando se corre el archivo directamente,
|
| 464 |
+
no cuando se importa como módulo.
|
| 465 |
+
"""
|
| 466 |
+
# Inicializar base de datos
|
| 467 |
+
init_db()
|
| 468 |
+
|
| 469 |
+
# Crear instancia del orquestador
|
| 470 |
+
orchestrator = ProOrchestrator()
|
| 471 |
+
|
| 472 |
+
# Ejecutar pipeline de prueba
|
| 473 |
+
print("\n" + "="*70)
|
| 474 |
+
print("EJECUCIÓN DE PRUEBA DEL PIPELINE")
|
| 475 |
+
print("="*70)
|
| 476 |
+
|
| 477 |
+
orchestrator.run_pro_pipeline(
|
| 478 |
+
niche="Real Estate",
|
| 479 |
+
region="CA Bay Area",
|
| 480 |
+
lead_type="both",
|
| 481 |
+
limit=10
|
| 482 |
+
)
|
| 483 |
+
|
| 484 |
+
# =============================================================================
|
| 485 |
+
# FIN DEL ARCHIVO
|
| 486 |
+
# =============================================================================
|
lead_gen_pro/l3_execution/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
# __init__.py for l3_execution module
|
lead_gen_pro/l3_execution/apify_pro_scraper.py
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
L3 Execution: Lead Gen Pro Scraper (Multi-platform)
|
| 3 |
+
==================================================
|
| 4 |
+
Scraper avanzado que integra LinkedIn, YouTube, TikTok e Instagram vía Apify API.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import os
|
| 8 |
+
import json
|
| 9 |
+
import time
|
| 10 |
+
from datetime import datetime
|
| 11 |
+
from typing import Optional, List, Dict, Any
|
| 12 |
+
from apify_client import ApifyClient
|
| 13 |
+
|
| 14 |
+
# Actor IDs actualizados según especificación
|
| 15 |
+
PRO_ACTORS = {
|
| 16 |
+
"linkedin": "trudax/linkedin-people-search-scraper", # REPLACED: Valid Public Actor
|
| 17 |
+
"youtube": "clank/youtube-scraper",
|
| 18 |
+
"tiktok": "clockworks/tiktok-scraper",
|
| 19 |
+
"instagram": "jaroslavsemanko/instagram-scraper",
|
| 20 |
+
"google_maps": "compass/crawler-google-places"
|
| 21 |
+
}
|
| 22 |
+
|
| 23 |
+
def get_apify_token():
|
| 24 |
+
config_path = os.path.join(os.path.dirname(__file__), "..", "config.json")
|
| 25 |
+
if os.path.exists(config_path):
|
| 26 |
+
with open(config_path, "r") as f:
|
| 27 |
+
return json.load(f).get("apify_token")
|
| 28 |
+
return os.environ.get("APIFY_TOKEN")
|
| 29 |
+
|
| 30 |
+
class ProScraper:
|
| 31 |
+
def __init__(self):
|
| 32 |
+
self.token = get_apify_token()
|
| 33 |
+
self.client = ApifyClient(self.token) if self.token else None
|
| 34 |
+
|
| 35 |
+
def scrape_linkedin(self, keywords: List[str], location: str, limit: int = 50):
|
| 36 |
+
if not self.client: return []
|
| 37 |
+
print(f"🔗 Scraping LinkedIn para: {keywords} en {location}...")
|
| 38 |
+
|
| 39 |
+
# Trudax scraper expects 'searchTerms' list
|
| 40 |
+
# We combine keyword + location for better results without needing Geo URNs
|
| 41 |
+
combined_queries = [f"{k} {location}" for k in keywords]
|
| 42 |
+
|
| 43 |
+
run_input = {
|
| 44 |
+
"searchTerms": combined_queries,
|
| 45 |
+
"limit": limit,
|
| 46 |
+
}
|
| 47 |
+
|
| 48 |
+
try:
|
| 49 |
+
run = self.client.actor(PRO_ACTORS["linkedin"]).call(run_input=run_input)
|
| 50 |
+
return list(self.client.dataset(run["defaultDatasetId"]).iterate_items())
|
| 51 |
+
except Exception as e:
|
| 52 |
+
print(f"⚠️ Error en LinkedIn Scraper: {e}")
|
| 53 |
+
return []
|
| 54 |
+
|
| 55 |
+
def scrape_youtube(self, search_queries: List[str], max_results: int = 20):
|
| 56 |
+
if not self.client: return []
|
| 57 |
+
print(f"🎬 Scraping YouTube para: {search_queries}...")
|
| 58 |
+
run_input = {
|
| 59 |
+
"searchQueries": search_queries,
|
| 60 |
+
"maxResults": max_results,
|
| 61 |
+
"exportSubtitles": False
|
| 62 |
+
}
|
| 63 |
+
run = self.client.actor(PRO_ACTORS["youtube"]).call(run_input=run_input)
|
| 64 |
+
return list(self.client.dataset(run["defaultDatasetId"]).iterate_items())
|
| 65 |
+
|
| 66 |
+
def scrape_tiktok(self, hashtags: List[str], limit: int = 20):
|
| 67 |
+
if not self.client: return []
|
| 68 |
+
print(f"🎵 Scraping TikTok para hashtags: {hashtags}...")
|
| 69 |
+
run_input = {
|
| 70 |
+
"hashtags": hashtags,
|
| 71 |
+
"resultsPerPage": limit,
|
| 72 |
+
"shouldDownloadVideo": False
|
| 73 |
+
}
|
| 74 |
+
run = self.client.actor(PRO_ACTORS["tiktok"]).call(run_input=run_input)
|
| 75 |
+
return list(self.client.dataset(run["defaultDatasetId"]).iterate_items())
|
| 76 |
+
|
| 77 |
+
def scrape_google_maps(self, search_terms: List[str], max_crawled_places: int = 20):
|
| 78 |
+
if not self.client: return []
|
| 79 |
+
print(f"🗺️ Scraping Google Maps para: {search_terms}...")
|
| 80 |
+
run_input = {
|
| 81 |
+
"searchStrings": search_terms,
|
| 82 |
+
"maxCrawledPlacesPerSearch": max_crawled_places,
|
| 83 |
+
"onlyDataFromTopResult": False,
|
| 84 |
+
}
|
| 85 |
+
try:
|
| 86 |
+
run = self.client.actor(PRO_ACTORS["google_maps"]).call(run_input=run_input)
|
| 87 |
+
return list(self.client.dataset(run["defaultDatasetId"]).iterate_items())
|
| 88 |
+
except Exception as e:
|
| 89 |
+
print(f"⚠️ Error en Google Maps Scraper: {e}")
|
| 90 |
+
return []
|
| 91 |
+
|
| 92 |
+
def unified_scrape(self, niche: str, region: str, limit: int = 50):
|
| 93 |
+
"""Pipeline de scraping unificado para múltiples plataformas."""
|
| 94 |
+
results = {
|
| 95 |
+
"linkedin": self.scrape_linkedin([f"{niche} owner", f"{niche} director"], region, limit),
|
| 96 |
+
"youtube": self.scrape_youtube([f"best {niche} in {region}", f"{niche} tips {region}"], limit // 2),
|
| 97 |
+
"tiktok": self.scrape_tiktok([niche.replace(" ", ""), region.split(",")[0].lower()], limit // 2),
|
| 98 |
+
"google_maps": self.scrape_google_maps([f"{niche} in {region}"], limit)
|
| 99 |
+
}
|
| 100 |
+
return results
|
| 101 |
+
|
| 102 |
+
if __name__ == "__main__":
|
| 103 |
+
scraper = ProScraper()
|
| 104 |
+
# Test rápido
|
| 105 |
+
if scraper.token:
|
| 106 |
+
print("✅ Token detectado. Scraper Pro listo.")
|
| 107 |
+
else:
|
| 108 |
+
print("❌ Token no detectado. Revisa config.json.")
|
lead_gen_pro/l3_execution/apify_scraper.py
ADDED
|
@@ -0,0 +1,266 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
L3 Execution: Apify Google Maps Scraper
|
| 3 |
+
=========================================
|
| 4 |
+
Script determinista para extraer leads de Google Maps usando Apify.
|
| 5 |
+
Implementa la capa L3 del framework DOE.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import os
|
| 9 |
+
import json
|
| 10 |
+
import time
|
| 11 |
+
from datetime import datetime
|
| 12 |
+
from typing import Optional, List, Dict, Any
|
| 13 |
+
|
| 14 |
+
# Apify client - will be installed in venv
|
| 15 |
+
try:
|
| 16 |
+
from apify_client import ApifyClient
|
| 17 |
+
APIFY_AVAILABLE = True
|
| 18 |
+
except ImportError:
|
| 19 |
+
APIFY_AVAILABLE = False
|
| 20 |
+
print("⚠️ apify-client no instalado. Ejecuta: pip install apify-client")
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
# Apify Actor IDs (Free/Low-cost options)
|
| 24 |
+
APIFY_ACTORS = {
|
| 25 |
+
"google_maps_basic": "compass/crawler-google-places",
|
| 26 |
+
"google_maps_extensive": "nwua9Gu5YrADL7ZDj", # Free Google Maps Scraper
|
| 27 |
+
"google_maps_lead": "compass/crawler-google-places",
|
| 28 |
+
}
|
| 29 |
+
|
| 30 |
+
# Default cities for each country
|
| 31 |
+
DEFAULT_CITIES = {
|
| 32 |
+
"usa": ["Miami, FL", "Houston, TX", "Los Angeles, CA", "New York, NY", "Chicago, IL"],
|
| 33 |
+
"venezuela": ["Caracas", "Maracaibo", "Valencia", "Barquisimeto", "Maracay"],
|
| 34 |
+
}
|
| 35 |
+
|
| 36 |
+
# Search queries by niche
|
| 37 |
+
SEARCH_QUERIES = {
|
| 38 |
+
"real_estate": {
|
| 39 |
+
"usa": ["real estate agent", "realtor", "property broker"],
|
| 40 |
+
"venezuela": ["inmobiliaria", "agente inmobiliario", "bienes raices"],
|
| 41 |
+
},
|
| 42 |
+
"insurance": {
|
| 43 |
+
"usa": ["insurance agent", "insurance broker", "life insurance agent"],
|
| 44 |
+
"venezuela": ["corredor de seguros", "agente de seguros", "aseguradora"],
|
| 45 |
+
}
|
| 46 |
+
}
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
def get_config() -> Dict[str, Any]:
|
| 50 |
+
"""Get full config."""
|
| 51 |
+
config_path = os.path.join(os.path.dirname(__file__), "..", "config.json")
|
| 52 |
+
if os.path.exists(config_path):
|
| 53 |
+
with open(config_path, "r") as f:
|
| 54 |
+
return json.load(f)
|
| 55 |
+
return {}
|
| 56 |
+
|
| 57 |
+
def get_queries_for_niche(niche: str, country: str) -> List[str]:
|
| 58 |
+
"""Get search queries from config or fallback."""
|
| 59 |
+
config = get_config()
|
| 60 |
+
queries = config.get("niches", {}).get(niche, {}).get("queries", {}).get(country)
|
| 61 |
+
|
| 62 |
+
if not queries:
|
| 63 |
+
# Fallback to hardcoded defaults
|
| 64 |
+
queries = SEARCH_QUERIES.get(niche, {}).get(country, [])
|
| 65 |
+
|
| 66 |
+
return queries
|
| 67 |
+
|
| 68 |
+
def get_apify_token() -> Optional[str]:
|
| 69 |
+
"""Get Apify token from environment or config."""
|
| 70 |
+
token = os.environ.get("APIFY_TOKEN")
|
| 71 |
+
if token:
|
| 72 |
+
return token
|
| 73 |
+
|
| 74 |
+
config_path = os.path.join(os.path.dirname(__file__), "..", "config.json")
|
| 75 |
+
if os.path.exists(config_path):
|
| 76 |
+
with open(config_path, "r") as f:
|
| 77 |
+
config = json.load(f)
|
| 78 |
+
return config.get("apify_token")
|
| 79 |
+
|
| 80 |
+
return None
|
| 81 |
+
|
| 82 |
+
|
| 83 |
+
def scrape_google_maps_apify(
|
| 84 |
+
query: str,
|
| 85 |
+
location: str,
|
| 86 |
+
max_results: int = 20,
|
| 87 |
+
actor_id: str = "google_maps_lead"
|
| 88 |
+
) -> List[Dict[str, Any]]:
|
| 89 |
+
"""
|
| 90 |
+
Scrape Google Maps using Apify actor.
|
| 91 |
+
|
| 92 |
+
Args:
|
| 93 |
+
query: Search query (e.g., "real estate agent")
|
| 94 |
+
location: Location string (e.g., "Miami, FL")
|
| 95 |
+
max_results: Maximum results to return
|
| 96 |
+
actor_id: Which Apify actor to use
|
| 97 |
+
|
| 98 |
+
Returns:
|
| 99 |
+
List of business results.
|
| 100 |
+
"""
|
| 101 |
+
token = get_apify_token()
|
| 102 |
+
if not token:
|
| 103 |
+
print("❌ APIFY_TOKEN no configurado. Usando modo demo.")
|
| 104 |
+
return demo_results(query, location)
|
| 105 |
+
|
| 106 |
+
if not APIFY_AVAILABLE:
|
| 107 |
+
print("❌ apify-client no disponible.")
|
| 108 |
+
return demo_results(query, location)
|
| 109 |
+
|
| 110 |
+
try:
|
| 111 |
+
client = ApifyClient(token)
|
| 112 |
+
|
| 113 |
+
# Full actor ID
|
| 114 |
+
full_actor_id = APIFY_ACTORS.get(actor_id, actor_id)
|
| 115 |
+
|
| 116 |
+
# Prepare input - format for compass/crawler-google-places
|
| 117 |
+
run_input = {
|
| 118 |
+
"searchStringsArray": [f"{query}"],
|
| 119 |
+
"locationQuery": location,
|
| 120 |
+
"maxCrawledPlacesPerSearch": max_results,
|
| 121 |
+
"language": "en",
|
| 122 |
+
"deeperCityScrape": False,
|
| 123 |
+
}
|
| 124 |
+
|
| 125 |
+
print(f"🔍 Buscando: '{query}' en {location}...")
|
| 126 |
+
|
| 127 |
+
# Run the actor
|
| 128 |
+
run = client.actor(full_actor_id).call(run_input=run_input)
|
| 129 |
+
|
| 130 |
+
# Get results from dataset
|
| 131 |
+
results = []
|
| 132 |
+
for item in client.dataset(run["defaultDatasetId"]).iterate_items():
|
| 133 |
+
result = {
|
| 134 |
+
"name": item.get("title") or item.get("name"),
|
| 135 |
+
"phone": item.get("phone") or item.get("phoneNumber"),
|
| 136 |
+
"email": item.get("email"),
|
| 137 |
+
"address": item.get("address") or item.get("fullAddress"),
|
| 138 |
+
"website": item.get("website") or item.get("url"),
|
| 139 |
+
"rating": item.get("rating") or item.get("totalScore"),
|
| 140 |
+
"reviews_count": item.get("reviewsCount") or item.get("reviews"),
|
| 141 |
+
"place_id": item.get("placeId"),
|
| 142 |
+
"category": item.get("category") or item.get("type"),
|
| 143 |
+
}
|
| 144 |
+
results.append(result)
|
| 145 |
+
|
| 146 |
+
print(f"✅ {len(results)} resultados encontrados")
|
| 147 |
+
return results
|
| 148 |
+
|
| 149 |
+
except Exception as e:
|
| 150 |
+
print(f"❌ Error en Apify: {e}")
|
| 151 |
+
return demo_results(query, location)
|
| 152 |
+
|
| 153 |
+
|
| 154 |
+
def demo_results(query: str, location: str) -> List[Dict[str, Any]]:
|
| 155 |
+
"""Generate demo results for testing without API."""
|
| 156 |
+
demo_data = [
|
| 157 |
+
{
|
| 158 |
+
"name": f"Demo Business 1 - {location}",
|
| 159 |
+
"phone": "+1-555-0101",
|
| 160 |
+
"email": "demo1@example.com",
|
| 161 |
+
"address": f"123 Main St, {location}",
|
| 162 |
+
"website": "https://demo1.example.com",
|
| 163 |
+
"rating": 4.8,
|
| 164 |
+
"reviews_count": 150,
|
| 165 |
+
"category": query,
|
| 166 |
+
},
|
| 167 |
+
{
|
| 168 |
+
"name": f"Demo Business 2 - {location}",
|
| 169 |
+
"phone": "+1-555-0102",
|
| 170 |
+
"email": "demo2@example.com",
|
| 171 |
+
"address": f"456 Oak Ave, {location}",
|
| 172 |
+
"website": "https://demo2.example.com",
|
| 173 |
+
"rating": 4.5,
|
| 174 |
+
"reviews_count": 89,
|
| 175 |
+
"category": query,
|
| 176 |
+
},
|
| 177 |
+
]
|
| 178 |
+
print(f"🎭 Modo demo: {len(demo_data)} resultados generados")
|
| 179 |
+
return demo_data
|
| 180 |
+
|
| 181 |
+
|
| 182 |
+
def scrape_leads(
|
| 183 |
+
niche: str,
|
| 184 |
+
country: str,
|
| 185 |
+
location: Optional[str] = None,
|
| 186 |
+
limit_per_city: int = 20
|
| 187 |
+
) -> List[Dict[str, Any]]:
|
| 188 |
+
"""
|
| 189 |
+
Scrape leads for a specific niche and country/location.
|
| 190 |
+
|
| 191 |
+
Args:
|
| 192 |
+
niche: 'real_estate' or 'insurance'
|
| 193 |
+
country: 'usa' or 'venezuela'
|
| 194 |
+
location: Specific city, state or ZIP (optional)
|
| 195 |
+
limit_per_city: Max leads per location/query
|
| 196 |
+
|
| 197 |
+
Returns:
|
| 198 |
+
List of lead dictionaries.
|
| 199 |
+
"""
|
| 200 |
+
if location:
|
| 201 |
+
cities = [location]
|
| 202 |
+
else:
|
| 203 |
+
cities = DEFAULT_CITIES.get(country, [])[:3]
|
| 204 |
+
|
| 205 |
+
queries = get_queries_for_niche(niche, country)
|
| 206 |
+
if not queries:
|
| 207 |
+
print(f"❌ No hay queries para {niche}/{country}")
|
| 208 |
+
return []
|
| 209 |
+
|
| 210 |
+
all_leads = []
|
| 211 |
+
seen_phones = set()
|
| 212 |
+
|
| 213 |
+
for city in cities:
|
| 214 |
+
for query in queries[:2]: # Use first 2 queries
|
| 215 |
+
results = scrape_google_maps_apify(
|
| 216 |
+
query=query,
|
| 217 |
+
location=city,
|
| 218 |
+
max_results=limit_per_city
|
| 219 |
+
)
|
| 220 |
+
|
| 221 |
+
for result in results:
|
| 222 |
+
phone = result.get("phone")
|
| 223 |
+
# Avoid duplicates
|
| 224 |
+
if phone and phone not in seen_phones:
|
| 225 |
+
seen_phones.add(phone)
|
| 226 |
+
|
| 227 |
+
lead = {
|
| 228 |
+
**result,
|
| 229 |
+
"source": "apify_google_maps",
|
| 230 |
+
"niche": niche,
|
| 231 |
+
"country": country,
|
| 232 |
+
"city": city,
|
| 233 |
+
"scraped_at": datetime.now().isoformat(),
|
| 234 |
+
}
|
| 235 |
+
all_leads.append(lead)
|
| 236 |
+
|
| 237 |
+
# Rate limiting
|
| 238 |
+
time.sleep(2)
|
| 239 |
+
|
| 240 |
+
print(f"\n📊 Total leads únicos: {len(all_leads)}")
|
| 241 |
+
return all_leads
|
| 242 |
+
|
| 243 |
+
|
| 244 |
+
if __name__ == "__main__":
|
| 245 |
+
import argparse
|
| 246 |
+
|
| 247 |
+
parser = argparse.ArgumentParser(description="Apify Google Maps Scraper")
|
| 248 |
+
parser.add_argument("--niche", choices=["real_estate", "insurance"], default="real_estate")
|
| 249 |
+
parser.add_argument("--country", choices=["usa", "venezuela"], default="usa")
|
| 250 |
+
parser.add_argument("--limit", type=int, default=10)
|
| 251 |
+
parser.add_argument("--demo", action="store_true", help="Use demo mode")
|
| 252 |
+
|
| 253 |
+
args = parser.parse_args()
|
| 254 |
+
|
| 255 |
+
if args.demo:
|
| 256 |
+
print("🎭 Ejecutando en modo demo...")
|
| 257 |
+
|
| 258 |
+
leads = scrape_leads(
|
| 259 |
+
niche=args.niche,
|
| 260 |
+
country=args.country,
|
| 261 |
+
limit_per_city=args.limit
|
| 262 |
+
)
|
| 263 |
+
|
| 264 |
+
print(f"\n📋 Leads encontrados:")
|
| 265 |
+
for lead in leads[:5]:
|
| 266 |
+
print(f" - {lead.get('name')} | {lead.get('phone')} | {lead.get('city')}")
|
lead_gen_pro/l3_execution/automation_engine.py
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
import os
|
| 3 |
+
from datetime import datetime
|
| 4 |
+
|
| 5 |
+
class AutomationEngine:
|
| 6 |
+
"""
|
| 7 |
+
Handles 'If-Then' automation rules for leads.
|
| 8 |
+
Rules are defined in config.json.
|
| 9 |
+
"""
|
| 10 |
+
|
| 11 |
+
def __init__(self, config_path=None):
|
| 12 |
+
if config_path is None:
|
| 13 |
+
config_path = os.path.join(os.path.dirname(__file__), "..", "config.json")
|
| 14 |
+
self.config_path = config_path
|
| 15 |
+
self.config = self._load_config()
|
| 16 |
+
|
| 17 |
+
def _load_config(self):
|
| 18 |
+
try:
|
| 19 |
+
if os.path.exists(self.config_path):
|
| 20 |
+
with open(self.config_path, "r") as f:
|
| 21 |
+
return json.load(f)
|
| 22 |
+
except Exception as e:
|
| 23 |
+
print(f"[Automation] Error loading config: {e}")
|
| 24 |
+
return {}
|
| 25 |
+
|
| 26 |
+
def process_new_lead(self, lead_data):
|
| 27 |
+
"""
|
| 28 |
+
Evaluates automation rules for a newly added lead.
|
| 29 |
+
"""
|
| 30 |
+
rules = self.config.get("automation_rules", [])
|
| 31 |
+
if not rules:
|
| 32 |
+
# Default rule if none defined: if quality > 7, log it
|
| 33 |
+
if lead_data.get("quality_score", 0) >= 7:
|
| 34 |
+
print(f"[Automation] High quality lead detected: {lead_data.get('name')}")
|
| 35 |
+
return
|
| 36 |
+
|
| 37 |
+
for rule in rules:
|
| 38 |
+
if self._evaluate_condition(rule.get("if"), lead_data):
|
| 39 |
+
self._execute_action(rule.get("then"), lead_data)
|
| 40 |
+
|
| 41 |
+
def _evaluate_condition(self, condition, data):
|
| 42 |
+
"""Simple condition evaluator: 'field', 'operator', 'value'"""
|
| 43 |
+
if not condition: return False
|
| 44 |
+
|
| 45 |
+
field = condition.get("field")
|
| 46 |
+
op = condition.get("operator")
|
| 47 |
+
val = condition.get("value")
|
| 48 |
+
|
| 49 |
+
data_val = data.get(field)
|
| 50 |
+
|
| 51 |
+
if op == "==": return data_val == val
|
| 52 |
+
if op == ">=": return data_val >= val
|
| 53 |
+
if op == "<=": return data_val <= val
|
| 54 |
+
if op == "contains": return val in str(data_val)
|
| 55 |
+
|
| 56 |
+
return False
|
| 57 |
+
|
| 58 |
+
def _execute_action(self, action, data):
|
| 59 |
+
"""Executes the 'then' part of a rule."""
|
| 60 |
+
action_type = action.get("type")
|
| 61 |
+
|
| 62 |
+
if action_type == "notify":
|
| 63 |
+
print(f"[NOTIFY] Automation triggered for {data.get('name')}: {action.get('message')}")
|
| 64 |
+
|
| 65 |
+
elif action_type == "mark_priority":
|
| 66 |
+
# This would typically update the DB, but since we are called inside add_lead,
|
| 67 |
+
# we should modify the data object before it's finalized (if possible)
|
| 68 |
+
data["priority"] = action.get("value", 1)
|
| 69 |
+
print(f"[Automation] Lead {data.get('name')} marked as priority {data['priority']}")
|
| 70 |
+
|
| 71 |
+
elif action_type == "trigger_outreach":
|
| 72 |
+
channel = action.get("channel", "whatsapp")
|
| 73 |
+
print(f"[Automation] Triggering {channel} outreach for {data.get('name')}")
|
| 74 |
+
# Actual outreach logic would go here or be queued
|
| 75 |
+
|
| 76 |
+
if __name__ == "__main__":
|
| 77 |
+
# Test
|
| 78 |
+
engine = AutomationEngine()
|
| 79 |
+
sample_lead = {"name": "Test Automation", "quality_score": 9, "niche": "real_estate"}
|
| 80 |
+
engine.process_new_lead(sample_lead)
|
lead_gen_pro/l3_execution/consumer_intent_scraper.py
ADDED
|
@@ -0,0 +1,729 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
L3 Execution: B2C Consumer Intent Scraper
|
| 3 |
+
==========================================
|
| 4 |
+
Script para detectar consumidores buscando activamente seguros en redes sociales.
|
| 5 |
+
Implementa scraping de intención de compra en YouTube, Reddit y Twitter/X.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import os
|
| 9 |
+
import json
|
| 10 |
+
import time
|
| 11 |
+
import re
|
| 12 |
+
from datetime import datetime
|
| 13 |
+
from typing import Optional, List, Dict, Any
|
| 14 |
+
from urllib.parse import urlencode
|
| 15 |
+
|
| 16 |
+
# Apify client - will be installed in venv
|
| 17 |
+
try:
|
| 18 |
+
from apify_client import ApifyClient
|
| 19 |
+
APIFY_AVAILABLE = True
|
| 20 |
+
except ImportError:
|
| 21 |
+
APIFY_AVAILABLE = False
|
| 22 |
+
print("⚠️ apify-client no instalado. Ejecuta: pip install apify-client")
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
# Apify Actor IDs for social media scraping
|
| 26 |
+
APIFY_ACTORS = {
|
| 27 |
+
"youtube_comments": "streamers/youtube-scraper", # For comments
|
| 28 |
+
"reddit_posts": "trudax/reddit-scraper", # For posts
|
| 29 |
+
"twitter_posts": "apidojo/tweet-scraper", # For tweets
|
| 30 |
+
"facebook_posts": "apify/facebook-posts-scraper", # For public posts and groups
|
| 31 |
+
}
|
| 32 |
+
|
| 33 |
+
# Keywords that indicate purchase intent (insurance-related)
|
| 34 |
+
INTENT_KEYWORDS = {
|
| 35 |
+
"es": [
|
| 36 |
+
"necesito seguro", "busco seguro", "recomiendan seguro",
|
| 37 |
+
"seguro barato", "mejor seguro", "seguro de auto",
|
| 38 |
+
"seguro de vida", "seguro dental", "seguro médico",
|
| 39 |
+
"cotización seguro", "precio seguro", "cuánto cuesta seguro",
|
| 40 |
+
"me cancelaron el seguro", "mi seguro es caro",
|
| 41 |
+
"cambiar de aseguradora", "alternativa a seguro"
|
| 42 |
+
],
|
| 43 |
+
"en": [
|
| 44 |
+
"need insurance", "looking for insurance", "recommend insurance",
|
| 45 |
+
"cheap insurance", "best insurance", "car insurance",
|
| 46 |
+
"life insurance", "dental insurance", "health insurance",
|
| 47 |
+
"insurance quote", "insurance price", "how much is insurance",
|
| 48 |
+
"my insurance cancelled", "my insurance is expensive",
|
| 49 |
+
"switch insurance", "insurance alternatives"
|
| 50 |
+
]
|
| 51 |
+
}
|
| 52 |
+
|
| 53 |
+
# Keywords that indicate the user might be an agent/agency (to filter out)
|
| 54 |
+
PROMOTIONAL_KEYWORDS = [
|
| 55 |
+
"agent", "agency", "broker", "vendo", "venta de seguros",
|
| 56 |
+
"corredor", "asesor", "consultor", "insurance agent",
|
| 57 |
+
"insurance broker", "sell insurance", "insurance sales"
|
| 58 |
+
]
|
| 59 |
+
|
| 60 |
+
# Complaint/negative sentiment indicators
|
| 61 |
+
COMPLAINT_PATTERNS = [
|
| 62 |
+
r"too expensive", r"muy caro", r"overpriced",
|
| 63 |
+
r"cancelled", r"cancelaron", r"cancelaron mi",
|
| 64 |
+
r"bad experience", r"mala experiencia", r"terrible",
|
| 65 |
+
r"worst", r"peor", r"horrible",
|
| 66 |
+
r"problem", r"problema", r"issue", r"issues"
|
| 67 |
+
]
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
def get_apify_token():
|
| 71 |
+
"""Obtiene el token de Apify desde config.json o variables de entorno."""
|
| 72 |
+
config_path = os.path.join(os.path.dirname(__file__), "..", "config.json")
|
| 73 |
+
if os.path.exists(config_path):
|
| 74 |
+
with open(config_path, "r") as f:
|
| 75 |
+
return json.load(f).get("apify_token")
|
| 76 |
+
return os.environ.get("APIFY_TOKEN")
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
class B2CScraper:
|
| 80 |
+
"""
|
| 81 |
+
Scraper para detectar intención de compra de consumidores en redes sociales.
|
| 82 |
+
"""
|
| 83 |
+
|
| 84 |
+
def __init__(self):
|
| 85 |
+
self.apify_token = get_apify_token()
|
| 86 |
+
self.client = None
|
| 87 |
+
|
| 88 |
+
if APIFY_AVAILABLE and self.apify_token:
|
| 89 |
+
self.client = ApifyClient(self.apify_token)
|
| 90 |
+
else:
|
| 91 |
+
print("⚠️ Apify no configurado. Usando modo demo.")
|
| 92 |
+
|
| 93 |
+
def scrape_intent(
|
| 94 |
+
self,
|
| 95 |
+
platform: str,
|
| 96 |
+
keywords: List[str],
|
| 97 |
+
limit: int = 20,
|
| 98 |
+
language: str = "en"
|
| 99 |
+
) -> List[Dict[str, Any]]:
|
| 100 |
+
"""
|
| 101 |
+
Función orquestadora que decide qué scraper específico ejecutar.
|
| 102 |
+
|
| 103 |
+
Args:
|
| 104 |
+
platform: 'youtube', 'reddit', o 'twitter'
|
| 105 |
+
keywords: Lista de frases clave a buscar
|
| 106 |
+
limit: Cantidad máxima de resultados
|
| 107 |
+
language: 'es' o 'en'
|
| 108 |
+
|
| 109 |
+
Returns:
|
| 110 |
+
Lista de diccionarios con leads B2C detectados
|
| 111 |
+
"""
|
| 112 |
+
print(f"🔍 Buscando intención de compra en {platform}...")
|
| 113 |
+
print(f" Keywords: {keywords[:3]}...")
|
| 114 |
+
|
| 115 |
+
if platform.lower() == "youtube":
|
| 116 |
+
return self._scrape_youtube(keywords, limit, language)
|
| 117 |
+
elif platform.lower() == "reddit":
|
| 118 |
+
return self._scrape_reddit(keywords, limit, language)
|
| 119 |
+
elif platform.lower() in ["twitter", "x"]:
|
| 120 |
+
return self._scrape_twitter(keywords, limit, language)
|
| 121 |
+
elif platform.lower() == "facebook":
|
| 122 |
+
return self._scrape_facebook(keywords, limit, language)
|
| 123 |
+
else:
|
| 124 |
+
print(f"❌ Plataforma no soportada: {platform}")
|
| 125 |
+
return []
|
| 126 |
+
|
| 127 |
+
def _calculate_intent_score(self, content: str, username: str = "") -> tuple:
|
| 128 |
+
"""
|
| 129 |
+
Calcula el score de intención de compra y el sentimiento.
|
| 130 |
+
|
| 131 |
+
Returns:
|
| 132 |
+
Tuple de (intent_score: int, sentiment: str)
|
| 133 |
+
"""
|
| 134 |
+
content_lower = content.lower()
|
| 135 |
+
username_lower = username.lower()
|
| 136 |
+
score = 50 # Base score
|
| 137 |
+
|
| 138 |
+
# Check for promotional content (filter out)
|
| 139 |
+
for prom_keyword in PROMOTIONAL_KEYWORDS:
|
| 140 |
+
if prom_keyword in content_lower or prom_keyword in username_lower:
|
| 141 |
+
return (0, "promotional") # Filter out
|
| 142 |
+
|
| 143 |
+
# Check for intent keywords
|
| 144 |
+
for lang_keywords in INTENT_KEYWORDS.values():
|
| 145 |
+
for keyword in lang_keywords:
|
| 146 |
+
if keyword in content_lower:
|
| 147 |
+
score += 15
|
| 148 |
+
|
| 149 |
+
# Check for complaints (indicates need)
|
| 150 |
+
has_complaint = False
|
| 151 |
+
for pattern in COMPLAINT_PATTERNS:
|
| 152 |
+
if re.search(pattern, content_lower):
|
| 153 |
+
has_complaint = True
|
| 154 |
+
score += 10
|
| 155 |
+
|
| 156 |
+
# Questions indicate high intent
|
| 157 |
+
if "?" in content:
|
| 158 |
+
score += 10
|
| 159 |
+
|
| 160 |
+
# Cap score at 100
|
| 161 |
+
score = min(score, 100)
|
| 162 |
+
|
| 163 |
+
# Determine sentiment
|
| 164 |
+
if has_complaint:
|
| 165 |
+
sentiment = "negative"
|
| 166 |
+
elif score > 70:
|
| 167 |
+
sentiment = "positive"
|
| 168 |
+
else:
|
| 169 |
+
sentiment = "neutral"
|
| 170 |
+
|
| 171 |
+
return (score, sentiment)
|
| 172 |
+
|
| 173 |
+
def _scrape_youtube(
|
| 174 |
+
self,
|
| 175 |
+
keywords: List[str],
|
| 176 |
+
limit: int,
|
| 177 |
+
language: str
|
| 178 |
+
) -> List[Dict[str, Any]]:
|
| 179 |
+
"""
|
| 180 |
+
Scrapea comentarios de YouTube buscando intención de compra.
|
| 181 |
+
Busca videos relacionados con seguros y extrae comentarios recientes.
|
| 182 |
+
"""
|
| 183 |
+
if not self.client:
|
| 184 |
+
return self._demo_results("youtube", keywords, limit)
|
| 185 |
+
|
| 186 |
+
results = []
|
| 187 |
+
|
| 188 |
+
try:
|
| 189 |
+
# Search for insurance-related videos
|
| 190 |
+
search_queries = keywords[:3] if keywords else ["insurance review", "best insurance 2024"]
|
| 191 |
+
|
| 192 |
+
for query in search_queries:
|
| 193 |
+
print(f" Buscando videos: '{query}'...")
|
| 194 |
+
|
| 195 |
+
# Use YouTube scraper actor
|
| 196 |
+
run_input = {
|
| 197 |
+
"searchKeywords": query,
|
| 198 |
+
"maxResults": min(limit * 2, 20),
|
| 199 |
+
"includeComments": True,
|
| 200 |
+
"maxComments": 50,
|
| 201 |
+
}
|
| 202 |
+
|
| 203 |
+
try:
|
| 204 |
+
run = self.client.actor(APIFY_ACTORS["youtube_comments"]).call(run_input=run_input)
|
| 205 |
+
|
| 206 |
+
for item in self.client.dataset(run["defaultDatasetId"]).iterate_items():
|
| 207 |
+
# Process comments if available
|
| 208 |
+
comments = item.get("comments", [])
|
| 209 |
+
video_url = item.get("url", "")
|
| 210 |
+
video_title = item.get("title", "")
|
| 211 |
+
|
| 212 |
+
for comment in comments[:10]: # Limit comments per video
|
| 213 |
+
content = comment.get("text", "")
|
| 214 |
+
username = comment.get("author", "")
|
| 215 |
+
comment_url = comment.get("url", video_url)
|
| 216 |
+
|
| 217 |
+
# Calculate intent
|
| 218 |
+
intent_score, sentiment = self._calculate_intent_score(content, username)
|
| 219 |
+
|
| 220 |
+
if intent_score >= 40: # Only include if meaningful intent
|
| 221 |
+
result = {
|
| 222 |
+
"platform": "youtube",
|
| 223 |
+
"username": username,
|
| 224 |
+
"content": content[:500], # Limit content length
|
| 225 |
+
"url": comment_url,
|
| 226 |
+
"scraped_at": datetime.now().isoformat(),
|
| 227 |
+
"sentiment": sentiment,
|
| 228 |
+
"intent_score": intent_score,
|
| 229 |
+
"context": f"Video: {video_title[:100]}"
|
| 230 |
+
}
|
| 231 |
+
results.append(result)
|
| 232 |
+
|
| 233 |
+
time.sleep(2) # Rate limiting
|
| 234 |
+
|
| 235 |
+
except Exception as e:
|
| 236 |
+
print(f" ⚠️ Error procesando query '{query}': {e}")
|
| 237 |
+
continue
|
| 238 |
+
|
| 239 |
+
if len(results) >= limit:
|
| 240 |
+
break
|
| 241 |
+
|
| 242 |
+
except Exception as e:
|
| 243 |
+
print(f"❌ Error en YouTube scraper: {e}")
|
| 244 |
+
return self._demo_results("youtube", keywords, limit)
|
| 245 |
+
|
| 246 |
+
print(f"✅ {len(results)} leads encontrados en YouTube")
|
| 247 |
+
return results[:limit]
|
| 248 |
+
|
| 249 |
+
def _scrape_reddit(
|
| 250 |
+
self,
|
| 251 |
+
keywords: List[str],
|
| 252 |
+
limit: int,
|
| 253 |
+
language: str
|
| 254 |
+
) -> List[Dict[str, Any]]:
|
| 255 |
+
"""
|
| 256 |
+
Scrapea posts de Reddit buscando intención de compra.
|
| 257 |
+
Busca en subreddits como r/Insurance, r/personalfinance, etc.
|
| 258 |
+
"""
|
| 259 |
+
if not self.client:
|
| 260 |
+
return self._demo_results("reddit", keywords, limit)
|
| 261 |
+
|
| 262 |
+
results = []
|
| 263 |
+
|
| 264 |
+
try:
|
| 265 |
+
# Key subreddits for insurance discussions
|
| 266 |
+
subreddits = ["Insurance", "personalfinance", "askcarsales", "LifeInsurance"]
|
| 267 |
+
|
| 268 |
+
for subreddit in subreddits:
|
| 269 |
+
if len(results) >= limit:
|
| 270 |
+
break
|
| 271 |
+
|
| 272 |
+
print(f" Buscando en r/{subreddit}...")
|
| 273 |
+
|
| 274 |
+
run_input = {
|
| 275 |
+
"subreddits": [subreddit],
|
| 276 |
+
"searchTerms": keywords[:3] if keywords else ["insurance"],
|
| 277 |
+
"maxResults": min(limit * 2, 25),
|
| 278 |
+
"sort": "new",
|
| 279 |
+
}
|
| 280 |
+
|
| 281 |
+
try:
|
| 282 |
+
run = self.client.actor(APIFY_ACTORS["reddit_posts"]).call(run_input=run_input)
|
| 283 |
+
|
| 284 |
+
for item in self.client.dataset(run["defaultDatasetId"]).iterate_items():
|
| 285 |
+
content = item.get("body", "") or item.get("title", "")
|
| 286 |
+
username = item.get("author", "")
|
| 287 |
+
post_url = item.get("url", "")
|
| 288 |
+
|
| 289 |
+
# Calculate intent
|
| 290 |
+
intent_score, sentiment = self._calculate_intent_score(content, username)
|
| 291 |
+
|
| 292 |
+
if intent_score >= 40:
|
| 293 |
+
result = {
|
| 294 |
+
"platform": "reddit",
|
| 295 |
+
"username": username,
|
| 296 |
+
"content": content[:500],
|
| 297 |
+
"url": post_url,
|
| 298 |
+
"scraped_at": datetime.now().isoformat(),
|
| 299 |
+
"sentiment": sentiment,
|
| 300 |
+
"intent_score": intent_score,
|
| 301 |
+
"context": f"r/{subreddit}"
|
| 302 |
+
}
|
| 303 |
+
results.append(result)
|
| 304 |
+
|
| 305 |
+
time.sleep(2) # Rate limiting
|
| 306 |
+
|
| 307 |
+
except Exception as e:
|
| 308 |
+
print(f" ⚠️ Error en r/{subreddit}: {e}")
|
| 309 |
+
continue
|
| 310 |
+
|
| 311 |
+
except Exception as e:
|
| 312 |
+
print(f"❌ Error en Reddit scraper: {e}")
|
| 313 |
+
return self._demo_results("reddit", keywords, limit)
|
| 314 |
+
|
| 315 |
+
print(f"✅ {len(results)} leads encontrados en Reddit")
|
| 316 |
+
return results[:limit]
|
| 317 |
+
|
| 318 |
+
def _scrape_twitter(
|
| 319 |
+
self,
|
| 320 |
+
keywords: List[str],
|
| 321 |
+
limit: int,
|
| 322 |
+
language: str
|
| 323 |
+
) -> List[Dict[str, Any]]:
|
| 324 |
+
"""
|
| 325 |
+
Scrapea tweets buscando intención de compra.
|
| 326 |
+
"""
|
| 327 |
+
if not self.client:
|
| 328 |
+
return self._demo_results("twitter", keywords, limit)
|
| 329 |
+
|
| 330 |
+
results = []
|
| 331 |
+
|
| 332 |
+
try:
|
| 333 |
+
# Build search query
|
| 334 |
+
search_terms = keywords[:3] if keywords else ["need insurance", "looking for insurance"]
|
| 335 |
+
|
| 336 |
+
for term in search_terms:
|
| 337 |
+
if len(results) >= limit:
|
| 338 |
+
break
|
| 339 |
+
|
| 340 |
+
print(f" Buscando tweets: '{term}'...")
|
| 341 |
+
|
| 342 |
+
run_input = {
|
| 343 |
+
"searchTerms": [term],
|
| 344 |
+
"maxTweets": min(limit * 2, 30),
|
| 345 |
+
"includeReplies": False,
|
| 346 |
+
"language": language,
|
| 347 |
+
}
|
| 348 |
+
|
| 349 |
+
try:
|
| 350 |
+
run = self.client.actor(APIFY_ACTORS["twitter_posts"]).call(run_input=run_input)
|
| 351 |
+
|
| 352 |
+
for item in self.client.dataset(run["defaultDatasetId"]).iterate_items():
|
| 353 |
+
content = item.get("text", "")
|
| 354 |
+
username = item.get("username", "").replace("@", "")
|
| 355 |
+
tweet_url = item.get("url", f"https://twitter.com/{username}/status/{item.get('id', '')}")
|
| 356 |
+
|
| 357 |
+
# Calculate intent
|
| 358 |
+
intent_score, sentiment = self._calculate_intent_score(content, username)
|
| 359 |
+
|
| 360 |
+
if intent_score >= 40:
|
| 361 |
+
result = {
|
| 362 |
+
"platform": "twitter",
|
| 363 |
+
"username": username,
|
| 364 |
+
"content": content[:500],
|
| 365 |
+
"url": tweet_url,
|
| 366 |
+
"scraped_at": datetime.now().isoformat(),
|
| 367 |
+
"sentiment": sentiment,
|
| 368 |
+
"intent_score": intent_score,
|
| 369 |
+
"context": None
|
| 370 |
+
}
|
| 371 |
+
results.append(result)
|
| 372 |
+
|
| 373 |
+
time.sleep(2) # Rate limiting
|
| 374 |
+
|
| 375 |
+
except Exception as e:
|
| 376 |
+
print(f" ⚠️ Error buscando '{term}': {e}")
|
| 377 |
+
continue
|
| 378 |
+
|
| 379 |
+
except Exception as e:
|
| 380 |
+
print(f"❌ Error en Twitter scraper: {e}")
|
| 381 |
+
return self._demo_results("twitter", keywords, limit)
|
| 382 |
+
|
| 383 |
+
print(f"✅ {len(results)} leads encontrados en Twitter")
|
| 384 |
+
return results[:limit]
|
| 385 |
+
|
| 386 |
+
def _scrape_facebook(
|
| 387 |
+
self,
|
| 388 |
+
keywords: List[str],
|
| 389 |
+
limit: int,
|
| 390 |
+
language: str
|
| 391 |
+
) -> List[Dict[str, Any]]:
|
| 392 |
+
"""
|
| 393 |
+
Scrapea posts públicos de Facebook buscando intención de compra.
|
| 394 |
+
Busca en grupos públicos y posts públicos relacionados con seguros.
|
| 395 |
+
Ideal para leads B2C en Florida y USA.
|
| 396 |
+
"""
|
| 397 |
+
if not self.client:
|
| 398 |
+
return self._demo_results("facebook", keywords, limit)
|
| 399 |
+
|
| 400 |
+
results = []
|
| 401 |
+
|
| 402 |
+
try:
|
| 403 |
+
# Search terms específicos para seguros
|
| 404 |
+
search_terms = keywords[:3] if keywords else [
|
| 405 |
+
"need insurance Florida",
|
| 406 |
+
"looking for insurance Miami",
|
| 407 |
+
"cheap insurance Orlando",
|
| 408 |
+
"car insurance Tampa",
|
| 409 |
+
"health insurance Florida"
|
| 410 |
+
]
|
| 411 |
+
|
| 412 |
+
for term in search_terms:
|
| 413 |
+
if len(results) >= limit:
|
| 414 |
+
break
|
| 415 |
+
|
| 416 |
+
print(f" Buscando en Facebook: '{term}'...")
|
| 417 |
+
|
| 418 |
+
run_input = {
|
| 419 |
+
"searchTerms": [term],
|
| 420 |
+
"maxResults": min(limit * 2, 30),
|
| 421 |
+
"onlyPosts": True,
|
| 422 |
+
"includeComments": False,
|
| 423 |
+
}
|
| 424 |
+
|
| 425 |
+
try:
|
| 426 |
+
run = self.client.actor(APIFY_ACTORS["facebook_posts"]).call(run_input=run_input)
|
| 427 |
+
|
| 428 |
+
for item in self.client.dataset(run["defaultDatasetId"]).iterate_items():
|
| 429 |
+
content = item.get("text", "")
|
| 430 |
+
username = item.get("user", {}).get("name", "Unknown")
|
| 431 |
+
user_url = item.get("user", {}).get("url", "")
|
| 432 |
+
post_url = item.get("url", "")
|
| 433 |
+
group_name = item.get("group", {}).get("name", "")
|
| 434 |
+
|
| 435 |
+
# Calculate intent
|
| 436 |
+
intent_score, sentiment = self._calculate_intent_score(content, username)
|
| 437 |
+
|
| 438 |
+
if intent_score >= 40:
|
| 439 |
+
result = {
|
| 440 |
+
"platform": "facebook",
|
| 441 |
+
"username": username,
|
| 442 |
+
"content": content[:500],
|
| 443 |
+
"url": post_url or user_url,
|
| 444 |
+
"scraped_at": datetime.now().isoformat(),
|
| 445 |
+
"sentiment": sentiment,
|
| 446 |
+
"intent_score": intent_score,
|
| 447 |
+
"context": f"Group: {group_name}" if group_name else "Public Post"
|
| 448 |
+
}
|
| 449 |
+
results.append(result)
|
| 450 |
+
|
| 451 |
+
time.sleep(3) # Rate limiting (Facebook es más estricto)
|
| 452 |
+
|
| 453 |
+
except Exception as e:
|
| 454 |
+
print(f" ⚠️ Error buscando '{term}': {e}")
|
| 455 |
+
continue
|
| 456 |
+
|
| 457 |
+
except Exception as e:
|
| 458 |
+
print(f"❌ Error en Facebook scraper: {e}")
|
| 459 |
+
return self._demo_results("facebook", keywords, limit)
|
| 460 |
+
|
| 461 |
+
print(f"✅ {len(results)} leads encontrados en Facebook")
|
| 462 |
+
return results[:limit]
|
| 463 |
+
|
| 464 |
+
def _demo_results(
|
| 465 |
+
self,
|
| 466 |
+
platform: str,
|
| 467 |
+
keywords: List[str],
|
| 468 |
+
limit: int
|
| 469 |
+
) -> List[Dict[str, Any]]:
|
| 470 |
+
"""Genera resultados demo para pruebas sin API."""
|
| 471 |
+
demo_data = {
|
| 472 |
+
"youtube": [
|
| 473 |
+
{
|
| 474 |
+
"platform": "youtube",
|
| 475 |
+
"username": "consumer_john_2024",
|
| 476 |
+
"content": "Necesito un seguro de auto barato en Miami. Alguien tiene recomendaciones? Mi seguro actual es muy caro.",
|
| 477 |
+
"url": "https://youtube.com/watch?v=demo1",
|
| 478 |
+
"sentiment": "negative",
|
| 479 |
+
"intent_score": 85,
|
| 480 |
+
"context": "Video: Best Car Insurance 2024"
|
| 481 |
+
},
|
| 482 |
+
{
|
| 483 |
+
"platform": "youtube",
|
| 484 |
+
"username": "sarah_smith",
|
| 485 |
+
"content": "Looking for dental insurance recommendations. My current plan doesn't cover what I need. Help!",
|
| 486 |
+
"url": "https://youtube.com/watch?v=demo2",
|
| 487 |
+
"sentiment": "negative",
|
| 488 |
+
"intent_score": 78,
|
| 489 |
+
"context": "Video: Dental Insurance Guide"
|
| 490 |
+
},
|
| 491 |
+
],
|
| 492 |
+
"reddit": [
|
| 493 |
+
{
|
| 494 |
+
"platform": "reddit",
|
| 495 |
+
"username": "insurance_seeker_22",
|
| 496 |
+
"content": "Just got my insurance cancelled. Need a new provider ASAP. Any recommendations for affordable life insurance?",
|
| 497 |
+
"url": "https://reddit.com/r/Insurance/demo1",
|
| 498 |
+
"sentiment": "negative",
|
| 499 |
+
"intent_score": 92,
|
| 500 |
+
"context": "r/Insurance"
|
| 501 |
+
},
|
| 502 |
+
{
|
| 503 |
+
"platform": "reddit",
|
| 504 |
+
"username": "budget_mom",
|
| 505 |
+
"content": "What's the best health insurance for a family of 4? Currently paying $800/month and it's killing us.",
|
| 506 |
+
"url": "https://reddit.com/r/personalfinance/demo2",
|
| 507 |
+
"sentiment": "negative",
|
| 508 |
+
"intent_score": 88,
|
| 509 |
+
"context": "r/personalfinance"
|
| 510 |
+
},
|
| 511 |
+
],
|
| 512 |
+
"twitter": [
|
| 513 |
+
{
|
| 514 |
+
"platform": "twitter",
|
| 515 |
+
"username": "mike_looking",
|
| 516 |
+
"content": "Need car insurance quotes ASAP! My current provider raised rates by 40%. Anyone have good experiences with Geico or Progressive?",
|
| 517 |
+
"url": "https://twitter.com/mike_looking/status/demo1",
|
| 518 |
+
"sentiment": "negative",
|
| 519 |
+
"intent_score": 90,
|
| 520 |
+
"context": None
|
| 521 |
+
},
|
| 522 |
+
{
|
| 523 |
+
"platform": "twitter",
|
| 524 |
+
"username": "young_professional",
|
| 525 |
+
"content": "First time buying life insurance. What should I look for? Term vs whole life? #insurance #help",
|
| 526 |
+
"url": "https://twitter.com/young_professional/status/demo2",
|
| 527 |
+
"sentiment": "neutral",
|
| 528 |
+
"intent_score": 75,
|
| 529 |
+
"context": None
|
| 530 |
+
},
|
| 531 |
+
],
|
| 532 |
+
"facebook": [
|
| 533 |
+
{
|
| 534 |
+
"platform": "facebook",
|
| 535 |
+
"username": "Sarah Johnson",
|
| 536 |
+
"content": "Hi everyone! Just moved to Florida and need car insurance recommendations. My previous insurer doesn't cover this state. Any suggestions for affordable options in Miami?",
|
| 537 |
+
"url": "https://facebook.com/groups/florida-insurance/posts/demo1",
|
| 538 |
+
"sentiment": "neutral",
|
| 539 |
+
"intent_score": 82,
|
| 540 |
+
"context": "Group: Florida Insurance Help"
|
| 541 |
+
},
|
| 542 |
+
{
|
| 543 |
+
"platform": "facebook",
|
| 544 |
+
"username": "Carlos Martinez",
|
| 545 |
+
"content": "Desperate for dental insurance in Orlando! My dentist says I need a root canal ASAP but my current plan won't cover it. Anyone know of good dental insurance that covers major procedures?",
|
| 546 |
+
"url": "https://facebook.com/groups/orlando-community/posts/demo2",
|
| 547 |
+
"sentiment": "negative",
|
| 548 |
+
"intent_score": 95,
|
| 549 |
+
"context": "Group: Orlando Community"
|
| 550 |
+
},
|
| 551 |
+
{
|
| 552 |
+
"platform": "facebook",
|
| 553 |
+
"username": "Jennifer Williams",
|
| 554 |
+
"content": "Looking for health insurance for my family of 5 in Tampa. Self-employed so no employer plan. What are my best options? Marketplace or private?",
|
| 555 |
+
"url": "https://facebook.com/groups/tampa-moms/posts/demo3",
|
| 556 |
+
"sentiment": "neutral",
|
| 557 |
+
"intent_score": 78,
|
| 558 |
+
"context": "Group: Tampa Moms Group"
|
| 559 |
+
},
|
| 560 |
+
]
|
| 561 |
+
}
|
| 562 |
+
|
| 563 |
+
platform_data = demo_data.get(platform, [])
|
| 564 |
+
results = []
|
| 565 |
+
|
| 566 |
+
for item in platform_data[:limit]:
|
| 567 |
+
item_copy = item.copy()
|
| 568 |
+
item_copy["scraped_at"] = datetime.now().isoformat()
|
| 569 |
+
results.append(item_copy)
|
| 570 |
+
|
| 571 |
+
print(f"🎭 Modo demo: {len(results)} resultados generados para {platform}")
|
| 572 |
+
return results
|
| 573 |
+
|
| 574 |
+
|
| 575 |
+
def scrape_b2c_leads(
|
| 576 |
+
platforms: List[str] = None,
|
| 577 |
+
keywords: List[str] = None,
|
| 578 |
+
location: Dict[str, str] = None,
|
| 579 |
+
limit_per_platform: int = 10,
|
| 580 |
+
language: str = "en"
|
| 581 |
+
) -> List[Dict[str, Any]]:
|
| 582 |
+
"""
|
| 583 |
+
Función principal para scrapear leads B2C de múltiples plataformas.
|
| 584 |
+
Soporta segmentación geográfica granular.
|
| 585 |
+
|
| 586 |
+
Args:
|
| 587 |
+
platforms: Lista de plataformas ('youtube', 'reddit', 'twitter')
|
| 588 |
+
keywords: Lista de keywords base (opcional)
|
| 589 |
+
location: Dict con 'city', 'state', 'country'
|
| 590 |
+
limit_per_platform: Máximo de resultados por plataforma
|
| 591 |
+
language: Idioma ('es' o 'en')
|
| 592 |
+
"""
|
| 593 |
+
if platforms is None:
|
| 594 |
+
platforms = ["youtube", "reddit", "twitter"]
|
| 595 |
+
|
| 596 |
+
# Base keywords if not provided
|
| 597 |
+
if keywords is None:
|
| 598 |
+
keywords = INTENT_KEYWORDS.get(language, INTENT_KEYWORDS["en"])
|
| 599 |
+
|
| 600 |
+
# Generate location-specific keywords
|
| 601 |
+
targeted_keywords = keywords.copy()
|
| 602 |
+
if location:
|
| 603 |
+
city = location.get("city")
|
| 604 |
+
state = location.get("state")
|
| 605 |
+
country = location.get("country")
|
| 606 |
+
|
| 607 |
+
location_terms = [term for term in [city, state, country] if term]
|
| 608 |
+
|
| 609 |
+
if location_terms:
|
| 610 |
+
print(f"📍 Aplicando segmentación geográfica: {', '.join(location_terms)}")
|
| 611 |
+
geo_keywords = []
|
| 612 |
+
for kw in keywords:
|
| 613 |
+
for term in location_terms:
|
| 614 |
+
geo_keywords.append(f"{kw} {term}")
|
| 615 |
+
geo_keywords.append(f"{kw} in {term}" if language == "en" else f"{kw} en {term}")
|
| 616 |
+
|
| 617 |
+
# Add specific geo keywords to the top of the list
|
| 618 |
+
targeted_keywords = geo_keywords + keywords
|
| 619 |
+
|
| 620 |
+
scraper = B2CScraper()
|
| 621 |
+
all_leads = []
|
| 622 |
+
|
| 623 |
+
for platform in platforms:
|
| 624 |
+
try:
|
| 625 |
+
leads = scraper.scrape_intent(
|
| 626 |
+
platform=platform,
|
| 627 |
+
keywords=targeted_keywords,
|
| 628 |
+
limit=limit_per_platform,
|
| 629 |
+
language=language
|
| 630 |
+
)
|
| 631 |
+
all_leads.extend(leads)
|
| 632 |
+
|
| 633 |
+
# Rate limiting entre plataformas
|
| 634 |
+
if platform != platforms[-1]:
|
| 635 |
+
time.sleep(3)
|
| 636 |
+
except Exception as e:
|
| 637 |
+
print(f"⚠️ Error en plataforma {platform}: {e}")
|
| 638 |
+
|
| 639 |
+
# Sort by intent score (highest first)
|
| 640 |
+
all_leads.sort(key=lambda x: x.get("intent_score", 0), reverse=True)
|
| 641 |
+
|
| 642 |
+
print(f"\n📊 Total leads B2C únicos: {len(all_leads)}")
|
| 643 |
+
return all_leads
|
| 644 |
+
|
| 645 |
+
|
| 646 |
+
if __name__ == "__main__":
|
| 647 |
+
import argparse
|
| 648 |
+
|
| 649 |
+
parser = argparse.ArgumentParser(description="B2C Consumer Intent Scraper")
|
| 650 |
+
parser.add_argument(
|
| 651 |
+
"--platform",
|
| 652 |
+
choices=["youtube", "reddit", "twitter", "all"],
|
| 653 |
+
default="all",
|
| 654 |
+
help="Plataforma a scrapear"
|
| 655 |
+
)
|
| 656 |
+
parser.add_argument(
|
| 657 |
+
"--keywords",
|
| 658 |
+
nargs="+",
|
| 659 |
+
default=None,
|
| 660 |
+
help="Keywords a buscar (separados por espacio)"
|
| 661 |
+
)
|
| 662 |
+
parser.add_argument(
|
| 663 |
+
"--limit",
|
| 664 |
+
type=int,
|
| 665 |
+
default=10,
|
| 666 |
+
help="Máximo de resultados por plataforma"
|
| 667 |
+
)
|
| 668 |
+
parser.add_argument(
|
| 669 |
+
"--language",
|
| 670 |
+
choices=["es", "en"],
|
| 671 |
+
default="en",
|
| 672 |
+
help="Idioma de búsqueda"
|
| 673 |
+
)
|
| 674 |
+
parser.add_argument(
|
| 675 |
+
"--demo",
|
| 676 |
+
action="store_true",
|
| 677 |
+
help="Usar modo demo (sin APIs)"
|
| 678 |
+
)
|
| 679 |
+
# Location args
|
| 680 |
+
parser.add_argument("--city", type=str, help="Ciudad")
|
| 681 |
+
parser.add_argument("--state", type=str, help="Estado")
|
| 682 |
+
parser.add_argument("--country", type=str, help="País")
|
| 683 |
+
|
| 684 |
+
args = parser.parse_args()
|
| 685 |
+
|
| 686 |
+
# Setup platforms
|
| 687 |
+
if args.platform == "all":
|
| 688 |
+
platforms = ["youtube", "reddit", "twitter"]
|
| 689 |
+
else:
|
| 690 |
+
platforms = [args.platform]
|
| 691 |
+
|
| 692 |
+
# Setup location
|
| 693 |
+
location = {}
|
| 694 |
+
if args.city: location["city"] = args.city
|
| 695 |
+
if args.state: location["state"] = args.state
|
| 696 |
+
if args.country: location["country"] = args.country
|
| 697 |
+
|
| 698 |
+
print("🎯 B2C Consumer Intent Scraper")
|
| 699 |
+
print("=" * 50)
|
| 700 |
+
print(f"Plataformas: {', '.join(platforms)}")
|
| 701 |
+
print(f"Keywords: {args.keywords or 'Default'}")
|
| 702 |
+
print(f"Ubicación: {location if location else 'Global'}")
|
| 703 |
+
print(f"Límite por plataforma: {args.limit}")
|
| 704 |
+
print(f"Idioma: {args.language}")
|
| 705 |
+
print("=" * 50)
|
| 706 |
+
|
| 707 |
+
# Run scraper
|
| 708 |
+
leads = scrape_b2c_leads(
|
| 709 |
+
platforms=platforms,
|
| 710 |
+
keywords=args.keywords,
|
| 711 |
+
location=location,
|
| 712 |
+
limit_per_platform=args.limit,
|
| 713 |
+
language=args.language
|
| 714 |
+
)
|
| 715 |
+
|
| 716 |
+
# Display results
|
| 717 |
+
print(f"\n📋 Top Leads B2C Detectados:")
|
| 718 |
+
print("-" * 80)
|
| 719 |
+
for i, lead in enumerate(leads[:10], 1):
|
| 720 |
+
print(f"\n{i}. [{lead['platform'].upper()}] Score: {lead['intent_score']}/100")
|
| 721 |
+
print(f" Usuario: @{lead['username']}")
|
| 722 |
+
print(f" Contenido: {lead['content'][:150]}...")
|
| 723 |
+
print(f" Sentimiento: {lead['sentiment']}")
|
| 724 |
+
print(f" URL: {lead['url']}")
|
| 725 |
+
if lead.get('context'):
|
| 726 |
+
print(f" Contexto: {lead['context']}")
|
| 727 |
+
|
| 728 |
+
print("\n" + "=" * 80)
|
| 729 |
+
print(f"✅ Proceso completado. {len(leads)} leads B2C detectados.")
|
lead_gen_pro/l3_execution/database_doe.py
ADDED
|
@@ -0,0 +1,480 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
L3 Execution: Enhanced Database Module
|
| 3 |
+
========================================
|
| 4 |
+
Base de datos SQLite expandida con soporte para contactos,
|
| 5 |
+
oportunidades e historial de outreach.
|
| 6 |
+
Implementa la capa L3 del framework DOE.
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
import sqlite3
|
| 10 |
+
import os
|
| 11 |
+
import json
|
| 12 |
+
from datetime import datetime
|
| 13 |
+
from typing import Optional, List, Dict, Any
|
| 14 |
+
|
| 15 |
+
# NexusCRM Fase 2 Cloud Sync
|
| 16 |
+
try:
|
| 17 |
+
from l3_execution.supabase_sync import sync_lead_to_supabase
|
| 18 |
+
from l3_execution.automation_engine import AutomationEngine
|
| 19 |
+
except ImportError:
|
| 20 |
+
try:
|
| 21 |
+
from supabase_sync import sync_lead_to_supabase
|
| 22 |
+
from automation_engine import AutomationEngine
|
| 23 |
+
except ImportError:
|
| 24 |
+
def sync_lead_to_supabase(data): pass
|
| 25 |
+
class AutomationEngine:
|
| 26 |
+
def process_new_lead(self, data): pass
|
| 27 |
+
|
| 28 |
+
automation = AutomationEngine()
|
| 29 |
+
|
| 30 |
+
DB_PATH = os.path.join(os.path.dirname(__file__), "..", "leads_doe.db")
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
def get_connection(timeout: int = 30) -> sqlite3.Connection:
|
| 34 |
+
"""Get database connection with row factory and timeout.
|
| 35 |
+
|
| 36 |
+
Args:
|
| 37 |
+
timeout: Seconds to wait for database lock (default: 30)
|
| 38 |
+
|
| 39 |
+
Returns:
|
| 40 |
+
sqlite3.Connection with row factory enabled
|
| 41 |
+
"""
|
| 42 |
+
conn = sqlite3.connect(DB_PATH, timeout=timeout)
|
| 43 |
+
conn.row_factory = sqlite3.Row
|
| 44 |
+
return conn
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
def init_db() -> None:
|
| 48 |
+
"""Initialize database with enhanced schema."""
|
| 49 |
+
conn = get_connection()
|
| 50 |
+
cursor = conn.cursor()
|
| 51 |
+
|
| 52 |
+
# Main leads table
|
| 53 |
+
cursor.execute("""
|
| 54 |
+
CREATE TABLE IF NOT EXISTS leads (
|
| 55 |
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
| 56 |
+
name TEXT NOT NULL,
|
| 57 |
+
phone TEXT,
|
| 58 |
+
phone_formatted TEXT,
|
| 59 |
+
email TEXT,
|
| 60 |
+
address TEXT,
|
| 61 |
+
website TEXT,
|
| 62 |
+
rating REAL,
|
| 63 |
+
reviews_count INTEGER,
|
| 64 |
+
category TEXT,
|
| 65 |
+
source TEXT DEFAULT 'apify',
|
| 66 |
+
niche TEXT NOT NULL,
|
| 67 |
+
country TEXT NOT NULL,
|
| 68 |
+
city TEXT,
|
| 69 |
+
|
| 70 |
+
-- Enrichment data
|
| 71 |
+
enriched BOOLEAN DEFAULT 0,
|
| 72 |
+
facebook_url TEXT,
|
| 73 |
+
instagram_url TEXT,
|
| 74 |
+
linkedin_url TEXT,
|
| 75 |
+
twitter_url TEXT,
|
| 76 |
+
|
| 77 |
+
-- Status tracking
|
| 78 |
+
status TEXT DEFAULT 'new',
|
| 79 |
+
priority INTEGER DEFAULT 0,
|
| 80 |
+
|
| 81 |
+
-- Outreach status
|
| 82 |
+
whatsapp_sent BOOLEAN DEFAULT 0,
|
| 83 |
+
whatsapp_sent_at TIMESTAMP,
|
| 84 |
+
whatsapp_link TEXT,
|
| 85 |
+
email_sent BOOLEAN DEFAULT 0,
|
| 86 |
+
email_sent_at TIMESTAMP,
|
| 87 |
+
replied BOOLEAN DEFAULT 0,
|
| 88 |
+
replied_at TIMESTAMP,
|
| 89 |
+
|
| 90 |
+
-- Opportunity tracking
|
| 91 |
+
is_opportunity BOOLEAN DEFAULT 0,
|
| 92 |
+
opportunity_value REAL,
|
| 93 |
+
opportunity_notes TEXT,
|
| 94 |
+
|
| 95 |
+
-- Metadata
|
| 96 |
+
scraped_at TIMESTAMP,
|
| 97 |
+
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
| 98 |
+
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
| 99 |
+
)
|
| 100 |
+
""")
|
| 101 |
+
|
| 102 |
+
# Outreach history log
|
| 103 |
+
cursor.execute("""
|
| 104 |
+
CREATE TABLE IF NOT EXISTS outreach_log (
|
| 105 |
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
| 106 |
+
lead_id INTEGER NOT NULL,
|
| 107 |
+
channel TEXT NOT NULL,
|
| 108 |
+
message_template TEXT,
|
| 109 |
+
message_sent TEXT,
|
| 110 |
+
status TEXT DEFAULT 'sent',
|
| 111 |
+
response TEXT,
|
| 112 |
+
sent_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
| 113 |
+
FOREIGN KEY (lead_id) REFERENCES leads(id)
|
| 114 |
+
)
|
| 115 |
+
""")
|
| 116 |
+
|
| 117 |
+
# Opportunities (CRM-like)
|
| 118 |
+
cursor.execute("""
|
| 119 |
+
CREATE TABLE IF NOT EXISTS opportunities (
|
| 120 |
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
| 121 |
+
lead_id INTEGER NOT NULL,
|
| 122 |
+
stage TEXT DEFAULT 'qualified',
|
| 123 |
+
value REAL,
|
| 124 |
+
probability INTEGER DEFAULT 50,
|
| 125 |
+
notes TEXT,
|
| 126 |
+
next_action TEXT,
|
| 127 |
+
next_action_date DATE,
|
| 128 |
+
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
| 129 |
+
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
| 130 |
+
FOREIGN KEY (lead_id) REFERENCES leads(id)
|
| 131 |
+
)
|
| 132 |
+
""")
|
| 133 |
+
|
| 134 |
+
# Dashboard metrics cache
|
| 135 |
+
cursor.execute("""
|
| 136 |
+
CREATE TABLE IF NOT EXISTS metrics (
|
| 137 |
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
| 138 |
+
metric_name TEXT NOT NULL,
|
| 139 |
+
metric_value TEXT,
|
| 140 |
+
calculated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
| 141 |
+
)
|
| 142 |
+
""")
|
| 143 |
+
|
| 144 |
+
# Create indexes
|
| 145 |
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_leads_status ON leads(status)")
|
| 146 |
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_leads_niche ON leads(niche)")
|
| 147 |
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_leads_country ON leads(country)")
|
| 148 |
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_leads_email ON leads(email)")
|
| 149 |
+
cursor.execute("CREATE INDEX IF NOT EXISTS idx_outreach_lead ON outreach_log(lead_id)")
|
| 150 |
+
|
| 151 |
+
conn.commit()
|
| 152 |
+
conn.close()
|
| 153 |
+
|
| 154 |
+
print(f"[OK] Base de datos DOE inicializada: {DB_PATH}")
|
| 155 |
+
|
| 156 |
+
|
| 157 |
+
def add_lead(lead_data: Dict[str, Any], upsert: bool = True) -> int:
|
| 158 |
+
"""
|
| 159 |
+
Add a new lead to the database.
|
| 160 |
+
If upsert is True, it updates existing lead based on phone or email.
|
| 161 |
+
"""
|
| 162 |
+
conn = get_connection()
|
| 163 |
+
cursor = conn.cursor()
|
| 164 |
+
|
| 165 |
+
phone = lead_data.get("phone")
|
| 166 |
+
email = lead_data.get("email")
|
| 167 |
+
|
| 168 |
+
# Check for existing lead (Deduplication)
|
| 169 |
+
existing_id = None
|
| 170 |
+
if phone:
|
| 171 |
+
cursor.execute("SELECT id FROM leads WHERE phone = ? OR phone_formatted = ?", (phone, phone))
|
| 172 |
+
row = cursor.fetchone()
|
| 173 |
+
if row: existing_id = row[0]
|
| 174 |
+
|
| 175 |
+
if not existing_id and email:
|
| 176 |
+
cursor.execute("SELECT id FROM leads WHERE email = ?", (email,))
|
| 177 |
+
row = cursor.fetchone()
|
| 178 |
+
if row: existing_id = row[0]
|
| 179 |
+
|
| 180 |
+
if existing_id:
|
| 181 |
+
if not upsert:
|
| 182 |
+
conn.close()
|
| 183 |
+
return existing_id
|
| 184 |
+
|
| 185 |
+
# Update existing lead (Upsert logic)
|
| 186 |
+
# Extract social profiles if present
|
| 187 |
+
social = lead_data.get("social_profiles", {})
|
| 188 |
+
|
| 189 |
+
cursor.execute("""
|
| 190 |
+
UPDATE leads
|
| 191 |
+
SET name = COALESCE(?, name),
|
| 192 |
+
email = COALESCE(?, email),
|
| 193 |
+
address = COALESCE(?, address),
|
| 194 |
+
website = COALESCE(?, website),
|
| 195 |
+
rating = COALESCE(?, rating),
|
| 196 |
+
whatsapp_link = COALESCE(?, whatsapp_link),
|
| 197 |
+
facebook_url = COALESCE(?, facebook_url),
|
| 198 |
+
instagram_url = COALESCE(?, instagram_url),
|
| 199 |
+
linkedin_url = COALESCE(?, linkedin_url),
|
| 200 |
+
twitter_url = COALESCE(?, twitter_url),
|
| 201 |
+
quality_score = COALESCE(?, quality_score),
|
| 202 |
+
updated_at = CURRENT_TIMESTAMP
|
| 203 |
+
WHERE id = ?
|
| 204 |
+
""", (
|
| 205 |
+
lead_data.get("name"),
|
| 206 |
+
lead_data.get("email"),
|
| 207 |
+
lead_data.get("address"),
|
| 208 |
+
lead_data.get("website"),
|
| 209 |
+
lead_data.get("rating"),
|
| 210 |
+
lead_data.get("whatsapp_link"),
|
| 211 |
+
social.get("facebook") if isinstance(social, dict) else None,
|
| 212 |
+
social.get("instagram") if isinstance(social, dict) else None,
|
| 213 |
+
social.get("linkedin") if isinstance(social, dict) else None,
|
| 214 |
+
social.get("twitter") if isinstance(social, dict) else None,
|
| 215 |
+
lead_data.get("quality_score"),
|
| 216 |
+
existing_id
|
| 217 |
+
))
|
| 218 |
+
lead_id = existing_id
|
| 219 |
+
else:
|
| 220 |
+
# Extract social profiles if present
|
| 221 |
+
social = lead_data.get("social_profiles", {})
|
| 222 |
+
|
| 223 |
+
cursor.execute("""
|
| 224 |
+
INSERT INTO leads (
|
| 225 |
+
name, phone, phone_formatted, email, address, website,
|
| 226 |
+
rating, reviews_count, category, source, niche, country, city,
|
| 227 |
+
enriched, facebook_url, instagram_url, linkedin_url, twitter_url,
|
| 228 |
+
whatsapp_link, quality_score, scraped_at
|
| 229 |
+
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
| 230 |
+
""", (
|
| 231 |
+
lead_data.get("name"),
|
| 232 |
+
lead_data.get("phone"),
|
| 233 |
+
lead_data.get("phone_formatted"),
|
| 234 |
+
lead_data.get("email"),
|
| 235 |
+
lead_data.get("address"),
|
| 236 |
+
lead_data.get("website"),
|
| 237 |
+
lead_data.get("rating"),
|
| 238 |
+
lead_data.get("reviews_count"),
|
| 239 |
+
lead_data.get("category"),
|
| 240 |
+
lead_data.get("source", "apify"),
|
| 241 |
+
lead_data.get("niche"),
|
| 242 |
+
lead_data.get("country"),
|
| 243 |
+
lead_data.get("city"),
|
| 244 |
+
lead_data.get("enriched", False),
|
| 245 |
+
social.get("facebook") if isinstance(social, dict) else None,
|
| 246 |
+
social.get("instagram") if isinstance(social, dict) else None,
|
| 247 |
+
social.get("linkedin") if isinstance(social, dict) else None,
|
| 248 |
+
social.get("twitter") if isinstance(social, dict) else None,
|
| 249 |
+
lead_data.get("whatsapp_link"),
|
| 250 |
+
lead_data.get("quality_score"),
|
| 251 |
+
lead_data.get("scraped_at")
|
| 252 |
+
))
|
| 253 |
+
lead_id = cursor.lastrowid
|
| 254 |
+
|
| 255 |
+
conn.commit()
|
| 256 |
+
conn.close()
|
| 257 |
+
|
| 258 |
+
# Proactive Sync to NexusCRM Supabase Cloud (Fase 2)
|
| 259 |
+
try:
|
| 260 |
+
sync_lead_to_supabase(lead_data)
|
| 261 |
+
except Exception as e:
|
| 262 |
+
print(f"[NexusCRM] Warning: Auto-sync to cloud failed: {e}")
|
| 263 |
+
|
| 264 |
+
# Process Automation Rules (Fase 2)
|
| 265 |
+
try:
|
| 266 |
+
automation.process_new_lead(lead_data)
|
| 267 |
+
except Exception as e:
|
| 268 |
+
print(f"[Automation] Warning: Rule processing failed: {e}")
|
| 269 |
+
|
| 270 |
+
return lead_id
|
| 271 |
+
|
| 272 |
+
|
| 273 |
+
def get_leads(
|
| 274 |
+
niche: Optional[str] = None,
|
| 275 |
+
country: Optional[str] = None,
|
| 276 |
+
status: Optional[str] = None,
|
| 277 |
+
with_email: bool = False,
|
| 278 |
+
with_phone: bool = False,
|
| 279 |
+
limit: int = 100
|
| 280 |
+
) -> List[Dict[str, Any]]:
|
| 281 |
+
"""Get leads with optional filters."""
|
| 282 |
+
conn = get_connection()
|
| 283 |
+
cursor = conn.cursor()
|
| 284 |
+
|
| 285 |
+
query = "SELECT * FROM leads WHERE 1=1"
|
| 286 |
+
params = []
|
| 287 |
+
|
| 288 |
+
if niche:
|
| 289 |
+
query += " AND niche = ?"
|
| 290 |
+
params.append(niche)
|
| 291 |
+
if country:
|
| 292 |
+
query += " AND country = ?"
|
| 293 |
+
params.append(country)
|
| 294 |
+
if status:
|
| 295 |
+
query += " AND status = ?"
|
| 296 |
+
params.append(status)
|
| 297 |
+
if with_email:
|
| 298 |
+
query += " AND email IS NOT NULL AND email != ''"
|
| 299 |
+
if with_phone:
|
| 300 |
+
query += " AND phone IS NOT NULL AND phone != ''"
|
| 301 |
+
|
| 302 |
+
query += " ORDER BY created_at DESC LIMIT ?"
|
| 303 |
+
params.append(limit)
|
| 304 |
+
|
| 305 |
+
cursor.execute(query, params)
|
| 306 |
+
rows = cursor.fetchall()
|
| 307 |
+
conn.close()
|
| 308 |
+
|
| 309 |
+
return [dict(row) for row in rows]
|
| 310 |
+
|
| 311 |
+
|
| 312 |
+
def get_pending_outreach(
|
| 313 |
+
channel: str,
|
| 314 |
+
niche: Optional[str] = None,
|
| 315 |
+
limit: int = 20
|
| 316 |
+
) -> List[Dict[str, Any]]:
|
| 317 |
+
"""Get leads pending outreach for a specific channel."""
|
| 318 |
+
conn = get_connection()
|
| 319 |
+
cursor = conn.cursor()
|
| 320 |
+
|
| 321 |
+
query = "SELECT * FROM leads WHERE status = 'new'"
|
| 322 |
+
params = []
|
| 323 |
+
|
| 324 |
+
if channel == "whatsapp":
|
| 325 |
+
query += " AND whatsapp_sent = 0 AND phone IS NOT NULL"
|
| 326 |
+
elif channel == "email":
|
| 327 |
+
query += " AND email_sent = 0 AND email IS NOT NULL"
|
| 328 |
+
|
| 329 |
+
if niche:
|
| 330 |
+
query += " AND niche = ?"
|
| 331 |
+
params.append(niche)
|
| 332 |
+
|
| 333 |
+
query += " ORDER BY priority DESC, rating DESC LIMIT ?"
|
| 334 |
+
params.append(limit)
|
| 335 |
+
|
| 336 |
+
cursor.execute(query, params)
|
| 337 |
+
rows = cursor.fetchall()
|
| 338 |
+
conn.close()
|
| 339 |
+
|
| 340 |
+
return [dict(row) for row in rows]
|
| 341 |
+
|
| 342 |
+
|
| 343 |
+
def mark_outreach(
|
| 344 |
+
lead_id: int,
|
| 345 |
+
channel: str,
|
| 346 |
+
message: str,
|
| 347 |
+
status: str = "sent"
|
| 348 |
+
) -> None:
|
| 349 |
+
"""Mark outreach as sent and log it."""
|
| 350 |
+
conn = get_connection()
|
| 351 |
+
cursor = conn.cursor()
|
| 352 |
+
|
| 353 |
+
now = datetime.now().isoformat()
|
| 354 |
+
|
| 355 |
+
# Update lead
|
| 356 |
+
if channel == "whatsapp":
|
| 357 |
+
cursor.execute("""
|
| 358 |
+
UPDATE leads
|
| 359 |
+
SET whatsapp_sent = 1, whatsapp_sent_at = ?, whatsapp_link = ?, status = 'contacted'
|
| 360 |
+
WHERE id = ?
|
| 361 |
+
""", (now, message, lead_id))
|
| 362 |
+
elif channel == "email":
|
| 363 |
+
cursor.execute("""
|
| 364 |
+
UPDATE leads
|
| 365 |
+
SET email_sent = 1, email_sent_at = ?, status = 'contacted'
|
| 366 |
+
WHERE id = ?
|
| 367 |
+
""", (now, lead_id))
|
| 368 |
+
|
| 369 |
+
# Log outreach
|
| 370 |
+
cursor.execute("""
|
| 371 |
+
INSERT INTO outreach_log (lead_id, channel, message_sent, status)
|
| 372 |
+
VALUES (?, ?, ?, ?)
|
| 373 |
+
""", (lead_id, channel, message, status))
|
| 374 |
+
|
| 375 |
+
conn.commit()
|
| 376 |
+
conn.close()
|
| 377 |
+
|
| 378 |
+
|
| 379 |
+
def create_opportunity(
|
| 380 |
+
lead_id: int,
|
| 381 |
+
value: float,
|
| 382 |
+
notes: str = "",
|
| 383 |
+
stage: str = "qualified"
|
| 384 |
+
) -> int:
|
| 385 |
+
"""Create an opportunity from a lead."""
|
| 386 |
+
conn = get_connection()
|
| 387 |
+
cursor = conn.cursor()
|
| 388 |
+
|
| 389 |
+
# Mark lead as opportunity
|
| 390 |
+
cursor.execute("""
|
| 391 |
+
UPDATE leads
|
| 392 |
+
SET is_opportunity = 1, opportunity_value = ?, opportunity_notes = ?
|
| 393 |
+
WHERE id = ?
|
| 394 |
+
""", (value, notes, lead_id))
|
| 395 |
+
|
| 396 |
+
# Create opportunity record
|
| 397 |
+
cursor.execute("""
|
| 398 |
+
INSERT INTO opportunities (lead_id, stage, value, notes)
|
| 399 |
+
VALUES (?, ?, ?, ?)
|
| 400 |
+
""", (lead_id, stage, value, notes))
|
| 401 |
+
|
| 402 |
+
opp_id = cursor.lastrowid
|
| 403 |
+
conn.commit()
|
| 404 |
+
conn.close()
|
| 405 |
+
|
| 406 |
+
return opp_id
|
| 407 |
+
|
| 408 |
+
|
| 409 |
+
def get_stats() -> Dict[str, Any]:
|
| 410 |
+
"""Get comprehensive statistics."""
|
| 411 |
+
conn = get_connection()
|
| 412 |
+
cursor = conn.cursor()
|
| 413 |
+
|
| 414 |
+
stats = {}
|
| 415 |
+
|
| 416 |
+
# Total leads
|
| 417 |
+
cursor.execute("SELECT COUNT(*) FROM leads")
|
| 418 |
+
stats["total_leads"] = cursor.fetchone()[0]
|
| 419 |
+
|
| 420 |
+
# By status
|
| 421 |
+
cursor.execute("SELECT status, COUNT(*) FROM leads GROUP BY status")
|
| 422 |
+
stats["by_status"] = dict(cursor.fetchall())
|
| 423 |
+
|
| 424 |
+
# By niche
|
| 425 |
+
cursor.execute("SELECT niche, COUNT(*) FROM leads GROUP BY niche")
|
| 426 |
+
stats["by_niche"] = dict(cursor.fetchall())
|
| 427 |
+
|
| 428 |
+
# By country
|
| 429 |
+
cursor.execute("SELECT country, COUNT(*) FROM leads GROUP BY country")
|
| 430 |
+
stats["by_country"] = dict(cursor.fetchall())
|
| 431 |
+
|
| 432 |
+
# Enrichment stats
|
| 433 |
+
cursor.execute("SELECT COUNT(*) FROM leads WHERE enriched = 1")
|
| 434 |
+
stats["enriched"] = cursor.fetchone()[0]
|
| 435 |
+
|
| 436 |
+
cursor.execute("SELECT COUNT(*) FROM leads WHERE email IS NOT NULL")
|
| 437 |
+
stats["with_email"] = cursor.fetchone()[0]
|
| 438 |
+
|
| 439 |
+
# Outreach stats
|
| 440 |
+
cursor.execute("SELECT COUNT(*) FROM leads WHERE whatsapp_sent = 1")
|
| 441 |
+
stats["whatsapp_sent"] = cursor.fetchone()[0]
|
| 442 |
+
|
| 443 |
+
cursor.execute("SELECT COUNT(*) FROM leads WHERE email_sent = 1")
|
| 444 |
+
stats["email_sent"] = cursor.fetchone()[0]
|
| 445 |
+
|
| 446 |
+
cursor.execute("SELECT COUNT(*) FROM leads WHERE replied = 1")
|
| 447 |
+
stats["replied"] = cursor.fetchone()[0]
|
| 448 |
+
|
| 449 |
+
# Opportunity stats
|
| 450 |
+
cursor.execute("SELECT COUNT(*), COALESCE(SUM(value), 0) FROM opportunities")
|
| 451 |
+
row = cursor.fetchone()
|
| 452 |
+
stats["opportunities"] = {"count": row[0], "total_value": row[1]}
|
| 453 |
+
|
| 454 |
+
conn.close()
|
| 455 |
+
return stats
|
| 456 |
+
|
| 457 |
+
|
| 458 |
+
def get_leads_count_last_30_days() -> int:
|
| 459 |
+
"""Get count of leads scraped in the last 30 days."""
|
| 460 |
+
conn = get_connection()
|
| 461 |
+
cursor = conn.cursor()
|
| 462 |
+
cursor.execute("""
|
| 463 |
+
SELECT COUNT(*) FROM leads
|
| 464 |
+
WHERE scraped_at >= datetime('now', '-30 days')
|
| 465 |
+
""")
|
| 466 |
+
count = cursor.fetchone()[0]
|
| 467 |
+
conn.close()
|
| 468 |
+
return count
|
| 469 |
+
|
| 470 |
+
|
| 471 |
+
if __name__ == "__main__":
|
| 472 |
+
init_db()
|
| 473 |
+
stats = get_stats()
|
| 474 |
+
|
| 475 |
+
print(f"\n[ESTADISTICAS] Estadísticas DOE:")
|
| 476 |
+
print(f" Total leads: {stats['total_leads']}")
|
| 477 |
+
print(f" Con email: {stats['with_email']}")
|
| 478 |
+
print(f" Enriquecidos: {stats['enriched']}")
|
| 479 |
+
print(f" WhatsApp enviados: {stats['whatsapp_sent']}")
|
| 480 |
+
print(f" Emails enviados: {stats['email_sent']}")
|
lead_gen_pro/l3_execution/enrichment.py
ADDED
|
@@ -0,0 +1,309 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
L3 Execution: Data Enrichment Module
|
| 3 |
+
=====================================
|
| 4 |
+
Enriquece leads con emails, teléfonos adicionales y perfiles sociales.
|
| 5 |
+
Implementa la capa L3 del framework DOE.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import os
|
| 9 |
+
import re
|
| 10 |
+
import json
|
| 11 |
+
import time
|
| 12 |
+
from typing import Optional, List, Dict, Any
|
| 13 |
+
from urllib.parse import urlparse, quote
|
| 14 |
+
|
| 15 |
+
# Optional: requests for web scraping
|
| 16 |
+
try:
|
| 17 |
+
import requests
|
| 18 |
+
REQUESTS_AVAILABLE = True
|
| 19 |
+
except ImportError:
|
| 20 |
+
REQUESTS_AVAILABLE = False
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
def extract_email_from_website(url: str, timeout: int = 10) -> Optional[str]:
|
| 24 |
+
"""
|
| 25 |
+
Extract email from a website homepage.
|
| 26 |
+
|
| 27 |
+
Args:
|
| 28 |
+
url: Website URL
|
| 29 |
+
timeout: Request timeout in seconds
|
| 30 |
+
|
| 31 |
+
Returns:
|
| 32 |
+
First email found or None.
|
| 33 |
+
"""
|
| 34 |
+
if not REQUESTS_AVAILABLE or not url:
|
| 35 |
+
return None
|
| 36 |
+
|
| 37 |
+
try:
|
| 38 |
+
# Clean URL
|
| 39 |
+
if not url.startswith("http"):
|
| 40 |
+
url = f"https://{url}"
|
| 41 |
+
|
| 42 |
+
response = requests.get(url, timeout=timeout, headers={
|
| 43 |
+
"User-Agent": "Mozilla/5.0 (compatible; LeadGen/1.0)"
|
| 44 |
+
})
|
| 45 |
+
|
| 46 |
+
if response.status_code == 200:
|
| 47 |
+
# Email regex pattern
|
| 48 |
+
email_pattern = r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}'
|
| 49 |
+
emails = re.findall(email_pattern, response.text)
|
| 50 |
+
|
| 51 |
+
# Filter out common non-business emails
|
| 52 |
+
excluded = ["example.com", "domain.com", "email.com", "test.com"]
|
| 53 |
+
for email in emails:
|
| 54 |
+
domain = email.split("@")[1].lower()
|
| 55 |
+
if domain not in excluded:
|
| 56 |
+
return email.lower()
|
| 57 |
+
|
| 58 |
+
return None
|
| 59 |
+
|
| 60 |
+
except Exception as e:
|
| 61 |
+
print(f" ⚠️ Error extrayendo email de {url}: {e}")
|
| 62 |
+
return None
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
def extract_social_profiles(url: str, timeout: int = 10) -> Dict[str, Optional[str]]:
|
| 66 |
+
"""
|
| 67 |
+
Extract social media profile links from website.
|
| 68 |
+
|
| 69 |
+
Args:
|
| 70 |
+
url: Website URL
|
| 71 |
+
timeout: Request timeout
|
| 72 |
+
|
| 73 |
+
Returns:
|
| 74 |
+
Dictionary with social profile URLs.
|
| 75 |
+
"""
|
| 76 |
+
profiles = {
|
| 77 |
+
"facebook": None,
|
| 78 |
+
"instagram": None,
|
| 79 |
+
"linkedin": None,
|
| 80 |
+
"twitter": None,
|
| 81 |
+
}
|
| 82 |
+
|
| 83 |
+
if not REQUESTS_AVAILABLE or not url:
|
| 84 |
+
return profiles
|
| 85 |
+
|
| 86 |
+
try:
|
| 87 |
+
if not url.startswith("http"):
|
| 88 |
+
url = f"https://{url}"
|
| 89 |
+
|
| 90 |
+
response = requests.get(url, timeout=timeout, headers={
|
| 91 |
+
"User-Agent": "Mozilla/5.0 (compatible; LeadGen/1.0)"
|
| 92 |
+
})
|
| 93 |
+
|
| 94 |
+
if response.status_code == 200:
|
| 95 |
+
text = response.text
|
| 96 |
+
|
| 97 |
+
# Facebook
|
| 98 |
+
fb_match = re.search(r'https?://(?:www\.)?facebook\.com/[a-zA-Z0-9._-]+', text)
|
| 99 |
+
if fb_match:
|
| 100 |
+
profiles["facebook"] = fb_match.group()
|
| 101 |
+
|
| 102 |
+
# Instagram
|
| 103 |
+
ig_match = re.search(r'https?://(?:www\.)?instagram\.com/[a-zA-Z0-9._-]+', text)
|
| 104 |
+
if ig_match:
|
| 105 |
+
profiles["instagram"] = ig_match.group()
|
| 106 |
+
|
| 107 |
+
# LinkedIn
|
| 108 |
+
li_match = re.search(r'https?://(?:www\.)?linkedin\.com/(?:in|company)/[a-zA-Z0-9._-]+', text)
|
| 109 |
+
if li_match:
|
| 110 |
+
profiles["linkedin"] = li_match.group()
|
| 111 |
+
|
| 112 |
+
# Twitter/X
|
| 113 |
+
tw_match = re.search(r'https?://(?:www\.)?(?:twitter|x)\.com/[a-zA-Z0-9._-]+', text)
|
| 114 |
+
if tw_match:
|
| 115 |
+
profiles["twitter"] = tw_match.group()
|
| 116 |
+
|
| 117 |
+
except Exception as e:
|
| 118 |
+
print(f" ⚠️ Error extrayendo perfiles de {url}: {e}")
|
| 119 |
+
|
| 120 |
+
return profiles
|
| 121 |
+
|
| 122 |
+
|
| 123 |
+
def format_phone(phone: str, country: str = "usa") -> str:
|
| 124 |
+
"""
|
| 125 |
+
Format phone number to international format.
|
| 126 |
+
"""
|
| 127 |
+
if not phone:
|
| 128 |
+
return ""
|
| 129 |
+
|
| 130 |
+
# Remove all non-numeric characters except +
|
| 131 |
+
cleaned = re.sub(r'[^\d+]', '', phone)
|
| 132 |
+
|
| 133 |
+
# Add country code if missing
|
| 134 |
+
if not cleaned.startswith("+"):
|
| 135 |
+
if country == "usa":
|
| 136 |
+
if len(cleaned) == 10:
|
| 137 |
+
cleaned = f"+1{cleaned}"
|
| 138 |
+
elif country == "venezuela":
|
| 139 |
+
if len(cleaned) == 10: # local with area code
|
| 140 |
+
cleaned = f"+58{cleaned}"
|
| 141 |
+
|
| 142 |
+
return cleaned
|
| 143 |
+
|
| 144 |
+
|
| 145 |
+
def generate_whatsapp_link_raw(phone: str, message: str) -> str:
|
| 146 |
+
"""Helper to generate WA link without importing the whole module."""
|
| 147 |
+
formatted_phone = "".join(c for c in phone if c.isdigit())
|
| 148 |
+
if formatted_phone.startswith("00"): formatted_phone = formatted_phone[2:]
|
| 149 |
+
return f"https://wa.me/{formatted_phone}?text={quote(message)}"
|
| 150 |
+
|
| 151 |
+
def get_default_message(name: str, city: str, niche: str) -> str:
|
| 152 |
+
"""Fast fallback message."""
|
| 153 |
+
return f"Hola {name}, vi tu negocio en {city}. Me gustaría hablar contigo."
|
| 154 |
+
|
| 155 |
+
def clean_text(text: Optional[str]) -> Optional[str]:
|
| 156 |
+
"""Clean text, normalize to Title Case and remove extra spaces."""
|
| 157 |
+
if not text:
|
| 158 |
+
return text
|
| 159 |
+
|
| 160 |
+
# Remove extra spaces and normalize title case
|
| 161 |
+
cleaned = " ".join(text.split())
|
| 162 |
+
# Title cases but keeps acronyms if needed? Simple title() for now
|
| 163 |
+
return cleaned.title()
|
| 164 |
+
|
| 165 |
+
|
| 166 |
+
def calculate_quality_score(lead: Dict[str, Any]) -> int:
|
| 167 |
+
"""
|
| 168 |
+
Calculate a quality score from 0 to 100.
|
| 169 |
+
"""
|
| 170 |
+
score = 0
|
| 171 |
+
weights = {
|
| 172 |
+
"email": 30,
|
| 173 |
+
"phone": 20,
|
| 174 |
+
"website": 15,
|
| 175 |
+
"social_profiles": 15,
|
| 176 |
+
"rating": 10,
|
| 177 |
+
"address": 10
|
| 178 |
+
}
|
| 179 |
+
|
| 180 |
+
if lead.get("email"): score += weights["email"]
|
| 181 |
+
if lead.get("phone"): score += weights["phone"]
|
| 182 |
+
if lead.get("website"): score += weights["website"]
|
| 183 |
+
|
| 184 |
+
social = lead.get("social_profiles", {})
|
| 185 |
+
social_count = sum(1 for v in social.values() if v)
|
| 186 |
+
if social_count > 0:
|
| 187 |
+
score += weights["social_profiles"] * (min(social_count, 3) / 3)
|
| 188 |
+
|
| 189 |
+
if lead.get("rating"): score += weights["rating"]
|
| 190 |
+
if lead.get("address"): score += weights["address"]
|
| 191 |
+
|
| 192 |
+
return int(score)
|
| 193 |
+
|
| 194 |
+
|
| 195 |
+
def enrich_lead(lead: Dict[str, Any]) -> Dict[str, Any]:
|
| 196 |
+
"""
|
| 197 |
+
Enrich a single lead with additional data and hygiene.
|
| 198 |
+
"""
|
| 199 |
+
enriched = lead.copy()
|
| 200 |
+
|
| 201 |
+
# 1. Hygiene: Normalize names and addresses
|
| 202 |
+
enriched["name"] = clean_text(lead.get("name"))
|
| 203 |
+
enriched["address"] = clean_text(lead.get("address"))
|
| 204 |
+
|
| 205 |
+
print(f" 🔄 Enriqueciendo: {enriched.get('name', 'Unknown')}")
|
| 206 |
+
|
| 207 |
+
# 2. Format phone
|
| 208 |
+
if lead.get("phone"):
|
| 209 |
+
enriched["phone_formatted"] = format_phone(
|
| 210 |
+
lead["phone"],
|
| 211 |
+
lead.get("country", "usa")
|
| 212 |
+
)
|
| 213 |
+
|
| 214 |
+
# 3. Extract email from website if not present
|
| 215 |
+
if not lead.get("email") and lead.get("website"):
|
| 216 |
+
email = extract_email_from_website(lead["website"])
|
| 217 |
+
if email:
|
| 218 |
+
enriched["email"] = email
|
| 219 |
+
print(f" ✅ Email encontrado: {email}")
|
| 220 |
+
|
| 221 |
+
# 4. Extract social profiles
|
| 222 |
+
if lead.get("website"):
|
| 223 |
+
profiles = extract_social_profiles(lead["website"])
|
| 224 |
+
enriched["social_profiles"] = profiles
|
| 225 |
+
|
| 226 |
+
found = [k for k, v in profiles.items() if v]
|
| 227 |
+
if found:
|
| 228 |
+
print(f" ✅ Perfiles sociales: {', '.join(found)}")
|
| 229 |
+
|
| 230 |
+
# 5. WhatsApp Link Auto-generation
|
| 231 |
+
if enriched.get("phone_formatted"):
|
| 232 |
+
# Try to get template from config
|
| 233 |
+
from l3_execution.apify_scraper import get_config
|
| 234 |
+
config = get_config()
|
| 235 |
+
niche_cfg = config.get("niches", {}).get(lead.get("niche", "real_estate"), {})
|
| 236 |
+
template = niche_cfg.get("templates", {}).get("whatsapp", "Hola {name}")
|
| 237 |
+
|
| 238 |
+
msg = template.format(
|
| 239 |
+
name=enriched.get("name", "there"),
|
| 240 |
+
city=enriched.get("city", "your area")
|
| 241 |
+
)
|
| 242 |
+
enriched["whatsapp_link"] = generate_whatsapp_link_raw(enriched["phone_formatted"], msg)
|
| 243 |
+
|
| 244 |
+
# 6. Quality Score
|
| 245 |
+
enriched["quality_score"] = calculate_quality_score(enriched)
|
| 246 |
+
|
| 247 |
+
# Mark as enriched
|
| 248 |
+
enriched["enriched"] = True
|
| 249 |
+
|
| 250 |
+
return enriched
|
| 251 |
+
|
| 252 |
+
|
| 253 |
+
def enrich_leads(leads: List[Dict[str, Any]], delay: float = 1.0) -> List[Dict[str, Any]]:
|
| 254 |
+
"""
|
| 255 |
+
Enrich a batch of leads.
|
| 256 |
+
|
| 257 |
+
Args:
|
| 258 |
+
leads: List of lead dictionaries
|
| 259 |
+
delay: Delay between enrichments (seconds)
|
| 260 |
+
|
| 261 |
+
Returns:
|
| 262 |
+
List of enriched leads.
|
| 263 |
+
"""
|
| 264 |
+
print(f"\n🔍 Enriqueciendo {len(leads)} leads...\n")
|
| 265 |
+
|
| 266 |
+
enriched_leads = []
|
| 267 |
+
|
| 268 |
+
for i, lead in enumerate(leads):
|
| 269 |
+
enriched = enrich_lead(lead)
|
| 270 |
+
enriched_leads.append(enriched)
|
| 271 |
+
|
| 272 |
+
if i < len(leads) - 1:
|
| 273 |
+
time.sleep(delay)
|
| 274 |
+
|
| 275 |
+
# Stats
|
| 276 |
+
with_email = sum(1 for l in enriched_leads if l.get("email"))
|
| 277 |
+
with_social = sum(1 for l in enriched_leads if any(l.get("social_profiles", {}).values()))
|
| 278 |
+
|
| 279 |
+
print(f"\n📊 Resultados del enriquecimiento:")
|
| 280 |
+
print(f" - Con email: {with_email}/{len(enriched_leads)}")
|
| 281 |
+
print(f" - Con perfiles sociales: {with_social}/{len(enriched_leads)}")
|
| 282 |
+
|
| 283 |
+
return enriched_leads
|
| 284 |
+
|
| 285 |
+
|
| 286 |
+
if __name__ == "__main__":
|
| 287 |
+
# Demo leads for testing
|
| 288 |
+
demo_leads = [
|
| 289 |
+
{
|
| 290 |
+
"name": "Miami Luxury Realty",
|
| 291 |
+
"phone": "786-555-0101",
|
| 292 |
+
"website": "https://www.rei.com", # Using real site for demo
|
| 293 |
+
"country": "usa",
|
| 294 |
+
},
|
| 295 |
+
{
|
| 296 |
+
"name": "Test Business",
|
| 297 |
+
"phone": "212-555-0202",
|
| 298 |
+
"website": "https://example.com",
|
| 299 |
+
"country": "venezuela",
|
| 300 |
+
},
|
| 301 |
+
]
|
| 302 |
+
|
| 303 |
+
enriched = enrich_leads(demo_leads)
|
| 304 |
+
|
| 305 |
+
print("\n📋 Leads enriquecidos:")
|
| 306 |
+
for lead in enriched:
|
| 307 |
+
print(f" - {lead.get('name')}")
|
| 308 |
+
print(f" 📧 Email: {lead.get('email', 'N/A')}")
|
| 309 |
+
print(f" 📱 Phone: {lead.get('phone_formatted', lead.get('phone'))}")
|
lead_gen_pro/l3_execution/enrichment_pro.py
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
L3 Execution: Pro Enrichment Pipeline
|
| 3 |
+
====================================
|
| 4 |
+
Integración con Apollo, Clearbit y Lusha (Simulada vía API calls).
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
from typing import Dict, Any, List
|
| 8 |
+
|
| 9 |
+
class ProEnrichment:
|
| 10 |
+
def __init__(self, config: Dict[str, Any]):
|
| 11 |
+
self.config = config.get("enrichment", {})
|
| 12 |
+
|
| 13 |
+
def enrich_with_apollo(self, lead: Dict[str, Any]) -> Dict[str, Any]:
|
| 14 |
+
"""Simula enriquecimiento con Apollo para emails/phones."""
|
| 15 |
+
if self.config.get("apollo_enabled") and lead.get("email"):
|
| 16 |
+
# Aquí iría la llamada real: requests.post("https://api.apollo.io/v1/...", ...)
|
| 17 |
+
lead["enrichment_source"] = "Apollo"
|
| 18 |
+
lead["verified_email"] = True
|
| 19 |
+
lead["direct_phone"] = lead.get("phone") # Simulación
|
| 20 |
+
return lead
|
| 21 |
+
|
| 22 |
+
def enrich_with_clearbit(self, lead: Dict[str, Any]) -> Dict[str, Any]:
|
| 23 |
+
"""Simula enriquecimiento con Clearbit para datos de empresa."""
|
| 24 |
+
if self.config.get("clearbit_enabled") and lead.get("company"):
|
| 25 |
+
lead["company_size"] = "50-200"
|
| 26 |
+
lead["company_revenue"] = "$10M+"
|
| 27 |
+
lead["industry_nicho"] = lead.get("niche")
|
| 28 |
+
return lead
|
| 29 |
+
|
| 30 |
+
def full_enrichment_flow(self, leads: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
| 31 |
+
for lead in leads:
|
| 32 |
+
lead = self.enrich_with_apollo(lead)
|
| 33 |
+
lead = self.enrich_with_clearbit(lead)
|
| 34 |
+
return leads
|
lead_gen_pro/l3_execution/instantly_sender.py
ADDED
|
@@ -0,0 +1,260 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
L3 Execution: Instantly.ai Email Sender
|
| 3 |
+
=========================================
|
| 4 |
+
Envía emails profesionales usando la API de Instantly.ai.
|
| 5 |
+
Implementa la capa L3 del framework DOE.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import os
|
| 9 |
+
import json
|
| 10 |
+
import time
|
| 11 |
+
from datetime import datetime
|
| 12 |
+
from typing import Optional, List, Dict, Any
|
| 13 |
+
|
| 14 |
+
try:
|
| 15 |
+
import requests
|
| 16 |
+
REQUESTS_AVAILABLE = True
|
| 17 |
+
except ImportError:
|
| 18 |
+
REQUESTS_AVAILABLE = False
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
# Instantly API base URL
|
| 22 |
+
INSTANTLY_API_BASE = "https://api.instantly.ai/api/v1"
|
| 23 |
+
|
| 24 |
+
# Email templates by niche
|
| 25 |
+
# Default templates (will be overriden by config.json)
|
| 26 |
+
DEFAULT_TEMPLATES = {
|
| 27 |
+
"real_estate": {
|
| 28 |
+
"subject": "Quick question about {city} real estate",
|
| 29 |
+
"body": "Hi {name},\n\nI saw your work in {city}.\n\nBest,\n{sender_name}"
|
| 30 |
+
}
|
| 31 |
+
}
|
| 32 |
+
|
| 33 |
+
def get_config():
|
| 34 |
+
config_path = os.path.join(os.path.dirname(__file__), "..", "config.json")
|
| 35 |
+
if os.path.exists(config_path):
|
| 36 |
+
with open(config_path, "r") as f:
|
| 37 |
+
return json.load(f)
|
| 38 |
+
return {}
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
def get_instantly_api_key() -> Optional[str]:
|
| 42 |
+
"""Get Instantly API key from environment or config."""
|
| 43 |
+
key = os.environ.get("INSTANTLY_API_KEY")
|
| 44 |
+
if key:
|
| 45 |
+
return key
|
| 46 |
+
|
| 47 |
+
config_path = os.path.join(os.path.dirname(__file__), "..", "config.json")
|
| 48 |
+
if os.path.exists(config_path):
|
| 49 |
+
with open(config_path, "r") as f:
|
| 50 |
+
config = json.load(f)
|
| 51 |
+
return config.get("instantly_api_key")
|
| 52 |
+
|
| 53 |
+
return None
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
def format_email_template(
|
| 57 |
+
template_type: str,
|
| 58 |
+
lead: Dict[str, Any],
|
| 59 |
+
sender_name: str = "Your Name"
|
| 60 |
+
) -> Dict[str, str]:
|
| 61 |
+
"""
|
| 62 |
+
Format email template with lead data.
|
| 63 |
+
|
| 64 |
+
Args:
|
| 65 |
+
template_type: 'real_estate' or 'insurance'
|
| 66 |
+
lead: Lead dictionary
|
| 67 |
+
sender_name: Name to sign emails with
|
| 68 |
+
|
| 69 |
+
Returns:
|
| 70 |
+
Dictionary with 'subject' and 'body'.
|
| 71 |
+
"""
|
| 72 |
+
config = get_config()
|
| 73 |
+
niche_config = config.get("niches", {}).get(template_type, {})
|
| 74 |
+
template = niche_config.get("templates", {}).get("email")
|
| 75 |
+
|
| 76 |
+
if not template:
|
| 77 |
+
# Fallback to default
|
| 78 |
+
template = DEFAULT_TEMPLATES.get(template_type, DEFAULT_TEMPLATES["real_estate"])
|
| 79 |
+
|
| 80 |
+
# Sign with config name if available
|
| 81 |
+
final_sender_name = config.get("sender", {}).get("name", sender_name)
|
| 82 |
+
|
| 83 |
+
return {
|
| 84 |
+
"subject": template.get("subject", "Hello").format(
|
| 85 |
+
name=lead.get("name", "there"),
|
| 86 |
+
city=lead.get("city", "your area"),
|
| 87 |
+
),
|
| 88 |
+
"body": template.get("body", "Hi").format(
|
| 89 |
+
name=lead.get("name", "there"),
|
| 90 |
+
city=lead.get("city", "your area"),
|
| 91 |
+
sender_name=final_sender_name,
|
| 92 |
+
)
|
| 93 |
+
}
|
| 94 |
+
|
| 95 |
+
|
| 96 |
+
def send_email_instantly(
|
| 97 |
+
to_email: str,
|
| 98 |
+
subject: str,
|
| 99 |
+
body: str,
|
| 100 |
+
from_email: Optional[str] = None,
|
| 101 |
+
campaign_id: Optional[str] = None
|
| 102 |
+
) -> Dict[str, Any]:
|
| 103 |
+
"""
|
| 104 |
+
Send email via Instantly.ai API.
|
| 105 |
+
|
| 106 |
+
Args:
|
| 107 |
+
to_email: Recipient email address
|
| 108 |
+
subject: Email subject
|
| 109 |
+
body: Email body (plain text)
|
| 110 |
+
from_email: Sender email (optional)
|
| 111 |
+
campaign_id: Instantly campaign ID (optional)
|
| 112 |
+
|
| 113 |
+
Returns:
|
| 114 |
+
API response or error dictionary.
|
| 115 |
+
"""
|
| 116 |
+
api_key = get_instantly_api_key()
|
| 117 |
+
|
| 118 |
+
if not api_key:
|
| 119 |
+
print("❌ INSTANTLY_API_KEY no configurado")
|
| 120 |
+
return {"success": False, "error": "API key missing", "mode": "demo"}
|
| 121 |
+
|
| 122 |
+
if not REQUESTS_AVAILABLE:
|
| 123 |
+
return {"success": False, "error": "requests not available"}
|
| 124 |
+
|
| 125 |
+
try:
|
| 126 |
+
# Instantly uses campaign-based sending
|
| 127 |
+
# For direct send, we need to add to a campaign or use transactional endpoint
|
| 128 |
+
|
| 129 |
+
url = f"{INSTANTLY_API_BASE}/email/send"
|
| 130 |
+
|
| 131 |
+
payload = {
|
| 132 |
+
"api_key": api_key,
|
| 133 |
+
"to": to_email,
|
| 134 |
+
"subject": subject,
|
| 135 |
+
"body": body,
|
| 136 |
+
}
|
| 137 |
+
|
| 138 |
+
if from_email:
|
| 139 |
+
payload["from"] = from_email
|
| 140 |
+
if campaign_id:
|
| 141 |
+
payload["campaign_id"] = campaign_id
|
| 142 |
+
|
| 143 |
+
response = requests.post(url, json=payload, timeout=30)
|
| 144 |
+
|
| 145 |
+
if response.status_code == 200:
|
| 146 |
+
print(f"✅ Email enviado a {to_email}")
|
| 147 |
+
return {"success": True, "response": response.json()}
|
| 148 |
+
else:
|
| 149 |
+
print(f"❌ Error enviando email: {response.status_code}")
|
| 150 |
+
return {"success": False, "error": response.text}
|
| 151 |
+
|
| 152 |
+
except Exception as e:
|
| 153 |
+
print(f"❌ Error: {e}")
|
| 154 |
+
return {"success": False, "error": str(e)}
|
| 155 |
+
|
| 156 |
+
|
| 157 |
+
def send_email_to_lead(
|
| 158 |
+
lead: Dict[str, Any],
|
| 159 |
+
sender_name: str = "Your Name"
|
| 160 |
+
) -> Dict[str, Any]:
|
| 161 |
+
"""
|
| 162 |
+
Send personalized email to a lead.
|
| 163 |
+
|
| 164 |
+
Args:
|
| 165 |
+
lead: Lead dictionary with email
|
| 166 |
+
sender_name: Name to sign email with
|
| 167 |
+
|
| 168 |
+
Returns:
|
| 169 |
+
Result dictionary.
|
| 170 |
+
"""
|
| 171 |
+
email = lead.get("email")
|
| 172 |
+
if not email:
|
| 173 |
+
return {"success": False, "error": "No email address", "lead_id": lead.get("id")}
|
| 174 |
+
|
| 175 |
+
niche = lead.get("niche", "real_estate")
|
| 176 |
+
formatted = format_email_template(niche, lead, sender_name)
|
| 177 |
+
|
| 178 |
+
result = send_email_instantly(
|
| 179 |
+
to_email=email,
|
| 180 |
+
subject=formatted["subject"],
|
| 181 |
+
body=formatted["body"]
|
| 182 |
+
)
|
| 183 |
+
|
| 184 |
+
result["lead_id"] = lead.get("id")
|
| 185 |
+
result["to_email"] = email
|
| 186 |
+
|
| 187 |
+
return result
|
| 188 |
+
|
| 189 |
+
|
| 190 |
+
def batch_send_emails(
|
| 191 |
+
leads: List[Dict[str, Any]],
|
| 192 |
+
sender_name: str = "Your Name",
|
| 193 |
+
delay_seconds: float = 30.0,
|
| 194 |
+
max_per_hour: int = 20
|
| 195 |
+
) -> List[Dict[str, Any]]:
|
| 196 |
+
"""
|
| 197 |
+
Send emails to a batch of leads with rate limiting.
|
| 198 |
+
|
| 199 |
+
Args:
|
| 200 |
+
leads: List of leads with emails
|
| 201 |
+
sender_name: Name to sign emails
|
| 202 |
+
delay_seconds: Delay between emails
|
| 203 |
+
max_per_hour: Maximum emails per hour
|
| 204 |
+
|
| 205 |
+
Returns:
|
| 206 |
+
List of results.
|
| 207 |
+
"""
|
| 208 |
+
# Filter leads with emails
|
| 209 |
+
leads_with_email = [l for l in leads if l.get("email")]
|
| 210 |
+
|
| 211 |
+
if not leads_with_email:
|
| 212 |
+
print("❌ No hay leads con email")
|
| 213 |
+
return []
|
| 214 |
+
|
| 215 |
+
print(f"\n📧 Enviando {len(leads_with_email)} emails...")
|
| 216 |
+
|
| 217 |
+
results = []
|
| 218 |
+
sent_count = 0
|
| 219 |
+
|
| 220 |
+
for i, lead in enumerate(leads_with_email[:max_per_hour]):
|
| 221 |
+
result = send_email_to_lead(lead, sender_name)
|
| 222 |
+
results.append(result)
|
| 223 |
+
|
| 224 |
+
if result.get("success"):
|
| 225 |
+
sent_count += 1
|
| 226 |
+
|
| 227 |
+
# Rate limiting
|
| 228 |
+
if i < len(leads_with_email) - 1:
|
| 229 |
+
print(f" ⏳ Esperando {delay_seconds}s...")
|
| 230 |
+
time.sleep(delay_seconds)
|
| 231 |
+
|
| 232 |
+
print(f"\n📊 Emails enviados: {sent_count}/{len(leads_with_email)}")
|
| 233 |
+
return results
|
| 234 |
+
|
| 235 |
+
|
| 236 |
+
def demo_email_preview(lead: Dict[str, Any], sender_name: str = "Your Name") -> None:
|
| 237 |
+
"""Print email preview without sending."""
|
| 238 |
+
niche = lead.get("niche", "real_estate")
|
| 239 |
+
formatted = format_email_template(niche, lead, sender_name)
|
| 240 |
+
|
| 241 |
+
print(f"\n{'='*50}")
|
| 242 |
+
print(f"📧 PREVIEW - Email para: {lead.get('email', 'N/A')}")
|
| 243 |
+
print(f"{'='*50}")
|
| 244 |
+
print(f"Subject: {formatted['subject']}")
|
| 245 |
+
print(f"\n{formatted['body']}")
|
| 246 |
+
print(f"{'='*50}\n")
|
| 247 |
+
|
| 248 |
+
|
| 249 |
+
if __name__ == "__main__":
|
| 250 |
+
# Demo
|
| 251 |
+
demo_lead = {
|
| 252 |
+
"id": 1,
|
| 253 |
+
"name": "Miami Luxury Realty",
|
| 254 |
+
"email": "info@example.com",
|
| 255 |
+
"city": "Miami, FL",
|
| 256 |
+
"niche": "real_estate",
|
| 257 |
+
}
|
| 258 |
+
|
| 259 |
+
print("🎭 Modo demo - Preview de email:")
|
| 260 |
+
demo_email_preview(demo_lead, sender_name="Test User")
|
lead_gen_pro/l3_execution/lead_scoring.py
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
L3 Execution: Lead Scoring & Qualification (MQL/SQL)
|
| 3 |
+
===================================================
|
| 4 |
+
Modelo de scoring avanzado para calificar leads basados en datos firmográficos y geográficos.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
from typing import Dict, Any, List
|
| 8 |
+
|
| 9 |
+
class LeadScorer:
|
| 10 |
+
def __init__(self):
|
| 11 |
+
# Configuración de regiones clave de USA
|
| 12 |
+
self.hot_regions = ["CA", "NY", "TX", "FL", "IL"]
|
| 13 |
+
self.priority_cities = ["San Francisco", "New York", "Austin", "Miami", "Chicago"]
|
| 14 |
+
|
| 15 |
+
def calculate_score(self, lead: Dict[str, Any]) -> Dict[str, Any]:
|
| 16 |
+
"""Calcula el score de un lead de 0 a 100."""
|
| 17 |
+
score = 0
|
| 18 |
+
details = []
|
| 19 |
+
|
| 20 |
+
# 1. Scoring Geográfico (Max 30 pts)
|
| 21 |
+
# Obtener location y asegurar que sea string (manejar None)
|
| 22 |
+
location = lead.get("location") or ""
|
| 23 |
+
location = location.upper()
|
| 24 |
+
|
| 25 |
+
# Obtener city y asegurar que sea string (manejar None)
|
| 26 |
+
city = lead.get("city") or ""
|
| 27 |
+
|
| 28 |
+
if any(region in location for region in self.hot_regions):
|
| 29 |
+
score += 20
|
| 30 |
+
details.append("Region de alta prioridad (+20)")
|
| 31 |
+
if city and any(p_city in city for p_city in self.priority_cities):
|
| 32 |
+
score += 10
|
| 33 |
+
details.append("Ciudad estratégica (+10)")
|
| 34 |
+
|
| 35 |
+
# 2. Scoring de Contactabilidad (Max 40 pts)
|
| 36 |
+
if lead.get("email"):
|
| 37 |
+
score += 25
|
| 38 |
+
details.append("Email disponible (+25)")
|
| 39 |
+
if lead.get("phone"):
|
| 40 |
+
score += 15
|
| 41 |
+
details.append("Teléfono disponible (+15)")
|
| 42 |
+
|
| 43 |
+
# 3. Scoring de Relevancia (Max 30 pts)
|
| 44 |
+
niche = lead.get("niche", "").lower()
|
| 45 |
+
if niche in ["real_estate", "healthcare", "saas"]:
|
| 46 |
+
score += 20
|
| 47 |
+
details.append("Nicho premium (+20)")
|
| 48 |
+
|
| 49 |
+
if lead.get("linkedin_url"):
|
| 50 |
+
score += 10
|
| 51 |
+
details.append("Enlace LinkedIn (+10)")
|
| 52 |
+
|
| 53 |
+
# Determinar Tier
|
| 54 |
+
if score >= 80:
|
| 55 |
+
tier = "SQL (Hot)"
|
| 56 |
+
elif score >= 40:
|
| 57 |
+
tier = "MQL (Warm)"
|
| 58 |
+
else:
|
| 59 |
+
tier = "Lead (Cold)"
|
| 60 |
+
|
| 61 |
+
return {
|
| 62 |
+
"score": score,
|
| 63 |
+
"tier": tier,
|
| 64 |
+
"score_details": ", ".join(details)
|
| 65 |
+
}
|
| 66 |
+
|
| 67 |
+
def process_batch(self, leads: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
| 68 |
+
for lead in leads:
|
| 69 |
+
scoring_data = self.calculate_score(lead)
|
| 70 |
+
lead.update(scoring_data)
|
| 71 |
+
return leads
|
lead_gen_pro/l3_execution/supabase_sync.py
ADDED
|
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import requests
|
| 2 |
+
import json
|
| 3 |
+
from datetime import datetime
|
| 4 |
+
|
| 5 |
+
# Supabase Configuration
|
| 6 |
+
SUPABASE_URL = "https://nvssvykqxaurtlgwxwwy.supabase.co"
|
| 7 |
+
SERVICE_ROLE_KEY = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6Im52c3N2eWtxeGF1cnRsZ3d4d3d5Iiwicm9sZSI6InNlcnZpY2Vfcm9sZSIsImlhdCI6MTc3Mjg5MjE2NSwiZXhwIjoyMDg4NDY4MTY1fQ.AceH8GESr9xKNmJBMHpM56HZASLRIjHc5dfxyLKBH_o"
|
| 8 |
+
|
| 9 |
+
HEADERS = {
|
| 10 |
+
"apikey": SERVICE_ROLE_KEY,
|
| 11 |
+
"Authorization": f"Bearer {SERVICE_ROLE_KEY}",
|
| 12 |
+
"Content-Type": "application/json",
|
| 13 |
+
"Prefer": "resolution=merge-duplicates"
|
| 14 |
+
}
|
| 15 |
+
|
| 16 |
+
def sync_lead_to_supabase(lead_data):
|
| 17 |
+
"""Syncs a single lead to Supabase 'leads' table."""
|
| 18 |
+
social = lead_data.get("social_profiles", {})
|
| 19 |
+
|
| 20 |
+
# PG Table names (Spanish according to Notion findings)
|
| 21 |
+
payload = {
|
| 22 |
+
"nombre": lead_data.get("name") or "Sin nombre",
|
| 23 |
+
"telefono": lead_data.get("phone"),
|
| 24 |
+
"telefono_formateado": lead_data.get("phone_formatted"),
|
| 25 |
+
"correo": lead_data.get("email"),
|
| 26 |
+
"direccion": lead_data.get("address"),
|
| 27 |
+
"website": lead_data.get("website"),
|
| 28 |
+
"rating": lead_data.get("rating"),
|
| 29 |
+
"reviews_count": lead_data.get("reviews_count"),
|
| 30 |
+
"categoria": lead_data.get("category") or lead_data.get("niche"),
|
| 31 |
+
"fuente": lead_data.get("source") or "lead_gen_pro",
|
| 32 |
+
"nicho": lead_data.get("niche"),
|
| 33 |
+
"pais": lead_data.get("country"),
|
| 34 |
+
"ciudad": lead_data.get("city"),
|
| 35 |
+
"enriquecido": lead_data.get("enriched", False),
|
| 36 |
+
"estado": lead_data.get("status") or "nuevo",
|
| 37 |
+
"prioridad": lead_data.get("priority", 0),
|
| 38 |
+
"whatsapp_enviado": lead_data.get("whatsapp_sent", False),
|
| 39 |
+
"whatsapp_link": lead_data.get("whatsapp_link"),
|
| 40 |
+
"correo_enviado": lead_data.get("email_sent", False),
|
| 41 |
+
"es_oportunidad": lead_data.get("is_opportunity", False),
|
| 42 |
+
"valor_oportunidad": lead_data.get("opportunity_value"),
|
| 43 |
+
"notas_oportunidad": lead_data.get("opportunity_notes"),
|
| 44 |
+
"quality_score": lead_data.get("quality_score", 0),
|
| 45 |
+
"creado_en": lead_data.get("created_at") or datetime.now().isoformat(),
|
| 46 |
+
"actualizado_en": datetime.now().isoformat()
|
| 47 |
+
}
|
| 48 |
+
|
| 49 |
+
try:
|
| 50 |
+
response = requests.post(f"{SUPABASE_URL}/rest/v1/leads", headers=HEADERS, json=payload)
|
| 51 |
+
return response.status_code in [200, 201, 204]
|
| 52 |
+
except Exception as e:
|
| 53 |
+
print(f"[ERROR] Lead Sync Failed: {e}")
|
| 54 |
+
return False
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
def sync_all_leads_to_supabase(db_path: str = None, limit: int = 500) -> dict:
|
| 58 |
+
"""
|
| 59 |
+
Lee los leads más recientes de leads_doe.db y los sincroniza en batch a Supabase.
|
| 60 |
+
Útil para sincronización post-scraping masiva.
|
| 61 |
+
"""
|
| 62 |
+
import sqlite3
|
| 63 |
+
import os
|
| 64 |
+
|
| 65 |
+
if db_path is None:
|
| 66 |
+
db_path = os.path.join(os.path.dirname(__file__), "..", "leads_doe.db")
|
| 67 |
+
|
| 68 |
+
if not os.path.exists(db_path):
|
| 69 |
+
return {"synced": 0, "errors": 0, "message": "DB not found"}
|
| 70 |
+
|
| 71 |
+
try:
|
| 72 |
+
conn = sqlite3.connect(db_path, timeout=10)
|
| 73 |
+
conn.row_factory = sqlite3.Row
|
| 74 |
+
cur = conn.cursor()
|
| 75 |
+
# Solo sincroniza leads que aún no han sido sincronizados (o los más recientes)
|
| 76 |
+
cur.execute(f"SELECT * FROM leads ORDER BY created_at DESC LIMIT {limit}")
|
| 77 |
+
rows = cur.fetchall()
|
| 78 |
+
conn.close()
|
| 79 |
+
except Exception as e:
|
| 80 |
+
return {"synced": 0, "errors": 0, "message": str(e)}
|
| 81 |
+
|
| 82 |
+
synced, errors = 0, 0
|
| 83 |
+
for row in rows:
|
| 84 |
+
lead_data = dict(row)
|
| 85 |
+
ok = sync_lead_to_supabase(lead_data)
|
| 86 |
+
if ok:
|
| 87 |
+
synced += 1
|
| 88 |
+
else:
|
| 89 |
+
errors += 1
|
| 90 |
+
|
| 91 |
+
return {"synced": synced, "errors": errors, "total": len(rows)}
|
lead_gen_pro/main.py
ADDED
|
@@ -0,0 +1,97 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Main Entry Point for Lead Generation System
|
| 3 |
+
============================================
|
| 4 |
+
Wrapper script that runs the DOE orchestrator.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import os
|
| 8 |
+
import sys
|
| 9 |
+
|
| 10 |
+
# Ensure we're using the venv
|
| 11 |
+
venv_path = os.path.join(os.path.dirname(__file__), "venv", "bin", "python")
|
| 12 |
+
if os.path.exists(venv_path) and sys.executable != venv_path:
|
| 13 |
+
os.execv(venv_path, [venv_path] + sys.argv)
|
| 14 |
+
|
| 15 |
+
# Add paths
|
| 16 |
+
sys.path.insert(0, os.path.dirname(__file__))
|
| 17 |
+
|
| 18 |
+
from l2_orchestration.orchestrator import (
|
| 19 |
+
run_full_pipeline,
|
| 20 |
+
run_scraping_only,
|
| 21 |
+
run_whatsapp_outreach,
|
| 22 |
+
run_email_outreach,
|
| 23 |
+
show_dashboard
|
| 24 |
+
)
|
| 25 |
+
from l3_execution.database_doe import init_db
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
if __name__ == "__main__":
|
| 29 |
+
import argparse
|
| 30 |
+
|
| 31 |
+
parser = argparse.ArgumentParser(
|
| 32 |
+
description="🚀 Lead Generation DOE System",
|
| 33 |
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
| 34 |
+
epilog="""
|
| 35 |
+
Ejemplos:
|
| 36 |
+
python main.py --pipeline --niche real_estate --country usa --location "Miami, FL"
|
| 37 |
+
python main.py --scrape --niche insurance --country venezuela --location "Caracas"
|
| 38 |
+
python main.py --outreach-wa --limit 10
|
| 39 |
+
python main.py --dashboard
|
| 40 |
+
python main.py --streamlit
|
| 41 |
+
"""
|
| 42 |
+
)
|
| 43 |
+
|
| 44 |
+
# Actions
|
| 45 |
+
parser.add_argument("--pipeline", action="store_true", help="Ejecutar pipeline completo")
|
| 46 |
+
parser.add_argument("--scrape", action="store_true", help="Solo scraping de leads")
|
| 47 |
+
parser.add_argument("--outreach-wa", action="store_true", help="Generar links de WhatsApp")
|
| 48 |
+
parser.add_argument("--outreach-email", action="store_true", help="Preview/enviar emails")
|
| 49 |
+
parser.add_argument("--dashboard", action="store_true", help="Dashboard en texto")
|
| 50 |
+
parser.add_argument("--streamlit", action="store_true", help="Dashboard web Streamlit")
|
| 51 |
+
|
| 52 |
+
# Options
|
| 53 |
+
parser.add_argument("--niche", choices=["real_estate", "insurance"], default="real_estate",
|
| 54 |
+
help="Nicho objetivo (default: real_estate)")
|
| 55 |
+
parser.add_argument("--country", choices=["usa", "venezuela"], default="usa",
|
| 56 |
+
help="País objetivo (default: usa)")
|
| 57 |
+
parser.add_argument("--limit", type=int, default=10, help="Límite de leads (default: 10)")
|
| 58 |
+
parser.add_argument("--location", type=str, help="Ubicación específica (Ciudad, Estado o ZIP)")
|
| 59 |
+
parser.add_argument("--demo", action="store_true", help="Modo demo sin API")
|
| 60 |
+
|
| 61 |
+
args = parser.parse_args()
|
| 62 |
+
|
| 63 |
+
# Initialize DB
|
| 64 |
+
init_db()
|
| 65 |
+
|
| 66 |
+
# Execute action
|
| 67 |
+
if args.pipeline:
|
| 68 |
+
run_full_pipeline(
|
| 69 |
+
niche=args.niche,
|
| 70 |
+
country=args.country,
|
| 71 |
+
location=args.location,
|
| 72 |
+
limit_per_city=args.limit,
|
| 73 |
+
demo=args.demo
|
| 74 |
+
)
|
| 75 |
+
elif args.scrape:
|
| 76 |
+
run_scraping_only(
|
| 77 |
+
niche=args.niche,
|
| 78 |
+
country=args.country,
|
| 79 |
+
location=args.location,
|
| 80 |
+
limit=args.limit
|
| 81 |
+
)
|
| 82 |
+
elif args.outreach_wa:
|
| 83 |
+
run_whatsapp_outreach(limit=args.limit)
|
| 84 |
+
elif args.outreach_email:
|
| 85 |
+
run_email_outreach(limit=args.limit, preview_only=True)
|
| 86 |
+
elif args.dashboard:
|
| 87 |
+
show_dashboard()
|
| 88 |
+
elif args.streamlit:
|
| 89 |
+
import subprocess
|
| 90 |
+
dashboard_path = os.path.join(os.path.dirname(__file__), "dashboard", "app.py")
|
| 91 |
+
venv_streamlit = os.path.join(os.path.dirname(__file__), "venv", "bin", "streamlit")
|
| 92 |
+
subprocess.run([venv_streamlit, "run", dashboard_path, "--server.headless", "true"])
|
| 93 |
+
else:
|
| 94 |
+
# Show help if no action
|
| 95 |
+
parser.print_help()
|
| 96 |
+
print("\n📊 Estado actual:")
|
| 97 |
+
show_dashboard()
|
lead_gen_pro/main_pro.py
ADDED
|
@@ -0,0 +1,264 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Main Entry Point for Lead Generation PRO - COMPLETAMENTE COMENTADO
|
| 3 |
+
===================================================================
|
| 4 |
+
|
| 5 |
+
Este es el punto de entrada principal del sistema Lead Generation PRO.
|
| 6 |
+
Funciona como una CLI (Command Line Interface) que permite ejecutar el
|
| 7 |
+
pipeline completo de generación de leads mediante argumentos de línea de comandos.
|
| 8 |
+
|
| 9 |
+
AUTOR: Lead Generation PRO System
|
| 10 |
+
FECHA: 2025
|
| 11 |
+
VERSION: 2.0
|
| 12 |
+
|
| 13 |
+
USO DESDE LÍNEA DE COMANDOS:
|
| 14 |
+
python3 main_pro.py --pipeline --niche "Insurance" --city "Miami" --state "FL" --country "USA"
|
| 15 |
+
|
| 16 |
+
COMPONENTES PRINCIPALES:
|
| 17 |
+
- Parser de argumentos CLI (argparse)
|
| 18 |
+
- Inicialización de base de datos
|
| 19 |
+
- Delegación al ProOrchestrator para ejecución del pipeline
|
| 20 |
+
|
| 21 |
+
FLUJO DE EJECUCIÓN:
|
| 22 |
+
1. Parsear argumentos de línea de comandos
|
| 23 |
+
2. Inicializar base de datos SQLite
|
| 24 |
+
3. Construir objeto de configuración con datos de ubicación
|
| 25 |
+
4. Llamar a ProOrchestrator.run_pro_pipeline()
|
| 26 |
+
5. Ejecutar scraping según tipo (B2B, B2C o ambos)
|
| 27 |
+
"""
|
| 28 |
+
|
| 29 |
+
# =============================================================================
|
| 30 |
+
# IMPORTS
|
| 31 |
+
# =============================================================================
|
| 32 |
+
|
| 33 |
+
import os # Para operaciones con sistema de archivos y variables de entorno
|
| 34 |
+
import argparse # Para parsear argumentos de línea de comandos
|
| 35 |
+
|
| 36 |
+
# Importar el orquestador principal desde la capa L2
|
| 37 |
+
# El orquestador coordina todo el flujo de trabajo
|
| 38 |
+
from l2_orchestration.orchestrator_pro import ProOrchestrator
|
| 39 |
+
|
| 40 |
+
# Importar función de inicialización de base de datos
|
| 41 |
+
# Esta función crea las tablas necesarias si no existen
|
| 42 |
+
from l3_execution.database_doe import init_db
|
| 43 |
+
|
| 44 |
+
# =============================================================================
|
| 45 |
+
# FUNCIÓN PRINCIPAL
|
| 46 |
+
# =============================================================================
|
| 47 |
+
|
| 48 |
+
def main():
|
| 49 |
+
"""
|
| 50 |
+
Función principal que configura y ejecuta el pipeline de generación de leads.
|
| 51 |
+
|
| 52 |
+
Esta función:
|
| 53 |
+
1. Define todos los argumentos CLI disponibles
|
| 54 |
+
2. Parsea los argumentos proporcionados por el usuario
|
| 55 |
+
3. Inicializa la base de datos
|
| 56 |
+
4. Ejecuta el pipeline según la configuración
|
| 57 |
+
"""
|
| 58 |
+
|
| 59 |
+
# -------------------------------------------------------------------------
|
| 60 |
+
# CONFIGURACIÓN DEL PARSER DE ARGUMENTOS
|
| 61 |
+
# -------------------------------------------------------------------------
|
| 62 |
+
|
| 63 |
+
# Crear el parser con una descripción útil
|
| 64 |
+
parser = argparse.ArgumentParser(
|
| 65 |
+
description="Lead Generation PRO DOE System - Pipeline de generacion de leads B2B y B2C",
|
| 66 |
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
| 67 |
+
epilog="""
|
| 68 |
+
Ejemplos de uso:
|
| 69 |
+
# Buscar leads B2B de seguros en Miami
|
| 70 |
+
python3 main_pro.py --pipeline --niche "Insurance" --city "Miami" --state "FL" --type b2b
|
| 71 |
+
|
| 72 |
+
# Buscar leads B2C en redes sociales
|
| 73 |
+
python3 main_pro.py --pipeline --niche "Real Estate" --city "Austin" --type b2c --b2c-platforms twitter facebook
|
| 74 |
+
|
| 75 |
+
# Buscar ambos tipos con todas las plataformas
|
| 76 |
+
python3 main_pro.py --pipeline --niche "Healthcare" --country "USA" --type both --limit 50
|
| 77 |
+
"""
|
| 78 |
+
)
|
| 79 |
+
|
| 80 |
+
# -------------------------------------------------------------------------
|
| 81 |
+
# ARGUMENTOS PRINCIPALES
|
| 82 |
+
# -------------------------------------------------------------------------
|
| 83 |
+
|
| 84 |
+
# --pipeline: Flag obligatorio para ejecutar el pipeline
|
| 85 |
+
parser.add_argument(
|
| 86 |
+
"--pipeline",
|
| 87 |
+
action="store_true",
|
| 88 |
+
help="Ejecutar Pipeline Pro (requerido para iniciar el scraping)"
|
| 89 |
+
)
|
| 90 |
+
|
| 91 |
+
# --niche: Nicho o industria a buscar
|
| 92 |
+
parser.add_argument(
|
| 93 |
+
"--niche",
|
| 94 |
+
type=str,
|
| 95 |
+
default="Real Estate",
|
| 96 |
+
help="Nicho de mercado a buscar (ej: Real Estate, Insurance, Healthcare)"
|
| 97 |
+
)
|
| 98 |
+
|
| 99 |
+
# --region: Región legacy (opcional, para compatibilidad hacia atrás)
|
| 100 |
+
parser.add_argument(
|
| 101 |
+
"--region",
|
| 102 |
+
type=str,
|
| 103 |
+
default="CA Bay Area",
|
| 104 |
+
help="Región legacy (opcional si se usa city/state/country)"
|
| 105 |
+
)
|
| 106 |
+
|
| 107 |
+
# -------------------------------------------------------------------------
|
| 108 |
+
# ARGUMENTOS DE UBICACIÓN (GEO-TARGETING)
|
| 109 |
+
# -------------------------------------------------------------------------
|
| 110 |
+
|
| 111 |
+
# --city: Ciudad objetivo
|
| 112 |
+
parser.add_argument(
|
| 113 |
+
"--city",
|
| 114 |
+
type=str,
|
| 115 |
+
help="Ciudad objetivo (ej: Miami, Austin, New York)"
|
| 116 |
+
)
|
| 117 |
+
|
| 118 |
+
# --state: Estado o provincia
|
| 119 |
+
parser.add_argument(
|
| 120 |
+
"--state",
|
| 121 |
+
type=str,
|
| 122 |
+
help="Estado/Provincia (ej: FL, CA, TX)"
|
| 123 |
+
)
|
| 124 |
+
|
| 125 |
+
# --country: País
|
| 126 |
+
parser.add_argument(
|
| 127 |
+
"--country",
|
| 128 |
+
type=str,
|
| 129 |
+
help="País (ej: USA, Canada, Mexico)"
|
| 130 |
+
)
|
| 131 |
+
|
| 132 |
+
# -------------------------------------------------------------------------
|
| 133 |
+
# ARGUMENTO DE TIPO DE LEAD (B2B, B2C o AMBOS)
|
| 134 |
+
# -------------------------------------------------------------------------
|
| 135 |
+
|
| 136 |
+
parser.add_argument(
|
| 137 |
+
"--type",
|
| 138 |
+
choices=["b2b", "b2c", "both"],
|
| 139 |
+
default="both",
|
| 140 |
+
help="""
|
| 141 |
+
Tipo de leads a buscar:
|
| 142 |
+
- b2b: Solo empresas y negocios (LinkedIn, Google Maps)
|
| 143 |
+
- b2c: Solo consumidores (Twitter, Reddit, YouTube, Facebook)
|
| 144 |
+
- both: Ambos tipos (por defecto)
|
| 145 |
+
"""
|
| 146 |
+
)
|
| 147 |
+
|
| 148 |
+
# -------------------------------------------------------------------------
|
| 149 |
+
# PLATAFORMAS B2B
|
| 150 |
+
# -------------------------------------------------------------------------
|
| 151 |
+
|
| 152 |
+
parser.add_argument(
|
| 153 |
+
"--b2b-platforms",
|
| 154 |
+
nargs="+", # Acepta uno o más valores
|
| 155 |
+
choices=["linkedin", "youtube", "tiktok", "instagram", "google_maps"],
|
| 156 |
+
default=["linkedin", "google_maps"],
|
| 157 |
+
help="""
|
| 158 |
+
Plataformas B2B a escanear:
|
| 159 |
+
- linkedin: Perfiles profesionales
|
| 160 |
+
- google_maps: Negocios locales
|
| 161 |
+
- youtube: Canales de YouTube
|
| 162 |
+
- tiktok: Cuentas por hashtags
|
| 163 |
+
- instagram: Perfiles por hashtags
|
| 164 |
+
"""
|
| 165 |
+
)
|
| 166 |
+
|
| 167 |
+
# -------------------------------------------------------------------------
|
| 168 |
+
# PLATAFORMAS B2C
|
| 169 |
+
# -------------------------------------------------------------------------
|
| 170 |
+
|
| 171 |
+
parser.add_argument(
|
| 172 |
+
"--b2c-platforms",
|
| 173 |
+
nargs="+",
|
| 174 |
+
choices=["youtube", "reddit", "twitter", "facebook"],
|
| 175 |
+
default=["youtube", "reddit", "twitter"],
|
| 176 |
+
help="""
|
| 177 |
+
Plataformas B2C a escanear:
|
| 178 |
+
- youtube: Comentarios en videos
|
| 179 |
+
- reddit: Posts en subreddits
|
| 180 |
+
- twitter: Tweets públicos
|
| 181 |
+
- facebook: Posts públicos y grupos públicos
|
| 182 |
+
"""
|
| 183 |
+
)
|
| 184 |
+
|
| 185 |
+
# -------------------------------------------------------------------------
|
| 186 |
+
# ARGUMENTOS DE CONFIGURACIÓN
|
| 187 |
+
# -------------------------------------------------------------------------
|
| 188 |
+
|
| 189 |
+
# --limit: Límite de leads por plataforma
|
| 190 |
+
parser.add_argument(
|
| 191 |
+
"--limit",
|
| 192 |
+
type=int,
|
| 193 |
+
default=20,
|
| 194 |
+
help="Límite de leads a obtener por cada plataforma (default: 20)"
|
| 195 |
+
)
|
| 196 |
+
|
| 197 |
+
# -------------------------------------------------------------------------
|
| 198 |
+
# PARSEAR ARGUMENTOS
|
| 199 |
+
# -------------------------------------------------------------------------
|
| 200 |
+
|
| 201 |
+
# Obtener los argumentos proporcionados por el usuario
|
| 202 |
+
args = parser.parse_args()
|
| 203 |
+
|
| 204 |
+
# -------------------------------------------------------------------------
|
| 205 |
+
# INICIALIZACIÓN DE BASE DE DATOS
|
| 206 |
+
# -------------------------------------------------------------------------
|
| 207 |
+
|
| 208 |
+
# Crear las tablas necesarias en SQLite si no existen
|
| 209 |
+
# Esto incluye: leads, outreach_log, opportunities, metrics
|
| 210 |
+
init_db()
|
| 211 |
+
|
| 212 |
+
# -------------------------------------------------------------------------
|
| 213 |
+
# EJECUCIÓN DEL PIPELINE
|
| 214 |
+
# -------------------------------------------------------------------------
|
| 215 |
+
|
| 216 |
+
# Verificar si se debe ejecutar el pipeline
|
| 217 |
+
if args.pipeline:
|
| 218 |
+
|
| 219 |
+
# Construir diccionario de datos de ubicación
|
| 220 |
+
# Solo incluye los campos que fueron proporcionados
|
| 221 |
+
location_data = {}
|
| 222 |
+
if args.city:
|
| 223 |
+
location_data["city"] = args.city
|
| 224 |
+
if args.state:
|
| 225 |
+
location_data["state"] = args.state
|
| 226 |
+
if args.country:
|
| 227 |
+
location_data["country"] = args.country
|
| 228 |
+
|
| 229 |
+
# Crear instancia del orquestador
|
| 230 |
+
# El orquestador coordina todo el flujo de trabajo
|
| 231 |
+
orchestrator = ProOrchestrator()
|
| 232 |
+
|
| 233 |
+
# Ejecutar el pipeline principal
|
| 234 |
+
# Esta función hace todo el trabajo: scraping, scoring, deduplicación, almacenamiento
|
| 235 |
+
orchestrator.run_pro_pipeline(
|
| 236 |
+
niche=args.niche, # Nicho/industria a buscar
|
| 237 |
+
region=args.region, # Región legacy (para compatibilidad)
|
| 238 |
+
location_data=location_data, # Datos de ubicación granular
|
| 239 |
+
lead_type=args.type, # Tipo: b2b, b2c, both
|
| 240 |
+
b2b_platforms=args.b2b_platforms, # Lista de plataformas B2B
|
| 241 |
+
b2c_platforms=args.b2c_platforms, # Lista de plataformas B2C
|
| 242 |
+
limit=args.limit # Límite por plataforma
|
| 243 |
+
)
|
| 244 |
+
|
| 245 |
+
else:
|
| 246 |
+
# Si no se proporcionó --pipeline, mostrar la ayuda
|
| 247 |
+
parser.print_help()
|
| 248 |
+
|
| 249 |
+
# =============================================================================
|
| 250 |
+
# PUNTO DE ENTRADA
|
| 251 |
+
# =============================================================================
|
| 252 |
+
|
| 253 |
+
if __name__ == "__main__":
|
| 254 |
+
"""
|
| 255 |
+
Punto de entrada estándar de Python.
|
| 256 |
+
|
| 257 |
+
Este bloque se ejecuta solo cuando el archivo se ejecuta directamente
|
| 258 |
+
(no cuando se importa como módulo).
|
| 259 |
+
"""
|
| 260 |
+
main()
|
| 261 |
+
|
| 262 |
+
# =============================================================================
|
| 263 |
+
# FIN DEL ARCHIVO
|
| 264 |
+
# =============================================================================
|
lead_gen_pro/requirements.txt
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Lead Generation System - Requirements
|
| 2 |
+
# =====================================
|
| 3 |
+
|
| 4 |
+
# Core
|
| 5 |
+
requests>=2.31.0
|
| 6 |
+
streamlit>=1.30.0
|
| 7 |
+
pandas>=2.0.0
|
| 8 |
+
plotly>=5.18.0
|
| 9 |
+
apify-client>=1.6.0
|
| 10 |
+
|
| 11 |
+
# WhatsApp & Automation
|
| 12 |
+
playwright>=1.40.0
|
| 13 |
+
|
| 14 |
+
# Utilities
|
| 15 |
+
python-dotenv>=1.0.0
|
| 16 |
+
schedule>=1.2.0
|
lead_gen_pro/scraper.py
ADDED
|
@@ -0,0 +1,311 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Google Maps Lead Scraper Module
|
| 3 |
+
================================
|
| 4 |
+
Uses SerpApi to search for business leads on Google Maps.
|
| 5 |
+
Supports real estate agents and insurance brokers in USA and Venezuela.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import os
|
| 9 |
+
import json
|
| 10 |
+
import time
|
| 11 |
+
from typing import Optional, List, Dict, Any
|
| 12 |
+
from dataclasses import dataclass
|
| 13 |
+
|
| 14 |
+
# You'll need to install: pip install google-search-results
|
| 15 |
+
try:
|
| 16 |
+
from serpapi import GoogleSearch
|
| 17 |
+
SERPAPI_AVAILABLE = True
|
| 18 |
+
except ImportError:
|
| 19 |
+
SERPAPI_AVAILABLE = False
|
| 20 |
+
print("⚠️ SerpApi no está instalado. Ejecuta: pip install google-search-results")
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
@dataclass
|
| 24 |
+
class SearchConfig:
|
| 25 |
+
"""Configuration for a lead search."""
|
| 26 |
+
query: str
|
| 27 |
+
location: str
|
| 28 |
+
country: str # 'usa' or 'venezuela'
|
| 29 |
+
niche: str # 'real_estate' or 'insurance'
|
| 30 |
+
limit: int = 20
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
# Predefined search queries for each niche
|
| 34 |
+
SEARCH_QUERIES = {
|
| 35 |
+
"real_estate": {
|
| 36 |
+
"usa": [
|
| 37 |
+
"real estate agent",
|
| 38 |
+
"realtor",
|
| 39 |
+
"real estate broker",
|
| 40 |
+
"property agent",
|
| 41 |
+
],
|
| 42 |
+
"venezuela": [
|
| 43 |
+
"inmobiliaria",
|
| 44 |
+
"agente inmobiliario",
|
| 45 |
+
"bienes raices",
|
| 46 |
+
"corredor inmobiliario",
|
| 47 |
+
]
|
| 48 |
+
},
|
| 49 |
+
"insurance": {
|
| 50 |
+
"usa": [
|
| 51 |
+
"insurance agent",
|
| 52 |
+
"insurance broker",
|
| 53 |
+
"insurance agency",
|
| 54 |
+
"life insurance agent",
|
| 55 |
+
],
|
| 56 |
+
"venezuela": [
|
| 57 |
+
"corredor de seguros",
|
| 58 |
+
"agente de seguros",
|
| 59 |
+
"aseguradora",
|
| 60 |
+
"seguros de vida",
|
| 61 |
+
]
|
| 62 |
+
}
|
| 63 |
+
}
|
| 64 |
+
|
| 65 |
+
# Major cities for each country
|
| 66 |
+
CITIES = {
|
| 67 |
+
"usa": [
|
| 68 |
+
"Miami, FL",
|
| 69 |
+
"Houston, TX",
|
| 70 |
+
"Los Angeles, CA",
|
| 71 |
+
"New York, NY",
|
| 72 |
+
"Chicago, IL",
|
| 73 |
+
"Dallas, TX",
|
| 74 |
+
"Phoenix, AZ",
|
| 75 |
+
"Orlando, FL",
|
| 76 |
+
],
|
| 77 |
+
"venezuela": [
|
| 78 |
+
"Caracas",
|
| 79 |
+
"Maracaibo",
|
| 80 |
+
"Valencia",
|
| 81 |
+
"Barquisimeto",
|
| 82 |
+
"Maracay",
|
| 83 |
+
"Puerto La Cruz",
|
| 84 |
+
"San Cristobal",
|
| 85 |
+
]
|
| 86 |
+
}
|
| 87 |
+
|
| 88 |
+
|
| 89 |
+
def get_serpapi_key() -> Optional[str]:
|
| 90 |
+
"""Get SerpApi key from environment or config file."""
|
| 91 |
+
# Try environment variable first
|
| 92 |
+
key = os.environ.get("SERPAPI_KEY")
|
| 93 |
+
if key:
|
| 94 |
+
return key
|
| 95 |
+
|
| 96 |
+
# Try config file
|
| 97 |
+
config_path = os.path.join(os.path.dirname(__file__), "config.json")
|
| 98 |
+
if os.path.exists(config_path):
|
| 99 |
+
with open(config_path, "r") as f:
|
| 100 |
+
config = json.load(f)
|
| 101 |
+
return config.get("serpapi_key")
|
| 102 |
+
|
| 103 |
+
return None
|
| 104 |
+
|
| 105 |
+
|
| 106 |
+
def search_google_maps(
|
| 107 |
+
query: str,
|
| 108 |
+
location: str,
|
| 109 |
+
api_key: str,
|
| 110 |
+
limit: int = 20
|
| 111 |
+
) -> List[Dict[str, Any]]:
|
| 112 |
+
"""
|
| 113 |
+
Search Google Maps via SerpApi.
|
| 114 |
+
|
| 115 |
+
Args:
|
| 116 |
+
query: Search query (e.g., "real estate agent")
|
| 117 |
+
location: Location string (e.g., "Miami, FL")
|
| 118 |
+
api_key: SerpApi API key
|
| 119 |
+
limit: Maximum number of results
|
| 120 |
+
|
| 121 |
+
Returns:
|
| 122 |
+
List of business results with contact info.
|
| 123 |
+
"""
|
| 124 |
+
if not SERPAPI_AVAILABLE:
|
| 125 |
+
print("❌ SerpApi no disponible. Instalando...")
|
| 126 |
+
return []
|
| 127 |
+
|
| 128 |
+
results = []
|
| 129 |
+
|
| 130 |
+
params = {
|
| 131 |
+
"engine": "google_maps",
|
| 132 |
+
"q": query,
|
| 133 |
+
"ll": None, # Will be geocoded by location
|
| 134 |
+
"type": "search",
|
| 135 |
+
"api_key": api_key,
|
| 136 |
+
}
|
| 137 |
+
|
| 138 |
+
# Add location to query
|
| 139 |
+
full_query = f"{query} in {location}"
|
| 140 |
+
params["q"] = full_query
|
| 141 |
+
|
| 142 |
+
try:
|
| 143 |
+
search = GoogleSearch(params)
|
| 144 |
+
data = search.get_dict()
|
| 145 |
+
|
| 146 |
+
places = data.get("local_results", [])
|
| 147 |
+
|
| 148 |
+
for place in places[:limit]:
|
| 149 |
+
result = {
|
| 150 |
+
"name": place.get("title"),
|
| 151 |
+
"address": place.get("address"),
|
| 152 |
+
"phone": place.get("phone"),
|
| 153 |
+
"website": place.get("website"),
|
| 154 |
+
"rating": place.get("rating"),
|
| 155 |
+
"reviews_count": place.get("reviews"),
|
| 156 |
+
"place_id": place.get("place_id"),
|
| 157 |
+
"thumbnail": place.get("thumbnail"),
|
| 158 |
+
}
|
| 159 |
+
results.append(result)
|
| 160 |
+
|
| 161 |
+
except Exception as e:
|
| 162 |
+
print(f"❌ Error en búsqueda: {e}")
|
| 163 |
+
|
| 164 |
+
return results
|
| 165 |
+
|
| 166 |
+
|
| 167 |
+
def scrape_leads(
|
| 168 |
+
niche: str,
|
| 169 |
+
country: str,
|
| 170 |
+
cities: Optional[List[str]] = None,
|
| 171 |
+
limit_per_city: int = 10
|
| 172 |
+
) -> List[Dict[str, Any]]:
|
| 173 |
+
"""
|
| 174 |
+
Scrape leads for a specific niche and country.
|
| 175 |
+
|
| 176 |
+
Args:
|
| 177 |
+
niche: 'real_estate' or 'insurance'
|
| 178 |
+
country: 'usa' or 'venezuela'
|
| 179 |
+
cities: List of cities to search (uses defaults if None)
|
| 180 |
+
limit_per_city: Max leads per city
|
| 181 |
+
|
| 182 |
+
Returns:
|
| 183 |
+
List of lead dictionaries ready for database insertion.
|
| 184 |
+
"""
|
| 185 |
+
api_key = get_serpapi_key()
|
| 186 |
+
if not api_key:
|
| 187 |
+
print("❌ No se encontró SERPAPI_KEY. Configura la variable de entorno o config.json")
|
| 188 |
+
return []
|
| 189 |
+
|
| 190 |
+
if cities is None:
|
| 191 |
+
cities = CITIES.get(country, [])[:3] # Default to first 3 cities
|
| 192 |
+
|
| 193 |
+
queries = SEARCH_QUERIES.get(niche, {}).get(country, [])
|
| 194 |
+
if not queries:
|
| 195 |
+
print(f"❌ No hay queries definidas para {niche}/{country}")
|
| 196 |
+
return []
|
| 197 |
+
|
| 198 |
+
all_leads = []
|
| 199 |
+
seen_phones = set() # Avoid duplicates
|
| 200 |
+
|
| 201 |
+
for city in cities:
|
| 202 |
+
for query in queries[:2]: # Use first 2 queries per city
|
| 203 |
+
print(f"🔍 Buscando: '{query}' en {city}...")
|
| 204 |
+
|
| 205 |
+
results = search_google_maps(
|
| 206 |
+
query=query,
|
| 207 |
+
location=city,
|
| 208 |
+
api_key=api_key,
|
| 209 |
+
limit=limit_per_city
|
| 210 |
+
)
|
| 211 |
+
|
| 212 |
+
for result in results:
|
| 213 |
+
phone = result.get("phone")
|
| 214 |
+
if phone and phone not in seen_phones:
|
| 215 |
+
seen_phones.add(phone)
|
| 216 |
+
|
| 217 |
+
lead = {
|
| 218 |
+
"name": result.get("name"),
|
| 219 |
+
"phone": phone,
|
| 220 |
+
"email": None, # Usually not available in Maps
|
| 221 |
+
"address": result.get("address"),
|
| 222 |
+
"website": result.get("website"),
|
| 223 |
+
"rating": result.get("rating"),
|
| 224 |
+
"reviews_count": result.get("reviews_count"),
|
| 225 |
+
"source": "google_maps",
|
| 226 |
+
"niche": niche,
|
| 227 |
+
"country": country,
|
| 228 |
+
"city": city,
|
| 229 |
+
}
|
| 230 |
+
all_leads.append(lead)
|
| 231 |
+
|
| 232 |
+
# Rate limiting
|
| 233 |
+
time.sleep(1)
|
| 234 |
+
|
| 235 |
+
print(f"✅ Total leads encontrados: {len(all_leads)}")
|
| 236 |
+
return all_leads
|
| 237 |
+
|
| 238 |
+
|
| 239 |
+
def demo_mode() -> List[Dict[str, Any]]:
|
| 240 |
+
"""
|
| 241 |
+
Return sample demo data without API calls.
|
| 242 |
+
Useful for testing the pipeline.
|
| 243 |
+
"""
|
| 244 |
+
demo_leads = [
|
| 245 |
+
{
|
| 246 |
+
"name": "Miami Luxury Realty",
|
| 247 |
+
"phone": "+1-786-555-0101",
|
| 248 |
+
"email": None,
|
| 249 |
+
"address": "1234 Brickell Ave, Miami, FL",
|
| 250 |
+
"website": "https://example-realty.com",
|
| 251 |
+
"rating": 4.8,
|
| 252 |
+
"reviews_count": 127,
|
| 253 |
+
"source": "demo",
|
| 254 |
+
"niche": "real_estate",
|
| 255 |
+
"country": "usa",
|
| 256 |
+
"city": "Miami, FL",
|
| 257 |
+
},
|
| 258 |
+
{
|
| 259 |
+
"name": "Seguros Caracas Plus",
|
| 260 |
+
"phone": "+58-212-555-0202",
|
| 261 |
+
"email": None,
|
| 262 |
+
"address": "Av. Francisco de Miranda, Caracas",
|
| 263 |
+
"website": "https://example-seguros.com",
|
| 264 |
+
"rating": 4.5,
|
| 265 |
+
"reviews_count": 89,
|
| 266 |
+
"source": "demo",
|
| 267 |
+
"niche": "insurance",
|
| 268 |
+
"country": "venezuela",
|
| 269 |
+
"city": "Caracas",
|
| 270 |
+
},
|
| 271 |
+
{
|
| 272 |
+
"name": "Texas Home Experts",
|
| 273 |
+
"phone": "+1-713-555-0303",
|
| 274 |
+
"email": None,
|
| 275 |
+
"address": "5678 Westheimer Rd, Houston, TX",
|
| 276 |
+
"website": "https://example-homes.com",
|
| 277 |
+
"rating": 4.9,
|
| 278 |
+
"reviews_count": 215,
|
| 279 |
+
"source": "demo",
|
| 280 |
+
"niche": "real_estate",
|
| 281 |
+
"country": "usa",
|
| 282 |
+
"city": "Houston, TX",
|
| 283 |
+
},
|
| 284 |
+
]
|
| 285 |
+
return demo_leads
|
| 286 |
+
|
| 287 |
+
|
| 288 |
+
if __name__ == "__main__":
|
| 289 |
+
import argparse
|
| 290 |
+
|
| 291 |
+
parser = argparse.ArgumentParser(description="Google Maps Lead Scraper")
|
| 292 |
+
parser.add_argument("--niche", choices=["real_estate", "insurance"], default="real_estate")
|
| 293 |
+
parser.add_argument("--country", choices=["usa", "venezuela"], default="usa")
|
| 294 |
+
parser.add_argument("--demo", action="store_true", help="Use demo data instead of API")
|
| 295 |
+
parser.add_argument("--limit", type=int, default=5, help="Leads per city")
|
| 296 |
+
|
| 297 |
+
args = parser.parse_args()
|
| 298 |
+
|
| 299 |
+
if args.demo:
|
| 300 |
+
print("🎭 Modo demo activado")
|
| 301 |
+
leads = demo_mode()
|
| 302 |
+
else:
|
| 303 |
+
leads = scrape_leads(
|
| 304 |
+
niche=args.niche,
|
| 305 |
+
country=args.country,
|
| 306 |
+
limit_per_city=args.limit
|
| 307 |
+
)
|
| 308 |
+
|
| 309 |
+
print(f"\n📋 Leads encontrados: {len(leads)}")
|
| 310 |
+
for lead in leads[:5]:
|
| 311 |
+
print(f" - {lead['name']} | {lead['phone']} | {lead['city']}")
|
lead_gen_pro/whatsapp_sender.py
ADDED
|
@@ -0,0 +1,267 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
WhatsApp Web Sender Module
|
| 3 |
+
==========================
|
| 4 |
+
Automates WhatsApp Web using Playwright for lead outreach.
|
| 5 |
+
Includes rate limiting and human-like delays to avoid detection.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import os
|
| 9 |
+
import time
|
| 10 |
+
import random
|
| 11 |
+
import json
|
| 12 |
+
from datetime import datetime
|
| 13 |
+
from typing import Optional, List, Dict, Any
|
| 14 |
+
|
| 15 |
+
# Playwright is optional - will work in semi-manual mode without it
|
| 16 |
+
try:
|
| 17 |
+
from playwright.sync_api import sync_playwright, Browser, Page
|
| 18 |
+
PLAYWRIGHT_AVAILABLE = True
|
| 19 |
+
except ImportError:
|
| 20 |
+
PLAYWRIGHT_AVAILABLE = False
|
| 21 |
+
print("⚠️ Playwright no está instalado. Modo semi-automático disponible.")
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
# Message templates for each niche
|
| 25 |
+
# Default templates (will be overriden by config.json)
|
| 26 |
+
DEFAULT_TEMPLATES = {
|
| 27 |
+
"real_estate": {
|
| 28 |
+
"usa": "Hi {name}! I saw your work in {city}. I have a proposal for you!",
|
| 29 |
+
"venezuela": "¡Hola {name}! Vi tu trabajo en {city}. ¡Tengo una propuesta!"
|
| 30 |
+
}
|
| 31 |
+
}
|
| 32 |
+
|
| 33 |
+
def get_config():
|
| 34 |
+
config_path = os.path.join(os.path.dirname(__file__), "config.json")
|
| 35 |
+
if os.path.exists(config_path):
|
| 36 |
+
with open(config_path, "r") as f:
|
| 37 |
+
return json.load(f)
|
| 38 |
+
return {}
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
def get_random_delay(min_seconds: int = 5, max_seconds: int = 15) -> float:
|
| 42 |
+
"""Get a random delay to simulate human behavior."""
|
| 43 |
+
return random.uniform(min_seconds, max_seconds)
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
def format_phone_for_whatsapp(phone: str) -> str:
|
| 47 |
+
"""
|
| 48 |
+
Format phone number for WhatsApp Web URL.
|
| 49 |
+
Removes all non-numeric characters except the leading +.
|
| 50 |
+
"""
|
| 51 |
+
# Remove all non-numeric characters
|
| 52 |
+
cleaned = ''.join(c for c in phone if c.isdigit() or c == '+')
|
| 53 |
+
|
| 54 |
+
# Remove leading + if present (WhatsApp API doesn't need it)
|
| 55 |
+
if cleaned.startswith('+'):
|
| 56 |
+
cleaned = cleaned[1:]
|
| 57 |
+
|
| 58 |
+
return cleaned
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
def get_message_for_lead(lead: Dict[str, Any]) -> str:
|
| 62 |
+
"""
|
| 63 |
+
Get a personalized message for a lead based on their niche and country.
|
| 64 |
+
"""
|
| 65 |
+
niche = lead.get("niche", "real_estate")
|
| 66 |
+
country = lead.get("country", "usa")
|
| 67 |
+
|
| 68 |
+
config = get_config()
|
| 69 |
+
niche_config = config.get("niches", {}).get(niche, {})
|
| 70 |
+
template = niche_config.get("templates", {}).get("whatsapp")
|
| 71 |
+
|
| 72 |
+
if not template:
|
| 73 |
+
template = DEFAULT_TEMPLATES.get(niche, {}).get(country, "Hello {name}")
|
| 74 |
+
|
| 75 |
+
if isinstance(template, list):
|
| 76 |
+
template = random.choice(template)
|
| 77 |
+
|
| 78 |
+
return template.format(
|
| 79 |
+
name=lead.get("name", "there"),
|
| 80 |
+
city=lead.get("city", "your area"),
|
| 81 |
+
)
|
| 82 |
+
|
| 83 |
+
|
| 84 |
+
def generate_whatsapp_link(phone: str, message: str) -> str:
|
| 85 |
+
"""
|
| 86 |
+
Generate a WhatsApp Web link for a given phone and message.
|
| 87 |
+
This can be opened manually or via automation.
|
| 88 |
+
"""
|
| 89 |
+
from urllib.parse import quote
|
| 90 |
+
|
| 91 |
+
formatted_phone = format_phone_for_whatsapp(phone)
|
| 92 |
+
encoded_message = quote(message)
|
| 93 |
+
|
| 94 |
+
return f"https://wa.me/{formatted_phone}?text={encoded_message}"
|
| 95 |
+
|
| 96 |
+
|
| 97 |
+
def send_whatsapp_message_manual(lead: Dict[str, Any]) -> Dict[str, Any]:
|
| 98 |
+
"""
|
| 99 |
+
Generate a WhatsApp link for manual sending.
|
| 100 |
+
Returns the link and message for the user to send manually.
|
| 101 |
+
"""
|
| 102 |
+
phone = lead.get("phone")
|
| 103 |
+
if not phone:
|
| 104 |
+
return {"success": False, "error": "No phone number"}
|
| 105 |
+
|
| 106 |
+
message = get_message_for_lead(lead)
|
| 107 |
+
link = generate_whatsapp_link(phone, message)
|
| 108 |
+
|
| 109 |
+
return {
|
| 110 |
+
"success": True,
|
| 111 |
+
"mode": "manual",
|
| 112 |
+
"phone": phone,
|
| 113 |
+
"message": message,
|
| 114 |
+
"link": link,
|
| 115 |
+
"lead_id": lead.get("id"),
|
| 116 |
+
}
|
| 117 |
+
|
| 118 |
+
|
| 119 |
+
class WhatsAppWebAutomation:
|
| 120 |
+
"""
|
| 121 |
+
Automates WhatsApp Web using Playwright.
|
| 122 |
+
Requires a logged-in WhatsApp Web session.
|
| 123 |
+
"""
|
| 124 |
+
|
| 125 |
+
def __init__(self, headless: bool = False):
|
| 126 |
+
self.headless = headless
|
| 127 |
+
self.browser: Optional[Browser] = None
|
| 128 |
+
self.page: Optional[Page] = None
|
| 129 |
+
self.session_dir = os.path.join(os.path.dirname(__file__), ".whatsapp_session")
|
| 130 |
+
|
| 131 |
+
def start(self) -> bool:
|
| 132 |
+
"""Start the browser and load WhatsApp Web."""
|
| 133 |
+
if not PLAYWRIGHT_AVAILABLE:
|
| 134 |
+
print("❌ Playwright no disponible. Usa el modo manual.")
|
| 135 |
+
return False
|
| 136 |
+
|
| 137 |
+
try:
|
| 138 |
+
self.playwright = sync_playwright().start()
|
| 139 |
+
|
| 140 |
+
# Use persistent context to keep login
|
| 141 |
+
self.browser = self.playwright.chromium.launch_persistent_context(
|
| 142 |
+
self.session_dir,
|
| 143 |
+
headless=self.headless,
|
| 144 |
+
args=["--start-maximized"]
|
| 145 |
+
)
|
| 146 |
+
|
| 147 |
+
self.page = self.browser.pages[0] if self.browser.pages else self.browser.new_page()
|
| 148 |
+
self.page.goto("https://web.whatsapp.com")
|
| 149 |
+
|
| 150 |
+
print("🟢 WhatsApp Web abierto. Esperando login...")
|
| 151 |
+
|
| 152 |
+
# Wait for the main interface to load (indicates successful login)
|
| 153 |
+
self.page.wait_for_selector('[data-testid="chat-list"]', timeout=120000)
|
| 154 |
+
|
| 155 |
+
print("✅ WhatsApp Web listo!")
|
| 156 |
+
return True
|
| 157 |
+
|
| 158 |
+
except Exception as e:
|
| 159 |
+
print(f"❌ Error iniciando WhatsApp: {e}")
|
| 160 |
+
return False
|
| 161 |
+
|
| 162 |
+
def send_message(self, phone: str, message: str) -> bool:
|
| 163 |
+
"""
|
| 164 |
+
Send a message to a phone number.
|
| 165 |
+
Returns True if successful.
|
| 166 |
+
"""
|
| 167 |
+
if not self.page:
|
| 168 |
+
print("❌ Navegador no iniciado")
|
| 169 |
+
return False
|
| 170 |
+
|
| 171 |
+
try:
|
| 172 |
+
formatted_phone = format_phone_for_whatsapp(phone)
|
| 173 |
+
link = generate_whatsapp_link(formatted_phone, message)
|
| 174 |
+
|
| 175 |
+
self.page.goto(link)
|
| 176 |
+
|
| 177 |
+
# Wait for the send button
|
| 178 |
+
time.sleep(get_random_delay(3, 6))
|
| 179 |
+
|
| 180 |
+
# Try to find and click the send button
|
| 181 |
+
send_button = self.page.query_selector('[data-testid="send"]')
|
| 182 |
+
if send_button:
|
| 183 |
+
time.sleep(get_random_delay(1, 3))
|
| 184 |
+
send_button.click()
|
| 185 |
+
print(f"✅ Mensaje enviado a {phone}")
|
| 186 |
+
return True
|
| 187 |
+
else:
|
| 188 |
+
print(f"⚠️ No se encontró botón de enviar para {phone}")
|
| 189 |
+
return False
|
| 190 |
+
|
| 191 |
+
except Exception as e:
|
| 192 |
+
print(f"❌ Error enviando mensaje a {phone}: {e}")
|
| 193 |
+
return False
|
| 194 |
+
|
| 195 |
+
def stop(self):
|
| 196 |
+
"""Close the browser."""
|
| 197 |
+
if self.browser:
|
| 198 |
+
self.browser.close()
|
| 199 |
+
if hasattr(self, 'playwright'):
|
| 200 |
+
self.playwright.stop()
|
| 201 |
+
|
| 202 |
+
|
| 203 |
+
def batch_generate_links(leads: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
| 204 |
+
"""
|
| 205 |
+
Generate WhatsApp links for a batch of leads.
|
| 206 |
+
Returns a list of results with links for manual sending.
|
| 207 |
+
"""
|
| 208 |
+
results = []
|
| 209 |
+
|
| 210 |
+
for lead in leads:
|
| 211 |
+
result = send_whatsapp_message_manual(lead)
|
| 212 |
+
results.append(result)
|
| 213 |
+
|
| 214 |
+
return results
|
| 215 |
+
|
| 216 |
+
|
| 217 |
+
if __name__ == "__main__":
|
| 218 |
+
import argparse
|
| 219 |
+
|
| 220 |
+
parser = argparse.ArgumentParser(description="WhatsApp Web Sender")
|
| 221 |
+
parser.add_argument("--demo", action="store_true", help="Generate demo links")
|
| 222 |
+
parser.add_argument("--auto", action="store_true", help="Use automated sending (requires Playwright)")
|
| 223 |
+
|
| 224 |
+
args = parser.parse_args()
|
| 225 |
+
|
| 226 |
+
# Demo leads for testing
|
| 227 |
+
demo_leads = [
|
| 228 |
+
{
|
| 229 |
+
"id": 1,
|
| 230 |
+
"name": "Miami Luxury Realty",
|
| 231 |
+
"phone": "+1-786-555-0101",
|
| 232 |
+
"niche": "real_estate",
|
| 233 |
+
"country": "usa",
|
| 234 |
+
"city": "Miami, FL",
|
| 235 |
+
},
|
| 236 |
+
{
|
| 237 |
+
"id": 2,
|
| 238 |
+
"name": "Seguros Caracas",
|
| 239 |
+
"phone": "+58-212-555-0202",
|
| 240 |
+
"niche": "insurance",
|
| 241 |
+
"country": "venezuela",
|
| 242 |
+
"city": "Caracas",
|
| 243 |
+
},
|
| 244 |
+
]
|
| 245 |
+
|
| 246 |
+
if args.demo:
|
| 247 |
+
print("🔗 Generando links de WhatsApp en modo demo...\n")
|
| 248 |
+
results = batch_generate_links(demo_leads)
|
| 249 |
+
|
| 250 |
+
for r in results:
|
| 251 |
+
if r["success"]:
|
| 252 |
+
print(f"📱 {r['phone']}")
|
| 253 |
+
print(f" Mensaje: {r['message'][:60]}...")
|
| 254 |
+
print(f" Link: {r['link'][:80]}...\n")
|
| 255 |
+
|
| 256 |
+
elif args.auto:
|
| 257 |
+
print("🤖 Modo automático (requiere Playwright instalado)")
|
| 258 |
+
wa = WhatsAppWebAutomation(headless=False)
|
| 259 |
+
if wa.start():
|
| 260 |
+
for lead in demo_leads:
|
| 261 |
+
message = get_message_for_lead(lead)
|
| 262 |
+
wa.send_message(lead["phone"], message)
|
| 263 |
+
time.sleep(get_random_delay(20, 40)) # Long delay between messages
|
| 264 |
+
wa.stop()
|
| 265 |
+
|
| 266 |
+
else:
|
| 267 |
+
print("Usa --demo para generar links o --auto para envío automático")
|
lead_gen_pro/workflow.py
ADDED
|
@@ -0,0 +1,230 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Lead Generation Workflow Orchestrator
|
| 3 |
+
======================================
|
| 4 |
+
Main entry point that orchestrates the entire lead generation pipeline:
|
| 5 |
+
1. Scrape leads from Google Maps
|
| 6 |
+
2. Store leads in database
|
| 7 |
+
3. Generate outreach messages
|
| 8 |
+
4. Track outreach status
|
| 9 |
+
"""
|
| 10 |
+
|
| 11 |
+
import os
|
| 12 |
+
import sys
|
| 13 |
+
import time
|
| 14 |
+
import json
|
| 15 |
+
from datetime import datetime
|
| 16 |
+
from typing import Optional, List, Dict, Any
|
| 17 |
+
|
| 18 |
+
# Import local modules
|
| 19 |
+
from database import init_db, add_lead, get_leads, get_pending_outreach, mark_outreach_sent, get_stats
|
| 20 |
+
from scraper import scrape_leads, demo_mode as scraper_demo
|
| 21 |
+
from whatsapp_sender import batch_generate_links, send_whatsapp_message_manual
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
def load_config() -> Dict[str, Any]:
|
| 25 |
+
"""Load configuration from config.json."""
|
| 26 |
+
config_path = os.path.join(os.path.dirname(__file__), "config.json")
|
| 27 |
+
if os.path.exists(config_path):
|
| 28 |
+
with open(config_path, "r") as f:
|
| 29 |
+
return json.load(f)
|
| 30 |
+
return {}
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
def run_scraping_pipeline(
|
| 34 |
+
niche: str,
|
| 35 |
+
country: str,
|
| 36 |
+
limit_per_city: int = 5,
|
| 37 |
+
demo: bool = False
|
| 38 |
+
) -> int:
|
| 39 |
+
"""
|
| 40 |
+
Run the scraping pipeline for a specific niche and country.
|
| 41 |
+
|
| 42 |
+
Args:
|
| 43 |
+
niche: 'real_estate' or 'insurance'
|
| 44 |
+
country: 'usa' or 'venezuela'
|
| 45 |
+
limit_per_city: Max leads per city
|
| 46 |
+
demo: Use demo data instead of API
|
| 47 |
+
|
| 48 |
+
Returns:
|
| 49 |
+
Number of leads added to database.
|
| 50 |
+
"""
|
| 51 |
+
print(f"\n{'='*50}")
|
| 52 |
+
print(f"🔍 SCRAPING: {niche.upper()} en {country.upper()}")
|
| 53 |
+
print(f"{'='*50}\n")
|
| 54 |
+
|
| 55 |
+
if demo:
|
| 56 |
+
leads = scraper_demo()
|
| 57 |
+
# Filter demo leads by niche/country
|
| 58 |
+
leads = [l for l in leads if l.get("niche") == niche and l.get("country") == country]
|
| 59 |
+
else:
|
| 60 |
+
leads = scrape_leads(
|
| 61 |
+
niche=niche,
|
| 62 |
+
country=country,
|
| 63 |
+
limit_per_city=limit_per_city
|
| 64 |
+
)
|
| 65 |
+
|
| 66 |
+
# Add leads to database
|
| 67 |
+
added_count = 0
|
| 68 |
+
for lead in leads:
|
| 69 |
+
try:
|
| 70 |
+
lead_id = add_lead(lead)
|
| 71 |
+
added_count += 1
|
| 72 |
+
print(f" ✅ Agregado: {lead['name']} (ID: {lead_id})")
|
| 73 |
+
except Exception as e:
|
| 74 |
+
print(f" ⚠️ Error agregando lead: {e}")
|
| 75 |
+
|
| 76 |
+
print(f"\n📊 Total leads agregados: {added_count}")
|
| 77 |
+
return added_count
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
def run_outreach_pipeline(
|
| 81 |
+
niche: Optional[str] = None,
|
| 82 |
+
channel: str = "whatsapp",
|
| 83 |
+
limit: int = 10,
|
| 84 |
+
auto_send: bool = False
|
| 85 |
+
) -> List[Dict[str, Any]]:
|
| 86 |
+
"""
|
| 87 |
+
Run the outreach pipeline for pending leads.
|
| 88 |
+
|
| 89 |
+
Args:
|
| 90 |
+
niche: Filter by niche (optional)
|
| 91 |
+
channel: 'whatsapp' or 'instagram'
|
| 92 |
+
limit: Max leads to process
|
| 93 |
+
auto_send: If True, attempt automatic sending (requires Playwright)
|
| 94 |
+
|
| 95 |
+
Returns:
|
| 96 |
+
List of outreach results.
|
| 97 |
+
"""
|
| 98 |
+
print(f"\n{'='*50}")
|
| 99 |
+
print(f"📤 OUTREACH: {channel.upper()} ({limit} leads)")
|
| 100 |
+
print(f"{'='*50}\n")
|
| 101 |
+
|
| 102 |
+
# Get pending leads
|
| 103 |
+
leads = get_pending_outreach(channel=channel, niche=niche, limit=limit)
|
| 104 |
+
|
| 105 |
+
if not leads:
|
| 106 |
+
print("✅ No hay leads pendientes de contactar.")
|
| 107 |
+
return []
|
| 108 |
+
|
| 109 |
+
print(f"📋 {len(leads)} leads pendientes encontrados\n")
|
| 110 |
+
|
| 111 |
+
results = []
|
| 112 |
+
|
| 113 |
+
if channel == "whatsapp":
|
| 114 |
+
# Generate WhatsApp links
|
| 115 |
+
link_results = batch_generate_links(leads)
|
| 116 |
+
|
| 117 |
+
for i, result in enumerate(link_results):
|
| 118 |
+
if result["success"]:
|
| 119 |
+
lead = leads[i]
|
| 120 |
+
print(f"📱 Lead #{lead.get('id', i)}: {lead.get('name', 'Unknown')}")
|
| 121 |
+
print(f" Teléfono: {result['phone']}")
|
| 122 |
+
print(f" Mensaje: {result['message'][:50]}...")
|
| 123 |
+
print(f" 🔗 Link: {result['link'][:60]}...\n")
|
| 124 |
+
|
| 125 |
+
# Mark as sent if in auto mode
|
| 126 |
+
if auto_send:
|
| 127 |
+
# TODO: Implement actual sending with Playwright
|
| 128 |
+
pass
|
| 129 |
+
|
| 130 |
+
results.append(result)
|
| 131 |
+
|
| 132 |
+
return results
|
| 133 |
+
|
| 134 |
+
|
| 135 |
+
def print_dashboard():
|
| 136 |
+
"""Print a dashboard with current stats."""
|
| 137 |
+
stats = get_stats()
|
| 138 |
+
|
| 139 |
+
print(f"\n{'='*50}")
|
| 140 |
+
print(f"📊 DASHBOARD - Lead Generation System")
|
| 141 |
+
print(f"{'='*50}")
|
| 142 |
+
print(f"\n📈 Total de Leads: {stats['total_leads']}")
|
| 143 |
+
|
| 144 |
+
if stats['by_niche']:
|
| 145 |
+
print("\n🏷️ Por Nicho:")
|
| 146 |
+
for niche, count in stats['by_niche'].items():
|
| 147 |
+
print(f" - {niche}: {count}")
|
| 148 |
+
|
| 149 |
+
if stats['by_country']:
|
| 150 |
+
print("\n🌎 Por País:")
|
| 151 |
+
for country, count in stats['by_country'].items():
|
| 152 |
+
print(f" - {country}: {count}")
|
| 153 |
+
|
| 154 |
+
if stats['by_status']:
|
| 155 |
+
print("\n📌 Por Estado:")
|
| 156 |
+
for status, count in stats['by_status'].items():
|
| 157 |
+
print(f" - {status}: {count}")
|
| 158 |
+
|
| 159 |
+
print(f"\n📤 Outreach Enviado:")
|
| 160 |
+
print(f" - WhatsApp: {stats['whatsapp_sent']}")
|
| 161 |
+
print(f" - Instagram: {stats['instagram_sent']}")
|
| 162 |
+
print(f"\n{'='*50}\n")
|
| 163 |
+
|
| 164 |
+
|
| 165 |
+
def full_pipeline_demo():
|
| 166 |
+
"""Run a full demo of the entire pipeline."""
|
| 167 |
+
print("\n🚀 INICIANDO PIPELINE COMPLETO EN MODO DEMO\n")
|
| 168 |
+
|
| 169 |
+
# Initialize database
|
| 170 |
+
print("1️⃣ Inicializando base de datos...")
|
| 171 |
+
init_db()
|
| 172 |
+
|
| 173 |
+
# Scrape demo leads for all combinations
|
| 174 |
+
combinations = [
|
| 175 |
+
("real_estate", "usa"),
|
| 176 |
+
("real_estate", "venezuela"),
|
| 177 |
+
("insurance", "usa"),
|
| 178 |
+
("insurance", "venezuela"),
|
| 179 |
+
]
|
| 180 |
+
|
| 181 |
+
print("\n2️⃣ Scraping de leads (modo demo)...")
|
| 182 |
+
for niche, country in combinations:
|
| 183 |
+
run_scraping_pipeline(niche=niche, country=country, demo=True)
|
| 184 |
+
|
| 185 |
+
# Show dashboard
|
| 186 |
+
print("\n3️⃣ Estado actual del sistema:")
|
| 187 |
+
print_dashboard()
|
| 188 |
+
|
| 189 |
+
# Generate outreach links
|
| 190 |
+
print("\n4️⃣ Generando links de WhatsApp para outreach...")
|
| 191 |
+
results = run_outreach_pipeline(channel="whatsapp", limit=5)
|
| 192 |
+
|
| 193 |
+
print(f"\n✅ Pipeline demo completado. {len(results)} links generados.")
|
| 194 |
+
return results
|
| 195 |
+
|
| 196 |
+
|
| 197 |
+
if __name__ == "__main__":
|
| 198 |
+
import argparse
|
| 199 |
+
|
| 200 |
+
parser = argparse.ArgumentParser(description="Lead Generation Workflow")
|
| 201 |
+
parser.add_argument("--demo", action="store_true", help="Run full demo pipeline")
|
| 202 |
+
parser.add_argument("--scrape", action="store_true", help="Run scraping only")
|
| 203 |
+
parser.add_argument("--outreach", action="store_true", help="Run outreach only")
|
| 204 |
+
parser.add_argument("--stats", action="store_true", help="Show dashboard only")
|
| 205 |
+
parser.add_argument("--niche", choices=["real_estate", "insurance"], default="real_estate")
|
| 206 |
+
parser.add_argument("--country", choices=["usa", "venezuela"], default="usa")
|
| 207 |
+
parser.add_argument("--limit", type=int, default=10)
|
| 208 |
+
|
| 209 |
+
args = parser.parse_args()
|
| 210 |
+
|
| 211 |
+
if args.demo:
|
| 212 |
+
full_pipeline_demo()
|
| 213 |
+
elif args.scrape:
|
| 214 |
+
init_db()
|
| 215 |
+
run_scraping_pipeline(
|
| 216 |
+
niche=args.niche,
|
| 217 |
+
country=args.country,
|
| 218 |
+
limit_per_city=args.limit,
|
| 219 |
+
demo=True # Change to False when SerpApi is configured
|
| 220 |
+
)
|
| 221 |
+
elif args.outreach:
|
| 222 |
+
run_outreach_pipeline(niche=args.niche, limit=args.limit)
|
| 223 |
+
elif args.stats:
|
| 224 |
+
init_db()
|
| 225 |
+
print_dashboard()
|
| 226 |
+
else:
|
| 227 |
+
print("Usa --demo para ejecutar el pipeline completo en modo demo")
|
| 228 |
+
print("Usa --scrape para ejecutar solo el scraping")
|
| 229 |
+
print("Usa --outreach para ejecutar solo el outreach")
|
| 230 |
+
print("Usa --stats para ver las estadísticas")
|
index.html → static/index.html
RENAMED
|
@@ -1,216 +1,595 @@
|
|
| 1 |
<!DOCTYPE html>
|
| 2 |
<html lang="es">
|
|
|
|
| 3 |
<head>
|
| 4 |
<meta charset="UTF-8">
|
| 5 |
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 6 |
<title>NexusCRM - CRM + Lead Gen</title>
|
| 7 |
<style>
|
| 8 |
-
|
| 9 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
body {
|
| 11 |
-
font-family: -apple-system,
|
| 12 |
-
background:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
min-height: 100vh;
|
|
|
|
| 14 |
}
|
| 15 |
-
|
| 16 |
.header {
|
| 17 |
-
background:
|
|
|
|
|
|
|
| 18 |
color: white;
|
| 19 |
-
padding:
|
| 20 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
}
|
| 22 |
-
|
| 23 |
-
.header h1 { font-size: 1.5rem; }
|
| 24 |
-
.header .subtitle { font-size: 0.875rem; opacity: 0.9; }
|
| 25 |
-
|
| 26 |
.nav {
|
| 27 |
-
|
| 28 |
-
padding: 0.5rem 2rem;
|
| 29 |
-
border-bottom: 1px solid #e0e0e0;
|
| 30 |
display: flex;
|
| 31 |
-
gap: 0.
|
| 32 |
flex-wrap: wrap;
|
| 33 |
}
|
| 34 |
-
|
| 35 |
.nav button {
|
| 36 |
-
padding: 0.
|
| 37 |
-
border:
|
| 38 |
-
background:
|
|
|
|
|
|
|
| 39 |
cursor: pointer;
|
| 40 |
font-size: 0.9rem;
|
| 41 |
-
|
| 42 |
-
|
|
|
|
| 43 |
}
|
| 44 |
-
|
| 45 |
-
.nav button:hover {
|
| 46 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
.nav button.active {
|
| 48 |
-
background:
|
| 49 |
-
color:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
}
|
| 51 |
-
|
| 52 |
-
.main { padding: 1.5rem; }
|
| 53 |
-
|
| 54 |
.stats {
|
| 55 |
display: grid;
|
| 56 |
-
grid-template-columns: repeat(auto-fit, minmax(
|
| 57 |
-
gap:
|
| 58 |
-
margin-bottom:
|
| 59 |
}
|
| 60 |
-
|
| 61 |
.stat-card {
|
| 62 |
-
background:
|
| 63 |
-
|
| 64 |
-
border-radius: 8px;
|
| 65 |
-
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
|
| 66 |
-
}
|
| 67 |
-
|
| 68 |
-
.stat-card h3 { color: #666; font-size: 0.8rem; margin-bottom: 0.5rem; text-transform: uppercase; }
|
| 69 |
-
.stat-card .value { font-size: 1.75rem; font-weight: bold; color: #667eea; }
|
| 70 |
-
.stat-card .value.green { color: #4caf50; }
|
| 71 |
-
.stat-card .value.orange { color: #f57c00; }
|
| 72 |
-
.stat-card .value.red { color: #f44336; }
|
| 73 |
-
|
| 74 |
-
.card {
|
| 75 |
-
background: white;
|
| 76 |
padding: 1.5rem;
|
| 77 |
-
border-radius:
|
| 78 |
-
|
| 79 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
}
|
| 81 |
-
|
| 82 |
-
.card
|
| 83 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 84 |
.filters {
|
| 85 |
display: flex;
|
| 86 |
gap: 1rem;
|
| 87 |
-
margin-bottom:
|
| 88 |
flex-wrap: wrap;
|
| 89 |
}
|
| 90 |
-
|
| 91 |
-
.filters select,
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
border
|
|
|
|
|
|
|
|
|
|
| 95 |
font-size: 0.9rem;
|
|
|
|
|
|
|
| 96 |
}
|
| 97 |
-
|
| 98 |
-
.filters
|
| 99 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 100 |
table {
|
| 101 |
width: 100%;
|
| 102 |
-
border-collapse:
|
|
|
|
| 103 |
}
|
| 104 |
-
|
| 105 |
-
th
|
| 106 |
-
padding:
|
| 107 |
text-align: left;
|
| 108 |
-
|
| 109 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 110 |
}
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 115 |
.badge {
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 119 |
font-size: 0.75rem;
|
| 120 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 121 |
}
|
| 122 |
-
|
| 123 |
-
.
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
.badge-perdido { background: #ffebee; color: #c62828; }
|
| 130 |
-
|
| 131 |
.btn {
|
| 132 |
-
padding: 0.
|
| 133 |
-
border:
|
| 134 |
-
|
| 135 |
cursor: pointer;
|
| 136 |
-
font-size: 0.8rem;
|
| 137 |
transition: all 0.2s;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 138 |
}
|
| 139 |
-
|
| 140 |
-
.btn
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
.btn-whatsapp { background: #25d366; color: white; }
|
| 145 |
-
.btn-whatsapp:hover { background: #20b655; }
|
| 146 |
-
|
| 147 |
.pipeline-stages {
|
| 148 |
display: flex;
|
| 149 |
-
gap:
|
| 150 |
overflow-x: auto;
|
| 151 |
padding-bottom: 1rem;
|
| 152 |
}
|
| 153 |
-
|
| 154 |
.pipeline-stage {
|
| 155 |
-
min-width:
|
| 156 |
-
background:
|
| 157 |
-
border-radius:
|
| 158 |
-
padding:
|
|
|
|
| 159 |
}
|
| 160 |
-
|
| 161 |
.pipeline-stage h4 {
|
| 162 |
-
color:
|
| 163 |
-
margin-bottom:
|
| 164 |
-
|
| 165 |
-
|
| 166 |
}
|
| 167 |
-
|
| 168 |
.pipeline-item {
|
| 169 |
-
background:
|
| 170 |
-
padding:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 171 |
border-radius: 4px;
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 183 |
width: 100%;
|
| 184 |
-
|
| 185 |
-
border: 1px solid
|
| 186 |
-
border-radius:
|
|
|
|
|
|
|
| 187 |
font-size: 0.9rem;
|
|
|
|
|
|
|
| 188 |
}
|
| 189 |
-
|
| 190 |
-
.
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 196 |
border-radius: 8px;
|
| 197 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 198 |
}
|
| 199 |
-
|
| 200 |
-
.whatsapp-card .phone { font-size: 1.1rem; font-weight: 600; color: #166534; }
|
| 201 |
-
.whatsapp-card .name { color: #166534; }
|
| 202 |
-
|
| 203 |
-
.loading { text-align: center; padding: 2rem; color: #666; }
|
| 204 |
-
|
| 205 |
-
.action-buttons { display: flex; gap: 0.5rem; }
|
| 206 |
</style>
|
| 207 |
</head>
|
|
|
|
| 208 |
<body>
|
| 209 |
<div class="header">
|
| 210 |
<h1>NexusCRM</h1>
|
| 211 |
<div class="subtitle">CRM + Lead Gen Pro</div>
|
| 212 |
</div>
|
| 213 |
-
|
| 214 |
<div class="nav">
|
| 215 |
<button class="active" onclick="showTab('dashboard')">Dashboard</button>
|
| 216 |
<button onclick="showTab('leads')">Leads</button>
|
|
@@ -218,8 +597,9 @@
|
|
| 218 |
<button onclick="showTab('outreach')">WhatsApp</button>
|
| 219 |
<button onclick="showTab('customers')">Clientes</button>
|
| 220 |
<button onclick="showTab('nuevo')">+ Nuevo</button>
|
|
|
|
| 221 |
</div>
|
| 222 |
-
|
| 223 |
<div class="main">
|
| 224 |
<!-- Dashboard Tab -->
|
| 225 |
<div id="dashboard" class="tab-content">
|
|
@@ -249,7 +629,7 @@
|
|
| 249 |
<div class="value" id="stat-enriquecidos">0</div>
|
| 250 |
</div>
|
| 251 |
</div>
|
| 252 |
-
|
| 253 |
<div class="grid-2">
|
| 254 |
<div class="card">
|
| 255 |
<h2>Leads por Estado</h2>
|
|
@@ -260,13 +640,13 @@
|
|
| 260 |
<div id="leads-by-niche"></div>
|
| 261 |
</div>
|
| 262 |
</div>
|
| 263 |
-
|
| 264 |
<div class="card">
|
| 265 |
<h2>Leads Recientes</h2>
|
| 266 |
<div id="recent-leads"></div>
|
| 267 |
</div>
|
| 268 |
</div>
|
| 269 |
-
|
| 270 |
<!-- Leads Tab -->
|
| 271 |
<div id="leads" class="tab-content" style="display: none;">
|
| 272 |
<div class="card">
|
|
@@ -287,7 +667,7 @@
|
|
| 287 |
<div id="leads-list"></div>
|
| 288 |
</div>
|
| 289 |
</div>
|
| 290 |
-
|
| 291 |
<!-- Pipeline Tab -->
|
| 292 |
<div id="pipeline" class="tab-content" style="display: none;">
|
| 293 |
<div class="card">
|
|
@@ -295,7 +675,7 @@
|
|
| 295 |
<div class="pipeline-stages" id="pipeline-view"></div>
|
| 296 |
</div>
|
| 297 |
</div>
|
| 298 |
-
|
| 299 |
<!-- Outreach Tab -->
|
| 300 |
<div id="outreach" class="tab-content" style="display: none;">
|
| 301 |
<div class="card">
|
|
@@ -311,16 +691,18 @@
|
|
| 311 |
<div id="whatsapp-list"></div>
|
| 312 |
</div>
|
| 313 |
</div>
|
| 314 |
-
|
| 315 |
<!-- Customers Tab -->
|
| 316 |
<div id="customers" class="tab-content" style="display: none;">
|
| 317 |
<div class="card">
|
| 318 |
<h2>Clientes</h2>
|
| 319 |
-
<input type="text" class="search-box" placeholder="Buscar clientes..."
|
|
|
|
|
|
|
| 320 |
<div id="customers-list"></div>
|
| 321 |
</div>
|
| 322 |
</div>
|
| 323 |
-
|
| 324 |
<!-- Nuevo Tab -->
|
| 325 |
<div id="nuevo" class="tab-content" style="display: none;">
|
| 326 |
<div class="card">
|
|
@@ -390,18 +772,18 @@
|
|
| 390 |
<script>
|
| 391 |
const SUPABASE_URL = 'https://nvssvykqxaurtlgwxwwy.supabase.co';
|
| 392 |
const SUPABASE_KEY = 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6Im52c3N2eWtxeGF1cnRsZ3d4d3d5Iiwicm9sZSI6ImFub24iLCJpYXQiOjE3NzI4OTIxNjUsImV4cCI6MjA4ODQ2ODE2NX0.onzmNQGBy6jDxWO7TcjZuyvgvId9HiGNzUNs1HmOAMk';
|
| 393 |
-
|
| 394 |
let customers = [];
|
| 395 |
let leads = [];
|
| 396 |
let opportunities = [];
|
| 397 |
-
|
| 398 |
async function apiGet(table) {
|
| 399 |
const response = await fetch(`${SUPABASE_URL}/rest/v1/${table}?select=*`, {
|
| 400 |
headers: { 'apikey': SUPABASE_KEY, 'Authorization': `Bearer ${SUPABASE_KEY}` }
|
| 401 |
});
|
| 402 |
return response.json();
|
| 403 |
}
|
| 404 |
-
|
| 405 |
async function apiPost(table, data) {
|
| 406 |
const response = await fetch(`${SUPABASE_URL}/rest/v1/${table}`, {
|
| 407 |
method: 'POST',
|
|
@@ -415,7 +797,7 @@
|
|
| 415 |
});
|
| 416 |
return response;
|
| 417 |
}
|
| 418 |
-
|
| 419 |
async function apiPatch(table, id, data) {
|
| 420 |
const response = await fetch(`${SUPABASE_URL}/rest/v1/${table}?id=eq.${id}`, {
|
| 421 |
method: 'PATCH',
|
|
@@ -429,13 +811,13 @@
|
|
| 429 |
});
|
| 430 |
return response;
|
| 431 |
}
|
| 432 |
-
|
| 433 |
async function loadData() {
|
| 434 |
try {
|
| 435 |
customers = await apiGet('customers');
|
| 436 |
leads = await apiGet('leads');
|
| 437 |
opportunities = await apiGet('opportunities');
|
| 438 |
-
|
| 439 |
populateNichoFilter();
|
| 440 |
updateStats();
|
| 441 |
renderRecentLeads();
|
|
@@ -447,7 +829,7 @@
|
|
| 447 |
console.error('Error:', error);
|
| 448 |
}
|
| 449 |
}
|
| 450 |
-
|
| 451 |
function populateNichoFilter() {
|
| 452 |
const niches = [...new Set(leads.map(l => l.nicho).filter(n => n))];
|
| 453 |
const select = document.getElementById('lead-nicho-filter');
|
|
@@ -458,7 +840,7 @@
|
|
| 458 |
select.appendChild(opt);
|
| 459 |
});
|
| 460 |
}
|
| 461 |
-
|
| 462 |
function updateStats() {
|
| 463 |
document.getElementById('stat-leads').textContent = leads.length;
|
| 464 |
document.getElementById('stat-customers').textContent = customers.length;
|
|
@@ -466,15 +848,15 @@
|
|
| 466 |
document.getElementById('stat-contactados').textContent = leads.filter(l => l.estado === 'contactado').length;
|
| 467 |
document.getElementById('stat-con-telefono').textContent = leads.filter(l => l.telefono || l.telefono_formateado).length;
|
| 468 |
document.getElementById('stat-enriquecidos').textContent = leads.filter(l => l.enriquecido).length;
|
| 469 |
-
|
| 470 |
renderLeadsByStatus();
|
| 471 |
renderLeadsByNiche();
|
| 472 |
}
|
| 473 |
-
|
| 474 |
function renderLeadsByStatus() {
|
| 475 |
const counts = {};
|
| 476 |
leads.forEach(l => { counts[l.estado || 'nuevo'] = (counts[l.estado || 'nuevo'] || 0) + 1; });
|
| 477 |
-
|
| 478 |
let html = '<table><thead><tr><th>Estado</th><th>Cantidad</th></tr></thead><tbody>';
|
| 479 |
Object.entries(counts).forEach(([estado, count]) => {
|
| 480 |
const badge = getBadgeClass(estado);
|
|
@@ -483,11 +865,11 @@
|
|
| 483 |
html += '</tbody></table>';
|
| 484 |
document.getElementById('leads-by-status').innerHTML = html;
|
| 485 |
}
|
| 486 |
-
|
| 487 |
function renderLeadsByNiche() {
|
| 488 |
const counts = {};
|
| 489 |
-
leads.forEach(l => { if(l.nicho) { counts[l.nicho] = (counts[l.nicho] || 0) + 1; }});
|
| 490 |
-
|
| 491 |
let html = '<table><thead><tr><th>Nicho</th><th>Cantidad</th></tr></thead><tbody>';
|
| 492 |
Object.entries(counts).slice(0, 10).forEach(([nicho, count]) => {
|
| 493 |
html += `<tr><td>${nicho}</td><td>${count}</td></tr>`;
|
|
@@ -495,32 +877,45 @@
|
|
| 495 |
html += '</tbody></table>';
|
| 496 |
document.getElementById('leads-by-niche').innerHTML = html;
|
| 497 |
}
|
| 498 |
-
|
| 499 |
function getBadgeClass(estado) {
|
| 500 |
const classes = { 'nuevo': 'badge-nuevo', 'contactado': 'badge-contactado', 'calificado': 'badge-calificado', 'cliente': 'badge-cliente', 'convertido': 'badge-ganado' };
|
| 501 |
return classes[estado] || 'badge-lead';
|
| 502 |
}
|
| 503 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 504 |
function renderRecentLeads() {
|
| 505 |
const container = document.getElementById('recent-leads');
|
| 506 |
-
|
| 507 |
-
|
|
|
|
| 508 |
if (!recent.length) {
|
| 509 |
container.innerHTML = '<p class="loading">No hay leads</p>';
|
| 510 |
return;
|
| 511 |
}
|
| 512 |
-
|
| 513 |
-
let html = '<table><thead><tr><th>Nombre</th><th>Nicho</th><th>Ciudad</th><th>Estado</th><th>Acciones</th></tr></thead><tbody>';
|
| 514 |
recent.forEach(lead => {
|
| 515 |
const badge = getBadgeClass(lead.estado);
|
|
|
|
|
|
|
| 516 |
html += `
|
| 517 |
<tr>
|
| 518 |
-
<td>
|
|
|
|
| 519 |
<td>${lead.nicho || '-'}</td>
|
| 520 |
<td>${lead.ciudad || '-'}</td>
|
| 521 |
<td><span class="badge ${badge}">${lead.estado || 'nuevo'}</span></td>
|
| 522 |
<td>
|
| 523 |
-
|
|
|
|
|
|
|
|
|
|
| 524 |
</td>
|
| 525 |
</tr>
|
| 526 |
`;
|
|
@@ -528,27 +923,27 @@
|
|
| 528 |
html += '</tbody></table>';
|
| 529 |
container.innerHTML = html;
|
| 530 |
}
|
| 531 |
-
|
| 532 |
function generateWhatsAppLink(lead) {
|
| 533 |
const phone = lead.telefono_formateado || lead.telefono || '';
|
| 534 |
const cleanPhone = phone.replace(/\D/g, '');
|
| 535 |
const message = encodeURIComponent(`Hola ${lead.nombre || ''}, te contactamos de DonNadie Apps. ¿Cómo podemos ayudarte?`);
|
| 536 |
return `https://wa.me/${cleanPhone}?text=${message}`;
|
| 537 |
}
|
| 538 |
-
|
| 539 |
function renderCustomers(filter = '') {
|
| 540 |
const container = document.getElementById('customers-list');
|
| 541 |
-
const filtered = customers.filter(c =>
|
| 542 |
c.nombre?.toLowerCase().includes(filter.toLowerCase()) ||
|
| 543 |
c.cedula?.includes(filter) ||
|
| 544 |
c.correo?.toLowerCase().includes(filter.toLowerCase())
|
| 545 |
);
|
| 546 |
-
|
| 547 |
if (!filtered.length) {
|
| 548 |
container.innerHTML = '<p class="loading">No hay clientes</p>';
|
| 549 |
return;
|
| 550 |
}
|
| 551 |
-
|
| 552 |
let html = '<table><thead><tr><th>Nombre</th><th>Cédula</th><th>Teléfono</th><th>WhatsApp</th><th>Correo</th><th>Estado</th></tr></thead><tbody>';
|
| 553 |
filtered.forEach(c => {
|
| 554 |
html += `
|
|
@@ -565,18 +960,18 @@
|
|
| 565 |
html += '</tbody></table>';
|
| 566 |
container.innerHTML = html;
|
| 567 |
}
|
| 568 |
-
|
| 569 |
function filterCustomers(value) {
|
| 570 |
renderCustomers(value);
|
| 571 |
}
|
| 572 |
-
|
| 573 |
function renderLeads() {
|
| 574 |
const search = document.getElementById('lead-search').value.toLowerCase();
|
| 575 |
const estado = document.getElementById('lead-estado-filter').value;
|
| 576 |
const nicho = document.getElementById('lead-nicho-filter').value;
|
| 577 |
-
|
| 578 |
const filtered = leads.filter(l => {
|
| 579 |
-
const matchSearch = !search ||
|
| 580 |
l.nombre?.toLowerCase().includes(search) ||
|
| 581 |
l.nicho?.toLowerCase().includes(search) ||
|
| 582 |
l.ciudad?.toLowerCase().includes(search);
|
|
@@ -584,28 +979,32 @@
|
|
| 584 |
const matchNicho = !nicho || l.nicho === nicho;
|
| 585 |
return matchSearch && matchEstado && matchNicho;
|
| 586 |
});
|
| 587 |
-
|
| 588 |
const container = document.getElementById('leads-list');
|
| 589 |
-
|
| 590 |
if (!filtered.length) {
|
| 591 |
container.innerHTML = '<p class="loading">No hay leads</p>';
|
| 592 |
return;
|
| 593 |
}
|
| 594 |
-
|
| 595 |
-
let html = '<table><thead><tr><th>
|
| 596 |
filtered.forEach(l => {
|
| 597 |
const badge = getBadgeClass(l.estado);
|
|
|
|
|
|
|
| 598 |
const hasPhone = l.telefono || l.telefono_formateado;
|
| 599 |
html += `
|
| 600 |
<tr>
|
| 601 |
-
<td>
|
|
|
|
|
|
|
|
|
|
|
|
|
| 602 |
<td>${l.nicho || '-'}</td>
|
| 603 |
<td>${l.ciudad || '-'}</td>
|
| 604 |
-
<td>${hasPhone ? 'Sí' : 'No'}</td>
|
| 605 |
-
<td><span class="badge ${badge}">${l.estado || 'nuevo'}</span></td>
|
| 606 |
<td class="action-buttons">
|
| 607 |
-
${hasPhone ? `<a href="${generateWhatsAppLink(l)}" target="_blank" class="btn btn-whatsapp">
|
| 608 |
-
<button class="btn btn-primary" onclick="
|
| 609 |
</td>
|
| 610 |
</tr>
|
| 611 |
`;
|
|
@@ -613,11 +1012,17 @@
|
|
| 613 |
html += '</tbody></table>';
|
| 614 |
container.innerHTML = html;
|
| 615 |
}
|
| 616 |
-
|
| 617 |
function filterLeads() {
|
| 618 |
renderLeads();
|
| 619 |
}
|
| 620 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 621 |
async function updateLeadStatus(id, estado) {
|
| 622 |
try {
|
| 623 |
await apiPatch('leads', id, { estado: estado, actualizado_en: new Date().toISOString() });
|
|
@@ -626,11 +1031,11 @@
|
|
| 626 |
alert('Error actualizando: ' + error.message);
|
| 627 |
}
|
| 628 |
}
|
| 629 |
-
|
| 630 |
function renderPipeline() {
|
| 631 |
const stages = ['nuevo', 'contactado', 'calificado', 'convertido', 'perdido'];
|
| 632 |
const stageNames = { 'nuevo': 'Nuevo', 'contactado': 'Contactado', 'calificado': 'Calificado', 'convertido': 'Ganado', 'perdido': 'Perdido' };
|
| 633 |
-
|
| 634 |
let html = '';
|
| 635 |
stages.forEach(stage => {
|
| 636 |
const stageLeads = leads.filter(l => l.estado === stage);
|
|
@@ -648,60 +1053,67 @@
|
|
| 648 |
});
|
| 649 |
document.getElementById('pipeline-view').innerHTML = html;
|
| 650 |
}
|
| 651 |
-
|
| 652 |
function renderWhatsApp() {
|
| 653 |
const search = document.getElementById('wa-search').value.toLowerCase();
|
| 654 |
const filter = document.getElementById('wa-filter').value;
|
| 655 |
-
|
| 656 |
const filtered = leads.filter(l => {
|
| 657 |
const phone = l.telefono || l.telefono_formateado;
|
| 658 |
if (!phone) return false;
|
| 659 |
-
|
| 660 |
-
const matchSearch = !search ||
|
| 661 |
l.nombre?.toLowerCase().includes(search) ||
|
| 662 |
l.nicho?.toLowerCase().includes(search);
|
| 663 |
-
const matchFilter = filter === 'todos' ||
|
| 664 |
(filter === 'sin-contactar' && l.estado === 'nuevo') ||
|
| 665 |
(filter === 'contactados' && l.whatsapp_enviado);
|
| 666 |
return matchSearch && matchFilter;
|
| 667 |
});
|
| 668 |
-
|
| 669 |
const container = document.getElementById('whatsapp-list');
|
| 670 |
-
|
| 671 |
if (!filtered.length) {
|
| 672 |
container.innerHTML = '<p class="loading">No hay leads con teléfono</p>';
|
| 673 |
return;
|
| 674 |
}
|
| 675 |
-
|
| 676 |
let html = '';
|
| 677 |
filtered.forEach(l => {
|
| 678 |
const phone = l.telefono_formateado || l.telefono;
|
|
|
|
|
|
|
| 679 |
html += `
|
| 680 |
-
<div class="whatsapp-card">
|
| 681 |
-
<div
|
| 682 |
-
|
| 683 |
-
|
| 684 |
-
|
| 685 |
-
<
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 686 |
</div>
|
| 687 |
-
<div
|
| 688 |
-
<a href="${generateWhatsAppLink(l)}" target="_blank" class="btn btn-whatsapp">Abrir
|
| 689 |
-
<button class="btn btn-primary" onclick="markWhatsAppSent('${l.id}')"
|
| 690 |
</div>
|
| 691 |
</div>
|
| 692 |
`;
|
| 693 |
});
|
| 694 |
container.innerHTML = html;
|
| 695 |
}
|
| 696 |
-
|
| 697 |
function filterWhatsApp() {
|
| 698 |
renderWhatsApp();
|
| 699 |
}
|
| 700 |
-
|
| 701 |
async function markWhatsAppSent(id) {
|
| 702 |
try {
|
| 703 |
-
await apiPatch('leads', id, {
|
| 704 |
-
whatsapp_enviado: true,
|
| 705 |
whatsapp_enviado_en: new Date().toISOString(),
|
| 706 |
estado: 'contactado'
|
| 707 |
});
|
|
@@ -711,20 +1123,25 @@
|
|
| 711 |
alert('Error: ' + error.message);
|
| 712 |
}
|
| 713 |
}
|
| 714 |
-
|
| 715 |
function showTab(tabId) {
|
| 716 |
document.querySelectorAll('.tab-content').forEach(t => t.style.display = 'none');
|
| 717 |
document.querySelectorAll('.nav button').forEach(b => b.classList.remove('active'));
|
| 718 |
document.getElementById(tabId).style.display = 'block';
|
| 719 |
event.target.classList.add('active');
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 720 |
}
|
| 721 |
-
|
| 722 |
async function saveNew(e) {
|
| 723 |
e.preventDefault();
|
| 724 |
-
|
| 725 |
const tipo = document.getElementById('nuevo-tipo').value;
|
| 726 |
const isLead = tipo === 'lead';
|
| 727 |
-
|
| 728 |
const data = {
|
| 729 |
nombre: document.getElementById('nuevo-nombre').value,
|
| 730 |
correo: document.getElementById('nuevo-correo').value || null,
|
|
@@ -739,9 +1156,9 @@
|
|
| 739 |
created_at: new Date().toISOString(),
|
| 740 |
updated_at: new Date().toISOString()
|
| 741 |
};
|
| 742 |
-
|
| 743 |
const table = isLead ? 'leads' : 'customers';
|
| 744 |
-
|
| 745 |
try {
|
| 746 |
await apiPost(table, data);
|
| 747 |
alert('Guardado correctamente');
|
|
@@ -752,8 +1169,320 @@
|
|
| 752 |
alert('Error: ' + error.message);
|
| 753 |
}
|
| 754 |
}
|
| 755 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 756 |
loadData();
|
| 757 |
</script>
|
| 758 |
</body>
|
| 759 |
-
|
|
|
|
|
|
| 1 |
<!DOCTYPE html>
|
| 2 |
<html lang="es">
|
| 3 |
+
|
| 4 |
<head>
|
| 5 |
<meta charset="UTF-8">
|
| 6 |
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 7 |
<title>NexusCRM - CRM + Lead Gen</title>
|
| 8 |
<style>
|
| 9 |
+
:root {
|
| 10 |
+
--primary: #6366f1;
|
| 11 |
+
--primary-glow: rgba(99, 102, 241, 0.5);
|
| 12 |
+
--secondary: #a855f7;
|
| 13 |
+
--bg-dark: #0f172a;
|
| 14 |
+
--glass-bg: rgba(255, 255, 255, 0.05);
|
| 15 |
+
--glass-border: rgba(255, 255, 255, 0.1);
|
| 16 |
+
--text-main: #f8fafc;
|
| 17 |
+
--text-dim: #94a3b8;
|
| 18 |
+
--success: #22c55e;
|
| 19 |
+
--warning: #f59e0b;
|
| 20 |
+
--danger: #ef4444;
|
| 21 |
+
}
|
| 22 |
+
|
| 23 |
+
* {
|
| 24 |
+
margin: 0;
|
| 25 |
+
padding: 0;
|
| 26 |
+
box-sizing: border-box;
|
| 27 |
+
}
|
| 28 |
+
|
| 29 |
body {
|
| 30 |
+
font-family: 'Inter', -apple-system, system-ui, sans-serif;
|
| 31 |
+
background: var(--bg-dark);
|
| 32 |
+
background-image:
|
| 33 |
+
radial-gradient(at 0% 0%, rgba(99, 102, 241, 0.15) 0, transparent 50%),
|
| 34 |
+
radial-gradient(at 100% 100%, rgba(168, 85, 247, 0.15) 0, transparent 50%);
|
| 35 |
+
color: var(--text-main);
|
| 36 |
min-height: 100vh;
|
| 37 |
+
overflow-x: hidden;
|
| 38 |
}
|
| 39 |
+
|
| 40 |
.header {
|
| 41 |
+
background: rgba(15, 23, 42, 0.8);
|
| 42 |
+
backdrop-filter: blur(12px);
|
| 43 |
+
border-bottom: 1px solid var(--glass-border);
|
| 44 |
color: white;
|
| 45 |
+
padding: 1.5rem 2rem;
|
| 46 |
+
position: sticky;
|
| 47 |
+
top: 0;
|
| 48 |
+
z-index: 100;
|
| 49 |
+
display: flex;
|
| 50 |
+
justify-content: space-between;
|
| 51 |
+
align-items: center;
|
| 52 |
+
}
|
| 53 |
+
|
| 54 |
+
.header h1 {
|
| 55 |
+
font-size: 1.75rem;
|
| 56 |
+
font-weight: 800;
|
| 57 |
+
background: linear-gradient(to right, #818cf8, #c084fc);
|
| 58 |
+
-webkit-background-clip: text;
|
| 59 |
+
background-clip: text;
|
| 60 |
+
-webkit-text-fill-color: transparent;
|
| 61 |
+
letter-spacing: -0.025em;
|
| 62 |
+
}
|
| 63 |
+
|
| 64 |
+
.header .subtitle {
|
| 65 |
+
font-size: 0.875rem;
|
| 66 |
+
color: var(--text-dim);
|
| 67 |
+
font-weight: 500;
|
| 68 |
}
|
| 69 |
+
|
|
|
|
|
|
|
|
|
|
| 70 |
.nav {
|
| 71 |
+
padding: 1rem 2rem;
|
|
|
|
|
|
|
| 72 |
display: flex;
|
| 73 |
+
gap: 0.75rem;
|
| 74 |
flex-wrap: wrap;
|
| 75 |
}
|
| 76 |
+
|
| 77 |
.nav button {
|
| 78 |
+
padding: 0.6rem 1.25rem;
|
| 79 |
+
border: 1px solid var(--glass-border);
|
| 80 |
+
background: var(--glass-bg);
|
| 81 |
+
backdrop-filter: blur(4px);
|
| 82 |
+
color: var(--text-main);
|
| 83 |
cursor: pointer;
|
| 84 |
font-size: 0.9rem;
|
| 85 |
+
font-weight: 600;
|
| 86 |
+
border-radius: 9999px;
|
| 87 |
+
transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1);
|
| 88 |
}
|
| 89 |
+
|
| 90 |
+
.nav button:hover {
|
| 91 |
+
background: rgba(255, 255, 255, 0.1);
|
| 92 |
+
border-color: rgba(255, 255, 255, 0.2);
|
| 93 |
+
transform: translateY(-1px);
|
| 94 |
+
}
|
| 95 |
+
|
| 96 |
.nav button.active {
|
| 97 |
+
background: var(--primary);
|
| 98 |
+
border-color: var(--primary);
|
| 99 |
+
box-shadow: 0 0 20px var(--primary-glow);
|
| 100 |
+
}
|
| 101 |
+
|
| 102 |
+
.main {
|
| 103 |
+
padding: 0 2rem 2rem 2rem;
|
| 104 |
}
|
| 105 |
+
|
|
|
|
|
|
|
| 106 |
.stats {
|
| 107 |
display: grid;
|
| 108 |
+
grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
|
| 109 |
+
gap: 1.5rem;
|
| 110 |
+
margin-bottom: 2rem;
|
| 111 |
}
|
| 112 |
+
|
| 113 |
.stat-card {
|
| 114 |
+
background: var(--glass-bg);
|
| 115 |
+
backdrop-filter: blur(20px);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 116 |
padding: 1.5rem;
|
| 117 |
+
border-radius: 16px;
|
| 118 |
+
border: 1px solid var(--glass-border);
|
| 119 |
+
transition: transform 0.3s ease;
|
| 120 |
+
}
|
| 121 |
+
|
| 122 |
+
.stat-card:hover {
|
| 123 |
+
transform: translateY(-4px);
|
| 124 |
+
}
|
| 125 |
+
|
| 126 |
+
.stat-card h3 {
|
| 127 |
+
color: var(--text-dim);
|
| 128 |
+
font-size: 0.75rem;
|
| 129 |
+
margin-bottom: 0.75rem;
|
| 130 |
+
text-transform: uppercase;
|
| 131 |
+
letter-spacing: 0.05em;
|
| 132 |
+
}
|
| 133 |
+
|
| 134 |
+
.stat-card .value {
|
| 135 |
+
font-size: 2.25rem;
|
| 136 |
+
font-weight: 800;
|
| 137 |
+
color: white;
|
| 138 |
+
}
|
| 139 |
+
|
| 140 |
+
.stat-card .value.green {
|
| 141 |
+
color: var(--success);
|
| 142 |
}
|
| 143 |
+
|
| 144 |
+
.stat-card .value.orange {
|
| 145 |
+
color: var(--warning);
|
| 146 |
+
}
|
| 147 |
+
|
| 148 |
+
.card {
|
| 149 |
+
background: var(--glass-bg);
|
| 150 |
+
backdrop-filter: blur(20px);
|
| 151 |
+
padding: 2rem;
|
| 152 |
+
border-radius: 20px;
|
| 153 |
+
border: 1px solid var(--glass-border);
|
| 154 |
+
margin-bottom: 2rem;
|
| 155 |
+
}
|
| 156 |
+
|
| 157 |
+
.card h2 {
|
| 158 |
+
margin-bottom: 1.5rem;
|
| 159 |
+
color: white;
|
| 160 |
+
font-size: 1.5rem;
|
| 161 |
+
font-weight: 700;
|
| 162 |
+
}
|
| 163 |
+
|
| 164 |
.filters {
|
| 165 |
display: flex;
|
| 166 |
gap: 1rem;
|
| 167 |
+
margin-bottom: 1.5rem;
|
| 168 |
flex-wrap: wrap;
|
| 169 |
}
|
| 170 |
+
|
| 171 |
+
.filters select,
|
| 172 |
+
.filters input {
|
| 173 |
+
background: rgba(255, 255, 255, 0.05);
|
| 174 |
+
border: 1px solid var(--glass-border);
|
| 175 |
+
padding: 0.75rem 1rem;
|
| 176 |
+
border-radius: 12px;
|
| 177 |
+
color: white;
|
| 178 |
font-size: 0.9rem;
|
| 179 |
+
outline: none;
|
| 180 |
+
transition: border-color 0.2s;
|
| 181 |
}
|
| 182 |
+
|
| 183 |
+
.filters select:focus,
|
| 184 |
+
.filters input:focus {
|
| 185 |
+
border-color: var(--primary);
|
| 186 |
+
}
|
| 187 |
+
|
| 188 |
+
.filters input[type="text"] {
|
| 189 |
+
flex: 1;
|
| 190 |
+
min-width: 280px;
|
| 191 |
+
}
|
| 192 |
+
|
| 193 |
+
.filters select option {
|
| 194 |
+
background: var(--bg-dark);
|
| 195 |
+
}
|
| 196 |
+
|
| 197 |
table {
|
| 198 |
width: 100%;
|
| 199 |
+
border-collapse: separate;
|
| 200 |
+
border-spacing: 0 0.5rem;
|
| 201 |
}
|
| 202 |
+
|
| 203 |
+
th {
|
| 204 |
+
padding: 1rem;
|
| 205 |
text-align: left;
|
| 206 |
+
font-size: 0.75rem;
|
| 207 |
+
text-transform: uppercase;
|
| 208 |
+
color: var(--text-dim);
|
| 209 |
+
letter-spacing: 0.05em;
|
| 210 |
+
font-weight: 600;
|
| 211 |
+
}
|
| 212 |
+
|
| 213 |
+
td {
|
| 214 |
+
padding: 1rem;
|
| 215 |
+
background: rgba(255, 255, 255, 0.02);
|
| 216 |
+
border-top: 1px solid var(--glass-border);
|
| 217 |
+
border-bottom: 1px solid var(--glass-border);
|
| 218 |
+
font-size: 0.95rem;
|
| 219 |
+
}
|
| 220 |
+
|
| 221 |
+
td:first-child {
|
| 222 |
+
border-left: 1px solid var(--glass-border);
|
| 223 |
+
border-radius: 12px 0 0 12px;
|
| 224 |
}
|
| 225 |
+
|
| 226 |
+
td:last-child {
|
| 227 |
+
border-right: 1px solid var(--glass-border);
|
| 228 |
+
border-radius: 0 12px 12px 0;
|
| 229 |
+
}
|
| 230 |
+
|
| 231 |
+
tr:hover td {
|
| 232 |
+
background: rgba(255, 255, 255, 0.05);
|
| 233 |
+
}
|
| 234 |
+
|
| 235 |
.badge {
|
| 236 |
+
padding: 0.35rem 0.75rem;
|
| 237 |
+
border-radius: 9999px;
|
| 238 |
+
font-size: 0.7rem;
|
| 239 |
+
font-weight: 700;
|
| 240 |
+
text-transform: uppercase;
|
| 241 |
+
}
|
| 242 |
+
|
| 243 |
+
.badge-lead {
|
| 244 |
+
background: rgba(99, 102, 241, 0.1);
|
| 245 |
+
color: #818cf8;
|
| 246 |
+
border: 1px solid rgba(99, 102, 241, 0.2);
|
| 247 |
+
}
|
| 248 |
+
|
| 249 |
+
.badge-cliente {
|
| 250 |
+
background: rgba(34, 197, 94, 0.1);
|
| 251 |
+
color: #4ade80;
|
| 252 |
+
border: 1px solid rgba(34, 197, 94, 0.2);
|
| 253 |
+
}
|
| 254 |
+
|
| 255 |
+
.badge-nuevo {
|
| 256 |
+
background: rgba(245, 158, 11, 0.1);
|
| 257 |
+
color: #fbbf24;
|
| 258 |
+
border: 1px solid rgba(245, 158, 11, 0.2);
|
| 259 |
+
}
|
| 260 |
+
|
| 261 |
+
.badge-contactado {
|
| 262 |
+
background: rgba(14, 165, 233, 0.1);
|
| 263 |
+
color: #38bdf8;
|
| 264 |
+
border: 1px solid rgba(14, 165, 233, 0.2);
|
| 265 |
+
}
|
| 266 |
+
|
| 267 |
+
.badge-ganado {
|
| 268 |
+
background: rgba(34, 197, 94, 0.1);
|
| 269 |
+
color: #4ade80;
|
| 270 |
+
}
|
| 271 |
+
|
| 272 |
+
.quality-ring {
|
| 273 |
+
display: inline-flex;
|
| 274 |
+
width: 32px;
|
| 275 |
+
height: 32px;
|
| 276 |
+
border-radius: 50%;
|
| 277 |
+
align-items: center;
|
| 278 |
+
justify-content: center;
|
| 279 |
+
font-weight: 800;
|
| 280 |
font-size: 0.75rem;
|
| 281 |
+
border: 2px solid;
|
| 282 |
+
}
|
| 283 |
+
|
| 284 |
+
.q-high {
|
| 285 |
+
color: var(--success);
|
| 286 |
+
border-color: var(--success);
|
| 287 |
+
background: rgba(34, 197, 94, 0.1);
|
| 288 |
+
}
|
| 289 |
+
|
| 290 |
+
.q-med {
|
| 291 |
+
color: var(--warning);
|
| 292 |
+
border-color: var(--warning);
|
| 293 |
+
background: rgba(245, 158, 11, 0.1);
|
| 294 |
}
|
| 295 |
+
|
| 296 |
+
.q-low {
|
| 297 |
+
color: var(--danger);
|
| 298 |
+
border-color: var(--danger);
|
| 299 |
+
background: rgba(239, 68, 68, 0.1);
|
| 300 |
+
}
|
| 301 |
+
|
|
|
|
|
|
|
| 302 |
.btn {
|
| 303 |
+
padding: 0.6rem 1rem;
|
| 304 |
+
border-radius: 10px;
|
| 305 |
+
font-weight: 600;
|
| 306 |
cursor: pointer;
|
|
|
|
| 307 |
transition: all 0.2s;
|
| 308 |
+
border: none;
|
| 309 |
+
display: inline-flex;
|
| 310 |
+
align-items: center;
|
| 311 |
+
gap: 0.5rem;
|
| 312 |
+
}
|
| 313 |
+
|
| 314 |
+
.btn-primary {
|
| 315 |
+
background: var(--primary);
|
| 316 |
+
color: white;
|
| 317 |
+
}
|
| 318 |
+
|
| 319 |
+
.btn-whatsapp {
|
| 320 |
+
background: #25d366;
|
| 321 |
+
color: white;
|
| 322 |
+
text-decoration: none;
|
| 323 |
}
|
| 324 |
+
|
| 325 |
+
.btn:active {
|
| 326 |
+
transform: scale(0.95);
|
| 327 |
+
}
|
| 328 |
+
|
|
|
|
|
|
|
|
|
|
| 329 |
.pipeline-stages {
|
| 330 |
display: flex;
|
| 331 |
+
gap: 1.5rem;
|
| 332 |
overflow-x: auto;
|
| 333 |
padding-bottom: 1rem;
|
| 334 |
}
|
| 335 |
+
|
| 336 |
.pipeline-stage {
|
| 337 |
+
min-width: 280px;
|
| 338 |
+
background: rgba(255, 255, 255, 0.02);
|
| 339 |
+
border-radius: 20px;
|
| 340 |
+
padding: 1.25rem;
|
| 341 |
+
border: 1px solid var(--glass-border);
|
| 342 |
}
|
| 343 |
+
|
| 344 |
.pipeline-stage h4 {
|
| 345 |
+
color: white;
|
| 346 |
+
margin-bottom: 1rem;
|
| 347 |
+
display: flex;
|
| 348 |
+
justify-content: space-between;
|
| 349 |
}
|
| 350 |
+
|
| 351 |
.pipeline-item {
|
| 352 |
+
background: rgba(255, 255, 255, 0.05);
|
| 353 |
+
padding: 1rem;
|
| 354 |
+
border-radius: 12px;
|
| 355 |
+
margin-bottom: 0.75rem;
|
| 356 |
+
border: 1px solid var(--glass-border);
|
| 357 |
+
cursor: pointer;
|
| 358 |
+
}
|
| 359 |
+
|
| 360 |
+
.pipeline-item:hover {
|
| 361 |
+
border-color: var(--primary);
|
| 362 |
+
}
|
| 363 |
+
|
| 364 |
+
#recent-leads table tr td:last-child {
|
| 365 |
+
text-align: right;
|
| 366 |
+
}
|
| 367 |
+
|
| 368 |
+
::-webkit-scrollbar {
|
| 369 |
+
width: 8px;
|
| 370 |
+
}
|
| 371 |
+
|
| 372 |
+
::-webkit-scrollbar-track {
|
| 373 |
+
background: var(--bg-dark);
|
| 374 |
+
}
|
| 375 |
+
|
| 376 |
+
::-webkit-scrollbar-thumb {
|
| 377 |
+
background: var(--glass-border);
|
| 378 |
border-radius: 4px;
|
| 379 |
+
}
|
| 380 |
+
|
| 381 |
+
::-webkit-scrollbar-thumb:hover {
|
| 382 |
+
background: var(--text-dim);
|
| 383 |
+
}
|
| 384 |
+
|
| 385 |
+
/* ─── Lead Gen Tab ─────────────────────────── */
|
| 386 |
+
.leadgen-grid {
|
| 387 |
+
display: grid;
|
| 388 |
+
grid-template-columns: 380px 1fr;
|
| 389 |
+
gap: 1.5rem;
|
| 390 |
+
align-items: start;
|
| 391 |
+
}
|
| 392 |
+
|
| 393 |
+
.leadgen-form-card {
|
| 394 |
+
background: var(--glass-bg);
|
| 395 |
+
border: 1px solid var(--glass-border);
|
| 396 |
+
border-radius: 16px;
|
| 397 |
+
padding: 1.75rem;
|
| 398 |
+
backdrop-filter: blur(8px);
|
| 399 |
+
}
|
| 400 |
+
|
| 401 |
+
.leadgen-form-card h3 {
|
| 402 |
+
font-size: 1rem;
|
| 403 |
+
font-weight: 700;
|
| 404 |
+
margin-bottom: 1.25rem;
|
| 405 |
+
color: #818cf8;
|
| 406 |
+
text-transform: uppercase;
|
| 407 |
+
letter-spacing: 0.08em;
|
| 408 |
+
}
|
| 409 |
+
|
| 410 |
+
.form-row {
|
| 411 |
+
margin-bottom: 0.9rem;
|
| 412 |
+
}
|
| 413 |
+
|
| 414 |
+
.form-row label {
|
| 415 |
+
display: block;
|
| 416 |
+
font-size: 0.75rem;
|
| 417 |
+
font-weight: 600;
|
| 418 |
+
color: var(--text-dim);
|
| 419 |
+
text-transform: uppercase;
|
| 420 |
+
letter-spacing: 0.05em;
|
| 421 |
+
margin-bottom: 0.4rem;
|
| 422 |
+
}
|
| 423 |
+
|
| 424 |
+
.form-row select,
|
| 425 |
+
.form-row input[type=number],
|
| 426 |
+
.form-row input[type=text] {
|
| 427 |
width: 100%;
|
| 428 |
+
background: rgba(255, 255, 255, 0.05);
|
| 429 |
+
border: 1px solid var(--glass-border);
|
| 430 |
+
border-radius: 10px;
|
| 431 |
+
color: var(--text-main);
|
| 432 |
+
padding: 0.6rem 0.9rem;
|
| 433 |
font-size: 0.9rem;
|
| 434 |
+
outline: none;
|
| 435 |
+
transition: border-color 0.2s;
|
| 436 |
}
|
| 437 |
+
|
| 438 |
+
.form-row select:focus,
|
| 439 |
+
.form-row input:focus {
|
| 440 |
+
border-color: var(--primary);
|
| 441 |
+
}
|
| 442 |
+
|
| 443 |
+
.platform-grid {
|
| 444 |
+
display: grid;
|
| 445 |
+
grid-template-columns: 1fr 1fr;
|
| 446 |
+
gap: 0.4rem;
|
| 447 |
+
margin-top: 0.4rem;
|
| 448 |
+
}
|
| 449 |
+
|
| 450 |
+
.platform-check {
|
| 451 |
+
display: flex;
|
| 452 |
+
align-items: center;
|
| 453 |
+
gap: 0.5rem;
|
| 454 |
+
font-size: 0.85rem;
|
| 455 |
+
cursor: pointer;
|
| 456 |
+
padding: 0.4rem 0.5rem;
|
| 457 |
border-radius: 8px;
|
| 458 |
+
transition: background 0.15s;
|
| 459 |
+
}
|
| 460 |
+
|
| 461 |
+
.platform-check:hover {
|
| 462 |
+
background: rgba(255, 255, 255, 0.06);
|
| 463 |
+
}
|
| 464 |
+
|
| 465 |
+
.platform-check input {
|
| 466 |
+
accent-color: var(--primary);
|
| 467 |
+
}
|
| 468 |
+
|
| 469 |
+
.btn-run {
|
| 470 |
+
width: 100%;
|
| 471 |
+
padding: 0.9rem;
|
| 472 |
+
background: linear-gradient(135deg, var(--primary), var(--secondary));
|
| 473 |
+
border: none;
|
| 474 |
+
border-radius: 12px;
|
| 475 |
+
color: white;
|
| 476 |
+
font-weight: 700;
|
| 477 |
+
font-size: 1rem;
|
| 478 |
+
cursor: pointer;
|
| 479 |
+
margin-top: 1rem;
|
| 480 |
+
transition: opacity 0.2s, transform 0.15s;
|
| 481 |
+
letter-spacing: 0.03em;
|
| 482 |
+
}
|
| 483 |
+
|
| 484 |
+
.btn-run:hover {
|
| 485 |
+
opacity: 0.9;
|
| 486 |
+
transform: translateY(-1px);
|
| 487 |
+
}
|
| 488 |
+
|
| 489 |
+
.btn-run:disabled {
|
| 490 |
+
opacity: 0.5;
|
| 491 |
+
cursor: not-allowed;
|
| 492 |
+
transform: none;
|
| 493 |
+
}
|
| 494 |
+
|
| 495 |
+
.log-terminal {
|
| 496 |
+
background: #0a0f1e;
|
| 497 |
+
border: 1px solid rgba(99, 102, 241, 0.3);
|
| 498 |
+
border-radius: 12px;
|
| 499 |
+
padding: 1rem;
|
| 500 |
+
height: 280px;
|
| 501 |
+
overflow-y: auto;
|
| 502 |
+
font-family: 'Courier New', monospace;
|
| 503 |
+
font-size: 0.78rem;
|
| 504 |
+
color: #4ade80;
|
| 505 |
+
line-height: 1.5;
|
| 506 |
+
white-space: pre-wrap;
|
| 507 |
+
word-break: break-all;
|
| 508 |
+
}
|
| 509 |
+
|
| 510 |
+
.log-terminal .log-err {
|
| 511 |
+
color: #f87171;
|
| 512 |
+
}
|
| 513 |
+
|
| 514 |
+
.log-terminal .log-done {
|
| 515 |
+
color: #818cf8;
|
| 516 |
+
font-weight: 700;
|
| 517 |
+
}
|
| 518 |
+
|
| 519 |
+
.server-banner {
|
| 520 |
+
background: rgba(245, 158, 11, 0.1);
|
| 521 |
+
border: 1px solid rgba(245, 158, 11, 0.3);
|
| 522 |
+
border-radius: 12px;
|
| 523 |
+
padding: 1rem 1.25rem;
|
| 524 |
+
margin-bottom: 1.5rem;
|
| 525 |
+
display: flex;
|
| 526 |
+
align-items: center;
|
| 527 |
+
gap: 0.75rem;
|
| 528 |
+
font-size: 0.88rem;
|
| 529 |
+
}
|
| 530 |
+
|
| 531 |
+
.server-banner.online {
|
| 532 |
+
background: rgba(34, 197, 94, 0.1);
|
| 533 |
+
border-color: rgba(34, 197, 94, 0.3);
|
| 534 |
+
color: #4ade80;
|
| 535 |
+
}
|
| 536 |
+
|
| 537 |
+
.server-banner.offline {
|
| 538 |
+
color: #fbbf24;
|
| 539 |
+
}
|
| 540 |
+
|
| 541 |
+
.lg-stat-row {
|
| 542 |
+
display: grid;
|
| 543 |
+
grid-template-columns: repeat(3, 1fr);
|
| 544 |
+
gap: 1rem;
|
| 545 |
+
margin-bottom: 1.5rem;
|
| 546 |
+
}
|
| 547 |
+
|
| 548 |
+
.lg-stat {
|
| 549 |
+
background: var(--glass-bg);
|
| 550 |
+
border: 1px solid var(--glass-border);
|
| 551 |
+
border-radius: 12px;
|
| 552 |
+
padding: 1rem;
|
| 553 |
+
text-align: center;
|
| 554 |
+
}
|
| 555 |
+
|
| 556 |
+
.lg-stat .val {
|
| 557 |
+
font-size: 1.75rem;
|
| 558 |
+
font-weight: 800;
|
| 559 |
+
color: #818cf8;
|
| 560 |
+
}
|
| 561 |
+
|
| 562 |
+
.lg-stat .lbl {
|
| 563 |
+
font-size: 0.7rem;
|
| 564 |
+
color: var(--text-dim);
|
| 565 |
+
text-transform: uppercase;
|
| 566 |
+
margin-top: 0.2rem;
|
| 567 |
+
}
|
| 568 |
+
|
| 569 |
+
.btn-sync {
|
| 570 |
+
padding: 0.6rem 1.2rem;
|
| 571 |
+
background: rgba(99, 102, 241, 0.15);
|
| 572 |
+
border: 1px solid rgba(99, 102, 241, 0.4);
|
| 573 |
+
border-radius: 10px;
|
| 574 |
+
color: #818cf8;
|
| 575 |
+
font-weight: 600;
|
| 576 |
+
font-size: 0.85rem;
|
| 577 |
+
cursor: pointer;
|
| 578 |
+
transition: background 0.2s;
|
| 579 |
+
}
|
| 580 |
+
|
| 581 |
+
.btn-sync:hover {
|
| 582 |
+
background: rgba(99, 102, 241, 0.3);
|
| 583 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 584 |
</style>
|
| 585 |
</head>
|
| 586 |
+
|
| 587 |
<body>
|
| 588 |
<div class="header">
|
| 589 |
<h1>NexusCRM</h1>
|
| 590 |
<div class="subtitle">CRM + Lead Gen Pro</div>
|
| 591 |
</div>
|
| 592 |
+
|
| 593 |
<div class="nav">
|
| 594 |
<button class="active" onclick="showTab('dashboard')">Dashboard</button>
|
| 595 |
<button onclick="showTab('leads')">Leads</button>
|
|
|
|
| 597 |
<button onclick="showTab('outreach')">WhatsApp</button>
|
| 598 |
<button onclick="showTab('customers')">Clientes</button>
|
| 599 |
<button onclick="showTab('nuevo')">+ Nuevo</button>
|
| 600 |
+
<button onclick="showTab('leadgen')" id="nav-leadgen">🔍 Lead Gen</button>
|
| 601 |
</div>
|
| 602 |
+
|
| 603 |
<div class="main">
|
| 604 |
<!-- Dashboard Tab -->
|
| 605 |
<div id="dashboard" class="tab-content">
|
|
|
|
| 629 |
<div class="value" id="stat-enriquecidos">0</div>
|
| 630 |
</div>
|
| 631 |
</div>
|
| 632 |
+
|
| 633 |
<div class="grid-2">
|
| 634 |
<div class="card">
|
| 635 |
<h2>Leads por Estado</h2>
|
|
|
|
| 640 |
<div id="leads-by-niche"></div>
|
| 641 |
</div>
|
| 642 |
</div>
|
| 643 |
+
|
| 644 |
<div class="card">
|
| 645 |
<h2>Leads Recientes</h2>
|
| 646 |
<div id="recent-leads"></div>
|
| 647 |
</div>
|
| 648 |
</div>
|
| 649 |
+
|
| 650 |
<!-- Leads Tab -->
|
| 651 |
<div id="leads" class="tab-content" style="display: none;">
|
| 652 |
<div class="card">
|
|
|
|
| 667 |
<div id="leads-list"></div>
|
| 668 |
</div>
|
| 669 |
</div>
|
| 670 |
+
|
| 671 |
<!-- Pipeline Tab -->
|
| 672 |
<div id="pipeline" class="tab-content" style="display: none;">
|
| 673 |
<div class="card">
|
|
|
|
| 675 |
<div class="pipeline-stages" id="pipeline-view"></div>
|
| 676 |
</div>
|
| 677 |
</div>
|
| 678 |
+
|
| 679 |
<!-- Outreach Tab -->
|
| 680 |
<div id="outreach" class="tab-content" style="display: none;">
|
| 681 |
<div class="card">
|
|
|
|
| 691 |
<div id="whatsapp-list"></div>
|
| 692 |
</div>
|
| 693 |
</div>
|
| 694 |
+
|
| 695 |
<!-- Customers Tab -->
|
| 696 |
<div id="customers" class="tab-content" style="display: none;">
|
| 697 |
<div class="card">
|
| 698 |
<h2>Clientes</h2>
|
| 699 |
+
<input type="text" class="search-box" placeholder="Buscar clientes..."
|
| 700 |
+
onkeyup="filterCustomers(this.value)"
|
| 701 |
+
style="width: 100%; padding: 0.75rem; border: 1px solid #e0e0e0; border-radius: 4px; margin-bottom: 1rem;">
|
| 702 |
<div id="customers-list"></div>
|
| 703 |
</div>
|
| 704 |
</div>
|
| 705 |
+
|
| 706 |
<!-- Nuevo Tab -->
|
| 707 |
<div id="nuevo" class="tab-content" style="display: none;">
|
| 708 |
<div class="card">
|
|
|
|
| 772 |
<script>
|
| 773 |
const SUPABASE_URL = 'https://nvssvykqxaurtlgwxwwy.supabase.co';
|
| 774 |
const SUPABASE_KEY = 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6Im52c3N2eWtxeGF1cnRsZ3d4d3d5Iiwicm9sZSI6ImFub24iLCJpYXQiOjE3NzI4OTIxNjUsImV4cCI6MjA4ODQ2ODE2NX0.onzmNQGBy6jDxWO7TcjZuyvgvId9HiGNzUNs1HmOAMk';
|
| 775 |
+
|
| 776 |
let customers = [];
|
| 777 |
let leads = [];
|
| 778 |
let opportunities = [];
|
| 779 |
+
|
| 780 |
async function apiGet(table) {
|
| 781 |
const response = await fetch(`${SUPABASE_URL}/rest/v1/${table}?select=*`, {
|
| 782 |
headers: { 'apikey': SUPABASE_KEY, 'Authorization': `Bearer ${SUPABASE_KEY}` }
|
| 783 |
});
|
| 784 |
return response.json();
|
| 785 |
}
|
| 786 |
+
|
| 787 |
async function apiPost(table, data) {
|
| 788 |
const response = await fetch(`${SUPABASE_URL}/rest/v1/${table}`, {
|
| 789 |
method: 'POST',
|
|
|
|
| 797 |
});
|
| 798 |
return response;
|
| 799 |
}
|
| 800 |
+
|
| 801 |
async function apiPatch(table, id, data) {
|
| 802 |
const response = await fetch(`${SUPABASE_URL}/rest/v1/${table}?id=eq.${id}`, {
|
| 803 |
method: 'PATCH',
|
|
|
|
| 811 |
});
|
| 812 |
return response;
|
| 813 |
}
|
| 814 |
+
|
| 815 |
async function loadData() {
|
| 816 |
try {
|
| 817 |
customers = await apiGet('customers');
|
| 818 |
leads = await apiGet('leads');
|
| 819 |
opportunities = await apiGet('opportunities');
|
| 820 |
+
|
| 821 |
populateNichoFilter();
|
| 822 |
updateStats();
|
| 823 |
renderRecentLeads();
|
|
|
|
| 829 |
console.error('Error:', error);
|
| 830 |
}
|
| 831 |
}
|
| 832 |
+
|
| 833 |
function populateNichoFilter() {
|
| 834 |
const niches = [...new Set(leads.map(l => l.nicho).filter(n => n))];
|
| 835 |
const select = document.getElementById('lead-nicho-filter');
|
|
|
|
| 840 |
select.appendChild(opt);
|
| 841 |
});
|
| 842 |
}
|
| 843 |
+
|
| 844 |
function updateStats() {
|
| 845 |
document.getElementById('stat-leads').textContent = leads.length;
|
| 846 |
document.getElementById('stat-customers').textContent = customers.length;
|
|
|
|
| 848 |
document.getElementById('stat-contactados').textContent = leads.filter(l => l.estado === 'contactado').length;
|
| 849 |
document.getElementById('stat-con-telefono').textContent = leads.filter(l => l.telefono || l.telefono_formateado).length;
|
| 850 |
document.getElementById('stat-enriquecidos').textContent = leads.filter(l => l.enriquecido).length;
|
| 851 |
+
|
| 852 |
renderLeadsByStatus();
|
| 853 |
renderLeadsByNiche();
|
| 854 |
}
|
| 855 |
+
|
| 856 |
function renderLeadsByStatus() {
|
| 857 |
const counts = {};
|
| 858 |
leads.forEach(l => { counts[l.estado || 'nuevo'] = (counts[l.estado || 'nuevo'] || 0) + 1; });
|
| 859 |
+
|
| 860 |
let html = '<table><thead><tr><th>Estado</th><th>Cantidad</th></tr></thead><tbody>';
|
| 861 |
Object.entries(counts).forEach(([estado, count]) => {
|
| 862 |
const badge = getBadgeClass(estado);
|
|
|
|
| 865 |
html += '</tbody></table>';
|
| 866 |
document.getElementById('leads-by-status').innerHTML = html;
|
| 867 |
}
|
| 868 |
+
|
| 869 |
function renderLeadsByNiche() {
|
| 870 |
const counts = {};
|
| 871 |
+
leads.forEach(l => { if (l.nicho) { counts[l.nicho] = (counts[l.nicho] || 0) + 1; } });
|
| 872 |
+
|
| 873 |
let html = '<table><thead><tr><th>Nicho</th><th>Cantidad</th></tr></thead><tbody>';
|
| 874 |
Object.entries(counts).slice(0, 10).forEach(([nicho, count]) => {
|
| 875 |
html += `<tr><td>${nicho}</td><td>${count}</td></tr>`;
|
|
|
|
| 877 |
html += '</tbody></table>';
|
| 878 |
document.getElementById('leads-by-niche').innerHTML = html;
|
| 879 |
}
|
| 880 |
+
|
| 881 |
function getBadgeClass(estado) {
|
| 882 |
const classes = { 'nuevo': 'badge-nuevo', 'contactado': 'badge-contactado', 'calificado': 'badge-calificado', 'cliente': 'badge-cliente', 'convertido': 'badge-ganado' };
|
| 883 |
return classes[estado] || 'badge-lead';
|
| 884 |
}
|
| 885 |
+
|
| 886 |
+
function getQualityClass(score) {
|
| 887 |
+
if (score >= 8) return 'q-high';
|
| 888 |
+
if (score >= 5) return 'q-med';
|
| 889 |
+
return 'q-low';
|
| 890 |
+
}
|
| 891 |
+
|
| 892 |
function renderRecentLeads() {
|
| 893 |
const container = document.getElementById('recent-leads');
|
| 894 |
+
// Sort by creation date descending
|
| 895 |
+
const recent = [...leads].sort((a, b) => new Date(b.created_at) - new Date(a.created_at)).slice(0, 8);
|
| 896 |
+
|
| 897 |
if (!recent.length) {
|
| 898 |
container.innerHTML = '<p class="loading">No hay leads</p>';
|
| 899 |
return;
|
| 900 |
}
|
| 901 |
+
|
| 902 |
+
let html = '<table><thead><tr><th>Score</th><th>Nombre</th><th>Nicho</th><th>Ciudad</th><th>Estado</th><th>Acciones</th></tr></thead><tbody>';
|
| 903 |
recent.forEach(lead => {
|
| 904 |
const badge = getBadgeClass(lead.estado);
|
| 905 |
+
const qScore = lead.quality_score || 0;
|
| 906 |
+
const qClass = getQualityClass(qScore);
|
| 907 |
html += `
|
| 908 |
<tr>
|
| 909 |
+
<td width="50"><div class="quality-ring ${qClass}">${qScore}</div></td>
|
| 910 |
+
<td><div style="font-weight:700">${lead.nombre || 'Sin nombre'}</div><div style="font-size: 0.75rem; color: var(--text-dim)">${lead.origen || 'lead_gen'}</div></td>
|
| 911 |
<td>${lead.nicho || '-'}</td>
|
| 912 |
<td>${lead.ciudad || '-'}</td>
|
| 913 |
<td><span class="badge ${badge}">${lead.estado || 'nuevo'}</span></td>
|
| 914 |
<td>
|
| 915 |
+
<div class="action-buttons">
|
| 916 |
+
${lead.telefono || lead.telefono_formateado ? `<a href="${generateWhatsAppLink(lead)}" target="_blank" class="btn btn-whatsapp">WA</a>` : ''}
|
| 917 |
+
<button class="btn btn-primary" onclick="showLeadDetails('${lead.id}')">Ver</button>
|
| 918 |
+
</div>
|
| 919 |
</td>
|
| 920 |
</tr>
|
| 921 |
`;
|
|
|
|
| 923 |
html += '</tbody></table>';
|
| 924 |
container.innerHTML = html;
|
| 925 |
}
|
| 926 |
+
|
| 927 |
function generateWhatsAppLink(lead) {
|
| 928 |
const phone = lead.telefono_formateado || lead.telefono || '';
|
| 929 |
const cleanPhone = phone.replace(/\D/g, '');
|
| 930 |
const message = encodeURIComponent(`Hola ${lead.nombre || ''}, te contactamos de DonNadie Apps. ¿Cómo podemos ayudarte?`);
|
| 931 |
return `https://wa.me/${cleanPhone}?text=${message}`;
|
| 932 |
}
|
| 933 |
+
|
| 934 |
function renderCustomers(filter = '') {
|
| 935 |
const container = document.getElementById('customers-list');
|
| 936 |
+
const filtered = customers.filter(c =>
|
| 937 |
c.nombre?.toLowerCase().includes(filter.toLowerCase()) ||
|
| 938 |
c.cedula?.includes(filter) ||
|
| 939 |
c.correo?.toLowerCase().includes(filter.toLowerCase())
|
| 940 |
);
|
| 941 |
+
|
| 942 |
if (!filtered.length) {
|
| 943 |
container.innerHTML = '<p class="loading">No hay clientes</p>';
|
| 944 |
return;
|
| 945 |
}
|
| 946 |
+
|
| 947 |
let html = '<table><thead><tr><th>Nombre</th><th>Cédula</th><th>Teléfono</th><th>WhatsApp</th><th>Correo</th><th>Estado</th></tr></thead><tbody>';
|
| 948 |
filtered.forEach(c => {
|
| 949 |
html += `
|
|
|
|
| 960 |
html += '</tbody></table>';
|
| 961 |
container.innerHTML = html;
|
| 962 |
}
|
| 963 |
+
|
| 964 |
function filterCustomers(value) {
|
| 965 |
renderCustomers(value);
|
| 966 |
}
|
| 967 |
+
|
| 968 |
function renderLeads() {
|
| 969 |
const search = document.getElementById('lead-search').value.toLowerCase();
|
| 970 |
const estado = document.getElementById('lead-estado-filter').value;
|
| 971 |
const nicho = document.getElementById('lead-nicho-filter').value;
|
| 972 |
+
|
| 973 |
const filtered = leads.filter(l => {
|
| 974 |
+
const matchSearch = !search ||
|
| 975 |
l.nombre?.toLowerCase().includes(search) ||
|
| 976 |
l.nicho?.toLowerCase().includes(search) ||
|
| 977 |
l.ciudad?.toLowerCase().includes(search);
|
|
|
|
| 979 |
const matchNicho = !nicho || l.nicho === nicho;
|
| 980 |
return matchSearch && matchEstado && matchNicho;
|
| 981 |
});
|
| 982 |
+
|
| 983 |
const container = document.getElementById('leads-list');
|
| 984 |
+
|
| 985 |
if (!filtered.length) {
|
| 986 |
container.innerHTML = '<p class="loading">No hay leads</p>';
|
| 987 |
return;
|
| 988 |
}
|
| 989 |
+
|
| 990 |
+
let html = '<table><thead><tr><th>Score</th><th>Nombre</th><th>Nicho</th><th>Ciudad</th><th>Acciones</th></tr></thead><tbody>';
|
| 991 |
filtered.forEach(l => {
|
| 992 |
const badge = getBadgeClass(l.estado);
|
| 993 |
+
const qScore = l.quality_score || 0;
|
| 994 |
+
const qClass = getQualityClass(qScore);
|
| 995 |
const hasPhone = l.telefono || l.telefono_formateado;
|
| 996 |
html += `
|
| 997 |
<tr>
|
| 998 |
+
<td><div class="quality-ring ${qClass}">${qScore}</div></td>
|
| 999 |
+
<td>
|
| 1000 |
+
<div style="font-weight:700">${l.nombre || 'Sin nombre'}</div>
|
| 1001 |
+
<span class="badge ${badge}">${l.estado || 'nuevo'}</span>
|
| 1002 |
+
</td>
|
| 1003 |
<td>${l.nicho || '-'}</td>
|
| 1004 |
<td>${l.ciudad || '-'}</td>
|
|
|
|
|
|
|
| 1005 |
<td class="action-buttons">
|
| 1006 |
+
${hasPhone ? `<a href="${generateWhatsAppLink(l)}" target="_blank" class="btn btn-whatsapp">WA</a>` : ''}
|
| 1007 |
+
<button class="btn btn-primary" onclick="showLeadDetails('${l.id}')">Ver</button>
|
| 1008 |
</td>
|
| 1009 |
</tr>
|
| 1010 |
`;
|
|
|
|
| 1012 |
html += '</tbody></table>';
|
| 1013 |
container.innerHTML = html;
|
| 1014 |
}
|
| 1015 |
+
|
| 1016 |
function filterLeads() {
|
| 1017 |
renderLeads();
|
| 1018 |
}
|
| 1019 |
+
|
| 1020 |
+
function showLeadDetails(id) {
|
| 1021 |
+
const lead = leads.find(l => l.id === id);
|
| 1022 |
+
if (!lead) return;
|
| 1023 |
+
alert(`Detalles de ${lead.nombre}:\n\nNicho: ${lead.nicho}\nCiudad: ${lead.ciudad}\nEstado: ${lead.estado}\nScore: ${lead.quality_score || 0}\n\nNotas: ${lead.observaciones || 'Sin notas'}`);
|
| 1024 |
+
}
|
| 1025 |
+
|
| 1026 |
async function updateLeadStatus(id, estado) {
|
| 1027 |
try {
|
| 1028 |
await apiPatch('leads', id, { estado: estado, actualizado_en: new Date().toISOString() });
|
|
|
|
| 1031 |
alert('Error actualizando: ' + error.message);
|
| 1032 |
}
|
| 1033 |
}
|
| 1034 |
+
|
| 1035 |
function renderPipeline() {
|
| 1036 |
const stages = ['nuevo', 'contactado', 'calificado', 'convertido', 'perdido'];
|
| 1037 |
const stageNames = { 'nuevo': 'Nuevo', 'contactado': 'Contactado', 'calificado': 'Calificado', 'convertido': 'Ganado', 'perdido': 'Perdido' };
|
| 1038 |
+
|
| 1039 |
let html = '';
|
| 1040 |
stages.forEach(stage => {
|
| 1041 |
const stageLeads = leads.filter(l => l.estado === stage);
|
|
|
|
| 1053 |
});
|
| 1054 |
document.getElementById('pipeline-view').innerHTML = html;
|
| 1055 |
}
|
| 1056 |
+
|
| 1057 |
function renderWhatsApp() {
|
| 1058 |
const search = document.getElementById('wa-search').value.toLowerCase();
|
| 1059 |
const filter = document.getElementById('wa-filter').value;
|
| 1060 |
+
|
| 1061 |
const filtered = leads.filter(l => {
|
| 1062 |
const phone = l.telefono || l.telefono_formateado;
|
| 1063 |
if (!phone) return false;
|
| 1064 |
+
|
| 1065 |
+
const matchSearch = !search ||
|
| 1066 |
l.nombre?.toLowerCase().includes(search) ||
|
| 1067 |
l.nicho?.toLowerCase().includes(search);
|
| 1068 |
+
const matchFilter = filter === 'todos' ||
|
| 1069 |
(filter === 'sin-contactar' && l.estado === 'nuevo') ||
|
| 1070 |
(filter === 'contactados' && l.whatsapp_enviado);
|
| 1071 |
return matchSearch && matchFilter;
|
| 1072 |
});
|
| 1073 |
+
|
| 1074 |
const container = document.getElementById('whatsapp-list');
|
| 1075 |
+
|
| 1076 |
if (!filtered.length) {
|
| 1077 |
container.innerHTML = '<p class="loading">No hay leads con teléfono</p>';
|
| 1078 |
return;
|
| 1079 |
}
|
| 1080 |
+
|
| 1081 |
let html = '';
|
| 1082 |
filtered.forEach(l => {
|
| 1083 |
const phone = l.telefono_formateado || l.telefono;
|
| 1084 |
+
const qScore = l.quality_score || 0;
|
| 1085 |
+
const qClass = getQualityClass(qScore);
|
| 1086 |
html += `
|
| 1087 |
+
<div class="whatsapp-card" style="display: flex; justify-content: space-between; align-items: center;">
|
| 1088 |
+
<div>
|
| 1089 |
+
<div style="display:flex; align-items:center; gap:10px;">
|
| 1090 |
+
<div class="quality-ring ${qClass}">${qScore}</div>
|
| 1091 |
+
<div class="name" style="font-weight:700; font-size:1.1rem">${l.nombre || 'Sin nombre'}</div>
|
| 1092 |
+
</div>
|
| 1093 |
+
<div class="phone" style="margin-top:5px; opacity:0.8">${phone}</div>
|
| 1094 |
+
<div style="margin-top: 0.5rem;">
|
| 1095 |
+
<span class="badge ${getBadgeClass(l.estado)}">${l.estado || 'nuevo'}</span>
|
| 1096 |
+
<span style="color: var(--text-dim); margin-left: 1rem; font-size:0.8rem">${l.nicho || ''} ${l.ciudad ? '- ' + l.ciudad : ''}</span>
|
| 1097 |
+
</div>
|
| 1098 |
</div>
|
| 1099 |
+
<div class="action-buttons">
|
| 1100 |
+
<a href="${generateWhatsAppLink(l)}" target="_blank" class="btn btn-whatsapp">Abrir Chat</a>
|
| 1101 |
+
<button class="btn btn-primary" onclick="markWhatsAppSent('${l.id}')">Marcar Enviado</button>
|
| 1102 |
</div>
|
| 1103 |
</div>
|
| 1104 |
`;
|
| 1105 |
});
|
| 1106 |
container.innerHTML = html;
|
| 1107 |
}
|
| 1108 |
+
|
| 1109 |
function filterWhatsApp() {
|
| 1110 |
renderWhatsApp();
|
| 1111 |
}
|
| 1112 |
+
|
| 1113 |
async function markWhatsAppSent(id) {
|
| 1114 |
try {
|
| 1115 |
+
await apiPatch('leads', id, {
|
| 1116 |
+
whatsapp_enviado: true,
|
| 1117 |
whatsapp_enviado_en: new Date().toISOString(),
|
| 1118 |
estado: 'contactado'
|
| 1119 |
});
|
|
|
|
| 1123 |
alert('Error: ' + error.message);
|
| 1124 |
}
|
| 1125 |
}
|
| 1126 |
+
|
| 1127 |
function showTab(tabId) {
|
| 1128 |
document.querySelectorAll('.tab-content').forEach(t => t.style.display = 'none');
|
| 1129 |
document.querySelectorAll('.nav button').forEach(b => b.classList.remove('active'));
|
| 1130 |
document.getElementById(tabId).style.display = 'block';
|
| 1131 |
event.target.classList.add('active');
|
| 1132 |
+
// Lead Gen tab side-effects
|
| 1133 |
+
if (tabId === 'leadgen') {
|
| 1134 |
+
if (typeof checkServer === 'function') checkServer();
|
| 1135 |
+
if (typeof updatePlatforms === 'function') updatePlatforms();
|
| 1136 |
+
}
|
| 1137 |
}
|
| 1138 |
+
|
| 1139 |
async function saveNew(e) {
|
| 1140 |
e.preventDefault();
|
| 1141 |
+
|
| 1142 |
const tipo = document.getElementById('nuevo-tipo').value;
|
| 1143 |
const isLead = tipo === 'lead';
|
| 1144 |
+
|
| 1145 |
const data = {
|
| 1146 |
nombre: document.getElementById('nuevo-nombre').value,
|
| 1147 |
correo: document.getElementById('nuevo-correo').value || null,
|
|
|
|
| 1156 |
created_at: new Date().toISOString(),
|
| 1157 |
updated_at: new Date().toISOString()
|
| 1158 |
};
|
| 1159 |
+
|
| 1160 |
const table = isLead ? 'leads' : 'customers';
|
| 1161 |
+
|
| 1162 |
try {
|
| 1163 |
await apiPost(table, data);
|
| 1164 |
alert('Guardado correctamente');
|
|
|
|
| 1169 |
alert('Error: ' + error.message);
|
| 1170 |
}
|
| 1171 |
}
|
| 1172 |
+
|
| 1173 |
+
loadData();
|
| 1174 |
+
|
| 1175 |
+
/* ════════════════════════════════════════════════════
|
| 1176 |
+
LEAD GEN PRO INTEGRATION
|
| 1177 |
+
API bridge: http://localhost:5000
|
| 1178 |
+
════════════════════════════════════════════════════ */
|
| 1179 |
+
|
| 1180 |
+
// 100% CLOUD: La API se sirve desde la misma raíz que el CRM
|
| 1181 |
+
const API_BASE = '';
|
| 1182 |
+
let sseSource = null;
|
| 1183 |
+
let scrapeRunning = false;
|
| 1184 |
+
|
| 1185 |
+
// Insert Lead Gen tab HTML
|
| 1186 |
+
(function () {
|
| 1187 |
+
const leadgenHtml = `
|
| 1188 |
+
<div id="leadgen" class="tab-content" style="display:none">
|
| 1189 |
+
<div id="server-banner" class="server-banner offline">
|
| 1190 |
+
<span id="server-icon">⚠️</span>
|
| 1191 |
+
<span id="server-msg">Verificando conexión con API Server (localhost:5000)...</span>
|
| 1192 |
+
<button class="btn-sync" onclick="checkServer()" style="margin-left:auto">Reconectar</button>
|
| 1193 |
+
</div>
|
| 1194 |
+
|
| 1195 |
+
<div class="lg-stat-row">
|
| 1196 |
+
<div class="lg-stat">
|
| 1197 |
+
<div class="val" id="lg-total">—</div>
|
| 1198 |
+
<div class="lbl">Total Local</div>
|
| 1199 |
+
</div>
|
| 1200 |
+
<div class="lg-stat">
|
| 1201 |
+
<div class="val" id="lg-30d">—</div>
|
| 1202 |
+
<div class="lbl">Últimos 30d</div>
|
| 1203 |
+
</div>
|
| 1204 |
+
<div class="lg-stat">
|
| 1205 |
+
<div class="val" id="lg-pending-wa">—</div>
|
| 1206 |
+
<div class="lbl">Pendiente WA</div>
|
| 1207 |
+
</div>
|
| 1208 |
+
</div>
|
| 1209 |
+
|
| 1210 |
+
<div class="leadgen-grid">
|
| 1211 |
+
<!-- Panel Izquierdo: Configuración de Scraping -->
|
| 1212 |
+
<div>
|
| 1213 |
+
<div class="leadgen-form-card">
|
| 1214 |
+
<h3>🎯 Configurar Búsqueda</h3>
|
| 1215 |
+
|
| 1216 |
+
<div class="form-row">
|
| 1217 |
+
<label>Nicho / Industria</label>
|
| 1218 |
+
<input type="text" id="lg-niche" value="Real Estate" placeholder="Real Estate, Insurance, Healthcare...">
|
| 1219 |
+
</div>
|
| 1220 |
+
|
| 1221 |
+
<div class="form-row">
|
| 1222 |
+
<label>Tipo de Lead</label>
|
| 1223 |
+
<select id="lg-type" onchange="updatePlatforms()">
|
| 1224 |
+
<option value="both">Ambos (B2B + B2C)</option>
|
| 1225 |
+
<option value="b2b">B2B — Empresas</option>
|
| 1226 |
+
<option value="b2c">B2C — Consumidores</option>
|
| 1227 |
+
</select>
|
| 1228 |
+
</div>
|
| 1229 |
+
|
| 1230 |
+
<div class="form-row">
|
| 1231 |
+
<label>País</label>
|
| 1232 |
+
<select id="lg-country" onchange="updatePlatforms()">
|
| 1233 |
+
<option value="USA">🇺🇸 USA</option>
|
| 1234 |
+
<option value="Venezuela">🇻🇪 Venezuela</option>
|
| 1235 |
+
<option value="Mexico">🇲🇽 México</option>
|
| 1236 |
+
<option value="Colombia">🇨🇴 Colombia</option>
|
| 1237 |
+
<option value="Argentina">🇦🇷 Argentina</option>
|
| 1238 |
+
<option value="Spain">🇪🇸 España</option>
|
| 1239 |
+
</select>
|
| 1240 |
+
</div>
|
| 1241 |
+
|
| 1242 |
+
<div class="form-row">
|
| 1243 |
+
<label>Estado / Provincia</label>
|
| 1244 |
+
<input type="text" id="lg-state" placeholder="FL, California, Miranda...">
|
| 1245 |
+
</div>
|
| 1246 |
+
|
| 1247 |
+
<div class="form-row">
|
| 1248 |
+
<label>Ciudad</label>
|
| 1249 |
+
<input type="text" id="lg-city" placeholder="Miami, Caracas, Bogotá...">
|
| 1250 |
+
</div>
|
| 1251 |
+
|
| 1252 |
+
<div class="form-row">
|
| 1253 |
+
<label>Límite por plataforma</label>
|
| 1254 |
+
<input type="number" id="lg-limit" value="15" min="5" max="50">
|
| 1255 |
+
</div>
|
| 1256 |
+
|
| 1257 |
+
<div class="form-row" id="b2b-platforms">
|
| 1258 |
+
<label>Plataformas B2B</label>
|
| 1259 |
+
<div class="platform-grid">
|
| 1260 |
+
<label class="platform-check"><input type="checkbox" id="p-linkedin" checked> LinkedIn</label>
|
| 1261 |
+
<label class="platform-check"><input type="checkbox" id="p-gmaps" checked> Google Maps</label>
|
| 1262 |
+
<label class="platform-check"><input type="checkbox" id="p-youtube-b2b"> YouTube</label>
|
| 1263 |
+
</div>
|
| 1264 |
+
</div>
|
| 1265 |
+
|
| 1266 |
+
<div class="form-row" id="b2c-platforms">
|
| 1267 |
+
<label>Plataformas B2C</label>
|
| 1268 |
+
<div class="platform-grid">
|
| 1269 |
+
<label class="platform-check"><input type="checkbox" id="p-twitter" checked> Twitter/X</label>
|
| 1270 |
+
<label class="platform-check"><input type="checkbox" id="p-reddit" checked> Reddit</label>
|
| 1271 |
+
<label class="platform-check"><input type="checkbox" id="p-youtube-b2c" checked> YouTube</label>
|
| 1272 |
+
<label class="platform-check"><input type="checkbox" id="p-facebook"> Facebook</label>
|
| 1273 |
+
</div>
|
| 1274 |
+
</div>
|
| 1275 |
+
|
| 1276 |
+
<button class="btn-run" id="btn-run-scrape" onclick="startScraping()">
|
| 1277 |
+
⚡ Ejecutar Scraping
|
| 1278 |
+
</button>
|
| 1279 |
+
|
| 1280 |
+
<button class="btn-sync" style="width:100%;margin-top:0.75rem" onclick="syncToCloud()">
|
| 1281 |
+
☁️ Sincronizar a Cloud (Supabase)
|
| 1282 |
+
</button>
|
| 1283 |
+
</div>
|
| 1284 |
+
</div>
|
| 1285 |
+
|
| 1286 |
+
<!-- Panel Derecho: Terminal + Info -->
|
| 1287 |
+
<div>
|
| 1288 |
+
<div class="leadgen-form-card" style="margin-bottom:1.5rem">
|
| 1289 |
+
<h3>🖥 Terminal de Logs</h3>
|
| 1290 |
+
<div class="log-terminal" id="log-terminal">[Esperando comando...]
|
| 1291 |
+
</div>
|
| 1292 |
+
<div style="display:flex;gap:0.5rem;margin-top:0.75rem">
|
| 1293 |
+
<button class="btn-sync" onclick="clearLogs()">Limpiar</button>
|
| 1294 |
+
<button class="btn-sync" onclick="loadLocalStats()">Actualizar Stats</button>
|
| 1295 |
+
<span id="scrape-status" style="margin-left:auto;font-size:0.8rem;color:var(--text-dim)">Inactivo</span>
|
| 1296 |
+
</div>
|
| 1297 |
+
</div>
|
| 1298 |
+
|
| 1299 |
+
<div class="leadgen-form-card">
|
| 1300 |
+
<h3>📊 Top Nichos (Local)</h3>
|
| 1301 |
+
<div id="lg-nicho-breakdown" style="color:var(--text-dim);font-size:0.85rem">Cargando...</div>
|
| 1302 |
+
</div>
|
| 1303 |
+
</div>
|
| 1304 |
+
</div>
|
| 1305 |
+
</div>`;
|
| 1306 |
+
document.querySelector('.main').insertAdjacentHTML('beforeend', leadgenHtml);
|
| 1307 |
+
})();
|
| 1308 |
+
|
| 1309 |
+
/** Check if the local API server is reachable */
|
| 1310 |
+
async function checkServer() {
|
| 1311 |
+
const banner = document.getElementById('server-banner');
|
| 1312 |
+
const msg = document.getElementById('server-msg');
|
| 1313 |
+
const icon = document.getElementById('server-icon');
|
| 1314 |
+
try {
|
| 1315 |
+
const res = await fetch(`${API_BASE}/api/health`, { signal: AbortSignal.timeout(3000) });
|
| 1316 |
+
if (res.ok) {
|
| 1317 |
+
banner.className = 'server-banner online';
|
| 1318 |
+
icon.textContent = '🟢';
|
| 1319 |
+
msg.textContent = 'API Server en línea — http://localhost:5000';
|
| 1320 |
+
loadLocalStats();
|
| 1321 |
+
return true;
|
| 1322 |
+
}
|
| 1323 |
+
} catch { }
|
| 1324 |
+
banner.className = 'server-banner offline';
|
| 1325 |
+
icon.textContent = '⚠️';
|
| 1326 |
+
msg.innerHTML = 'API Server offline. Ejecuta <code>start_api_server.bat</code> en lead_gen_pro/';
|
| 1327 |
+
return false;
|
| 1328 |
+
}
|
| 1329 |
+
|
| 1330 |
+
/** Load local DB statistics from Flask */
|
| 1331 |
+
async function loadLocalStats() {
|
| 1332 |
+
try {
|
| 1333 |
+
const res = await fetch(`${API_BASE}/api/stats`);
|
| 1334 |
+
const d = await res.json();
|
| 1335 |
+
document.getElementById('lg-total').textContent = d.total ?? '—';
|
| 1336 |
+
document.getElementById('lg-30d').textContent = d.leads_30d ?? '—';
|
| 1337 |
+
document.getElementById('lg-pending-wa').textContent = d.pending_whatsapp ?? '—';
|
| 1338 |
+
|
| 1339 |
+
// Nicho breakdown
|
| 1340 |
+
const elem = document.getElementById('lg-nicho-breakdown');
|
| 1341 |
+
if (d.by_niche && Object.keys(d.by_niche).length) {
|
| 1342 |
+
let html = '<table style="width:100%;border-collapse:collapse">';
|
| 1343 |
+
for (const [n, cnt] of Object.entries(d.by_niche)) {
|
| 1344 |
+
html += `<tr><td style="padding:0.35rem 0;color:var(--text-main)">${n}</td>
|
| 1345 |
+
<td style="text-align:right;color:#818cf8;font-weight:700">${cnt}</td></tr>`;
|
| 1346 |
+
}
|
| 1347 |
+
html += '</table>';
|
| 1348 |
+
elem.innerHTML = html;
|
| 1349 |
+
} else {
|
| 1350 |
+
elem.textContent = 'No hay datos aún.';
|
| 1351 |
+
}
|
| 1352 |
+
} catch (e) {
|
| 1353 |
+
document.getElementById('lg-total').textContent = '—';
|
| 1354 |
+
}
|
| 1355 |
+
}
|
| 1356 |
+
|
| 1357 |
+
/** Show/hide platform groups based on lead type */
|
| 1358 |
+
function updatePlatforms() {
|
| 1359 |
+
const type = document.getElementById('lg-type').value;
|
| 1360 |
+
document.getElementById('b2b-platforms').style.display = (type === 'b2c') ? 'none' : 'block';
|
| 1361 |
+
document.getElementById('b2c-platforms').style.display = (type === 'b2b') ? 'none' : 'block';
|
| 1362 |
+
}
|
| 1363 |
+
|
| 1364 |
+
/** Append a line to the terminal */
|
| 1365 |
+
function appendLog(line) {
|
| 1366 |
+
const t = document.getElementById('log-terminal');
|
| 1367 |
+
const span = document.createElement('span');
|
| 1368 |
+
const cls = line.includes('[ERR]') || line.includes('Error') ? 'log-err'
|
| 1369 |
+
: line.includes('[DONE]') || line.includes('[SYNC]') ? 'log-done' : '';
|
| 1370 |
+
if (cls) span.className = cls;
|
| 1371 |
+
span.textContent = line + '\n';
|
| 1372 |
+
t.appendChild(span);
|
| 1373 |
+
t.scrollTop = t.scrollHeight;
|
| 1374 |
+
}
|
| 1375 |
+
|
| 1376 |
+
function clearLogs() {
|
| 1377 |
+
document.getElementById('log-terminal').textContent = '[Logs limpios]\n';
|
| 1378 |
+
}
|
| 1379 |
+
|
| 1380 |
+
/** Launch the scraping pipeline */
|
| 1381 |
+
async function startScraping() {
|
| 1382 |
+
if (scrapeRunning) return;
|
| 1383 |
+
const online = await checkServer();
|
| 1384 |
+
if (!online) return;
|
| 1385 |
+
|
| 1386 |
+
const type = document.getElementById('lg-type').value;
|
| 1387 |
+
|
| 1388 |
+
const b2b = [];
|
| 1389 |
+
if (type !== 'b2c') {
|
| 1390 |
+
if (document.getElementById('p-linkedin').checked) b2b.push('linkedin');
|
| 1391 |
+
if (document.getElementById('p-gmaps').checked) b2b.push('google_maps');
|
| 1392 |
+
if (document.getElementById('p-youtube-b2b').checked) b2b.push('youtube');
|
| 1393 |
+
}
|
| 1394 |
+
const b2c = [];
|
| 1395 |
+
if (type !== 'b2b') {
|
| 1396 |
+
if (document.getElementById('p-twitter').checked) b2c.push('twitter');
|
| 1397 |
+
if (document.getElementById('p-reddit').checked) b2c.push('reddit');
|
| 1398 |
+
if (document.getElementById('p-youtube-b2c').checked) b2c.push('youtube');
|
| 1399 |
+
if (document.getElementById('p-facebook').checked) b2c.push('facebook');
|
| 1400 |
+
}
|
| 1401 |
+
|
| 1402 |
+
const payload = {
|
| 1403 |
+
niche: document.getElementById('lg-niche').value || 'Real Estate',
|
| 1404 |
+
lead_type: type,
|
| 1405 |
+
country: document.getElementById('lg-country').value,
|
| 1406 |
+
state: document.getElementById('lg-state').value,
|
| 1407 |
+
city: document.getElementById('lg-city').value,
|
| 1408 |
+
limit: parseInt(document.getElementById('lg-limit').value) || 15,
|
| 1409 |
+
b2b_platforms: b2b,
|
| 1410 |
+
b2c_platforms: b2c
|
| 1411 |
+
};
|
| 1412 |
+
|
| 1413 |
+
clearLogs();
|
| 1414 |
+
appendLog(`[INICIO] ${new Date().toLocaleTimeString()} — Lanzando pipeline...`);
|
| 1415 |
+
appendLog(`Nicho: ${payload.niche} | Tipo: ${payload.lead_type} | Ciudad: ${payload.city || '—'}`);
|
| 1416 |
+
|
| 1417 |
+
scrapeRunning = true;
|
| 1418 |
+
document.getElementById('btn-run-scrape').disabled = true;
|
| 1419 |
+
document.getElementById('scrape-status').textContent = '⏳ Ejecutando...';
|
| 1420 |
+
|
| 1421 |
+
// POST to start the scrape
|
| 1422 |
+
try {
|
| 1423 |
+
const res = await fetch(`${API_BASE}/api/scrape`, {
|
| 1424 |
+
method: 'POST',
|
| 1425 |
+
headers: { 'Content-Type': 'application/json' },
|
| 1426 |
+
body: JSON.stringify(payload)
|
| 1427 |
+
});
|
| 1428 |
+
const d = await res.json();
|
| 1429 |
+
if (!res.ok) {
|
| 1430 |
+
appendLog('[ERR] ' + (d.error || 'Error desconocido'));
|
| 1431 |
+
resetScrapeUI();
|
| 1432 |
+
return;
|
| 1433 |
+
}
|
| 1434 |
+
} catch (e) {
|
| 1435 |
+
appendLog('[ERR] No se pudo conectar: ' + e.message);
|
| 1436 |
+
resetScrapeUI();
|
| 1437 |
+
return;
|
| 1438 |
+
}
|
| 1439 |
+
|
| 1440 |
+
// Subscribe to SSE log stream
|
| 1441 |
+
if (sseSource) sseSource.close();
|
| 1442 |
+
sseSource = new EventSource(`${API_BASE}/api/stream`);
|
| 1443 |
+
sseSource.onmessage = (e) => {
|
| 1444 |
+
if (e.data === '[END]') {
|
| 1445 |
+
sseSource.close();
|
| 1446 |
+
resetScrapeUI();
|
| 1447 |
+
loadLocalStats();
|
| 1448 |
+
// Refresh cloud leads
|
| 1449 |
+
loadData();
|
| 1450 |
+
return;
|
| 1451 |
+
}
|
| 1452 |
+
if (e.data !== '[PING]') appendLog(e.data);
|
| 1453 |
+
};
|
| 1454 |
+
sseSource.onerror = () => {
|
| 1455 |
+
sseSource.close();
|
| 1456 |
+
resetScrapeUI();
|
| 1457 |
+
};
|
| 1458 |
+
}
|
| 1459 |
+
|
| 1460 |
+
function resetScrapeUI() {
|
| 1461 |
+
scrapeRunning = false;
|
| 1462 |
+
document.getElementById('btn-run-scrape').disabled = false;
|
| 1463 |
+
document.getElementById('scrape-status').textContent = '✅ Completado';
|
| 1464 |
+
setTimeout(() => {
|
| 1465 |
+
document.getElementById('scrape-status').textContent = 'Inactivo';
|
| 1466 |
+
}, 5000);
|
| 1467 |
+
}
|
| 1468 |
+
|
| 1469 |
+
/** Force sync local SQLite → Supabase */
|
| 1470 |
+
async function syncToCloud() {
|
| 1471 |
+
const online = await checkServer();
|
| 1472 |
+
if (!online) return;
|
| 1473 |
+
appendLog('[SYNC] Iniciando sincronización con Supabase...');
|
| 1474 |
+
try {
|
| 1475 |
+
const res = await fetch(`${API_BASE}/api/sync`, { method: 'POST' });
|
| 1476 |
+
const d = await res.json();
|
| 1477 |
+
appendLog(`[SYNC] ${d.message} — Sincronizados: ${d.result || ''}`);
|
| 1478 |
+
loadData();
|
| 1479 |
+
} catch (e) {
|
| 1480 |
+
appendLog('[ERR] Error sincronizando: ' + e.message);
|
| 1481 |
+
}
|
| 1482 |
+
}
|
| 1483 |
+
|
| 1484 |
loadData();
|
| 1485 |
</script>
|
| 1486 |
</body>
|
| 1487 |
+
|
| 1488 |
+
</html>
|