Spaces:

Lukeetah
/

SamuelHouseFinderApp

Sleeping

App Files Files Community

Lukeetah commited on Aug 11, 2025

Commit

3d30d20

verified ·

1 Parent(s): 699ed82

Update app.py

Browse files

Files changed (1) hide show

app.py +262 -279

app.py CHANGED Viewed

@@ -1,20 +1,23 @@
 import os
 import re
-import time
-import math
 import json
 import asyncio
 import random
 from dataclasses import dataclass, asdict
 from typing import List, Optional, Dict, Any, Tuple
 import urllib.parse as ul
-from pathlib import Path
 import httpx
 from bs4 import BeautifulSoup
 from rapidfuzz import fuzz
 import pandas as pd
 import gradio as gr
 # =========================
 # Configuración principal
@@ -23,6 +26,7 @@ import gradio as gr
 DEFAULT_MAX_USD = 90000
 DEFAULT_NEIGHBORHOODS = [
     "Saavedra", "Nuñez", "La Lucila", "Florida Oeste", "Munro", "Carapachay",
     "Olivos", "Villa Martelli"
 ]
 DEFAULT_TYPES = ["casa", "ph"]   # casa / ph
@@ -31,34 +35,46 @@ REQUIRE_BIDET = True
 REQUIRE_PET_FRIENDLY = True
 REQUIRE_OUTDOOR = True           # patio o terraza
-# Alertas por Telegram
-TELEGRAM_BOT_TOKEN = os.getenv("TELEGRAM_BOT_TOKEN", "").strip()
-TELEGRAM_CHAT_ID = os.getenv("TELEGRAM_CHAT_ID", "").strip()
-DEFAULT_MIN_SCORE_ALERT = 2.2
-DEFAULT_MONITOR_INTERVAL_MIN = 60
-# Persistencia ligera (para no reenviar duplicados)
-CACHE_PATH = Path("cache_listings.json")
 USER_AGENT_POOL = [
     "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36",
     "Mozilla/5.0 (Macintosh; Intel Mac OS X 13_4) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.4 Safari/605.1.15",
     "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36",
 ]
 TIMEOUT = httpx.Timeout(20.0, connect=10.0)
 MAX_CONCURRENCY = 6
 RETRIES = 2
-BACKOFF_BASE = 0.8
-# Microzonas residenciales priorizadas (heurística positiva)
-MICROZONAS_PRIORITARIAS = [
-    "Parque Saavedra", "Parque Sarmiento", "Av. Balbín", "Ruiz Huidobro",
-    "Lomas de Nuñez", "Cabildo", "Plaza Alberti",
-    "Estación La Lucila", "Rawson", "Paraná", "Maipú",
-    "Estación Florida", "Estación Carapachay", "Estación Munro",
-    "Ugarte", "San Martín", "Panamericana", "Pelliza", "Melo",
-]
 # =========================
 # Modelos y utilidades
@@ -84,14 +100,17 @@ class Listing:
     description: Optional[str]
     score: float
 def to_float_price(value: str) -> Optional[float]:
     if not value:
         return None
     txt = value.replace(".", "").replace(",", ".").upper()
-    if "USD" in txt or "U$S" in txt or "U$D" in txt or "DOLAR" in txt or "US$" in txt:
         m = re.search(r"(\d+(?:\.\d+)?)", txt)
         return float(m.group(1)) if m else None
-    return None
 def extract_int(text: str) -> Optional[int]:
     if not text:
@@ -99,15 +118,6 @@ def extract_int(text: str) -> Optional[int]:
     m = re.search(r"(\d+)", text)
     return int(m.group(1)) if m else None
-def clean_text(s: str) -> str:
-    return re.sub(r"\s+", " ", (s or "").strip())
-def text_has_any(text: str, keywords: List[str]) -> bool:
-    if not text:
-        return False
-    t = text.lower()
-    return any(kw.lower() in t for kw in keywords)
 def fuzzy_any(text: str, keywords: List[str], thresh: int = 80) -> bool:
     if not text:
         return False
@@ -135,69 +145,89 @@ def compute_score(lst: Listing, filters: Dict[str, Any]) -> float:
         score += (filters["max_price_usd"] - lst.price_usd) / max(filters["max_price_usd"], 1) * 1.0
     if lst.rooms and lst.rooms >= filters["min_rooms"]:
         score += 1.0
-    if filters["require_outdoor"]:
-        if (lst.has_patio or lst.has_terrace):
-            score += 1.0
-    if not filters["require_pet"]:
-        score += 0.2
-    else:
         if lst.pet_friendly:
             score += 0.6
-    if not filters["require_bidet"]:
-        score += 0.2
     else:
         if lst.has_bidet:
             score += 0.6
     score += residential_score(lst.address or "", lst.neighborhood or "", lst.description or "")
     return round(score, 3)
-def headers():
-    return {"User-Agent": random.choice(USER_AGENT_POOL)}
-async def fetch(client: httpx.AsyncClient, url: str) -> Optional[str]:
     for i in range(RETRIES + 1):
         try:
-            r = await client.get(url, headers=headers(), timeout=TIMEOUT)
-            if r.status_code == 200 and r.text:
-                return r.text
-            await asyncio.sleep(BACKOFF_BASE * (2 ** i))
         except Exception:
-            await asyncio.sleep(BACKOFF_BASE * (2 ** i))
     return None
-async def fetch_detail_and_enrich(client: httpx.AsyncClient, lst: Listing) -> Listing:
-    html = await fetch(client, lst.link)
     if not html:
         return lst
     soup = BeautifulSoup(html, "lxml")
     desc_el = soup.find(["div", "section"], attrs={"class": re.compile(r"(description|Description|post|body)")}) or soup.find("p")
     if desc_el:
         desc = clean_text(desc_el.get_text(" ", strip=True))
     else:
-        desc = clean_text(" ".join(t.get_text(" ", strip=True) for t in soup.find_all(["p", "li"])[:30]))
     patio, terraza, mascotas, bidet = feature_guess(desc)
     features_text = " ".join(
         el.get_text(" ", strip=True)
         for el in soup.find_all(["li", "span", "div"])
         if el and el.get_text() and any(x in el.get_text().lower() for x in ["ambiente", "dorm", "bañ"])
-    )
-    ft = features_text.lower()
-    rooms = extract_int(re.search(r"(\d+)\s*ambiente", ft).group(1)) if re.search(r"(\d+)\s*ambiente", ft) else lst.rooms
-    bathrooms = extract_int(re.search(r"(\d+)\s*bañ", ft).group(1)) if re.search(r"(\d+)\s*bañ", ft) else lst.bathrooms
-    bedrooms = extract_int(re.search(r"(\d+)\s*dorm", ft).group(1)) if re.search(r"(\d+)\s*dorm", ft) else lst.bedrooms
     addr_guess = soup.find(attrs={"class": re.compile(r"(address|ubicacion|location|inmo-location)")})
     if addr_guess and not lst.address:
         lst.address = clean_text(addr_guess.get_text(" ", strip=True))[:200]
     lst.description = desc or lst.description
-    lst.has_patio = lst.has_patio if lst.has_patio is not None else patio
-    lst.has_terrace = lst.has_terrace if lst.has_terrace is not None else terraza
-    lst.pet_friendly = lst.pet_friendly if lst.pet_friendly is not None else mascotas
-    lst.has_bidet = lst.has_bidet if lst.has_bidet is not None else bidet
     lst.rooms = rooms
     lst.bathrooms = bathrooms
     lst.bedrooms = bedrooms
@@ -252,23 +282,27 @@ def generic_card_extractor(soup: BeautifulSoup, domain: str) -> List[Dict[str, A
                 price_text = (m.group(0) if m else "")
                 addr_m = re.search(r"(Saavedra|Nu[eñ]ez|La Lucila|Florida|Munro|Carapachay|Olivos|Martelli)[^|,]*", block_text, re.IGNORECASE)
                 address_text = addr_m.group(0) if addr_m else ""
             cards.append({
                 "title": title or "",
-                "link": href if href.startswith("http") else f"https://{domain}{href}",
                 "price_text": price_text,
                 "addr_text": address_text
             })
     filtered = []
     for c in cards:
         if len(c["title"]) < 8:
             continue
-        if any(tok in c["link"] for tok in ["/perfil/", "/inmobiliaria/", "/ayuda", "/faq", "/login", "/like"]):
             continue
         filtered.append(c)
     return filtered
-async def scrape_search_page(client: httpx.AsyncClient, url: str, domain: str) -> List[Listing]:
-    html = await fetch(client, url)
     if not html:
         return []
     soup = BeautifulSoup(html, "lxml")
@@ -290,17 +324,18 @@ async def scrape_search_page(client: httpx.AsyncClient, url: str, domain: str) -
             description=None,
             score=0.0
         ))
     return listings[:25]
-async def scrape_portal(client: httpx.AsyncClient, portal: str, urls: List[str]) -> List[Listing]:
     out: List[Listing] = []
     for u in urls[:4]:
         try:
-            res = await scrape_search_page(client, u, portal)
             out.extend(res)
-            await asyncio.sleep(0.5)
         except Exception:
-            continue
     return out
 # =========================
@@ -324,57 +359,63 @@ async def run_agent(
         require_pet=require_pet,
     )
-    async with httpx.AsyncClient(follow_redirects=True) as client:
-        z_urls = zonaprop_search_urls(neighborhoods, max_price_usd, types)
-        a_urls = argenprop_search_urls(neighborhoods, max_price_usd, types)
-        p_urls = properati_search_urls(neighborhoods, max_price_usd, types)
-        tasks = [
-            scrape_portal(client, "www.zonaprop.com.ar", z_urls),
-            scrape_portal(client, "www.argenprop.com", a_urls),
-            scrape_portal(client, "www.properati.com.ar", p_urls),
-        ]
-        batch_lists = await asyncio.gather(*tasks)
-        listings = [l for batch in batch_lists for l in batch]
-        seen = set()
-        unique: List[Listing] = []
-        for l in listings:
-            if l.link in seen:
-                continue
-            seen.add(l.link)
-            unique.append(l)
-        sem = asyncio.Semaphore(MAX_CONCURRENCY)
-        async def enrich_guarded(l: Listing):
-            async with sem:
-                return await fetch_detail_and_enrich(client, l)
-        enriched = await asyncio.gather(*[enrich_guarded(l) for l in unique])
-        def passes(l: Listing) -> bool:
-            if l.price_usd is None or l.price_usd > max_price_usd:
-                return False
-            if l.rooms is not None and l.rooms < min_rooms:
-                return False
-            if require_outdoor and not ((l.has_patio is True) or (l.has_terrace is True)):
-                return False
-            if require_bidet and l.has_bidet is not True:
-                return False
-            if require_pet and l.pet_friendly is not True:
-                return False
-            type_hit = any(t in (l.title.lower() + " " + (l.description or "").lower()) for t in types)
-            if not type_hit:
-                type_hit = True
-            return type_hit
-        filtered = [l for l in enriched if passes(l)]
-        for l in filtered:
-            l.score = compute_score(l, filters)
-        filtered.sort(key=lambda x: (-x.score, x.price_usd or 1e9))
-        return filtered
 def listings_to_df(listings: List[Listing]) -> pd.DataFrame:
     rows = []
@@ -401,118 +442,63 @@ def listings_to_df(listings: List[Listing]) -> pd.DataFrame:
     return df
 # =========================
-# Cache + Telegram
-# =========================
-def load_cache() -> Dict[str, Any]:
-    if CACHE_PATH.exists():
-        try:
-            return json.loads(CACHE_PATH.read_text(encoding="utf-8"))
-        except Exception:
-            return {"sent_links": []}
-    return {"sent_links": []}
-def save_cache(cache: Dict[str, Any]) -> None:
-    try:
-        CACHE_PATH.write_text(json.dumps(cache, ensure_ascii=False, indent=2), encoding="utf-8")
-    except Exception:
-        pass
-async def telegram_send_message(text: str, disable_web_page_preview: bool = False) -> bool:
-    if not TELEGRAM_BOT_TOKEN or not TELEGRAM_CHAT_ID:
-        return False
-    api = f"https://api.telegram.org/bot{TELEGRAM_BOT_TOKEN}/sendMessage"
-    payload = {
-        "chat_id": TELEGRAM_CHAT_ID,
-        "text": text,
-        "parse_mode": "HTML",
-        "disable_web_page_preview": disable_web_page_preview
-    }
-    try:
-        async with httpx.AsyncClient() as client:
-            r = await client.post(api, data=payload, timeout=TIMEOUT)
-            return r.status_code == 200
-    except Exception:
-        return False
-def fmt_listing_msg(l: Listing) -> str:
-    price = f"USD {int(l.price_usd)}" if l.price_usd else "USD -"
-    flags = []
-    if l.has_patio: flags.append("Patio")
-    if l.has_terrace: flags.append("Terraza")
-    if l.pet_friendly: flags.append("Mascotas")
-    if l.has_bidet: flags.append("Bidet")
-    flags_txt = " · ".join(flags) if flags else "—"
-    addr = l.address or "Zona: —"
-    return (
-        f"🏡 <b>{l.title[:70]}</b>\n"
-        f"{addr}\n"
-        f"💰 {price}  ·  ⭐ {l.score}\n"
-        f"🔖 {l.rooms or '-'} amb · {l.bedrooms or '-'} dorm · {l.bathrooms or '-'} baños\n"
-        f"✅ {flags_txt}\n"
-        f"🔗 <a href=\"{l.link}\">Ver aviso</a>  ·  {l.source.replace('www.', '')}"
-    )
-# =========================
-# Monitor en background
 # =========================
-monitor_task: Optional[asyncio.Task] = None
-monitor_stop_event = asyncio.Event()
-monitor_running = False
-async def monitor_loop(
-    neighs: List[str],
-    max_usd: int,
-    types: List[str],
-    min_rooms: int,
-    req_outdoor: bool,
-    req_bidet: bool,
-    req_pet: bool,
-    min_score_alert: float,
-    interval_min: int,
-    max_alerts_per_run: int = 5
-):
-    global monitor_running
-    cache = load_cache()
-    sent_links = set(cache.get("sent_links", []))
-    monitor_running = True
-    await telegram_send_message("✅ Monitor de avisos iniciado. Te aviso lo que valga la pena. 🐶🏡", True)
     try:
-        while not monitor_stop_event.is_set():
-            try:
-                results = await run_agent(
-                    neighborhoods=neighs,
-                    max_price_usd=max_usd,
-                    types=types,
-                    min_rooms=min_rooms,
-                    require_outdoor=req_outdoor,
-                    require_bidet=req_bidet,
-                    require_pet=req_pet
-                )
-                # Filtrar nuevos con buen score
-                new_hits = [l for l in results if l.score >= min_score_alert and l.link not in sent_links]
-                if new_hits:
-                    for l in new_hits[:max_alerts_per_run]:
-                        ok = await telegram_send_message(fmt_listing_msg(l))
-                        if ok:
-                            sent_links.add(l.link)
-                    cache["sent_links"] = list(sent_links)
-                    save_cache(cache)
-                else:
-                    # ping silencioso cada tanto para saber que sigue vivo (opcional)
-                    pass
-            except Exception:
-                # Evita caída total del loop
-                await asyncio.sleep(3)
-            # Espera
-            await asyncio.wait_for(monitor_stop_event.wait(), timeout=interval_min * 60)
-    except asyncio.TimeoutError:
-        # Timeout esperado por wait_for; continúa loop
-        pass
-    finally:
-        monitor_running = False
-        await telegram_send_message("⏹️ Monitor de avisos detenido.", True)
 # =========================
 # UI (Gradio)
@@ -521,47 +507,45 @@ async def monitor_loop(
 DESCRIPTION = """
 Agente agregador de avisos (Zonaprop, Argenprop, Properati) para Saavedra → La Lucila y alrededores.
 Filtra: USD ≤ 90k, ≥ 3 ambientes (para oficina), patio/terraza, mascotas, bidet (si figura en descripción).
-Alertas por Telegram: configurá TELEGRAM_BOT_TOKEN y TELEGRAM_CHAT_ID en los Secrets del Space. Luego, iniciá el monitor.
 """
-async def run_and_present(neighs, max_usd, types, min_rooms, req_outdoor, req_bidet, req_pet):
-    neighs = [n.strip() for n in neighs.split(",") if n.strip()]
-    types = [t.strip().lower() for t in types.split(",") if t.strip()]
     results = await run_agent(
-        neighborhoods=neighs,
-        max_price_usd=max_usd,
-        types=types,
-        min_rooms=min_rooms,
-        require_outdoor=req_outdoor,
-        require_bidet=req_bidet,
-        require_pet=req_pet
     )
     df = listings_to_df(results)
-    json_blob = json.dumps([asdict(l) for l in results], ensure_ascii=False, indent=2)
-    return df, json_blob
-async def start_monitor(neighs, max_usd, types, min_rooms, req_outdoor, req_bidet, req_pet, min_score_alert, interval_min):
-    global monitor_task
-    if monitor_task and not monitor_task.done():
-        return "El monitor ya está corriendo."
-    if not TELEGRAM_BOT_TOKEN or not TELEGRAM_CHAT_ID:
-        return "Faltan TELEGRAM_BOT_TOKEN o TELEGRAM_CHAT_ID en los Secrets del Space."
-    monitor_stop_event.clear()
-    neighs_l = [n.strip() for n in neighs.split(",") if n.strip()]
-    types_l = [t.strip().lower() for t in types.split(",") if t.strip()]
-    monitor_task = asyncio.create_task(monitor_loop(
-        neighs_l, int(max_usd), types_l, int(min_rooms),
-        bool(req_outdoor), bool(req_bidet), bool(req_pet),
-        float(min_score_alert), int(interval_min)
-    ))
-    return "Monitor iniciado. Te aviso por Telegram."
-async def stop_monitor():
-    if monitor_task and not monitor_task.done():
-        monitor_stop_event.set()
-        return "Solicitada detención. Se detendrá en el próximo ciclo."
-    return "El monitor no estaba corriendo."
 with gr.Blocks(title="Agente Inmuebles Norte BA (≤ USD 90k)") as demo:
     gr.Markdown("# Agente de casas/PH norte BA (≤ 90 000 USD)")
@@ -576,27 +560,26 @@ with gr.Blocks(title="Agente Inmuebles Norte BA (≤ USD 90k)") as demo:
         req_outdoor = gr.Checkbox(label="Requerir patio o terraza", value=REQUIRE_OUTDOOR)
         req_bidet = gr.Checkbox(label="Requerir bidet (solo si aparece en descripción)", value=REQUIRE_BIDET)
         req_pet = gr.Checkbox(label="Requerir pet-friendly (si aparece en descripción)", value=REQUIRE_PET_FRIENDLY)
     btn = gr.Button("Buscar ahora", variant="primary")
     with gr.Tabs():
         with gr.Tab("Resultados"):
-            table = gr.Dataframe(interactive=False, wrap=True, overflow_row_behaviour="paginate", max_rows=300)
         with gr.Tab("JSON"):
             j = gr.Code(language="json")
-    gr.Markdown("---")
-    gr.Markdown("## Alertas por Telegram")
-    with gr.Row():
-        min_score_alert = gr.Slider(label="Score mínimo para alertar", minimum=1.0, maximum=4.0, step=0.1, value=DEFAULT_MIN_SCORE_ALERT)
-        interval_min = gr.Slider(label="Intervalo de monitoreo (minutos)", minimum=10, maximum=240, step=5, value=DEFAULT_MONITOR_INTERVAL_MIN)
-    with gr.Row():
-        start_btn = gr.Button("Iniciar monitor", variant="primary")
-        stop_btn = gr.Button("Detener monitor")
-    status = gr.Markdown("Estado: —")
-    btn.click(run_and_present, inputs=[neighs, max_usd, types, min_rooms, req_outdoor, req_bidet, req_pet], outputs=[table, j])
-    start_btn.click(start_monitor, inputs=[neighs, max_usd, types, min_rooms, req_outdoor, req_bidet, req_pet, min_score_alert, interval_min], outputs=[status])
-    stop_btn.click(stop_monitor, outputs=[status])
 if __name__ == "__main__":
-    demo.launch()

 import os
 import re
 import json
+import time
+import ssl
+import smtplib
 import asyncio
 import random
+import mimetypes
+from pathlib import Path
 from dataclasses import dataclass, asdict
 from typing import List, Optional, Dict, Any, Tuple
 import urllib.parse as ul
 import httpx
 from bs4 import BeautifulSoup
 from rapidfuzz import fuzz
 import pandas as pd
 import gradio as gr
+from email.message import EmailMessage
 # =========================
 # Configuración principal
 DEFAULT_MAX_USD = 90000
 DEFAULT_NEIGHBORHOODS = [
     "Saavedra", "Nuñez", "La Lucila", "Florida Oeste", "Munro", "Carapachay",
+    # Cercanos útiles
     "Olivos", "Villa Martelli"
 ]
 DEFAULT_TYPES = ["casa", "ph"]   # casa / ph
 REQUIRE_PET_FRIENDLY = True
 REQUIRE_OUTDOOR = True           # patio o terraza
+# Microzonas residenciales priorizadas (heurística positiva)
+MICROZONAS_PRIORITARIAS = [
+    "Parque Saavedra", "Parque Sarmiento", "Av. Balbín", "Ruiz Huidobro",
+    "Lomas de Nuñez", "Cabildo", "Plaza Alberti",
+    "Estación La Lucila", "Rawson", "Paraná", "Maipú",
+    "Estación Florida", "Estación Carapachay", "Estación Munro",
+    "Ugarte", "San Martín", "Panamericana", "Pelliza", "Melo",
+]
+# Anti-scraping: headers y tiempos
 USER_AGENT_POOL = [
     "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36",
     "Mozilla/5.0 (Macintosh; Intel Mac OS X 13_4) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.4 Safari/605.1.15",
     "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36",
 ]
+REFERER_POOL = [
+    "https://www.google.com/",
+    "https://www.bing.com/",
+    "https://duckduckgo.com/",
+]
 TIMEOUT = httpx.Timeout(20.0, connect=10.0)
 MAX_CONCURRENCY = 6
 RETRIES = 2
+BACKOFF_BASE = 0.9
+JITTER_RANGE = (0.15, 0.6)  # segundos
+# Proxy opcional (si definís en Secrets)
+# Ejemplos: http://user:pass@host:port
+PROXY_URL = os.getenv("PROXY_URL", "").strip()  # se aplica a todo el cliente si está presente
+# =========================
+# Email (usa tu SMTP)
+# =========================
+# Configuralo en Settings → Secrets del Space
+SMTP_HOST = os.getenv("SMTP_HOST", "").strip()       # ej: smtp.gmail.com
+SMTP_PORT = int(os.getenv("SMTP_PORT", "587"))       # 587 (STARTTLS) o 465 (SSL)
+SMTP_USER = os.getenv("SMTP_USER", "").strip()       # tu usuario/alias
+SMTP_PASS = os.getenv("SMTP_PASS", "").strip()       # password o app password
+SMTP_FROM = os.getenv("SMTP_FROM", SMTP_USER).strip()
+SMTP_USE_SSL = os.getenv("SMTP_USE_SSL", "false").lower() in ("1", "true", "yes")
 # =========================
 # Modelos y utilidades
     description: Optional[str]
     score: float
+def clean_text(s: str) -> str:
+    return re.sub(r"\s+", " ", (s or "").strip())
 def to_float_price(value: str) -> Optional[float]:
     if not value:
         return None
     txt = value.replace(".", "").replace(",", ".").upper()
+    if any(k in txt for k in ["USD", "U$S", "US$", "DOLAR", "U$D"]):
         m = re.search(r"(\d+(?:\.\d+)?)", txt)
         return float(m.group(1)) if m else None
+    return None  # si es ARS, omitimos
 def extract_int(text: str) -> Optional[int]:
     if not text:
     m = re.search(r"(\d+)", text)
     return int(m.group(1)) if m else None
 def fuzzy_any(text: str, keywords: List[str], thresh: int = 80) -> bool:
     if not text:
         return False
         score += (filters["max_price_usd"] - lst.price_usd) / max(filters["max_price_usd"], 1) * 1.0
     if lst.rooms and lst.rooms >= filters["min_rooms"]:
         score += 1.0
+    if filters["require_outdoor"] and (lst.has_patio or lst.has_terrace):
+        score += 1.0
+    if filters["require_pet"]:
         if lst.pet_friendly:
             score += 0.6
     else:
+        score += 0.2
+    if filters["require_bidet"]:
         if lst.has_bidet:
             score += 0.6
+    else:
+        score += 0.2
     score += residential_score(lst.address or "", lst.neighborhood or "", lst.description or "")
     return round(score, 3)
+def make_headers() -> Dict[str, str]:
+    return {
+        "User-Agent": random.choice(USER_AGENT_POOL),
+        "Accept-Language": "es-AR,es;q=0.9,en;q=0.8",
+        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
+        "Referer": random.choice(REFERER_POOL),
+        "Cache-Control": "no-cache",
+        "Pragma": "no-cache",
+    }
+async def polite_pause():
+    await asyncio.sleep(random.uniform(*JITTER_RANGE))
+async def fetch(url: str) -> Optional[str]:
+    # Cliente por request para poder variar headers y evitar fingerprinting básico
+    proxies = {"all://": PROXY_URL} if PROXY_URL else None
     for i in range(RETRIES + 1):
         try:
+            async with httpx.AsyncClient(follow_redirects=True, http2=True, proxies=proxies, timeout=TIMEOUT) as client:
+                r = await client.get(url, headers=make_headers())
+                if r.status_code == 200 and r.text:
+                    return r.text
+                # manejar 4xx/5xx con backoff
+                await asyncio.sleep(BACKOFF_BASE * (2 ** i) + random.uniform(0, 0.3))
         except Exception:
+            await asyncio.sleep(BACKOFF_BASE * (2 ** i) + random.uniform(0, 0.3))
     return None
+async def fetch_detail_and_enrich(lst: Listing) -> Listing:
+    html = await fetch(lst.link)
     if not html:
         return lst
     soup = BeautifulSoup(html, "lxml")
+    # Descripción
     desc_el = soup.find(["div", "section"], attrs={"class": re.compile(r"(description|Description|post|body)")}) or soup.find("p")
     if desc_el:
         desc = clean_text(desc_el.get_text(" ", strip=True))
     else:
+        desc = clean_text(" ".join(t.get_text(" ", strip=True) for t in soup.find_all(["p", "li"])[:40]))
     patio, terraza, mascotas, bidet = feature_guess(desc)
+    # Features (ambientes / baños / dormitorios)
     features_text = " ".join(
         el.get_text(" ", strip=True)
         for el in soup.find_all(["li", "span", "div"])
         if el and el.get_text() and any(x in el.get_text().lower() for x in ["ambiente", "dorm", "bañ"])
+    ).lower()
+    rooms = lst.rooms
+    bathrooms = lst.bathrooms
+    bedrooms = lst.bedrooms
+    m = re.search(r"(\d+)\s*ambiente", features_text)
+    if m: rooms = extract_int(m.group(1))
+    m = re.search(r"(\d+)\s*bañ", features_text)
+    if m: bathrooms = extract_int(m.group(1))
+    m = re.search(r"(\d+)\s*dorm", features_text)
+    if m: bedrooms = extract_int(m.group(1))
     addr_guess = soup.find(attrs={"class": re.compile(r"(address|ubicacion|location|inmo-location)")})
     if addr_guess and not lst.address:
         lst.address = clean_text(addr_guess.get_text(" ", strip=True))[:200]
     lst.description = desc or lst.description
+    if lst.has_patio is None: lst.has_patio = patio
+    if lst.has_terrace is None: lst.has_terrace = terraza
+    if lst.pet_friendly is None: lst.pet_friendly = mascotas
+    if lst.has_bidet is None: lst.has_bidet = bidet
     lst.rooms = rooms
     lst.bathrooms = bathrooms
     lst.bedrooms = bedrooms
                 price_text = (m.group(0) if m else "")
                 addr_m = re.search(r"(Saavedra|Nu[eñ]ez|La Lucila|Florida|Munro|Carapachay|Olivos|Martelli)[^|,]*", block_text, re.IGNORECASE)
                 address_text = addr_m.group(0) if addr_m else ""
+            # Armar link absoluto si fuera relativo
+            link_abs = href if href.startswith("http") else f"https://{domain}{href}"
             cards.append({
                 "title": title or "",
+                "link": link_abs,
                 "price_text": price_text,
                 "addr_text": address_text
             })
+    # Filtrar ruido
     filtered = []
     for c in cards:
         if len(c["title"]) < 8:
             continue
+        if any(tok in c["link"] for tok in ["/perfil/", "/inmobiliaria/", "/ayuda", "/faq", "/login", "/like", "/favorito"]):
             continue
         filtered.append(c)
     return filtered
+async def scrape_search_page(url: str, domain: str) -> List[Listing]:
+    html = await fetch(url)
+    await polite_pause()
     if not html:
         return []
     soup = BeautifulSoup(html, "lxml")
             description=None,
             score=0.0
         ))
+    # Limitar por página para evitar ruido excesivo
     return listings[:25]
+async def scrape_portal(urls: List[str], domain: str) -> List[Listing]:
     out: List[Listing] = []
+    # Toma hasta 4 queries por portal para hacerlo rápido y gentil
     for u in urls[:4]:
         try:
+            res = await scrape_search_page(u, domain)
             out.extend(res)
         except Exception:
+            pass
     return out
 # =========================
         require_pet=require_pet,
     )
+    # 1) Generar URLs de búsqueda
+    z_urls = zonaprop_search_urls(neighborhoods, max_price_usd, types)
+    a_urls = argenprop_search_urls(neighborhoods, max_price_usd, types)
+    p_urls = properati_search_urls(neighborhoods, max_price_usd, types)
+    # 2) Scrapeo base
+    batch_lists = await asyncio.gather(
+        scrape_portal(z_urls, "www.zonaprop.com.ar"),
+        scrape_portal(a_urls, "www.argenprop.com"),
+        scrape_portal(p_urls, "www.properati.com.ar"),
+    )
+    listings = [l for batch in batch_lists for l in batch]
+    # 3) Deduplicar por link
+    seen = set()
+    unique: List[Listing] = []
+    for l in listings:
+        if l.link in seen:
+            continue
+        seen.add(l.link)
+        unique.append(l)
+    # 4) Enriquecer con detalle en paralelo (concurrencia acotada)
+    sem = asyncio.Semaphore(MAX_CONCURRENCY)
+    async def enrich_guarded(l: Listing):
+        async with sem:
+            enriched = await fetch_detail_and_enrich(l)
+            await polite_pause()
+            return enriched
+    enriched = await asyncio.gather(*[enrich_guarded(l) for l in unique])
+    # 5) Filtros duros
+    def passes(l: Listing) -> bool:
+        if l.price_usd is None or l.price_usd > max_price_usd:
+            return False
+        if l.rooms is not None and l.rooms < min_rooms:
+            return False
+        if require_outdoor and not ((l.has_patio is True) or (l.has_terrace is True)):
+            return False
+        if require_bidet and l.has_bidet is not True:
+            return False
+        if require_pet and l.pet_friendly is not True:
+            return False
+        # Tipo: tolerante si no se menciona explícitamente
+        text_mix = (l.title + " " + (l.description or "")).lower()
+        if not any(t in text_mix for t in types):
+            pass
+        return True
+    filtered = [l for l in enriched if passes(l)]
+    # 6) Scoring y orden
+    for l in filtered:
+        l.score = compute_score(l, filters)
+    filtered.sort(key=lambda x: (-x.score, x.price_usd or 1e9))
+    return filtered
 def listings_to_df(listings: List[Listing]) -> pd.DataFrame:
     rows = []
     return df
 # =========================
+# Email sender
 # =========================
+EMAIL_REGEX = re.compile(r"^[^@\s]+@[^@\s]+\.[^@\s]+$")
+def build_email(subject: str, sender: str, to_addr: str, body_html: str, attachments: List[Tuple[str, bytes, str]]) -> EmailMessage:
+    msg = EmailMessage()
+    msg["Subject"] = subject
+    msg["From"] = sender
+    msg["To"] = to_addr
+    msg.set_content("Este mensaje requiere un cliente compatible HTML.")
+    msg.add_alternative(body_html, subtype="html")
+    for filename, content, mimetype in attachments:
+        maintype, subtype = (mimetype.split("/", 1) if "/" in mimetype else ("application", "octet-stream"))
+        msg.add_attachment(content, maintype=maintype, subtype=subtype, filename=filename)
+    return msg
+def send_email(to_addr: str, subject: str, html_body: str, attachments: List[Tuple[str, bytes, str]]) -> str:
+    if not (SMTP_HOST and SMTP_PORT and SMTP_USER and SMTP_PASS and SMTP_FROM):
+        return "Error: SMTP no configurado en Secrets (SMTP_HOST, SMTP_PORT, SMTP_USER, SMTP_PASS, SMTP_FROM)."
+    if not EMAIL_REGEX.match(to_addr):
+        return "Error: email destino inválido."
+    msg = build_email(subject, SMTP_FROM, to_addr, html_body, attachments)
     try:
+        if SMTP_USE_SSL or SMTP_PORT == 465:
+            context = ssl.create_default_context()
+            with smtplib.SMTP_SSL(SMTP_HOST, SMTP_PORT, context=context) as server:
+                server.login(SMTP_USER, SMTP_PASS)
+                server.send_message(msg)
+        else:
+            with smtplib.SMTP(SMTP_HOST, SMTP_PORT) as server:
+                server.ehlo()
+                server.starttls()
+                server.ehlo()
+                server.login(SMTP_USER, SMTP_PASS)
+                server.send_message(msg)
+        return "OK"
+    except Exception as e:
+        return f"Error enviando email: {e}"
+def df_to_csv_bytes(df: pd.DataFrame) -> bytes:
+    return df.to_csv(index=False).encode("utf-8")
+def json_to_bytes(obj: Any) -> bytes:
+    return json.dumps(obj, ensure_ascii=False, indent=2).encode("utf-8")
+def render_summary_html(df: pd.DataFrame, neighborhoods: List[str], max_usd: int, min_rooms: int) -> str:
+    count = len(df)
+    head = f"<h2>Resultados de tu búsqueda</h2><p><b>Zonas:</b> {', '.join(neighborhoods)}<br><b>Precio máx.:</b> USD {max_usd}<br><b>Ambientes mín.:</b> {min_rooms}<br><b>Total:</b> {count}</p>"
+    if count == 0:
+        return head + "<p>No se encontraron resultados con los filtros actuales.</p>"
+    top_rows = df.sort_values(by=['Score','Precio USD'], ascending=[False, True]).head(10)
+    items = []
+    for _, r in top_rows.iterrows():
+        flags = " · ".join([k for k in ["Patio","Terraza","Mascotas","Bidet"] if bool(r.get(k))]) or "—"
+        items.append(f"<li><b>{r['Título']}</b> — USD {int(r['Precio USD']) if pd.notna(r['Precio USD']) else '-'} — {r.get('Dirección/Área') or ''} — {flags} — <a href='{r['Link']}'>Link</a></li>")
+    return head + "<ol>" + "\n".join(items) + "</ol>"
 # =========================
 # UI (Gradio)
 DESCRIPTION = """
 Agente agregador de avisos (Zonaprop, Argenprop, Properati) para Saavedra → La Lucila y alrededores.
 Filtra: USD ≤ 90k, ≥ 3 ambientes (para oficina), patio/terraza, mascotas, bidet (si figura en descripción).
+Al finalizar, podés enviar el resumen a tu email con CSV y JSON adjuntos.
 """
+async def run_and_present(neighs, max_usd, types, min_rooms, req_outdoor, req_bidet, req_pet, email_to, send_email_flag):
+    neighs_list = [n.strip() for n in str(neighs).split(",") if n.strip()]
+    types_list = [t.strip().lower() for t in str(types).split(",") if t.strip()]
     results = await run_agent(
+        neighborhoods=neighs_list,
+        max_price_usd=int(max_usd),
+        types=types_list,
+        min_rooms=int(min_rooms),
+        require_outdoor=bool(req_outdoor),
+        require_bidet=bool(req_bidet),
+        require_pet=bool(req_pet)
     )
     df = listings_to_df(results)
+    json_blob = [asdict(l) for l in results]
+    # Email opcional
+    email_status = "Email no enviado."
+    if send_email_flag:
+        if not EMAIL_REGEX.match(email_to or ""):
+            email_status = "Error: email destino inválido."
+        else:
+            html = render_summary_html(df, neighs_list, int(max_usd), int(min_rooms))
+            attachments = []
+            if not df.empty:
+                attachments.append(("resultados.csv", df_to_csv_bytes(df), "text/csv"))
+            attachments.append(("resultados.json", json_to_bytes(json_blob), "application/json"))
+            status = send_email(
+                to_addr=email_to,
+                subject="Resultados de casas/PH (≤ USD 90k) – Norte BA",
+                html_body=html,
+                attachments=attachments
+            )
+            email_status = "Enviado" if status == "OK" else status
+    return df, json.dumps(json_blob, ensure_ascii=False, indent=2), email_status
 with gr.Blocks(title="Agente Inmuebles Norte BA (≤ USD 90k)") as demo:
     gr.Markdown("# Agente de casas/PH norte BA (≤ 90 000 USD)")
         req_outdoor = gr.Checkbox(label="Requerir patio o terraza", value=REQUIRE_OUTDOOR)
         req_bidet = gr.Checkbox(label="Requerir bidet (solo si aparece en descripción)", value=REQUIRE_BIDET)
         req_pet = gr.Checkbox(label="Requerir pet-friendly (si aparece en descripción)", value=REQUIRE_PET_FRIENDLY)
+    gr.Markdown("### Envío por email (opcional al finalizar)")
+    with gr.Row():
+        email_to = gr.Textbox(label="Tu email para recibir los resultados", placeholder="tu@correo.com")
+        send_email_flag = gr.Checkbox(label="Enviar email al finalizar", value=True)
     btn = gr.Button("Buscar ahora", variant="primary")
     with gr.Tabs():
         with gr.Tab("Resultados"):
+            table = gr.Dataframe(interactive=False, wrap=True, max_rows=300)
         with gr.Tab("JSON"):
             j = gr.Code(language="json")
+        with gr.Tab("Estado de email"):
+            status = gr.Markdown("—")
+    btn.click(
+        run_and_present,
+        inputs=[neighs, max_usd, types, min_rooms, req_outdoor, req_bidet, req_pet, email_to, send_email_flag],
+        outputs=[table, j, status]
+    )
 if __name__ == "__main__":
+    demo.launch()