Spaces:

Lukeetah
/

SamuelHouseFinderApp

Sleeping

App Files Files Community

Lukeetah commited on Aug 11, 2025

Commit

db669b7

verified ·

1 Parent(s): fff1f95

Update app.py

Browse files

Files changed (1) hide show

app.py +146 -124

app.py CHANGED Viewed

@@ -6,11 +6,9 @@ import time
 import smtplib
 import random
 import asyncio
-import mimetypes
 from dataclasses import dataclass, asdict
 from typing import List, Optional, Dict, Any, Tuple
 from email.message import EmailMessage
-from pathlib import Path
 import urllib.parse as ul
 import httpx
@@ -26,15 +24,25 @@ import gradio as gr
 DEFAULT_MAX_USD = 90000
 DEFAULT_NEIGHBORHOODS = [
     "Saavedra", "Nuñez", "La Lucila", "Florida Oeste", "Munro", "Carapachay",
-    "Olivos", "Villa Martelli"
 ]
 DEFAULT_TYPES = ["casa", "ph"]   # "casa", "ph"
-DEFAULT_MIN_ROOMS = 3            # ambientes (asegura oficina)
 REQUIRE_BIDET = True
 REQUIRE_PET_FRIENDLY = True
-REQUIRE_OUTDOOR = True           # patio o terraza
-# Microzonas residenciales priorizadas (heurística positiva)
 MICROZONAS_PRIORITARIAS = [
     "Parque Saavedra", "Parque Sarmiento", "Av. Balbín", "Ruiz Huidobro",
     "Lomas de Nuñez", "Cabildo", "Plaza Alberti",
@@ -43,7 +51,7 @@ MICROZONAS_PRIORITARIAS = [
     "Ugarte", "San Martín", "Panamericana", "Pelliza", "Melo",
 ]
-# Anti-scraping: headers, tiempos, rate limit por dominio
 USER_AGENT_POOL = [
     "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36",
     "Mozilla/5.0 (Macintosh; Intel Mac OS X 13_4) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.4 Safari/605.1.15",
@@ -54,9 +62,9 @@ REFERER_POOL = ["https://www.google.com/", "https://www.bing.com/", "https://duc
 TIMEOUT = httpx.Timeout(25.0, connect=12.0)
 RETRIES = 2
 BACKOFF_BASE = 0.9
-JITTER_RANGE = (0.12, 0.55)  # segundos
-# Rate-limit suave por dominio (segundos min entre hits)
 DOMAIN_RATE_LIMIT = {
     "www.zonaprop.com.ar": 0.6,
     "www.argenprop.com": 0.6,
@@ -70,19 +78,16 @@ DOMAIN_RATE_LIMIT = {
     "www.buscatucasa.com.ar": 0.8,
 }
-# Proxy opcional (definí PROXY_URL en Secrets si tenés pool)
 PROXY_URL = os.getenv("PROXY_URL", "").strip()
-# =========================
-# Email (configurado vía Secrets)
-# =========================
 SMTP_HOST = os.getenv("SMTP_HOST", "").strip()
 SMTP_PORT = int(os.getenv("SMTP_PORT", "587"))
 SMTP_USER = os.getenv("SMTP_USER", "").strip()
 SMTP_PASS = os.getenv("SMTP_PASS", "").strip()
 SMTP_FROM = os.getenv("SMTP_FROM", SMTP_USER).strip()
 SMTP_USE_SSL = os.getenv("SMTP_USE_SSL", "false").lower() in ("1", "true", "yes")
 EMAIL_REGEX = re.compile(r"^[^@\s]+@[^@\s]+\.[^@\s]+$")
 # =========================
@@ -121,7 +126,7 @@ def to_float_price(value: str) -> Optional[float]:
         return float(m.group(1)) if m else None
     return None
-def extract_int_from_text(text: str, pattern: str) -> Optional[int]:
     if not text:
         return None
     m = re.search(pattern, text)
@@ -158,13 +163,11 @@ def compute_score(lst: Listing, filters: Dict[str, Any]) -> float:
     if filters["require_outdoor"] and (lst.has_patio or lst.has_terrace):
         score += 1.0
     if filters["require_pet"]:
-        if lst.pet_friendly:
-            score += 0.6
     else:
         score += 0.2
     if filters["require_bidet"]:
-        if lst.has_bidet:
-            score += 0.6
     else:
         score += 0.2
     score += residential_score(lst.address or "", lst.neighborhood or "", lst.description or "")
@@ -195,7 +198,6 @@ async def domain_throttle(domain: str):
     if wait > 0:
         await asyncio.sleep(wait)
     _last_hit[domain] = time.time()
-    # jitter suave
     await asyncio.sleep(random.uniform(*JITTER_RANGE))
 async def fetch(url: str) -> Optional[str]:
@@ -206,41 +208,36 @@ async def fetch(url: str) -> Optional[str]:
         try:
             async with httpx.AsyncClient(follow_redirects=True, http2=True, proxies=proxies, timeout=TIMEOUT) as client:
                 r = await client.get(url, headers=make_headers())
-                # algunos portales sirven HTML con 200 pero bloquean por JS -> intentamos igualmente parsear
-                if r.status_code == 200 and r.text and len(r.text) > 1000:
                     return r.text
-                # backoff
                 await asyncio.sleep(BACKOFF_BASE * (2 ** i) + random.uniform(0, 0.35))
         except Exception:
             await asyncio.sleep(BACKOFF_BASE * (2 ** i) + random.uniform(0, 0.35))
     return None
 # =========================
-# Portal adapters
 # =========================
 class Portal:
-    def __init__(self, domain: str, search_builder, card_hint: Optional[str] = None):
         self.domain = domain
-        self.search_builder = search_builder  # fn(neighs, max_usd, types) -> [urls]
-        self.card_hint = card_hint  # texto para filtrar anchors
 def sb_qparam(base: str, param: str = "q"):
     def _builder(neighs: List[str], max_usd: int, types: List[str]) -> List[str]:
         urls = []
-        # permutamos consultas para reducir cache y mejorar recall
-        syn_outdoor = ["patio", "terraza", "exterior", "pulmón"]
-        syn_pets = ["mascotas", "pet friendly", "apta mascotas"]
-        syn_rooms = ["3 ambientes", "tres ambientes", ">=3 ambientes"]
         for n in neighs:
-            for o in syn_outdoor[:2]:
-                for p in syn_pets[:2]:
-                    q = f"{' o '.join(types)} venta {n} hasta {max_usd} dolares {random.choice(syn_rooms)} {o} {p} bidet"
-                    urls.append(f"{base}?{param}={ul.quote(q)}")
         return urls
     return _builder
-# Portales contemplados (agregar más es trivial)
 PORTALS: List[Portal] = [
     Portal("www.zonaprop.com.ar", sb_qparam("https://www.zonaprop.com.ar/propiedades.html", "q")),
     Portal("www.argenprop.com", sb_qparam("https://www.argenprop.com/propiedades", "text")),
@@ -254,12 +251,12 @@ PORTALS: List[Portal] = [
     Portal("www.buscatucasa.com.ar", sb_qparam("https://www.buscatucasa.com.ar/buscar", "q")),
 ]
 def generic_card_extractor(soup: BeautifulSoup, domain: str) -> List[Dict[str, Any]]:
-    """
-    Heurística universal:
-    - Encuentra <a href> del mismo dominio que parezcan links a avisos.
-    - Extrae texto cercano para precio y zona.
-    """
     anchors = soup.select("a[href]")
     seen = set()
     cards = []
@@ -267,44 +264,46 @@ def generic_card_extractor(soup: BeautifulSoup, domain: str) -> List[Dict[str, A
         href = a.get("href", "")
         if not href:
             continue
-        # normalizar a absoluto
         if href.startswith("//"):
             href = "https:" + href
         elif href.startswith("/"):
             href = f"https://{domain}{href}"
         if domain not in href:
             continue
         if any(x in href for x in ["/login", "/perfil", "/ayuda", "/faq", "/favorito", "/mi-cuenta"]):
             continue
         if href in seen:
             continue
         seen.add(href)
         title = clean_text(a.get_text(" ", strip=True))
         parent = a.find_parent()
         block_text = clean_text(parent.get_text(" ", strip=True)) if parent else ""
-        # precio en USD
         m = re.search(r"(U\$S|USD|US\$|D[oó]lares?)\s*([\d\.\,]+)", block_text, re.IGNORECASE)
         price_text = m.group(0) if m else ""
-        # address o barrio clave
-        addr_m = re.search(r"(Saavedra|Nu[eñ]ez|La Lucila|Florida(?: Oeste)?|Munro|Carapachay|Olivos|Martelli)[^|,]*", block_text, re.IGNORECASE)
         addr_text = addr_m.group(0) if addr_m else ""
-        # Filtros mínimos para evitar ruido
-        if len(title) < 8:
-            continue
-        if not any(x in href.lower() for x in ["propiedad", "inmueble", "inmuebles", "departamento", "casa", "ph", "detalle", "item", "listing", "publicacion", "aviso", "id"]):
-            # permitir de todos modos: muchos sitios usan slugs
-            pass
         cards.append({
-            "title": title,
             "link": href,
             "price_text": price_text,
             "addr_text": addr_text
         })
-    # quedarnos con primeras N tarjetas decentes
-    return cards[:40]
 async def scrape_search_page(url: str, domain: str) -> List[Listing]:
     html = await fetch(url)
@@ -334,8 +333,7 @@ async def scrape_search_page(url: str, domain: str) -> List[Listing]:
 async def scrape_portal(portal: Portal, neighborhoods: List[str], max_usd: int, types: List[str]) -> List[Listing]:
     urls = portal.search_builder(neighborhoods, max_usd, types)
     results: List[Listing] = []
-    # tomar un subconjunto para diversidad sin abusar
-    for u in urls[:6]:
         try:
             res = await scrape_search_page(u, portal.domain)
             results.extend(res)
@@ -343,37 +341,37 @@ async def scrape_portal(portal: Portal, neighborhoods: List[str], max_usd: int,
             pass
     return results
-async def fetch_detail_and_enrich(lst: Listing) -> Listing:
     html = await fetch(lst.link)
     if not html:
         return lst
     soup = BeautifulSoup(html, "lxml")
     # Descripción
-    desc_el = soup.find(["div", "section"], attrs={"class": re.compile(r"(description|descripcion|post|body|texto)")}) or soup.find("p")
-    if desc_el:
-        desc = clean_text(desc_el.get_text(" ", strip=True))
-    else:
-        desc = clean_text(" ".join(x.get_text(" ", strip=True) for x in soup.find_all(["p", "li"])[:50]))
     # Inferencias
     patio, terraza, mascotas, bidet = feature_guess(desc)
     # Características
-    features_text = " ".join(
         el.get_text(" ", strip=True) for el in soup.find_all(["li", "span", "div"])
         if el and el.get_text() and any(x in el.get_text().lower() for x in ["ambiente", "dorm", "bañ"])
     ).lower()
-    rooms = extract_int_from_text(features_text, r"(\d+)\s*ambiente")
-    bathrooms = extract_int_from_text(features_text, r"(\d+)\s*bañ")
-    bedrooms = extract_int_from_text(features_text, r"(\d+)\s*dorm")
-    # Dirección si aparece
     addr_guess = soup.find(attrs={"class": re.compile(r"(address|ubicacion|ubicación|location|inmo-location)")})
     if addr_guess and not lst.address:
         lst.address = clean_text(addr_guess.get_text(" ", strip=True))[:200]
     lst.description = desc or lst.description
     lst.has_patio = lst.has_patio if lst.has_patio is not None else patio
     lst.has_terrace = lst.has_terrace if lst.has_terrace is not None else terraza
     lst.pet_friendly = lst.pet_friendly if lst.pet_friendly is not None else mascotas
@@ -387,15 +385,18 @@ async def fetch_detail_and_enrich(lst: Listing) -> Listing:
 # Orquestación
 # =========================
-async def run_agent(
-    neighborhoods: List[str],
-    max_price_usd: int,
-    types: List[str],
-    min_rooms: int,
-    require_outdoor: bool,
-    require_bidet: bool,
-    require_pet: bool
-) -> List[Listing]:
     filters = dict(
         max_price_usd=max_price_usd,
         min_rooms=min_rooms,
@@ -403,24 +404,12 @@ async def run_agent(
         require_bidet=require_bidet,
         require_pet=require_pet,
     )
-    # 1) Scrapeo base multi-portal
     tasks = [scrape_portal(p, neighborhoods, max_price_usd, types) for p in PORTALS]
     batch = await asyncio.gather(*tasks)
     listings = [l for sub in batch for l in sub]
-    # 2) Deduplicación por link canónico
-    def canon(url: str) -> str:
-        # quitar parámetros de tracking
-        try:
-            parsed = ul.urlparse(url)
-            q = ul.parse_qsl(parsed.query)
-            q = [(k, v) for (k, v) in q if k.lower() not in {"utm_source", "utm_medium", "utm_campaign", "gclid", "s"}]
-            new_q = ul.urlencode(q, doseq=True)
-            return ul.urlunparse((parsed.scheme, parsed.netloc, parsed.path, "", new_q, ""))
-        except Exception:
-            return url
     seen = set()
     unique: List[Listing] = []
     for l in listings:
@@ -431,17 +420,16 @@ async def run_agent(
         l.link = key
         unique.append(l)
-    # 3) Enriquecer en paralelo con control de concurrencia
     sem = asyncio.Semaphore(8)
-    async def enrich_guarded(item: Listing):
         async with sem:
-            enriched = await fetch_detail_and_enrich(item)
-            # pausa suave entre detalles
             await asyncio.sleep(random.uniform(*JITTER_RANGE))
             return enriched
-    enriched = await asyncio.gather(*[enrich_guarded(l) for l in unique])
-    # 4) Filtros duros
     def passes(l: Listing) -> bool:
         if l.price_usd is None or l.price_usd > max_price_usd:
             return False
@@ -461,11 +449,43 @@ async def run_agent(
     filtered = [l for l in enriched if passes(l)]
-    # 5) Scoring y orden
     for l in filtered:
         l.score = compute_score(l, filters)
     filtered.sort(key=lambda x: (-x.score, x.price_usd or 1e9))
-    return filtered
 def listings_to_df(listings: List[Listing]) -> pd.DataFrame:
     rows = []
@@ -500,7 +520,7 @@ def build_email(subject: str, sender: str, to_addr: str, body_html: str, attachm
     msg["Subject"] = subject
     msg["From"] = sender
     msg["To"] = to_addr
-    msg.set_content("Este mensaje tiene una versión HTML y adjuntos.")
     msg.add_alternative(body_html, subtype="html")
     for filename, content, mimetype in attachments:
         maintype, subtype = (mimetype.split("/", 1) if "/" in mimetype else ("application", "octet-stream"))
@@ -521,9 +541,7 @@ def send_email(to_addr: str, subject: str, html_body: str, attachments: List[Tup
                 server.send_message(msg)
         else:
             with smtplib.SMTP(SMTP_HOST, SMTP_PORT) as server:
-                server.ehlo()
-                server.starttls()
-                server.ehlo()
                 server.login(SMTP_USER, SMTP_PASS)
                 server.send_message(msg)
         return "OK"
@@ -536,11 +554,12 @@ def df_to_csv_bytes(df: pd.DataFrame) -> bytes:
 def json_to_bytes(obj: Any) -> bytes:
     return json.dumps(obj, ensure_ascii=False, indent=2).encode("utf-8")
-def render_summary_html(df: pd.DataFrame, neighborhoods: List[str], max_usd: int, min_rooms: int) -> str:
     count = len(df)
-    head = f"<h2>Resultados de tu búsqueda</h2><p><b>Zonas:</b> {', '.join(neighborhoods)}<br><b>Precio máx.:</b> USD {max_usd}<br><b>Ambientes mín.:</b> {min_rooms}<br><b>Total:</b> {count}</p>"
     if count == 0:
-        return head + "<p>No se encontraron resultados con los filtros actuales.</p>"
     top_rows = df.sort_values(by=['Score','Precio USD'], ascending=[False, True]).head(12)
     items = []
     for _, r in top_rows.iterrows():
@@ -548,30 +567,29 @@ def render_summary_html(df: pd.DataFrame, neighborhoods: List[str], max_usd: int
         price = f"USD {int(r['Precio USD'])}" if pd.notna(r['Precio USD']) else "USD —"
         addr = r.get("Dirección/Área") or ""
         items.append(f"<li><b>{r['Título']}</b> — {price} — {addr} — {flags} — <a href='{r['Link']}'>Abrir</a></li>")
-    return head + "<ol>" + "\n".join(items) + "</ol>"
 # =========================
 # UI (Gradio)
 # =========================
 DESCRIPTION = """
-Meta-buscador multi-portales para casas/PH entre Saavedra y La Lucila y alrededores.
-Filtra: USD ≤ 90k, ≥ 3 ambientes (para oficina), patio/terraza, mascotas, bidet (si figura en descripción).
-Al terminar, te puede enviar el resumen a tu email con CSV y JSON adjuntos.
 """
-async def run_and_present(neighs, max_usd, types, min_rooms, req_outdoor, req_bidet, req_pet, email_to, send_email_flag):
     neighs_list = [n.strip() for n in str(neighs).split(",") if n.strip()]
     types_list = [t.strip().lower() for t in str(types).split(",") if t.strip()]
-    results = await run_agent(
-        neighborhoods=neighs_list,
-        max_price_usd=int(max_usd),
-        types=types_list,
-        min_rooms=int(min_rooms),
-        require_outdoor=bool(req_outdoor),
-        require_bidet=bool(req_bidet),
-        require_pet=bool(req_pet)
     )
     df = listings_to_df(results)
     json_blob = [asdict(l) for l in results]
@@ -581,7 +599,7 @@ async def run_and_present(neighs, max_usd, types, min_rooms, req_outdoor, req_bi
         if not EMAIL_REGEX.match(email_to or ""):
             email_status = "Error: email destino inválido."
         else:
-            html = render_summary_html(df, neighs_list, int(max_usd), int(min_rooms))
             attachments: List[Tuple[str, bytes, str]] = []
             if not df.empty:
                 attachments.append(("resultados.csv", df_to_csv_bytes(df), "text/csv"))
@@ -594,7 +612,8 @@ async def run_and_present(neighs, max_usd, types, min_rooms, req_outdoor, req_bi
             )
             email_status = "Enviado" if status == "OK" else status
-    return df, json.dumps(json_blob, ensure_ascii=False, indent=2), email_status
 with gr.Blocks(title="Meta-buscador Inmuebles Norte BA (≤ USD 90k)") as demo:
     gr.Markdown("# Meta-buscador de casas/PH norte BA (≤ 90 000 USD)")
@@ -609,6 +628,7 @@ with gr.Blocks(title="Meta-buscador Inmuebles Norte BA (≤ USD 90k)") as demo:
         req_outdoor = gr.Checkbox(label="Requerir patio o terraza", value=REQUIRE_OUTDOOR)
         req_bidet = gr.Checkbox(label="Requerir bidet (si aparece en descripción)", value=REQUIRE_BIDET)
         req_pet = gr.Checkbox(label="Requerir pet-friendly (si aparece en descripción)", value=REQUIRE_PET_FRIENDLY)
     gr.Markdown("### Envío por email al finalizar (opcional)")
     with gr.Row():
@@ -618,16 +638,18 @@ with gr.Blocks(title="Meta-buscador Inmuebles Norte BA (≤ USD 90k)") as demo:
     btn = gr.Button("Buscar ahora", variant="primary")
     with gr.Tabs():
         with gr.Tab("Resultados"):
-            table = gr.Dataframe(interactive=False)
         with gr.Tab("JSON"):
             j = gr.Code(language="json")
         with gr.Tab("Estado de email"):
             status = gr.Markdown("—")
     btn.click(
         run_and_present,
-        inputs=[neighs, max_usd, types, min_rooms, req_outdoor, req_bidet, req_pet, email_to, send_email_flag],
-        outputs=[table, j, status]
     )
 if __name__ == "__main__":

 import smtplib
 import random
 import asyncio
 from dataclasses import dataclass, asdict
 from typing import List, Optional, Dict, Any, Tuple
 from email.message import EmailMessage
 import urllib.parse as ul
 import httpx
 DEFAULT_MAX_USD = 90000
 DEFAULT_NEIGHBORHOODS = [
     "Saavedra", "Nuñez", "La Lucila", "Florida Oeste", "Munro", "Carapachay",
+    "Olivos", "Villa Martelli", "Florida", "Vicente López"
 ]
 DEFAULT_TYPES = ["casa", "ph"]   # "casa", "ph"
+DEFAULT_MIN_ROOMS = 3
 REQUIRE_BIDET = True
 REQUIRE_PET_FRIENDLY = True
+REQUIRE_OUTDOOR = True
+# Auto-relajación si no hay resultados (escalonada)
+AUTO_RELAX_ENABLED = True
+RELAX_STEPS = [
+    {"require_bidet": False},                           # 1) liberar bidet
+    {"require_pet": False},                             # 2) liberar mascotas
+    {"min_rooms": 2},                                   # 3) bajar ambientes a 2
+    {"require_outdoor": False},                         # 4) exterior opcional
+    {"max_price_usd_delta": 10000},                     # 5) subir precio máx. +10k
+]
+# Microzonas (boost de score)
 MICROZONAS_PRIORITARIAS = [
     "Parque Saavedra", "Parque Sarmiento", "Av. Balbín", "Ruiz Huidobro",
     "Lomas de Nuñez", "Cabildo", "Plaza Alberti",
     "Ugarte", "San Martín", "Panamericana", "Pelliza", "Melo",
 ]
+# Anti-scraping
 USER_AGENT_POOL = [
     "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36",
     "Mozilla/5.0 (Macintosh; Intel Mac OS X 13_4) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.4 Safari/605.1.15",
 TIMEOUT = httpx.Timeout(25.0, connect=12.0)
 RETRIES = 2
 BACKOFF_BASE = 0.9
+JITTER_RANGE = (0.13, 0.55)  # s
+# Rate-limit por dominio
 DOMAIN_RATE_LIMIT = {
     "www.zonaprop.com.ar": 0.6,
     "www.argenprop.com": 0.6,
     "www.buscatucasa.com.ar": 0.8,
 }
+# Proxy opcional (configurable en Secrets)
 PROXY_URL = os.getenv("PROXY_URL", "").strip()
+# Email (configurable en Secrets)
 SMTP_HOST = os.getenv("SMTP_HOST", "").strip()
 SMTP_PORT = int(os.getenv("SMTP_PORT", "587"))
 SMTP_USER = os.getenv("SMTP_USER", "").strip()
 SMTP_PASS = os.getenv("SMTP_PASS", "").strip()
 SMTP_FROM = os.getenv("SMTP_FROM", SMTP_USER).strip()
 SMTP_USE_SSL = os.getenv("SMTP_USE_SSL", "false").lower() in ("1", "true", "yes")
 EMAIL_REGEX = re.compile(r"^[^@\s]+@[^@\s]+\.[^@\s]+$")
 # =========================
         return float(m.group(1)) if m else None
     return None
+def extract_int_from(text: str, pattern: str) -> Optional[int]:
     if not text:
         return None
     m = re.search(pattern, text)
     if filters["require_outdoor"] and (lst.has_patio or lst.has_terrace):
         score += 1.0
     if filters["require_pet"]:
+        score += 0.6 if lst.pet_friendly else 0.0
     else:
         score += 0.2
     if filters["require_bidet"]:
+        score += 0.6 if lst.has_bidet else 0.0
     else:
         score += 0.2
     score += residential_score(lst.address or "", lst.neighborhood or "", lst.description or "")
     if wait > 0:
         await asyncio.sleep(wait)
     _last_hit[domain] = time.time()
     await asyncio.sleep(random.uniform(*JITTER_RANGE))
 async def fetch(url: str) -> Optional[str]:
         try:
             async with httpx.AsyncClient(follow_redirects=True, http2=True, proxies=proxies, timeout=TIMEOUT) as client:
                 r = await client.get(url, headers=make_headers())
+                # aceptamos HTML corto; algunos portales entregan SSR mínimo
+                if r.status_code == 200 and r.text:
                     return r.text
                 await asyncio.sleep(BACKOFF_BASE * (2 ** i) + random.uniform(0, 0.35))
         except Exception:
             await asyncio.sleep(BACKOFF_BASE * (2 ** i) + random.uniform(0, 0.35))
     return None
 # =========================
+# Portales
 # =========================
 class Portal:
+    def __init__(self, domain: str, search_builder):
         self.domain = domain
+        self.search_builder = search_builder  # fn(neighs, max_usd, types)->[urls]
 def sb_qparam(base: str, param: str = "q"):
     def _builder(neighs: List[str], max_usd: int, types: List[str]) -> List[str]:
         urls = []
+        syn_outdoor = ["patio", "terraza", "exterior"]
+        syn_pets = ["mascotas", "pet friendly"]
+        rooms_variants = ["3 ambientes", "tres ambientes"]
         for n in neighs:
+            for o in syn_outdoor:
+                q = f"{' o '.join(types)} venta {n} hasta {max_usd} dolares {random.choice(rooms_variants)} {o} {random.choice(syn_pets)} bidet"
+                urls.append(f"{base}?{param}={ul.quote(q)}")
         return urls
     return _builder
 PORTALS: List[Portal] = [
     Portal("www.zonaprop.com.ar", sb_qparam("https://www.zonaprop.com.ar/propiedades.html", "q")),
     Portal("www.argenprop.com", sb_qparam("https://www.argenprop.com/propiedades", "text")),
     Portal("www.buscatucasa.com.ar", sb_qparam("https://www.buscatucasa.com.ar/buscar", "q")),
 ]
+ANCHOR_TOKENS = [
+    "propiedad", "inmueble", "inmuebles", "departamento", "casa", "ph",
+    "detalle", "item", "listing", "publicacion", "aviso", "MLA-"
+]
 def generic_card_extractor(soup: BeautifulSoup, domain: str) -> List[Dict[str, Any]]:
     anchors = soup.select("a[href]")
     seen = set()
     cards = []
         href = a.get("href", "")
         if not href:
             continue
+        # normalizar absoluto
         if href.startswith("//"):
             href = "https:" + href
         elif href.startswith("/"):
             href = f"https://{domain}{href}"
+        # solo mismo dominio
         if domain not in href:
             continue
+        # filtrar rutas no relevantes
         if any(x in href for x in ["/login", "/perfil", "/ayuda", "/faq", "/favorito", "/mi-cuenta"]):
             continue
+        # heurística de “parece aviso”
+        if not any(tok in href.lower() for tok in [t.lower() for t in ANCHOR_TOKENS]):
+            continue
+        # no duplicados
         if href in seen:
             continue
         seen.add(href)
         title = clean_text(a.get_text(" ", strip=True))
+        if len(title) < 8:
+            # algunos sitios tienen título en contenedor padre
+            parent = a.find_parent()
+            if parent:
+                title = clean_text(parent.get_text(" ", strip=True))[:160]
+        # texto de bloque cercano
         parent = a.find_parent()
         block_text = clean_text(parent.get_text(" ", strip=True)) if parent else ""
         m = re.search(r"(U\$S|USD|US\$|D[oó]lares?)\s*([\d\.\,]+)", block_text, re.IGNORECASE)
         price_text = m.group(0) if m else ""
+        addr_m = re.search(r"(Saavedra|Nu[eñ]ez|La Lucila|Florida(?: Oeste)?|Munro|Carapachay|Olivos|Martelli|Vicente L[oó]pez)[^|,]*", block_text, re.IGNORECASE)
         addr_text = addr_m.group(0) if addr_m else ""
         cards.append({
+            "title": title[:160],
             "link": href,
             "price_text": price_text,
             "addr_text": addr_text
         })
+    return cards[:50]
 async def scrape_search_page(url: str, domain: str) -> List[Listing]:
     html = await fetch(url)
 async def scrape_portal(portal: Portal, neighborhoods: List[str], max_usd: int, types: List[str]) -> List[Listing]:
     urls = portal.search_builder(neighborhoods, max_usd, types)
     results: List[Listing] = []
+    for u in urls[:6]:  # primeras 6 queries permutadas
         try:
             res = await scrape_search_page(u, portal.domain)
             results.extend(res)
             pass
     return results
+async def enrich_listing(lst: Listing) -> Listing:
     html = await fetch(lst.link)
     if not html:
         return lst
     soup = BeautifulSoup(html, "lxml")
     # Descripción
+    desc_el = soup.find(["div", "section"], attrs={"class": re.compile(r"(description|descripcion|post|body|texto|descripcion-larga)")}) or soup.find("p")
+    desc = clean_text(desc_el.get_text(" ", strip=True)) if desc_el else clean_text(" ".join(x.get_text(" ", strip=True) for x in soup.find_all(["p", "li"])[:60]))
     # Inferencias
     patio, terraza, mascotas, bidet = feature_guess(desc)
     # Características
+    feat_text = " ".join(
         el.get_text(" ", strip=True) for el in soup.find_all(["li", "span", "div"])
         if el and el.get_text() and any(x in el.get_text().lower() for x in ["ambiente", "dorm", "bañ"])
     ).lower()
+    # también mirar el título
+    coarse = (lst.title + " " + desc).lower()
+    rooms = extract_int_from(feat_text, r"(\d+)\s*ambiente") or extract_int_from(coarse, r"(\d+)\s*amb")
+    bathrooms = extract_int_from(feat_text, r"(\d+)\s*bañ") or extract_int_from(coarse, r"(\d+)\s*bañ")
+    bedrooms = extract_int_from(feat_text, r"(\d+)\s*dorm") or extract_int_from(coarse, r"(\d+)\s*dormi")
+    # Dirección
     addr_guess = soup.find(attrs={"class": re.compile(r"(address|ubicacion|ubicación|location|inmo-location)")})
     if addr_guess and not lst.address:
         lst.address = clean_text(addr_guess.get_text(" ", strip=True))[:200]
     lst.description = desc or lst.description
+    lst.has_patio = lst.has_patro if hasattr(lst, "has_patro") else lst.has_patio  # guard
     lst.has_patio = lst.has_patio if lst.has_patio is not None else patio
     lst.has_terrace = lst.has_terrace if lst.has_terrace is not None else terraza
     lst.pet_friendly = lst.pet_friendly if lst.pet_friendly is not None else mascotas
 # Orquestación
 # =========================
+def canon(url: str) -> str:
+    try:
+        parsed = ul.urlparse(url)
+        q = ul.parse_qsl(parsed.query)
+        q = [(k, v) for (k, v) in q if k.lower() not in {"utm_source", "utm_medium", "utm_campaign", "gclid", "s", "utm_term", "utm_content"}]
+        new_q = ul.urlencode(q, doseq=True)
+        return ul.urlunparse((parsed.scheme, parsed.netloc, parsed.path, "", new_q, ""))
+    except Exception:
+        return url
+async def run_agent_once(neighborhoods: List[str], max_price_usd: int, types: List[str],
+                         min_rooms: int, require_outdoor: bool, require_bidet: bool, require_pet: bool) -> Tuple[List[Listing], str]:
     filters = dict(
         max_price_usd=max_price_usd,
         min_rooms=min_rooms,
         require_bidet=require_bidet,
         require_pet=require_pet,
     )
+    # 1) Multi-portal
     tasks = [scrape_portal(p, neighborhoods, max_price_usd, types) for p in PORTALS]
     batch = await asyncio.gather(*tasks)
     listings = [l for sub in batch for l in sub]
+    # 2) Dedup
     seen = set()
     unique: List[Listing] = []
     for l in listings:
         l.link = key
         unique.append(l)
+    # 3) Enriquecer
     sem = asyncio.Semaphore(8)
+    async def guard(item: Listing):
         async with sem:
+            enriched = await enrich_listing(item)
             await asyncio.sleep(random.uniform(*JITTER_RANGE))
             return enriched
+    enriched = await asyncio.gather(*[guard(l) for l in unique])
+    # 4) Filtrar (tolerante: None no bloquea salvo que se exija explícito)
     def passes(l: Listing) -> bool:
         if l.price_usd is None or l.price_usd > max_price_usd:
             return False
     filtered = [l for l in enriched if passes(l)]
+    # 5) Score + Orden
     for l in filtered:
         l.score = compute_score(l, filters)
     filtered.sort(key=lambda x: (-x.score, x.price_usd or 1e9))
+    # Trace
+    trace = f"Portales: {len(PORTALS)} | Crudos: {len(listings)} | Únicos: {len(unique)} | Enriquecidos: {len(enriched)} | Final: {len(filtered)}"
+    return filtered, trace
+async def run_agent_with_relax(neighborhoods: List[str], max_price_usd: int, types: List[str],
+                               min_rooms: int, require_outdoor: bool, require_bidet: bool, require_pet: bool,
+                               auto_relax: bool = True) -> Tuple[List[Listing], List[str]]:
+    log = []
+    results, trace = await run_agent_once(neighborhoods, max_price_usd, types, min_rooms, require_outdoor, require_bidet, require_pet)
+    log.append(f"[Base] {trace}")
+    if results or not auto_relax:
+        return results, log
+    # no hay resultados: probar escalonado
+    base = dict(
+        neighborhoods=neighborhoods, max_price_usd=max_price_usd, types=types,
+        min_rooms=min_rooms, require_outdoor=require_outdoor, require_bidet=require_bidet, require_pet=require_pet
+    )
+    price = max_price_usd
+    for i, step in enumerate(RELAX_STEPS, 1):
+        mr = step.get("min_rooms", base["min_rooms"])
+        ro = step.get("require_outdoor", base["require_outdoor"])
+        rb = step.get("require_bidet", base["require_bidet"])
+        rp = step.get("require_pet", base["require_pet"])
+        if "max_price_usd_delta" in step:
+            price = max_price_usd + step["max_price_usd_delta"]
+        log.append(f"[Relax {i}] rooms={mr} outdoor={ro} bidet={rb} pet={rp} price_max=USD {price}")
+        results, trace = await run_agent_once(neighborhoods, price, types, mr, ro, rb, rp)
+        log.append(f"[Relax {i}] {trace}")
+        if results:
+            return results, log
+    return results, log
 def listings_to_df(listings: List[Listing]) -> pd.DataFrame:
     rows = []
     msg["Subject"] = subject
     msg["From"] = sender
     msg["To"] = to_addr
+    msg.set_content("Este mensaje tiene versión HTML y adjuntos.")
     msg.add_alternative(body_html, subtype="html")
     for filename, content, mimetype in attachments:
         maintype, subtype = (mimetype.split("/", 1) if "/" in mimetype else ("application", "octet-stream"))
                 server.send_message(msg)
         else:
             with smtplib.SMTP(SMTP_HOST, SMTP_PORT) as server:
+                server.ehlo(); server.starttls(); server.ehlo()
                 server.login(SMTP_USER, SMTP_PASS)
                 server.send_message(msg)
         return "OK"
 def json_to_bytes(obj: Any) -> bytes:
     return json.dumps(obj, ensure_ascii=False, indent=2).encode("utf-8")
+def render_summary_html(df: pd.DataFrame, neighborhoods: List[str], max_usd: int, min_rooms: int, relax_log: List[str]) -> str:
     count = len(df)
+    head = f"<h2>Resultados</h2><p><b>Zonas:</b> {', '.join(neighborhoods)}<br><b>Precio máx.:</b> USD {max_usd}<br><b>Ambientes mín.:</b> {min_rooms}<br><b>Total:</b> {count}</p>"
+    trace = "<pre style='white-space:pre-wrap;font-size:12px;opacity:.85;'>" + "\n".join(relax_log) + "</pre>"
     if count == 0:
+        return head + "<p>No se encontraron resultados con los filtros actuales.</p>" + trace
     top_rows = df.sort_values(by=['Score','Precio USD'], ascending=[False, True]).head(12)
     items = []
     for _, r in top_rows.iterrows():
         price = f"USD {int(r['Precio USD'])}" if pd.notna(r['Precio USD']) else "USD —"
         addr = r.get("Dirección/Área") or ""
         items.append(f"<li><b>{r['Título']}</b> — {price} — {addr} — {flags} — <a href='{r['Link']}'>Abrir</a></li>")
+    return head + "<ol>" + "\n".join(items) + "</ol>" + trace
 # =========================
 # UI (Gradio)
 # =========================
 DESCRIPTION = """
+Meta-buscador multi-portales para casas/PH entre Saavedra y La Lucila y alrededores.
+• Filtros: USD ≤ 90k, ≥ 3 ambientes, patio/terraza, mascotas, bidet (si figura en descripción).
+• Anti-scraping: headers rotativos, referers, HTTP/2, rate limit con jitter, reintentos con backoff.
+• Si no hay resultados, activa auto-relajación escalonada (configurable) y documenta los pasos.
 """
+async def run_and_present(neighs, max_usd, types, min_rooms, req_outdoor, req_bidet, req_pet, auto_relax, email_to, send_email_flag):
     neighs_list = [n.strip() for n in str(neighs).split(",") if n.strip()]
     types_list = [t.strip().lower() for t in str(types).split(",") if t.strip()]
+    max_usd = int(max_usd); min_rooms = int(min_rooms)
+    req_outdoor = bool(req_outdoor); req_bidet = bool(req_bidet); req_pet = bool(req_pet); auto_relax = bool(auto_relax)
+    results, relax_log = await run_agent_with_relax(
+        neighborhoods=neighs_list, max_price_usd=max_usd, types=types_list,
+        min_rooms=min_rooms, require_outdoor=req_outdoor, require_bidet=req_bidet, require_pet=req_pet,
+        auto_relax=auto_relax
     )
     df = listings_to_df(results)
     json_blob = [asdict(l) for l in results]
         if not EMAIL_REGEX.match(email_to or ""):
             email_status = "Error: email destino inválido."
         else:
+            html = render_summary_html(df, neighs_list, max_usd, min_rooms, relax_log)
             attachments: List[Tuple[str, bytes, str]] = []
             if not df.empty:
                 attachments.append(("resultados.csv", df_to_csv_bytes(df), "text/csv"))
             )
             email_status = "Enviado" if status == "OK" else status
+    # Mostrar log en la pestaña de estado
+    return df, json.dumps(json_blob, ensure_ascii=False, indent=2), " | ".join(relax_log), email_status
 with gr.Blocks(title="Meta-buscador Inmuebles Norte BA (≤ USD 90k)") as demo:
     gr.Markdown("# Meta-buscador de casas/PH norte BA (≤ 90 000 USD)")
         req_outdoor = gr.Checkbox(label="Requerir patio o terraza", value=REQUIRE_OUTDOOR)
         req_bidet = gr.Checkbox(label="Requerir bidet (si aparece en descripción)", value=REQUIRE_BIDET)
         req_pet = gr.Checkbox(label="Requerir pet-friendly (si aparece en descripción)", value=REQUIRE_PET_FRIENDLY)
+        auto_relax = gr.Checkbox(label="Auto-relajar si no hay resultados", value=AUTO_RELAX_ENABLED)
     gr.Markdown("### Envío por email al finalizar (opcional)")
     with gr.Row():
     btn = gr.Button("Buscar ahora", variant="primary")
     with gr.Tabs():
         with gr.Tab("Resultados"):
+            table = gr.Dataframe(interactive=False)  # sin args raros
         with gr.Tab("JSON"):
             j = gr.Code(language="json")
+        with gr.Tab("Estado"):
+            trace = gr.Markdown("—")
         with gr.Tab("Estado de email"):
             status = gr.Markdown("—")
     btn.click(
         run_and_present,
+        inputs=[neighs, max_usd, types, min_rooms, req_outdoor, req_bidet, req_pet, auto_relax, email_to, send_email_flag],
+        outputs=[table, j, trace, status]
     )
 if __name__ == "__main__":