estamidaosderentav3

Running

App Files Files Community

jcalbornoz commited on 14 days ago

Commit

08f1bba

verified ·

1 Parent(s): 2663862

Update app.py

Browse files

Files changed (1) hide show

app.py +9 -18

app.py CHANGED Viewed

@@ -20,7 +20,7 @@ except ImportError:
     subprocess.run(["pip", "install", "fake-useragent"], check=True)
     from fake_useragent import UserAgent
-# --- 1. GENERADOR DE URLS (CORREGIDO CON RUTA COMPLETA) ---
 def construir_urls_final(zona, ciudad, tipo, hab, ban, park, antiguedad):
     mapa_ant = {
         "Menos de 1 año": "de-0-a-1-anos",
@@ -36,17 +36,16 @@ def construir_urls_final(zona, ciudad, tipo, hab, ban, park, antiguedad):
     z_slug = zona.lower().strip().replace(" ", "-")
     c_slug = ciudad.lower().strip().replace(" ", "-")
-    # URL FINCA RAÍZ: /arriendo/barrio/ciudad/filtros
     url_fr = f"https://www.fincaraiz.com.co/arriendo/{z_slug}/{c_slug}/{int(hab)}-o-mas-habitaciones/{int(ban)}-o-mas-banos/{slug_park}/{slug_ant}"
-    # URL METROCUADRADO
-    url_mc = f"https://www.metrocuadrado.com/{tipo.lower()}-casa-oficina/arriendo/{c_slug}/{int(ban)}-banos-{int(hab)}-habitaciones/?search=form"
     return url_fr, url_mc
 # --- 2. EXTRACTOR DE PRECIO (REGEX) ---
 def extraer_precio_regex(texto):
-    # Detecta $ 1.500.000 o $1500000
     patron = r'\$\s?(\d{1,3}(?:[.,]\d{3})*)'
     coincidencias = re.findall(patron, texto)
     if coincidencias:
@@ -63,7 +62,6 @@ def motor_tramitia_final(zona, ciudad, area, tipo, hab, ban, park, antiguedad):
     ua = UserAgent()
     with sync_playwright() as p:
-        # Lanzamos navegador con perfil de evasión
         browser = p.chromium.launch(
             headless=True,
             args=[
@@ -71,7 +69,7 @@ def motor_tramitia_final(zona, ciudad, area, tipo, hab, ban, park, antiguedad):
                 '--no-sandbox',
                 '--disable-infobars',
                 '--window-position=0,0',
-                f'--user-agent={ua.random}' # Identidad aleatoria
             ]
         )
         context = browser.new_context(viewport={'width': 1366, 'height': 768})
@@ -82,24 +80,21 @@ def motor_tramitia_final(zona, ciudad, area, tipo, hab, ban, park, antiguedad):
             log_visible += "🔄 Escaneando Finca Raíz...\n"
             page.goto(url_fr, wait_until="domcontentloaded", timeout=60000)
-            # Movimiento humano
             page.mouse.move(random.randint(100, 500), random.randint(100, 500))
             for _ in range(4):
                 page.mouse.wheel(0, 1000)
-                time.sleep(1.5) # Espera para cargar elementos dinámicos
-            # Buscamos enlaces (Links <a>)
             elementos = page.query_selector_all("a")
             cont_fr = 0
-            # Extraemos hasta 15 para tener margen de descarte
             for el in elementos:
                 if cont_fr >= 15: break
                 txt = el.inner_text()
                 if "$" in txt:
                     precio = extraer_precio_regex(txt)
-                    if precio > 600000: # Filtro anti-ruido (precios muy bajos)
                         href = el.get_attribute("href")
                         full_url = f"https://www.fincaraiz.com.co{href}" if href and href.startswith("/") else href
@@ -122,7 +117,7 @@ def motor_tramitia_final(zona, ciudad, area, tipo, hab, ban, park, antiguedad):
             log_visible += "🔄 Escaneando Metrocuadrado...\n"
             page.goto(url_mc, wait_until="domcontentloaded", timeout=60000)
-            for _ in range(5): # Un poco más de scroll en MC
                 page.mouse.wheel(0, 1000)
                 time.sleep(1.5)
@@ -159,17 +154,13 @@ def motor_tramitia_final(zona, ciudad, area, tipo, hab, ban, park, antiguedad):
     if not resultados:
         return f"{log_visible}\n❌ NO SE ENCONTRARON DATOS VÁLIDOS.", None, None, "---"
-    # --- LIMPIEZA Y SELECCIÓN DE LOS 6 MEJORES ---
     df_crudo = pd.DataFrame(resultados)
-    # 1. Eliminar duplicados exactos de URL
     df_limpio = df_crudo.drop_duplicates(subset=['URL'])
-    # 2. Seleccionar top 6 por portal
     df_fr = df_limpio[df_limpio['Portal'] == 'Finca Raiz'].head(6)
     df_mc = df_limpio[df_limpio['Portal'] == 'Metrocuadrado'].head(6)
-    # 3. Combinar
     df_final = pd.concat([df_fr, df_mc]).reset_index(drop=True)
     log_visible += f"\n✨ PROCESADO FINAL: {len(df_final)} inmuebles únicos seleccionados para el reporte."

     subprocess.run(["pip", "install", "fake-useragent"], check=True)
     from fake_useragent import UserAgent
+# --- 1. GENERADOR DE URLS (CORREGIDO MC Y FR) ---
 def construir_urls_final(zona, ciudad, tipo, hab, ban, park, antiguedad):
     mapa_ant = {
         "Menos de 1 año": "de-0-a-1-anos",
     z_slug = zona.lower().strip().replace(" ", "-")
     c_slug = ciudad.lower().strip().replace(" ", "-")
+    # URL FINCA RAÍZ
     url_fr = f"https://www.fincaraiz.com.co/arriendo/{z_slug}/{c_slug}/{int(hab)}-o-mas-habitaciones/{int(ban)}-o-mas-banos/{slug_park}/{slug_ant}"
+    # URL METROCUADRADO CORREGIDA (Se añade {z_slug})
+    url_mc = f"https://www.metrocuadrado.com/{tipo.lower()}-casa-oficina/arriendo/{c_slug}/{z_slug}/{int(ban)}-banos-{int(hab)}-habitaciones/?search=form"
     return url_fr, url_mc
 # --- 2. EXTRACTOR DE PRECIO (REGEX) ---
 def extraer_precio_regex(texto):
     patron = r'\$\s?(\d{1,3}(?:[.,]\d{3})*)'
     coincidencias = re.findall(patron, texto)
     if coincidencias:
     ua = UserAgent()
     with sync_playwright() as p:
         browser = p.chromium.launch(
             headless=True,
             args=[
                 '--no-sandbox',
                 '--disable-infobars',
                 '--window-position=0,0',
+                f'--user-agent={ua.random}'
             ]
         )
         context = browser.new_context(viewport={'width': 1366, 'height': 768})
             log_visible += "🔄 Escaneando Finca Raíz...\n"
             page.goto(url_fr, wait_until="domcontentloaded", timeout=60000)
             page.mouse.move(random.randint(100, 500), random.randint(100, 500))
             for _ in range(4):
                 page.mouse.wheel(0, 1000)
+                time.sleep(1.5)
             elementos = page.query_selector_all("a")
             cont_fr = 0
             for el in elementos:
                 if cont_fr >= 15: break
                 txt = el.inner_text()
                 if "$" in txt:
                     precio = extraer_precio_regex(txt)
+                    if precio > 600000:
                         href = el.get_attribute("href")
                         full_url = f"https://www.fincaraiz.com.co{href}" if href and href.startswith("/") else href
             log_visible += "🔄 Escaneando Metrocuadrado...\n"
             page.goto(url_mc, wait_until="domcontentloaded", timeout=60000)
+            for _ in range(5):
                 page.mouse.wheel(0, 1000)
                 time.sleep(1.5)
     if not resultados:
         return f"{log_visible}\n❌ NO SE ENCONTRARON DATOS VÁLIDOS.", None, None, "---"
+    # --- LIMPIEZA Y SELECCIÓN ---
     df_crudo = pd.DataFrame(resultados)
     df_limpio = df_crudo.drop_duplicates(subset=['URL'])
     df_fr = df_limpio[df_limpio['Portal'] == 'Finca Raiz'].head(6)
     df_mc = df_limpio[df_limpio['Portal'] == 'Metrocuadrado'].head(6)
     df_final = pd.concat([df_fr, df_mc]).reset_index(drop=True)
     log_visible += f"\n✨ PROCESADO FINAL: {len(df_final)} inmuebles únicos seleccionados para el reporte."