Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -20,7 +20,7 @@ except ImportError:
|
|
| 20 |
subprocess.run(["pip", "install", "fake-useragent"], check=True)
|
| 21 |
from fake_useragent import UserAgent
|
| 22 |
|
| 23 |
-
# --- 1. GENERADOR DE URLS ---
|
| 24 |
def construir_urls_final(zona, ciudad, tipo, hab, ban, park, antiguedad):
|
| 25 |
mapa_ant = {
|
| 26 |
"Menos de 1 a帽o": "de-0-a-1-anos",
|
|
@@ -32,10 +32,15 @@ def construir_urls_final(zona, ciudad, tipo, hab, ban, park, antiguedad):
|
|
| 32 |
slug_ant = mapa_ant.get(antiguedad, "de-1-a-8-anios")
|
| 33 |
slug_park = f"{int(park)}-parqueadero" if int(park) == 1 else f"{int(park)}-parqueaderos"
|
| 34 |
|
|
|
|
| 35 |
z_slug = zona.lower().strip().replace(" ", "-")
|
| 36 |
c_slug = ciudad.lower().strip().replace(" ", "-")
|
|
|
|
| 37 |
|
| 38 |
-
|
|
|
|
|
|
|
|
|
|
| 39 |
url_mc = f"https://www.metrocuadrado.com/{tipo.lower()}-casa-oficina/arriendo/{c_slug}/{z_slug}/{int(ban)}-banos-{int(hab)}-habitaciones/?search=form"
|
| 40 |
|
| 41 |
return url_fr, url_mc
|
|
@@ -47,13 +52,11 @@ def extraer_precio_regex(texto):
|
|
| 47 |
if coincidencias:
|
| 48 |
precios = [int(p.replace('.', '').replace(',', '')) for p in coincidencias]
|
| 49 |
|
| 50 |
-
#
|
| 51 |
-
# y descartar valores muy bajos (como la administraci贸n sola)
|
| 52 |
precios_validos = [p for p in precios if 600000 <= p <= 40000000]
|
| 53 |
|
| 54 |
if precios_validos:
|
| 55 |
-
#
|
| 56 |
-
return precios_validos[0]
|
| 57 |
return 0
|
| 58 |
|
| 59 |
# --- 3. MOTOR DE EXTRACCI脫N ROBUSTO ---
|
|
@@ -97,7 +100,7 @@ def motor_tramitia_final(zona, ciudad, area, tipo, hab, ban, park, antiguedad):
|
|
| 97 |
|
| 98 |
if "$" in txt:
|
| 99 |
precio = extraer_precio_regex(txt)
|
| 100 |
-
if precio > 0:
|
| 101 |
href = el.get_attribute("href")
|
| 102 |
full_url = f"https://www.fincaraiz.com.co{href}" if href and href.startswith("/") else href
|
| 103 |
|
|
|
|
| 20 |
subprocess.run(["pip", "install", "fake-useragent"], check=True)
|
| 21 |
from fake_useragent import UserAgent
|
| 22 |
|
| 23 |
+
# --- 1. GENERADOR DE URLS (RESTAURADO EXACTO) ---
|
| 24 |
def construir_urls_final(zona, ciudad, tipo, hab, ban, park, antiguedad):
|
| 25 |
mapa_ant = {
|
| 26 |
"Menos de 1 a帽o": "de-0-a-1-anos",
|
|
|
|
| 32 |
slug_ant = mapa_ant.get(antiguedad, "de-1-a-8-anios")
|
| 33 |
slug_park = f"{int(park)}-parqueadero" if int(park) == 1 else f"{int(park)}-parqueaderos"
|
| 34 |
|
| 35 |
+
# Limpieza estricta de slugs
|
| 36 |
z_slug = zona.lower().strip().replace(" ", "-")
|
| 37 |
c_slug = ciudad.lower().strip().replace(" ", "-")
|
| 38 |
+
query_geo = f"{z_slug}-{c_slug}"
|
| 39 |
|
| 40 |
+
# URL FINCA RA脥Z RESTAURADA: Usa searchstring al final como indicaste
|
| 41 |
+
url_fr = f"https://www.fincaraiz.com.co/arriendo/{int(hab)}-o-mas-habitaciones/{int(ban)}-o-mas-banos/{slug_park}/{slug_ant}?&searchstring={query_geo}"
|
| 42 |
+
|
| 43 |
+
# URL METROCUADRADO: Mantiene el barrio (z_slug) en el path
|
| 44 |
url_mc = f"https://www.metrocuadrado.com/{tipo.lower()}-casa-oficina/arriendo/{c_slug}/{z_slug}/{int(ban)}-banos-{int(hab)}-habitaciones/?search=form"
|
| 45 |
|
| 46 |
return url_fr, url_mc
|
|
|
|
| 52 |
if coincidencias:
|
| 53 |
precios = [int(p.replace('.', '').replace(',', '')) for p in coincidencias]
|
| 54 |
|
| 55 |
+
# Filtro: Ignorar precios de venta (> 40 millones) y descartar adm贸n
|
|
|
|
| 56 |
precios_validos = [p for p in precios if 600000 <= p <= 40000000]
|
| 57 |
|
| 58 |
if precios_validos:
|
| 59 |
+
return precios_validos[0] # Retorna el primer precio v谩lido (canon principal)
|
|
|
|
| 60 |
return 0
|
| 61 |
|
| 62 |
# --- 3. MOTOR DE EXTRACCI脫N ROBUSTO ---
|
|
|
|
| 100 |
|
| 101 |
if "$" in txt:
|
| 102 |
precio = extraer_precio_regex(txt)
|
| 103 |
+
if precio > 0:
|
| 104 |
href = el.get_attribute("href")
|
| 105 |
full_url = f"https://www.fincaraiz.com.co{href}" if href and href.startswith("/") else href
|
| 106 |
|