Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -20,7 +20,7 @@ except ImportError:
|
|
| 20 |
subprocess.run(["pip", "install", "fake-useragent"], check=True)
|
| 21 |
from fake_useragent import UserAgent
|
| 22 |
|
| 23 |
-
# --- 1. GENERADOR DE URLS
|
| 24 |
def construir_urls_final(zona, ciudad, tipo, hab, ban, park, antiguedad):
|
| 25 |
mapa_ant = {
|
| 26 |
"Menos de 1 a帽o": "de-0-a-1-anos",
|
|
@@ -32,25 +32,28 @@ def construir_urls_final(zona, ciudad, tipo, hab, ban, park, antiguedad):
|
|
| 32 |
slug_ant = mapa_ant.get(antiguedad, "de-1-a-8-anios")
|
| 33 |
slug_park = f"{int(park)}-parqueadero" if int(park) == 1 else f"{int(park)}-parqueaderos"
|
| 34 |
|
| 35 |
-
# Limpieza estricta de slugs
|
| 36 |
z_slug = zona.lower().strip().replace(" ", "-")
|
| 37 |
c_slug = ciudad.lower().strip().replace(" ", "-")
|
| 38 |
|
| 39 |
-
# URL FINCA RA脥Z
|
| 40 |
url_fr = f"https://www.fincaraiz.com.co/arriendo/{z_slug}/{c_slug}/{int(hab)}-o-mas-habitaciones/{int(ban)}-o-mas-banos/{slug_park}/{slug_ant}"
|
| 41 |
-
|
| 42 |
-
# URL METROCUADRADO CORREGIDA (Se a帽ade {z_slug})
|
| 43 |
url_mc = f"https://www.metrocuadrado.com/{tipo.lower()}-casa-oficina/arriendo/{c_slug}/{z_slug}/{int(ban)}-banos-{int(hab)}-habitaciones/?search=form"
|
| 44 |
|
| 45 |
return url_fr, url_mc
|
| 46 |
|
| 47 |
-
# --- 2. EXTRACTOR DE PRECIO (REGEX) ---
|
| 48 |
def extraer_precio_regex(texto):
|
| 49 |
patron = r'\$\s?(\d{1,3}(?:[.,]\d{3})*)'
|
| 50 |
coincidencias = re.findall(patron, texto)
|
| 51 |
if coincidencias:
|
| 52 |
precios = [int(p.replace('.', '').replace(',', '')) for p in coincidencias]
|
| 53 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
return 0
|
| 55 |
|
| 56 |
# --- 3. MOTOR DE EXTRACCI脫N ROBUSTO ---
|
|
@@ -94,7 +97,7 @@ def motor_tramitia_final(zona, ciudad, area, tipo, hab, ban, park, antiguedad):
|
|
| 94 |
|
| 95 |
if "$" in txt:
|
| 96 |
precio = extraer_precio_regex(txt)
|
| 97 |
-
if precio >
|
| 98 |
href = el.get_attribute("href")
|
| 99 |
full_url = f"https://www.fincaraiz.com.co{href}" if href and href.startswith("/") else href
|
| 100 |
|
|
@@ -130,7 +133,7 @@ def motor_tramitia_final(zona, ciudad, area, tipo, hab, ban, park, antiguedad):
|
|
| 130 |
|
| 131 |
if "$" in txt:
|
| 132 |
precio = extraer_precio_regex(txt)
|
| 133 |
-
if precio >
|
| 134 |
enlace = card.query_selector("a")
|
| 135 |
if enlace:
|
| 136 |
href = enlace.get_attribute("href")
|
|
@@ -207,9 +210,9 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
| 207 |
|
| 208 |
with gr.Row():
|
| 209 |
with gr.Column(scale=1):
|
| 210 |
-
c = gr.Textbox(label="Ciudad", value="
|
| 211 |
-
z = gr.Textbox(label="Zona (Ej:
|
| 212 |
-
a = gr.Number(label="脕rea M2", value=
|
| 213 |
t = gr.Dropdown(["Apartamento", "Casa"], label="Tipo", value="Apartamento")
|
| 214 |
|
| 215 |
with gr.Row():
|
|
|
|
| 20 |
subprocess.run(["pip", "install", "fake-useragent"], check=True)
|
| 21 |
from fake_useragent import UserAgent
|
| 22 |
|
| 23 |
+
# --- 1. GENERADOR DE URLS ---
|
| 24 |
def construir_urls_final(zona, ciudad, tipo, hab, ban, park, antiguedad):
|
| 25 |
mapa_ant = {
|
| 26 |
"Menos de 1 a帽o": "de-0-a-1-anos",
|
|
|
|
| 32 |
slug_ant = mapa_ant.get(antiguedad, "de-1-a-8-anios")
|
| 33 |
slug_park = f"{int(park)}-parqueadero" if int(park) == 1 else f"{int(park)}-parqueaderos"
|
| 34 |
|
|
|
|
| 35 |
z_slug = zona.lower().strip().replace(" ", "-")
|
| 36 |
c_slug = ciudad.lower().strip().replace(" ", "-")
|
| 37 |
|
|
|
|
| 38 |
url_fr = f"https://www.fincaraiz.com.co/arriendo/{z_slug}/{c_slug}/{int(hab)}-o-mas-habitaciones/{int(ban)}-o-mas-banos/{slug_park}/{slug_ant}"
|
|
|
|
|
|
|
| 39 |
url_mc = f"https://www.metrocuadrado.com/{tipo.lower()}-casa-oficina/arriendo/{c_slug}/{z_slug}/{int(ban)}-banos-{int(hab)}-habitaciones/?search=form"
|
| 40 |
|
| 41 |
return url_fr, url_mc
|
| 42 |
|
| 43 |
+
# --- 2. EXTRACTOR DE PRECIO INTELIGENTE (REGEX) ---
|
| 44 |
def extraer_precio_regex(texto):
|
| 45 |
patron = r'\$\s?(\d{1,3}(?:[.,]\d{3})*)'
|
| 46 |
coincidencias = re.findall(patron, texto)
|
| 47 |
if coincidencias:
|
| 48 |
precios = [int(p.replace('.', '').replace(',', '')) for p in coincidencias]
|
| 49 |
+
|
| 50 |
+
# FILTRO DE CORDURA: Ignorar precios de venta (> 40 millones)
|
| 51 |
+
# y descartar valores muy bajos (como la administraci贸n sola)
|
| 52 |
+
precios_validos = [p for p in precios if 600000 <= p <= 40000000]
|
| 53 |
+
|
| 54 |
+
if precios_validos:
|
| 55 |
+
# El primer precio v谩lido que aparece en el texto es casi siempre el Canon principal
|
| 56 |
+
return precios_validos[0]
|
| 57 |
return 0
|
| 58 |
|
| 59 |
# --- 3. MOTOR DE EXTRACCI脫N ROBUSTO ---
|
|
|
|
| 97 |
|
| 98 |
if "$" in txt:
|
| 99 |
precio = extraer_precio_regex(txt)
|
| 100 |
+
if precio > 0: # Ya pas贸 por el filtro de cordura
|
| 101 |
href = el.get_attribute("href")
|
| 102 |
full_url = f"https://www.fincaraiz.com.co{href}" if href and href.startswith("/") else href
|
| 103 |
|
|
|
|
| 133 |
|
| 134 |
if "$" in txt:
|
| 135 |
precio = extraer_precio_regex(txt)
|
| 136 |
+
if precio > 0:
|
| 137 |
enlace = card.query_selector("a")
|
| 138 |
if enlace:
|
| 139 |
href = enlace.get_attribute("href")
|
|
|
|
| 210 |
|
| 211 |
with gr.Row():
|
| 212 |
with gr.Column(scale=1):
|
| 213 |
+
c = gr.Textbox(label="Ciudad", value="Barranquilla")
|
| 214 |
+
z = gr.Textbox(label="Zona (Ej: El Prado)", value="El Prado")
|
| 215 |
+
a = gr.Number(label="脕rea M2", value=70)
|
| 216 |
t = gr.Dropdown(["Apartamento", "Casa"], label="Tipo", value="Apartamento")
|
| 217 |
|
| 218 |
with gr.Row():
|