Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,6 +1,5 @@
|
|
| 1 |
import sys
|
| 2 |
|
| 3 |
-
# Silenciador de errores fantasma de consola
|
| 4 |
def silenciador_errores_basura(unraisable):
|
| 5 |
if unraisable.exc_type == ValueError and "Invalid file descriptor: -1" in str(unraisable.exc_value): pass
|
| 6 |
else: sys.__unraisablehook__(unraisable)
|
|
@@ -22,31 +21,21 @@ import io
|
|
| 22 |
import traceback
|
| 23 |
|
| 24 |
# --- INSTALACIÓN DE DEPENDENCIAS ---
|
| 25 |
-
try:
|
| 26 |
-
subprocess.run(["playwright", "install", "chromium"], check=True)
|
| 27 |
except: pass
|
| 28 |
-
|
| 29 |
-
try:
|
| 30 |
-
from fake_useragent import UserAgent
|
| 31 |
except ImportError:
|
| 32 |
subprocess.run(["pip", "install", "fake-useragent"], check=True)
|
| 33 |
from fake_useragent import UserAgent
|
| 34 |
|
| 35 |
-
# --- FILTRO ANTI-EMOJIS PARA EL PDF ---
|
| 36 |
def sanear_texto(texto):
|
| 37 |
if not isinstance(texto, str): return ""
|
| 38 |
return texto.encode('latin-1', 'ignore').decode('latin-1').strip()
|
| 39 |
|
| 40 |
-
# --- DESCARGA SEGURA DE FOTOS ---
|
| 41 |
def descargar_imagen(url, idx):
|
| 42 |
-
if not url or len(url) < 5 or url.startswith("data:"):
|
| 43 |
-
return None
|
| 44 |
try:
|
| 45 |
-
headers = {
|
| 46 |
-
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)",
|
| 47 |
-
"Accept": "image/*",
|
| 48 |
-
"Referer": "https://www.fincaraiz.com.co/"
|
| 49 |
-
}
|
| 50 |
r = requests.get(url, timeout=8, headers=headers)
|
| 51 |
if r.status_code == 200:
|
| 52 |
img = Image.open(io.BytesIO(r.content))
|
|
@@ -57,31 +46,22 @@ def descargar_imagen(url, idx):
|
|
| 57 |
except: return None
|
| 58 |
return None
|
| 59 |
|
| 60 |
-
# --- 1. GENERADOR DE URLS ---
|
| 61 |
def construir_urls_final(operacion, barrio, ciudad, tipo, hab, ban, park, antiguedad, m2_min, m2_max, ascensor, piscina):
|
| 62 |
-
mapa_ant = {
|
| 63 |
-
"Menos de 1 año": "de-0-a-1-anos", "1 a 8 años": "de-1-a-8-anos",
|
| 64 |
-
"9 a 15 años": "de-9-a-15-anos", "16 a 30 años": "de-16-a-30-anos", "Más de 30 años": "mas-de-30-anos"
|
| 65 |
-
}
|
| 66 |
slug_ant = mapa_ant.get(antiguedad, "de-1-a-8-anos")
|
| 67 |
slug_park = f"{int(park)}-parqueadero" if int(park) == 1 else f"{int(park)}-parqueaderos"
|
| 68 |
-
|
| 69 |
b_slug = barrio.lower().strip().replace(" ", "-")
|
| 70 |
c_slug = ciudad.lower().strip().replace(" ", "-")
|
| 71 |
op_slug = operacion.lower().strip()
|
| 72 |
-
|
| 73 |
tipo_slug = tipo.lower().strip()
|
| 74 |
tipo_fr = "casas-y-apartamentos-y-apartaestudios" if tipo_slug in ["apartamento", "casa"] else tipo_slug + "s"
|
| 75 |
|
| 76 |
url_fr_base = f"https://www.fincaraiz.com.co/{op_slug}/{tipo_fr}/{b_slug}/{c_slug}/{int(hab)}-o-mas-habitaciones/{int(ban)}-o-mas-banos/{slug_park}/{slug_ant}/m2-desde-{int(m2_min)}/m2-hasta-{int(m2_max)}"
|
| 77 |
if ascensor: url_fr_base += "/con-ascensor"
|
| 78 |
if piscina: url_fr_base += "/con-piscina"
|
| 79 |
-
|
| 80 |
url_mc = f"https://www.metrocuadrado.com/{tipo_slug}-casa-oficina/{op_slug}/{c_slug}/{b_slug}/{int(ban)}-banos-{int(hab)}-habitaciones/?search=form"
|
| 81 |
-
|
| 82 |
return url_fr_base, url_mc
|
| 83 |
|
| 84 |
-
# --- 2. EXTRACTORES INTELIGENTES ---
|
| 85 |
def extraer_precio(texto, operacion):
|
| 86 |
patron = r'\$\s?(\d{1,3}(?:[.,]\d{3})*)'
|
| 87 |
coincidencias = re.findall(patron, texto)
|
|
@@ -98,23 +78,19 @@ def extraer_ubicacion(texto):
|
|
| 98 |
if "," in linea or " en " in linea.lower():
|
| 99 |
limpio = re.sub(r'(?i)(apartamento|casa|bodega|lote|oficina)\s+en\s+(arriendo|venta)\s+(en\s+)?', '', linea)
|
| 100 |
return limpio[:60].strip()
|
| 101 |
-
|
| 102 |
for linea in lineas[1:4]:
|
| 103 |
-
if "$" not in linea and not re.search(r'\d', linea):
|
| 104 |
-
return linea[:60]
|
| 105 |
return "Ubicacion en la zona"
|
| 106 |
|
| 107 |
-
# --- FILTRO FRANCOTIRADOR DE URLS ---
|
| 108 |
def es_inmueble_valido(href, portal):
|
| 109 |
-
if not href or "javascript" in href or "blog" in href or "proyectos" in href:
|
| 110 |
-
return False
|
| 111 |
if portal == "FR":
|
| 112 |
if re.search(r'/\d{7,10}$', href) or "arriendo-en" in href or "venta-en" in href: return True
|
| 113 |
elif portal == "MC":
|
| 114 |
if "/inmueble/" in href or "-id-" in href: return True
|
| 115 |
return False
|
| 116 |
|
| 117 |
-
# ---
|
| 118 |
def motor_tramitia_visual(operacion, barrio, ciudad, area, m2_min, m2_max, tipo, hab, ban, park, antiguedad, ascensor, piscina):
|
| 119 |
resultados = []
|
| 120 |
log_visible = ""
|
|
@@ -129,17 +105,14 @@ def motor_tramitia_visual(operacion, barrio, ciudad, area, m2_min, m2_max, tipo,
|
|
| 129 |
browser = p.chromium.launch(headless=True, args=['--disable-blink-features=AutomationControlled', '--no-sandbox'])
|
| 130 |
context = browser.new_context(viewport={'width': 1366, 'height': 768}, user_agent=ua.random)
|
| 131 |
|
| 132 |
-
#
|
| 133 |
try:
|
| 134 |
page = context.new_page()
|
| 135 |
-
log_visible += "🔄 FR: Buscando inmuebles reales...\n"
|
| 136 |
page.goto(url_fr, wait_until="domcontentloaded", timeout=60000)
|
| 137 |
try: page.wait_for_load_state("networkidle", timeout=10000)
|
| 138 |
except: pass
|
| 139 |
-
|
| 140 |
for _ in range(4):
|
| 141 |
-
page.mouse.wheel(0, 1000)
|
| 142 |
-
page.wait_for_timeout(2000)
|
| 143 |
|
| 144 |
elementos = page.query_selector_all("a")
|
| 145 |
cont_fr = 0
|
|
@@ -148,16 +121,13 @@ def motor_tramitia_visual(operacion, barrio, ciudad, area, m2_min, m2_max, tipo,
|
|
| 148 |
try:
|
| 149 |
href = el.get_attribute("href")
|
| 150 |
if not es_inmueble_valido(href, "FR"): continue
|
| 151 |
-
|
| 152 |
full_url = f"https://www.fincaraiz.com.co{href}" if href.startswith("/") else href
|
| 153 |
if full_url in urls_vistas: continue
|
| 154 |
-
|
| 155 |
card = el.evaluate_handle("el => el.closest('article') || el.closest('[class*=\"card\"]') || el.parentElement.parentElement")
|
| 156 |
if not card: continue
|
| 157 |
|
| 158 |
txt = card.inner_text()
|
| 159 |
precio = extraer_precio(txt, operacion)
|
| 160 |
-
|
| 161 |
if precio > 0:
|
| 162 |
img_url = ""
|
| 163 |
img_el = card.query_selector("img")
|
|
@@ -165,33 +135,21 @@ def motor_tramitia_visual(operacion, barrio, ciudad, area, m2_min, m2_max, tipo,
|
|
| 165 |
img_url = img_el.get_attribute("src") or img_el.get_attribute("data-src") or ""
|
| 166 |
if img_url.startswith("/"): img_url = "https://www.fincaraiz.com.co" + img_url
|
| 167 |
|
| 168 |
-
resultados.append({
|
| 169 |
-
|
| 170 |
-
"Precio": precio,
|
| 171 |
-
"Precio_M2": precio / area,
|
| 172 |
-
"Ubicacion": extraer_ubicacion(txt),
|
| 173 |
-
"Descripcion": txt.replace('\n', ' | ')[:120] + "...",
|
| 174 |
-
"URL": full_url,
|
| 175 |
-
"Imagen": img_url
|
| 176 |
-
})
|
| 177 |
-
urls_vistas.add(full_url)
|
| 178 |
-
cont_fr += 1
|
| 179 |
except: continue
|
| 180 |
page.close()
|
| 181 |
-
log_visible += f"✅ FR:
|
| 182 |
-
except Exception as e: log_visible += f"⚠️ Error
|
| 183 |
|
| 184 |
-
#
|
| 185 |
try:
|
| 186 |
page = context.new_page()
|
| 187 |
-
log_visible += "🔄 MC: Buscando inmuebles reales...\n"
|
| 188 |
page.goto(url_mc, wait_until="domcontentloaded", timeout=60000)
|
| 189 |
try: page.wait_for_load_state("networkidle", timeout=10000)
|
| 190 |
except: pass
|
| 191 |
-
|
| 192 |
for _ in range(4):
|
| 193 |
-
page.mouse.wheel(0, 1000)
|
| 194 |
-
page.wait_for_timeout(2000)
|
| 195 |
|
| 196 |
elementos = page.query_selector_all("a")
|
| 197 |
cont_mc = 0
|
|
@@ -200,16 +158,13 @@ def motor_tramitia_visual(operacion, barrio, ciudad, area, m2_min, m2_max, tipo,
|
|
| 200 |
try:
|
| 201 |
href = el.get_attribute("href")
|
| 202 |
if not es_inmueble_valido(href, "MC"): continue
|
| 203 |
-
|
| 204 |
full_url = f"https://www.metrocuadrado.com{href}" if href.startswith("/") else href
|
| 205 |
if full_url in urls_vistas: continue
|
| 206 |
-
|
| 207 |
card = el.evaluate_handle("el => el.closest('li') || el.closest('[class*=\"card\"]') || el.closest('[class*=\"property\"]') || el.parentElement.parentElement.parentElement")
|
| 208 |
if not card: continue
|
| 209 |
|
| 210 |
txt = card.inner_text()
|
| 211 |
precio = extraer_precio(txt, operacion)
|
| 212 |
-
|
| 213 |
if precio > 0:
|
| 214 |
img_url = ""
|
| 215 |
img_el = card.query_selector("img")
|
|
@@ -217,55 +172,108 @@ def motor_tramitia_visual(operacion, barrio, ciudad, area, m2_min, m2_max, tipo,
|
|
| 217 |
img_url = img_el.get_attribute("src") or img_el.get_attribute("data-src") or ""
|
| 218 |
if img_url.startswith("/"): img_url = "https://www.metrocuadrado.com" + img_url
|
| 219 |
|
| 220 |
-
resultados.append({
|
| 221 |
-
|
| 222 |
-
"Precio": precio,
|
| 223 |
-
"Precio_M2": precio / area,
|
| 224 |
-
"Ubicacion": extraer_ubicacion(txt),
|
| 225 |
-
"Descripcion": txt.replace('\n', ' | ')[:120] + "...",
|
| 226 |
-
"URL": full_url,
|
| 227 |
-
"Imagen": img_url
|
| 228 |
-
})
|
| 229 |
-
urls_vistas.add(full_url)
|
| 230 |
-
cont_mc += 1
|
| 231 |
except: continue
|
| 232 |
page.close()
|
| 233 |
-
log_visible += f"✅ MC:
|
| 234 |
-
except Exception as e: log_visible += f"⚠️ Error
|
| 235 |
|
| 236 |
browser.close()
|
| 237 |
|
| 238 |
if not resultados:
|
| 239 |
-
return f"{log_visible}\n❌ NO
|
| 240 |
|
| 241 |
-
# --- SELECCIÓN FINAL ---
|
| 242 |
df_final_completo = pd.DataFrame(resultados)
|
| 243 |
-
|
| 244 |
df_fr = df_final_completo[df_final_completo['Portal'] == 'Finca Raiz'].head(6)
|
| 245 |
df_mc = df_final_completo[df_final_completo['Portal'] == 'Metrocuadrado'].head(6)
|
| 246 |
df_final = pd.concat([df_fr, df_mc]).reset_index(drop=True)
|
| 247 |
|
| 248 |
-
if df_final.empty:
|
| 249 |
-
return f"{log_visible}\n❌ DATOS VACÍOS.", pd.DataFrame(), None, "---"
|
| 250 |
|
| 251 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 252 |
|
| 253 |
-
# --- PDF
|
| 254 |
-
pdf_path = f"
|
| 255 |
pdf = FPDF()
|
|
|
|
|
|
|
| 256 |
pdf.add_page()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 257 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 258 |
pdf.set_font("Arial", 'B', 16)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 259 |
pdf.set_fill_color(40, 53, 147)
|
| 260 |
pdf.set_text_color(255, 255, 255)
|
| 261 |
-
|
| 262 |
-
pdf.cell(0, 15, encabezado, ln=True, fill=True)
|
| 263 |
pdf.set_text_color(0, 0, 0)
|
| 264 |
pdf.ln(5)
|
| 265 |
|
| 266 |
for idx, r in df_final.iterrows():
|
| 267 |
-
if pdf.get_y() >
|
| 268 |
-
|
| 269 |
y_start = pdf.get_y()
|
| 270 |
img_path = descargar_imagen(r['Imagen'], idx)
|
| 271 |
text_x = 10
|
|
@@ -277,36 +285,30 @@ def motor_tramitia_visual(operacion, barrio, ciudad, area, m2_min, m2_max, tipo,
|
|
| 277 |
except: pass
|
| 278 |
|
| 279 |
pdf.set_xy(text_x, y_start)
|
| 280 |
-
pdf.set_font("Arial", 'B',
|
| 281 |
pdf.cell(0, 6, f"${r['Precio']:,.0f} COP", ln=True)
|
| 282 |
|
| 283 |
pdf.set_x(text_x)
|
| 284 |
-
pdf.set_font("Arial", 'B',
|
| 285 |
pdf.set_text_color(100, 100, 100)
|
| 286 |
-
|
| 287 |
-
ubicacion_limpia = sanear_texto(r['Ubicacion'])
|
| 288 |
-
portal_limpio = sanear_texto(r['Portal'])
|
| 289 |
-
desc_limpia = sanear_texto(r['Descripcion'])
|
| 290 |
-
|
| 291 |
-
pdf.cell(0, 5, f"Ubicacion: {ubicacion_limpia} | Fuente: {portal_limpio}", ln=True)
|
| 292 |
|
| 293 |
pdf.set_x(text_x)
|
| 294 |
pdf.set_font("Arial", '', 8)
|
| 295 |
pdf.set_text_color(0, 0, 0)
|
| 296 |
-
pdf.multi_cell(0, 4,
|
| 297 |
|
| 298 |
pdf.set_x(text_x)
|
| 299 |
pdf.set_font("Arial", 'U', 8)
|
| 300 |
pdf.set_text_color(0, 102, 204)
|
| 301 |
-
pdf.cell(0,
|
| 302 |
pdf.set_text_color(0, 0, 0)
|
| 303 |
|
| 304 |
y_end = pdf.get_y()
|
| 305 |
pdf.set_y(max(y_start + 35, y_end + 5))
|
| 306 |
-
|
| 307 |
pdf.set_draw_color(200, 200, 200)
|
| 308 |
pdf.line(10, pdf.get_y(), 200, pdf.get_y())
|
| 309 |
-
pdf.ln(
|
| 310 |
|
| 311 |
if img_path and os.path.exists(img_path):
|
| 312 |
try: os.remove(img_path)
|
|
@@ -314,65 +316,60 @@ def motor_tramitia_visual(operacion, barrio, ciudad, area, m2_min, m2_max, tipo,
|
|
| 314 |
|
| 315 |
pdf.output(pdf_path)
|
| 316 |
|
| 317 |
-
# --- CÁLCULOS
|
| 318 |
-
promedio = df_final['Precio_M2'].mean() * area
|
| 319 |
resumen = (
|
| 320 |
-
f"
|
| 321 |
-
f"🔹 **
|
| 322 |
-
f"
|
| 323 |
-
f"
|
| 324 |
-
f"📈 Máximo en Zona: ${df_final['Precio'].max():,.0f}"
|
| 325 |
)
|
| 326 |
|
| 327 |
-
# Creamos una copia para formatear solo lo que se ve en pantalla
|
| 328 |
df_mostrar = df_final[['Portal', 'Precio', 'Precio_M2', 'Ubicacion', 'Descripcion', 'URL']].copy()
|
| 329 |
-
|
| 330 |
-
# Formateamos como moneda ($X,XXX) para que se vea impecable
|
| 331 |
df_mostrar['Precio'] = df_mostrar['Precio'].apply(lambda x: f"${x:,.0f}")
|
| 332 |
df_mostrar['Precio_M2'] = df_mostrar['Precio_M2'].apply(lambda x: f"${x:,.0f}")
|
| 333 |
|
| 334 |
-
return f"{log_visible}\n✅
|
| 335 |
|
| 336 |
except Exception as error_fatal:
|
| 337 |
traza = traceback.format_exc()
|
| 338 |
-
return f"❌ ERROR GRAVE DEL SISTEMA:\n{str(error_fatal)}
|
| 339 |
|
| 340 |
# --- INTERFAZ GRÁFICA ---
|
| 341 |
with gr.Blocks() as demo:
|
| 342 |
-
gr.Markdown("##
|
| 343 |
|
| 344 |
with gr.Row():
|
| 345 |
with gr.Column(scale=1):
|
| 346 |
-
op = gr.Radio(["Arriendo", "Venta"], label="
|
| 347 |
c = gr.Textbox(label="Ciudad", value="Barranquilla")
|
| 348 |
b = gr.Textbox(label="Barrio (Ej: La Concepcion)", value="La Concepcion")
|
| 349 |
|
| 350 |
with gr.Row():
|
| 351 |
-
t = gr.Dropdown(["Apartamento", "Casa", "Bodega", "Lote", "Oficina"], label="Tipo", value="Apartamento")
|
| 352 |
-
a = gr.Number(label="Área M2", value=70)
|
| 353 |
|
| 354 |
with gr.Row():
|
| 355 |
-
m2_min = gr.Number(label="M2 Mínimo", value=10)
|
| 356 |
-
m2_max = gr.Number(label="M2 Máximo", value=200)
|
| 357 |
|
| 358 |
with gr.Row():
|
| 359 |
ascensor = gr.Checkbox(label="Con Ascensor")
|
| 360 |
piscina = gr.Checkbox(label="Con Piscina")
|
| 361 |
|
| 362 |
with gr.Row():
|
| 363 |
-
h = gr.Number(label="
|
| 364 |
ban = gr.Number(label="Baños", value=2)
|
| 365 |
-
p = gr.Number(label="
|
| 366 |
|
| 367 |
e = gr.Dropdown(["Menos de 1 año", "1 a 8 años", "9 a 15 años", "16 a 30 años", "Más de 30 años"], label="Antigüedad", value="1 a 8 años")
|
| 368 |
-
btn = gr.Button("GENERAR
|
| 369 |
|
| 370 |
with gr.Column(scale=2):
|
| 371 |
-
res_fin = gr.Markdown("### 💰
|
| 372 |
with gr.Tabs():
|
| 373 |
-
with gr.TabItem("Descargar PDF"): out_pdf = gr.File()
|
| 374 |
-
with gr.TabItem("
|
| 375 |
-
with gr.TabItem("
|
| 376 |
|
| 377 |
btn.click(motor_tramitia_visual, [op, b, c, a, m2_min, m2_max, t, h, ban, p, e, ascensor, piscina], [msg, out_df, out_pdf, res_fin])
|
| 378 |
|
|
|
|
| 1 |
import sys
|
| 2 |
|
|
|
|
| 3 |
def silenciador_errores_basura(unraisable):
|
| 4 |
if unraisable.exc_type == ValueError and "Invalid file descriptor: -1" in str(unraisable.exc_value): pass
|
| 5 |
else: sys.__unraisablehook__(unraisable)
|
|
|
|
| 21 |
import traceback
|
| 22 |
|
| 23 |
# --- INSTALACIÓN DE DEPENDENCIAS ---
|
| 24 |
+
try: subprocess.run(["playwright", "install", "chromium"], check=True)
|
|
|
|
| 25 |
except: pass
|
| 26 |
+
try: from fake_useragent import UserAgent
|
|
|
|
|
|
|
| 27 |
except ImportError:
|
| 28 |
subprocess.run(["pip", "install", "fake-useragent"], check=True)
|
| 29 |
from fake_useragent import UserAgent
|
| 30 |
|
|
|
|
| 31 |
def sanear_texto(texto):
|
| 32 |
if not isinstance(texto, str): return ""
|
| 33 |
return texto.encode('latin-1', 'ignore').decode('latin-1').strip()
|
| 34 |
|
|
|
|
| 35 |
def descargar_imagen(url, idx):
|
| 36 |
+
if not url or len(url) < 5 or url.startswith("data:"): return None
|
|
|
|
| 37 |
try:
|
| 38 |
+
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)", "Accept": "image/*", "Referer": "https://www.fincaraiz.com.co/"}
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
r = requests.get(url, timeout=8, headers=headers)
|
| 40 |
if r.status_code == 200:
|
| 41 |
img = Image.open(io.BytesIO(r.content))
|
|
|
|
| 46 |
except: return None
|
| 47 |
return None
|
| 48 |
|
|
|
|
| 49 |
def construir_urls_final(operacion, barrio, ciudad, tipo, hab, ban, park, antiguedad, m2_min, m2_max, ascensor, piscina):
|
| 50 |
+
mapa_ant = {"Menos de 1 año": "de-0-a-1-anos", "1 a 8 años": "de-1-a-8-anos", "9 a 15 años": "de-9-a-15-anos", "16 a 30 años": "de-16-a-30-anos", "Más de 30 años": "mas-de-30-anos"}
|
|
|
|
|
|
|
|
|
|
| 51 |
slug_ant = mapa_ant.get(antiguedad, "de-1-a-8-anos")
|
| 52 |
slug_park = f"{int(park)}-parqueadero" if int(park) == 1 else f"{int(park)}-parqueaderos"
|
|
|
|
| 53 |
b_slug = barrio.lower().strip().replace(" ", "-")
|
| 54 |
c_slug = ciudad.lower().strip().replace(" ", "-")
|
| 55 |
op_slug = operacion.lower().strip()
|
|
|
|
| 56 |
tipo_slug = tipo.lower().strip()
|
| 57 |
tipo_fr = "casas-y-apartamentos-y-apartaestudios" if tipo_slug in ["apartamento", "casa"] else tipo_slug + "s"
|
| 58 |
|
| 59 |
url_fr_base = f"https://www.fincaraiz.com.co/{op_slug}/{tipo_fr}/{b_slug}/{c_slug}/{int(hab)}-o-mas-habitaciones/{int(ban)}-o-mas-banos/{slug_park}/{slug_ant}/m2-desde-{int(m2_min)}/m2-hasta-{int(m2_max)}"
|
| 60 |
if ascensor: url_fr_base += "/con-ascensor"
|
| 61 |
if piscina: url_fr_base += "/con-piscina"
|
|
|
|
| 62 |
url_mc = f"https://www.metrocuadrado.com/{tipo_slug}-casa-oficina/{op_slug}/{c_slug}/{b_slug}/{int(ban)}-banos-{int(hab)}-habitaciones/?search=form"
|
|
|
|
| 63 |
return url_fr_base, url_mc
|
| 64 |
|
|
|
|
| 65 |
def extraer_precio(texto, operacion):
|
| 66 |
patron = r'\$\s?(\d{1,3}(?:[.,]\d{3})*)'
|
| 67 |
coincidencias = re.findall(patron, texto)
|
|
|
|
| 78 |
if "," in linea or " en " in linea.lower():
|
| 79 |
limpio = re.sub(r'(?i)(apartamento|casa|bodega|lote|oficina)\s+en\s+(arriendo|venta)\s+(en\s+)?', '', linea)
|
| 80 |
return limpio[:60].strip()
|
|
|
|
| 81 |
for linea in lineas[1:4]:
|
| 82 |
+
if "$" not in linea and not re.search(r'\d', linea): return linea[:60]
|
|
|
|
| 83 |
return "Ubicacion en la zona"
|
| 84 |
|
|
|
|
| 85 |
def es_inmueble_valido(href, portal):
|
| 86 |
+
if not href or "javascript" in href or "blog" in href or "proyectos" in href: return False
|
|
|
|
| 87 |
if portal == "FR":
|
| 88 |
if re.search(r'/\d{7,10}$', href) or "arriendo-en" in href or "venta-en" in href: return True
|
| 89 |
elif portal == "MC":
|
| 90 |
if "/inmueble/" in href or "-id-" in href: return True
|
| 91 |
return False
|
| 92 |
|
| 93 |
+
# --- MOTOR PRINCIPAL ---
|
| 94 |
def motor_tramitia_visual(operacion, barrio, ciudad, area, m2_min, m2_max, tipo, hab, ban, park, antiguedad, ascensor, piscina):
|
| 95 |
resultados = []
|
| 96 |
log_visible = ""
|
|
|
|
| 105 |
browser = p.chromium.launch(headless=True, args=['--disable-blink-features=AutomationControlled', '--no-sandbox'])
|
| 106 |
context = browser.new_context(viewport={'width': 1366, 'height': 768}, user_agent=ua.random)
|
| 107 |
|
| 108 |
+
# FINCA RAÍZ
|
| 109 |
try:
|
| 110 |
page = context.new_page()
|
|
|
|
| 111 |
page.goto(url_fr, wait_until="domcontentloaded", timeout=60000)
|
| 112 |
try: page.wait_for_load_state("networkidle", timeout=10000)
|
| 113 |
except: pass
|
|
|
|
| 114 |
for _ in range(4):
|
| 115 |
+
page.mouse.wheel(0, 1000); page.wait_for_timeout(2000)
|
|
|
|
| 116 |
|
| 117 |
elementos = page.query_selector_all("a")
|
| 118 |
cont_fr = 0
|
|
|
|
| 121 |
try:
|
| 122 |
href = el.get_attribute("href")
|
| 123 |
if not es_inmueble_valido(href, "FR"): continue
|
|
|
|
| 124 |
full_url = f"https://www.fincaraiz.com.co{href}" if href.startswith("/") else href
|
| 125 |
if full_url in urls_vistas: continue
|
|
|
|
| 126 |
card = el.evaluate_handle("el => el.closest('article') || el.closest('[class*=\"card\"]') || el.parentElement.parentElement")
|
| 127 |
if not card: continue
|
| 128 |
|
| 129 |
txt = card.inner_text()
|
| 130 |
precio = extraer_precio(txt, operacion)
|
|
|
|
| 131 |
if precio > 0:
|
| 132 |
img_url = ""
|
| 133 |
img_el = card.query_selector("img")
|
|
|
|
| 135 |
img_url = img_el.get_attribute("src") or img_el.get_attribute("data-src") or ""
|
| 136 |
if img_url.startswith("/"): img_url = "https://www.fincaraiz.com.co" + img_url
|
| 137 |
|
| 138 |
+
resultados.append({"Portal": "Finca Raiz", "Precio": precio, "Precio_M2": precio / area, "Ubicacion": extraer_ubicacion(txt), "Descripcion": txt.replace('\n', ' | ')[:120] + "...", "URL": full_url, "Imagen": img_url})
|
| 139 |
+
urls_vistas.add(full_url); cont_fr += 1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 140 |
except: continue
|
| 141 |
page.close()
|
| 142 |
+
log_visible += f"✅ FR: {cont_fr} inmuebles.\n"
|
| 143 |
+
except Exception as e: log_visible += f"⚠️ Error FR.\n"
|
| 144 |
|
| 145 |
+
# METROCUADRADO
|
| 146 |
try:
|
| 147 |
page = context.new_page()
|
|
|
|
| 148 |
page.goto(url_mc, wait_until="domcontentloaded", timeout=60000)
|
| 149 |
try: page.wait_for_load_state("networkidle", timeout=10000)
|
| 150 |
except: pass
|
|
|
|
| 151 |
for _ in range(4):
|
| 152 |
+
page.mouse.wheel(0, 1000); page.wait_for_timeout(2000)
|
|
|
|
| 153 |
|
| 154 |
elementos = page.query_selector_all("a")
|
| 155 |
cont_mc = 0
|
|
|
|
| 158 |
try:
|
| 159 |
href = el.get_attribute("href")
|
| 160 |
if not es_inmueble_valido(href, "MC"): continue
|
|
|
|
| 161 |
full_url = f"https://www.metrocuadrado.com{href}" if href.startswith("/") else href
|
| 162 |
if full_url in urls_vistas: continue
|
|
|
|
| 163 |
card = el.evaluate_handle("el => el.closest('li') || el.closest('[class*=\"card\"]') || el.closest('[class*=\"property\"]') || el.parentElement.parentElement.parentElement")
|
| 164 |
if not card: continue
|
| 165 |
|
| 166 |
txt = card.inner_text()
|
| 167 |
precio = extraer_precio(txt, operacion)
|
|
|
|
| 168 |
if precio > 0:
|
| 169 |
img_url = ""
|
| 170 |
img_el = card.query_selector("img")
|
|
|
|
| 172 |
img_url = img_el.get_attribute("src") or img_el.get_attribute("data-src") or ""
|
| 173 |
if img_url.startswith("/"): img_url = "https://www.metrocuadrado.com" + img_url
|
| 174 |
|
| 175 |
+
resultados.append({"Portal": "Metrocuadrado", "Precio": precio, "Precio_M2": precio / area, "Ubicacion": extraer_ubicacion(txt), "Descripcion": txt.replace('\n', ' | ')[:120] + "...", "URL": full_url, "Imagen": img_url})
|
| 176 |
+
urls_vistas.add(full_url); cont_mc += 1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 177 |
except: continue
|
| 178 |
page.close()
|
| 179 |
+
log_visible += f"✅ MC: {cont_mc} inmuebles.\n"
|
| 180 |
+
except Exception as e: log_visible += f"⚠️ Error MC.\n"
|
| 181 |
|
| 182 |
browser.close()
|
| 183 |
|
| 184 |
if not resultados:
|
| 185 |
+
return f"{log_visible}\n❌ NO HAY DATOS.", pd.DataFrame(), None, "---"
|
| 186 |
|
|
|
|
| 187 |
df_final_completo = pd.DataFrame(resultados)
|
|
|
|
| 188 |
df_fr = df_final_completo[df_final_completo['Portal'] == 'Finca Raiz'].head(6)
|
| 189 |
df_mc = df_final_completo[df_final_completo['Portal'] == 'Metrocuadrado'].head(6)
|
| 190 |
df_final = pd.concat([df_fr, df_mc]).reset_index(drop=True)
|
| 191 |
|
| 192 |
+
if df_final.empty: return f"{log_visible}\n❌ DATOS VACÍOS.", pd.DataFrame(), None, "---"
|
|
|
|
| 193 |
|
| 194 |
+
# --- CÁLCULOS TÉCNICOS (NIVEL SAE) ---
|
| 195 |
+
margen_negociacion = 0.08 # 8% de castigo sobre precio de oferta
|
| 196 |
+
mediana_m2 = df_final['Precio_M2'].median()
|
| 197 |
+
promedio_m2 = df_final['Precio_M2'].mean()
|
| 198 |
+
|
| 199 |
+
# El valor técnico usa la Mediana castigada por el margen de negociación
|
| 200 |
+
valor_tecnico_m2 = mediana_m2 * (1 - margen_negociacion)
|
| 201 |
+
valor_total_sugerido = valor_tecnico_m2 * area
|
| 202 |
|
| 203 |
+
# --- CREACIÓN DEL PDF FORMAL ---
|
| 204 |
+
pdf_path = f"Estudio_Mercado_{int(time.time())}.pdf"
|
| 205 |
pdf = FPDF()
|
| 206 |
+
|
| 207 |
+
# PÁGINA 1: PORTADA Y OBJETIVO
|
| 208 |
pdf.add_page()
|
| 209 |
+
pdf.set_font("Arial", 'B', 18)
|
| 210 |
+
pdf.set_text_color(20, 40, 90)
|
| 211 |
+
pdf.cell(0, 20, sanear_texto(f"ESTUDIO DE MERCADO INMOBILIARIO"), ln=True, align='C')
|
| 212 |
+
pdf.set_font("Arial", 'B', 14)
|
| 213 |
+
pdf.cell(0, 10, sanear_texto(f"ESTIMACION DE {operacion.upper()}"), ln=True, align='C')
|
| 214 |
+
pdf.line(20, 45, 190, 45)
|
| 215 |
+
pdf.ln(15)
|
| 216 |
+
|
| 217 |
+
pdf.set_font("Arial", 'B', 12)
|
| 218 |
+
pdf.set_text_color(0, 0, 0)
|
| 219 |
+
pdf.cell(0, 10, sanear_texto("1. DATOS DEL INMUEBLE SUJETO"), ln=True)
|
| 220 |
+
pdf.set_font("Arial", '', 11)
|
| 221 |
+
pdf.cell(0, 6, sanear_texto(f"- Tipo de Inmueble: {tipo.capitalize()}"), ln=True)
|
| 222 |
+
pdf.cell(0, 6, sanear_texto(f"- Ubicacion: Barrio {barrio.title()}, {ciudad.title()}"), ln=True)
|
| 223 |
+
pdf.cell(0, 6, sanear_texto(f"- Area Privada/Construida: {area} m2"), ln=True)
|
| 224 |
+
pdf.cell(0, 6, sanear_texto(f"- Caracteristicas: {hab} Hab, {ban} Banos, {park} Parqueaderos"), ln=True)
|
| 225 |
+
pdf.ln(10)
|
| 226 |
+
|
| 227 |
+
pdf.set_font("Arial", 'B', 12)
|
| 228 |
+
pdf.cell(0, 10, sanear_texto("2. METODOLOGIA (Metodo Comparativo de Mercado)"), ln=True)
|
| 229 |
+
pdf.set_font("Arial", '', 11)
|
| 230 |
+
pdf.multi_cell(0, 5, sanear_texto("El presente estudio utiliza el Metodo Comparativo de Mercado. Consiste en deducir el valor del inmueble sujeto mediante la comparacion sistematica de las ofertas recientes de inmuebles similares en la misma zona o zonas homogeneas comparables."))
|
| 231 |
+
pdf.ln(5)
|
| 232 |
+
pdf.multi_cell(0, 5, sanear_texto("Factores de Homogenizacion: Se ha aplicado un factor de comercializacion (Margen de Negociacion) del 8% sobre los precios de oferta publicos para estimar el valor real de cierre contractual, aislando factores especulativos."))
|
| 233 |
+
|
| 234 |
+
# PÁGINA 2: CONCLUSIONES ECONÓMICAS
|
| 235 |
+
pdf.add_page()
|
| 236 |
+
pdf.set_font("Arial", 'B', 14)
|
| 237 |
+
pdf.set_fill_color(230, 230, 230)
|
| 238 |
+
pdf.cell(0, 12, sanear_texto("3. RESULTADOS ESTADISTICOS Y VALOR SUGERIDO"), ln=True, fill=True)
|
| 239 |
+
pdf.ln(5)
|
| 240 |
|
| 241 |
+
pdf.set_font("Arial", '', 11)
|
| 242 |
+
pdf.cell(80, 8, sanear_texto("Total Inmuebles Analizados:"), border=1)
|
| 243 |
+
pdf.cell(50, 8, sanear_texto(f"{len(df_final)}"), border=1, ln=True, align='C')
|
| 244 |
+
|
| 245 |
+
pdf.cell(80, 8, sanear_texto("Valor Promedio Zona (M2):"), border=1)
|
| 246 |
+
pdf.cell(50, 8, sanear_texto(f"${promedio_m2:,.0f}"), border=1, ln=True, align='C')
|
| 247 |
+
|
| 248 |
+
pdf.cell(80, 8, sanear_texto("Valor Mediana Zona (M2):"), border=1)
|
| 249 |
+
pdf.cell(50, 8, sanear_texto(f"${mediana_m2:,.0f}"), border=1, ln=True, align='C')
|
| 250 |
+
|
| 251 |
+
pdf.cell(80, 8, sanear_texto("Margen Negociacion Aplicado:"), border=1)
|
| 252 |
+
pdf.cell(50, 8, sanear_texto("8.00%"), border=1, ln=True, align='C')
|
| 253 |
+
pdf.ln(10)
|
| 254 |
+
|
| 255 |
+
pdf.set_font("Arial", 'B', 14)
|
| 256 |
+
pdf.set_text_color(0, 100, 0) # Verde oscuro
|
| 257 |
+
pdf.cell(0, 10, sanear_texto(f"VALOR ESTIMADO DE {operacion.upper()} (M2): ${valor_tecnico_m2:,.0f} COP"), ln=True)
|
| 258 |
pdf.set_font("Arial", 'B', 16)
|
| 259 |
+
pdf.cell(0, 12, sanear_texto(f"VALOR TOTAL SUGERIDO: ${valor_total_sugerido:,.0f} COP"), ln=True)
|
| 260 |
+
|
| 261 |
+
pdf.set_text_color(0, 0, 0)
|
| 262 |
+
pdf.ln(15)
|
| 263 |
+
pdf.set_font("Arial", '', 9)
|
| 264 |
+
pdf.multi_cell(0, 4, sanear_texto("Nota Legal: Este documento es un Estudio de Mercado Comparativo generado de forma analitica. Sirve como anexo tecnico referencial para la toma de decisiones administrativas o comerciales."))
|
| 265 |
+
|
| 266 |
+
# PÁGINA 3+: ANEXO FOTOGRÁFICO DE COMPARABLES
|
| 267 |
+
pdf.add_page()
|
| 268 |
+
pdf.set_font("Arial", 'B', 14)
|
| 269 |
pdf.set_fill_color(40, 53, 147)
|
| 270 |
pdf.set_text_color(255, 255, 255)
|
| 271 |
+
pdf.cell(0, 12, sanear_texto("4. ANEXO TECNICO: TESTIGOS COMPARABLES"), ln=True, fill=True)
|
|
|
|
| 272 |
pdf.set_text_color(0, 0, 0)
|
| 273 |
pdf.ln(5)
|
| 274 |
|
| 275 |
for idx, r in df_final.iterrows():
|
| 276 |
+
if pdf.get_y() > 240: pdf.add_page()
|
|
|
|
| 277 |
y_start = pdf.get_y()
|
| 278 |
img_path = descargar_imagen(r['Imagen'], idx)
|
| 279 |
text_x = 10
|
|
|
|
| 285 |
except: pass
|
| 286 |
|
| 287 |
pdf.set_xy(text_x, y_start)
|
| 288 |
+
pdf.set_font("Arial", 'B', 11)
|
| 289 |
pdf.cell(0, 6, f"${r['Precio']:,.0f} COP", ln=True)
|
| 290 |
|
| 291 |
pdf.set_x(text_x)
|
| 292 |
+
pdf.set_font("Arial", 'B', 8)
|
| 293 |
pdf.set_text_color(100, 100, 100)
|
| 294 |
+
pdf.cell(0, 4, f"Ubicacion: {sanear_texto(r['Ubicacion'])} | Fuente: {sanear_texto(r['Portal'])}", ln=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 295 |
|
| 296 |
pdf.set_x(text_x)
|
| 297 |
pdf.set_font("Arial", '', 8)
|
| 298 |
pdf.set_text_color(0, 0, 0)
|
| 299 |
+
pdf.multi_cell(0, 4, sanear_texto(r['Descripcion']))
|
| 300 |
|
| 301 |
pdf.set_x(text_x)
|
| 302 |
pdf.set_font("Arial", 'U', 8)
|
| 303 |
pdf.set_text_color(0, 102, 204)
|
| 304 |
+
pdf.cell(0, 4, ">> Ver publicacion fuente", link=r['URL'], ln=True)
|
| 305 |
pdf.set_text_color(0, 0, 0)
|
| 306 |
|
| 307 |
y_end = pdf.get_y()
|
| 308 |
pdf.set_y(max(y_start + 35, y_end + 5))
|
|
|
|
| 309 |
pdf.set_draw_color(200, 200, 200)
|
| 310 |
pdf.line(10, pdf.get_y(), 200, pdf.get_y())
|
| 311 |
+
pdf.ln(4)
|
| 312 |
|
| 313 |
if img_path and os.path.exists(img_path):
|
| 314 |
try: os.remove(img_path)
|
|
|
|
| 316 |
|
| 317 |
pdf.output(pdf_path)
|
| 318 |
|
| 319 |
+
# --- CÁLCULOS INTERFAZ ---
|
|
|
|
| 320 |
resumen = (
|
| 321 |
+
f"📊 **RESULTADO ESTUDIO DE MERCADO**\n"
|
| 322 |
+
f"🔹 **Mediana Mercado:** ${mediana_m2:,.0f} / m2\n"
|
| 323 |
+
f"📉 **Margen Comercialización:** -8%\n"
|
| 324 |
+
f"✅ **VALOR SUGERIDO (Cierre):** ${valor_total_sugerido:,.0f}"
|
|
|
|
| 325 |
)
|
| 326 |
|
|
|
|
| 327 |
df_mostrar = df_final[['Portal', 'Precio', 'Precio_M2', 'Ubicacion', 'Descripcion', 'URL']].copy()
|
|
|
|
|
|
|
| 328 |
df_mostrar['Precio'] = df_mostrar['Precio'].apply(lambda x: f"${x:,.0f}")
|
| 329 |
df_mostrar['Precio_M2'] = df_mostrar['Precio_M2'].apply(lambda x: f"${x:,.0f}")
|
| 330 |
|
| 331 |
+
return f"{log_visible}\n✅ Reporte de Mercado Generado.", df_mostrar, pdf_path, resumen
|
| 332 |
|
| 333 |
except Exception as error_fatal:
|
| 334 |
traza = traceback.format_exc()
|
| 335 |
+
return f"❌ ERROR GRAVE DEL SISTEMA:\n{str(error_fatal)}", pd.DataFrame(), None, "⚠️ Falló la ejecución"
|
| 336 |
|
| 337 |
# --- INTERFAZ GRÁFICA ---
|
| 338 |
with gr.Blocks() as demo:
|
| 339 |
+
gr.Markdown("## 🏢 TramitIA Pro: Estudio de Mercado Institucional (SAE)")
|
| 340 |
|
| 341 |
with gr.Row():
|
| 342 |
with gr.Column(scale=1):
|
| 343 |
+
op = gr.Radio(["Arriendo", "Venta"], label="Tipo de Operación", value="Arriendo")
|
| 344 |
c = gr.Textbox(label="Ciudad", value="Barranquilla")
|
| 345 |
b = gr.Textbox(label="Barrio (Ej: La Concepcion)", value="La Concepcion")
|
| 346 |
|
| 347 |
with gr.Row():
|
| 348 |
+
t = gr.Dropdown(["Apartamento", "Casa", "Bodega", "Lote", "Oficina"], label="Tipo de Inmueble", value="Apartamento")
|
| 349 |
+
a = gr.Number(label="Área M2 a tasar", value=70)
|
| 350 |
|
| 351 |
with gr.Row():
|
| 352 |
+
m2_min = gr.Number(label="Filtro Mercado M2 Mínimo", value=10)
|
| 353 |
+
m2_max = gr.Number(label="Filtro Mercado M2 Máximo", value=200)
|
| 354 |
|
| 355 |
with gr.Row():
|
| 356 |
ascensor = gr.Checkbox(label="Con Ascensor")
|
| 357 |
piscina = gr.Checkbox(label="Con Piscina")
|
| 358 |
|
| 359 |
with gr.Row():
|
| 360 |
+
h = gr.Number(label="Habitaciones", value=3)
|
| 361 |
ban = gr.Number(label="Baños", value=2)
|
| 362 |
+
p = gr.Number(label="Parqueaderos", value=1)
|
| 363 |
|
| 364 |
e = gr.Dropdown(["Menos de 1 año", "1 a 8 años", "9 a 15 años", "16 a 30 años", "Más de 30 años"], label="Antigüedad", value="1 a 8 años")
|
| 365 |
+
btn = gr.Button("GENERAR ESTUDIO DE MERCADO OFICIAL", variant="primary")
|
| 366 |
|
| 367 |
with gr.Column(scale=2):
|
| 368 |
+
res_fin = gr.Markdown("### 💰 Resumen Financiero...")
|
| 369 |
with gr.Tabs():
|
| 370 |
+
with gr.TabItem("Descargar Estudio Técnico (PDF)"): out_pdf = gr.File()
|
| 371 |
+
with gr.TabItem("Matriz de Datos"): out_df = gr.Dataframe()
|
| 372 |
+
with gr.TabItem("Trazabilidad (Logs)"): msg = gr.Textbox(lines=10)
|
| 373 |
|
| 374 |
btn.click(motor_tramitia_visual, [op, b, c, a, m2_min, m2_max, t, h, ban, p, e, ascensor, piscina], [msg, out_df, out_pdf, res_fin])
|
| 375 |
|