Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -2,175 +2,103 @@ import os
|
|
| 2 |
import subprocess
|
| 3 |
import pandas as pd
|
| 4 |
import datetime
|
| 5 |
-
import folium
|
| 6 |
from fpdf import FPDF
|
| 7 |
import gradio as gr
|
| 8 |
-
import plotly.express as px
|
| 9 |
from playwright.sync_api import sync_playwright
|
| 10 |
import time
|
|
|
|
| 11 |
|
| 12 |
-
# ---
|
| 13 |
try:
|
| 14 |
subprocess.run(["playwright", "install", "chromium"], check=True)
|
| 15 |
-
except:
|
| 16 |
-
pass
|
| 17 |
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
pdf.set_font("Arial", 'B', 16)
|
| 23 |
-
pdf.set_text_color(26, 54, 93)
|
| 24 |
-
pdf.cell(0, 15, "TRAMITIA PRO - INFORME TÉCNICO DE ESTIMACIÓN", ln=True, align='C')
|
| 25 |
-
|
| 26 |
-
pdf.set_font("Arial", '', 10)
|
| 27 |
-
pdf.set_text_color(0, 0, 0)
|
| 28 |
-
pdf.cell(0, 5, f"Zona Analizada: {zona.upper()}", ln=True)
|
| 29 |
-
pdf.cell(0, 5, f"Área de Referencia: {area_m2} m2", ln=True)
|
| 30 |
-
pdf.cell(0, 5, f"Fecha: {datetime.datetime.now().strftime('%d/%m/%Y')}", ln=True)
|
| 31 |
-
pdf.ln(10)
|
| 32 |
-
|
| 33 |
-
for i, r in df.iterrows():
|
| 34 |
-
pdf.set_font("Arial", 'B', 11)
|
| 35 |
-
pdf.set_fill_color(230, 235, 245)
|
| 36 |
-
pdf.cell(0, 8, f"REFERENCIA #{i+1} - {r['Portal']}", ln=True, fill=True)
|
| 37 |
-
|
| 38 |
-
pdf.set_font("Arial", '', 9)
|
| 39 |
-
# Detalle técnico solicitado
|
| 40 |
-
detalle = f"Precio: ${r['Precio']:,.0f} | Hab: {r['Habitaciones']} | Baños: {r['Banos']} | Garajes: {r['Garajes']} | Edad: {r['Antiguedad']}"
|
| 41 |
-
pdf.cell(0, 7, detalle, ln=True)
|
| 42 |
-
|
| 43 |
-
pdf.set_font("Arial", 'I', 8)
|
| 44 |
-
pdf.multi_cell(0, 5, f"Descripción: {r['Descripcion']}")
|
| 45 |
-
|
| 46 |
-
pdf.set_font("Arial", 'U', 8)
|
| 47 |
-
pdf.set_text_color(0, 0, 255)
|
| 48 |
-
pdf.cell(0, 7, f"Link de la publicación: {r['URL']}", ln=True)
|
| 49 |
-
pdf.set_text_color(0, 0, 0)
|
| 50 |
-
pdf.ln(5)
|
| 51 |
-
|
| 52 |
-
if pdf.get_y() > 250: pdf.add_page()
|
| 53 |
-
|
| 54 |
-
path_pdf = "Estimacion_Renta_TramitIA.pdf"
|
| 55 |
-
pdf.output(path_pdf)
|
| 56 |
-
return path_pdf
|
| 57 |
-
|
| 58 |
-
# --- MOTOR DE BÚSQUEDA HÍBRIDO ---
|
| 59 |
-
def motor_tramitia_pro(zona, area, tipo, hab, ban, park, edad):
|
| 60 |
-
# Forzar la ciudad para evitar saltos de localización
|
| 61 |
-
busqueda_url = zona.lower().replace(" ", "-")
|
| 62 |
-
if "bogota" not in busqueda_url:
|
| 63 |
-
busqueda_url += "-bogota"
|
| 64 |
-
|
| 65 |
-
all_data = []
|
| 66 |
|
| 67 |
with sync_playwright() as p:
|
|
|
|
| 68 |
browser = p.chromium.launch(headless=True)
|
| 69 |
-
#
|
| 70 |
-
context = browser.new_context(user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64)
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
]
|
| 83 |
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
|
|
|
| 91 |
|
| 92 |
-
#
|
| 93 |
-
|
| 94 |
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
desc_lineas = [l.strip() for l in full_text.split('\n') if len(l) > 20]
|
| 113 |
-
descripcion = desc_lineas[0] if desc_lineas else "Ver más detalles en el portal."
|
| 114 |
-
|
| 115 |
-
all_data.append({
|
| 116 |
-
"Portal": p_info["id"],
|
| 117 |
-
"Precio": max(precios),
|
| 118 |
-
"Precio_M2": max(precios) / area,
|
| 119 |
-
"Habitaciones": hab,
|
| 120 |
-
"Banos": ban,
|
| 121 |
-
"Garajes": park,
|
| 122 |
-
"Antiguedad": edad,
|
| 123 |
-
"Descripcion": descripcion,
|
| 124 |
-
"URL": url_final
|
| 125 |
-
})
|
| 126 |
-
except: continue
|
| 127 |
-
except: pass
|
| 128 |
-
finally: page.close()
|
| 129 |
-
browser.close()
|
| 130 |
|
| 131 |
-
if not
|
| 132 |
-
return "
|
| 133 |
|
| 134 |
-
|
| 135 |
-
pdf_path = generar_pdf_pro(zona, area, df)
|
| 136 |
|
| 137 |
-
#
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
|
|
|
|
|
|
|
|
|
| 144 |
|
| 145 |
-
return f"✅
|
| 146 |
|
| 147 |
-
# --- INTERFAZ
|
| 148 |
-
with gr.Blocks(
|
| 149 |
-
gr.
|
| 150 |
-
|
| 151 |
with gr.Row():
|
| 152 |
-
with gr.Column(
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
msg = gr.Markdown("Esperando consulta...")
|
| 166 |
-
with gr.Tabs():
|
| 167 |
-
with gr.TabItem("📋 Ficha Técnica y PDF"):
|
| 168 |
-
table = gr.Dataframe(interactive=False)
|
| 169 |
-
file = gr.File(label="Descargar Reporte de Consultoría")
|
| 170 |
-
with gr.TabItem("📊 Mapa y Gráficas"):
|
| 171 |
-
plot = gr.Plot()
|
| 172 |
-
mapa = gr.HTML()
|
| 173 |
-
|
| 174 |
-
btn.click(motor_tramitia_pro, [zona_in, area_in, tipo_in, hab_in, ban_in, gar_in, ant_in], [msg, table, file, plot, mapa])
|
| 175 |
|
| 176 |
demo.launch()
|
|
|
|
| 2 |
import subprocess
|
| 3 |
import pandas as pd
|
| 4 |
import datetime
|
|
|
|
| 5 |
from fpdf import FPDF
|
| 6 |
import gradio as gr
|
|
|
|
| 7 |
from playwright.sync_api import sync_playwright
|
| 8 |
import time
|
| 9 |
+
import random
|
| 10 |
|
| 11 |
+
# --- REINICIO DE NAVEGADOR ---
|
| 12 |
try:
|
| 13 |
subprocess.run(["playwright", "install", "chromium"], check=True)
|
| 14 |
+
except: pass
|
|
|
|
| 15 |
|
| 16 |
+
def motor_tramitia_limpio(zona, area, tipo, hab, ban, park, edad):
|
| 17 |
+
# LIMPIEZA INICIAL: Si la zona cambia, forzamos resultados nuevos
|
| 18 |
+
resultados = []
|
| 19 |
+
busqueda_clean = zona.lower().strip()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
|
| 21 |
with sync_playwright() as p:
|
| 22 |
+
# Usamos un nuevo contexto de incógnito total cada vez
|
| 23 |
browser = p.chromium.launch(headless=True)
|
| 24 |
+
# Randomizamos el User-Agent para que el portal no nos reconozca
|
| 25 |
+
context = browser.new_context(user_agent=f"Mozilla/5.0 (Windows NT 10.0; Win64; x64) {random.randint(1,100)}")
|
| 26 |
+
page = context.new_page()
|
| 27 |
+
|
| 28 |
+
try:
|
| 29 |
+
# URL de búsqueda estricta
|
| 30 |
+
query_url = busqueda_clean.replace(" ", "-")
|
| 31 |
+
url = f"https://www.fincaraiz.com.co/{tipo.lower()}/arriendo/{query_url}?habitaciones={int(hab)}&banos={int(ban)}"
|
| 32 |
+
|
| 33 |
+
page.goto(url, wait_until="networkidle", timeout=60000)
|
| 34 |
+
# Scroll agresivo para saltar la publicidad de Lagos de Torca
|
| 35 |
+
page.mouse.wheel(0, 3000)
|
| 36 |
+
time.sleep(4)
|
|
|
|
| 37 |
|
| 38 |
+
# Buscamos artículos, pero filtramos que el texto contenga la zona buscada
|
| 39 |
+
cards = page.query_selector_all("article")
|
| 40 |
+
|
| 41 |
+
for card in cards:
|
| 42 |
+
content = card.inner_text()
|
| 43 |
+
# VALIDACIÓN CRUCIAL: Solo tomar si el anuncio NO es el de siempre
|
| 44 |
+
if "Lagos de Torca" in content and "torca" not in busqueda_clean:
|
| 45 |
+
continue
|
| 46 |
|
| 47 |
+
# Extraer Precios
|
| 48 |
+
precios = [int(s) for s in content.replace('.', '').replace('$', '').split() if s.isdigit() and len(s) >= 6]
|
| 49 |
|
| 50 |
+
if precios and len(resultados) < 5:
|
| 51 |
+
p_val = max(precios)
|
| 52 |
+
resultados.append({
|
| 53 |
+
"Portal": "Finca Raiz",
|
| 54 |
+
"Precio": p_val,
|
| 55 |
+
"Precio_M2": p_val / area,
|
| 56 |
+
"Habitaciones": hab,
|
| 57 |
+
"Banos": ban,
|
| 58 |
+
"Garajes": park,
|
| 59 |
+
"Antiguedad": edad,
|
| 60 |
+
"Descripcion": content.split('\n')[0][:100],
|
| 61 |
+
"URL": "https://fincaraiz.com.co" + (card.query_selector("a").get_attribute("href") if card.query_selector("a") else "")
|
| 62 |
+
})
|
| 63 |
+
|
| 64 |
+
browser.close()
|
| 65 |
+
except:
|
| 66 |
+
browser.close()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
|
| 68 |
+
if not resultados:
|
| 69 |
+
return "❌ No se encontraron datos nuevos. Intenta cambiar un poco el nombre del barrio.", None
|
| 70 |
|
| 71 |
+
df_final = pd.DataFrame(resultados).drop_duplicates()
|
|
|
|
| 72 |
|
| 73 |
+
# Crear el PDF cada vez con nombre único para evitar que el navegador muestre el anterior
|
| 74 |
+
pdf_path = f"Reporte_{random.randint(1,999)}.pdf"
|
| 75 |
+
pdf = FPDF()
|
| 76 |
+
pdf.add_page()
|
| 77 |
+
pdf.set_font("Arial", 'B', 12)
|
| 78 |
+
pdf.cell(0, 10, f"Analisis: {zona}", ln=True)
|
| 79 |
+
pdf.set_font("Arial", '', 10)
|
| 80 |
+
for _, r in df_final.iterrows():
|
| 81 |
+
pdf.cell(0, 8, f"Precio: {r['Precio']} - {r['Descripcion']}", ln=True)
|
| 82 |
+
pdf.output(pdf_path)
|
| 83 |
|
| 84 |
+
return f"✅ Se encontraron {len(df_final)} registros nuevos para {zona}.", df_final, pdf_path
|
| 85 |
|
| 86 |
+
# --- INTERFAZ ---
|
| 87 |
+
with gr.Blocks() as demo:
|
| 88 |
+
gr.Markdown("# 🤖 TramitIA Pro: Hard Reset de Datos")
|
|
|
|
| 89 |
with gr.Row():
|
| 90 |
+
with gr.Column():
|
| 91 |
+
z = gr.Textbox(label="Barrio y Ciudad (Escribe algo diferente, ej: Salitre Central Bogota)")
|
| 92 |
+
a = gr.Number(label="M2", value=80)
|
| 93 |
+
t = gr.Dropdown(["Apartamento", "Casa"], label="Tipo", value="Apartamento")
|
| 94 |
+
h = gr.Number(label="Hab", value=3); b = gr.Number(label="Banos", value=2)
|
| 95 |
+
p = gr.Number(label="Parqueaderos", value=1); e = gr.Textbox(label="Antiguedad", value="5-10")
|
| 96 |
+
btn = gr.Button("LIMPIAR CACHÉ Y BUSCAR")
|
| 97 |
+
with gr.Column():
|
| 98 |
+
msg = gr.Markdown()
|
| 99 |
+
out_df = gr.Dataframe()
|
| 100 |
+
out_pdf = gr.File()
|
| 101 |
+
|
| 102 |
+
btn.click(motor_tramitia_limpio, [z, a, t, h, b, p, e], [msg, out_df, out_pdf])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 103 |
|
| 104 |
demo.launch()
|