Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -9,98 +9,108 @@ import plotly.express as px
|
|
| 9 |
from playwright.sync_api import sync_playwright
|
| 10 |
import time
|
| 11 |
|
| 12 |
-
# --- CONFIGURACIÓN DE
|
| 13 |
try:
|
| 14 |
subprocess.run(["playwright", "install", "chromium"], check=True)
|
| 15 |
except:
|
| 16 |
pass
|
| 17 |
|
| 18 |
-
# --- GENERADOR DE REPORTE TÉCNICO
|
| 19 |
-
def
|
| 20 |
pdf = FPDF()
|
| 21 |
pdf.add_page()
|
| 22 |
pdf.set_font("Arial", 'B', 16)
|
| 23 |
pdf.set_text_color(26, 54, 93)
|
| 24 |
-
pdf.cell(0, 15, "TRAMITIA PRO -
|
| 25 |
|
| 26 |
pdf.set_font("Arial", '', 10)
|
| 27 |
pdf.set_text_color(0, 0, 0)
|
| 28 |
-
pdf.cell(0, 5, f"Zona: {zona.upper()}
|
|
|
|
| 29 |
pdf.cell(0, 5, f"Fecha: {datetime.datetime.now().strftime('%d/%m/%Y')}", ln=True)
|
| 30 |
pdf.ln(10)
|
| 31 |
|
| 32 |
for i, r in df.iterrows():
|
| 33 |
-
pdf.set_font("Arial", 'B',
|
| 34 |
-
pdf.set_fill_color(
|
| 35 |
pdf.cell(0, 8, f"REFERENCIA #{i+1} - {r['Portal']}", ln=True, fill=True)
|
| 36 |
|
| 37 |
pdf.set_font("Arial", '', 9)
|
| 38 |
-
|
|
|
|
|
|
|
| 39 |
|
| 40 |
pdf.set_font("Arial", 'I', 8)
|
| 41 |
-
pdf.multi_cell(0, 5, f"
|
| 42 |
|
| 43 |
pdf.set_font("Arial", 'U', 8)
|
| 44 |
pdf.set_text_color(0, 0, 255)
|
| 45 |
-
|
| 46 |
-
pdf.cell(0, 7, f"Link Directo: {r['URL']}", ln=True)
|
| 47 |
pdf.set_text_color(0, 0, 0)
|
| 48 |
-
pdf.ln(
|
| 49 |
|
| 50 |
if pdf.get_y() > 250: pdf.add_page()
|
| 51 |
|
| 52 |
-
path_pdf = "
|
| 53 |
pdf.output(path_pdf)
|
| 54 |
return path_pdf
|
| 55 |
|
| 56 |
-
# --- MOTOR DE BÚSQUEDA
|
| 57 |
def motor_tramitia_pro(zona, area, tipo, hab, ban, park, edad):
|
| 58 |
-
#
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
|
|
|
| 62 |
all_data = []
|
| 63 |
|
| 64 |
with sync_playwright() as p:
|
| 65 |
browser = p.chromium.launch(headless=True)
|
| 66 |
-
|
|
|
|
| 67 |
|
| 68 |
-
#
|
| 69 |
portales = [
|
| 70 |
{
|
| 71 |
"id": "Finca Raiz",
|
| 72 |
-
"url": f"https://www.fincaraiz.com.co/{tipo.lower()}/arriendo/{
|
| 73 |
},
|
| 74 |
{
|
| 75 |
"id": "Metrocuadrado",
|
| 76 |
-
"url": f"https://www.metrocuadrado.com/{tipo.lower()}/arriendo/bogota/{
|
| 77 |
}
|
| 78 |
]
|
| 79 |
|
| 80 |
for p_info in portales:
|
| 81 |
page = context.new_page()
|
| 82 |
try:
|
|
|
|
| 83 |
page.goto(p_info["url"], wait_until="load", timeout=60000)
|
| 84 |
-
time.sleep(
|
| 85 |
-
page.mouse.wheel(0, 2000) # Scroll para cargar
|
| 86 |
|
| 87 |
-
|
|
|
|
| 88 |
|
| 89 |
for card in cards:
|
|
|
|
|
|
|
| 90 |
try:
|
| 91 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 92 |
link_elem = card.query_selector("a")
|
| 93 |
href = link_elem.get_attribute("href") if link_elem else ""
|
| 94 |
url_final = href if "http" in href else f"https://www.{p_info['id'].lower().replace(' ', '')}.com.co{href}"
|
| 95 |
|
| 96 |
-
|
| 97 |
-
texto_completo = card.inner_text()
|
| 98 |
-
precios = [int(s) for s in texto_completo.replace('.', '').replace('$', '').split() if s.isdigit() and len(s) >= 6]
|
| 99 |
|
| 100 |
if precios:
|
| 101 |
-
#
|
| 102 |
-
desc_lineas = [l for l in
|
| 103 |
-
descripcion =
|
| 104 |
|
| 105 |
all_data.append({
|
| 106 |
"Portal": p_info["id"],
|
|
@@ -119,38 +129,44 @@ def motor_tramitia_pro(zona, area, tipo, hab, ban, park, edad):
|
|
| 119 |
browser.close()
|
| 120 |
|
| 121 |
if not all_data:
|
| 122 |
-
return "⚠️
|
| 123 |
|
| 124 |
df = pd.DataFrame(all_data)
|
| 125 |
-
pdf_path =
|
| 126 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 127 |
m = folium.Map(location=[4.66, -74.11], zoom_start=14)
|
|
|
|
| 128 |
|
| 129 |
-
return f"✅ Análisis Exitoso
|
| 130 |
|
| 131 |
-
# --- INTERFAZ ---
|
| 132 |
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
| 133 |
-
gr.HTML("<h1 style='text-align:center'>🤖 TramitIA Pro:
|
| 134 |
|
| 135 |
with gr.Row():
|
| 136 |
with gr.Column(scale=1):
|
| 137 |
-
zona_in = gr.Textbox(label="Barrio y Ciudad", value="
|
| 138 |
-
area_in = gr.Number(label="Área M2", value=
|
| 139 |
-
tipo_in = gr.Dropdown(["Apartamento", "Casa"], label="Tipo", value="Apartamento")
|
| 140 |
with gr.Row():
|
| 141 |
hab_in = gr.Number(label="Habitaciones", value=3)
|
| 142 |
ban_in = gr.Number(label="Baños", value=2)
|
| 143 |
with gr.Row():
|
| 144 |
-
gar_in = gr.Number(label="
|
| 145 |
ant_in = gr.Dropdown(["Nuevo", "1-5 años", "5-10 años", "Más de 10"], label="Antigüedad", value="5-10 años")
|
| 146 |
-
btn = gr.Button("
|
| 147 |
|
| 148 |
with gr.Column(scale=2):
|
| 149 |
-
msg = gr.Markdown()
|
| 150 |
with gr.Tabs():
|
| 151 |
-
with gr.TabItem("📋 Ficha Técnica"):
|
| 152 |
table = gr.Dataframe(interactive=False)
|
| 153 |
-
file = gr.File(label="Descargar
|
| 154 |
with gr.TabItem("📊 Mapa y Gráficas"):
|
| 155 |
plot = gr.Plot()
|
| 156 |
mapa = gr.HTML()
|
|
|
|
| 9 |
from playwright.sync_api import sync_playwright
|
| 10 |
import time
|
| 11 |
|
| 12 |
+
# --- CONFIGURACIÓN DE ENTORNO ---
|
| 13 |
try:
|
| 14 |
subprocess.run(["playwright", "install", "chromium"], check=True)
|
| 15 |
except:
|
| 16 |
pass
|
| 17 |
|
| 18 |
+
# --- GENERADOR DE REPORTE TÉCNICO ---
|
| 19 |
+
def generar_pdf_pro(zona, area_m2, df):
|
| 20 |
pdf = FPDF()
|
| 21 |
pdf.add_page()
|
| 22 |
pdf.set_font("Arial", 'B', 16)
|
| 23 |
pdf.set_text_color(26, 54, 93)
|
| 24 |
+
pdf.cell(0, 15, "TRAMITIA PRO - INFORME TÉCNICO DE ESTIMACIÓN", ln=True, align='C')
|
| 25 |
|
| 26 |
pdf.set_font("Arial", '', 10)
|
| 27 |
pdf.set_text_color(0, 0, 0)
|
| 28 |
+
pdf.cell(0, 5, f"Zona Analizada: {zona.upper()}", ln=True)
|
| 29 |
+
pdf.cell(0, 5, f"Área de Referencia: {area_m2} m2", ln=True)
|
| 30 |
pdf.cell(0, 5, f"Fecha: {datetime.datetime.now().strftime('%d/%m/%Y')}", ln=True)
|
| 31 |
pdf.ln(10)
|
| 32 |
|
| 33 |
for i, r in df.iterrows():
|
| 34 |
+
pdf.set_font("Arial", 'B', 11)
|
| 35 |
+
pdf.set_fill_color(230, 235, 245)
|
| 36 |
pdf.cell(0, 8, f"REFERENCIA #{i+1} - {r['Portal']}", ln=True, fill=True)
|
| 37 |
|
| 38 |
pdf.set_font("Arial", '', 9)
|
| 39 |
+
# Detalle técnico solicitado
|
| 40 |
+
detalle = f"Precio: ${r['Precio']:,.0f} | Hab: {r['Habitaciones']} | Baños: {r['Banos']} | Garajes: {r['Garajes']} | Edad: {r['Antiguedad']}"
|
| 41 |
+
pdf.cell(0, 7, detalle, ln=True)
|
| 42 |
|
| 43 |
pdf.set_font("Arial", 'I', 8)
|
| 44 |
+
pdf.multi_cell(0, 5, f"Descripción: {r['Descripcion']}")
|
| 45 |
|
| 46 |
pdf.set_font("Arial", 'U', 8)
|
| 47 |
pdf.set_text_color(0, 0, 255)
|
| 48 |
+
pdf.cell(0, 7, f"Link de la publicación: {r['URL']}", ln=True)
|
|
|
|
| 49 |
pdf.set_text_color(0, 0, 0)
|
| 50 |
+
pdf.ln(5)
|
| 51 |
|
| 52 |
if pdf.get_y() > 250: pdf.add_page()
|
| 53 |
|
| 54 |
+
path_pdf = "Estimacion_Renta_TramitIA.pdf"
|
| 55 |
pdf.output(path_pdf)
|
| 56 |
return path_pdf
|
| 57 |
|
| 58 |
+
# --- MOTOR DE BÚSQUEDA HÍBRIDO ---
|
| 59 |
def motor_tramitia_pro(zona, area, tipo, hab, ban, park, edad):
|
| 60 |
+
# Forzar la ciudad para evitar saltos de localización
|
| 61 |
+
busqueda_url = zona.lower().replace(" ", "-")
|
| 62 |
+
if "bogota" not in busqueda_url:
|
| 63 |
+
busqueda_url += "-bogota"
|
| 64 |
+
|
| 65 |
all_data = []
|
| 66 |
|
| 67 |
with sync_playwright() as p:
|
| 68 |
browser = p.chromium.launch(headless=True)
|
| 69 |
+
# Cambiamos el contexto para parecer un navegador real en Colombia
|
| 70 |
+
context = browser.new_context(user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36")
|
| 71 |
|
| 72 |
+
# FUENTES
|
| 73 |
portales = [
|
| 74 |
{
|
| 75 |
"id": "Finca Raiz",
|
| 76 |
+
"url": f"https://www.fincaraiz.com.co/{tipo.lower()}/arriendo/{busqueda_url}?habitaciones={int(hab)}&banos={int(ban)}"
|
| 77 |
},
|
| 78 |
{
|
| 79 |
"id": "Metrocuadrado",
|
| 80 |
+
"url": f"https://www.metrocuadrado.com/{tipo.lower()}/arriendo/bogota/{busqueda_url.replace('-bogota','')}/{int(hab)}-habitaciones/{int(ban)}-banos/"
|
| 81 |
}
|
| 82 |
]
|
| 83 |
|
| 84 |
for p_info in portales:
|
| 85 |
page = context.new_page()
|
| 86 |
try:
|
| 87 |
+
# Navegar y esperar carga real
|
| 88 |
page.goto(p_info["url"], wait_until="load", timeout=60000)
|
| 89 |
+
time.sleep(5)
|
| 90 |
+
page.mouse.wheel(0, 2000) # Scroll para cargar contenido dinámico
|
| 91 |
|
| 92 |
+
# Buscamos anuncios que coincidan con la zona para evitar falsos positivos
|
| 93 |
+
cards = page.query_selector_all("article, [class*='Card'], .listing-item")
|
| 94 |
|
| 95 |
for card in cards:
|
| 96 |
+
if len(all_data) >= 6: break # Límite de referencias
|
| 97 |
+
|
| 98 |
try:
|
| 99 |
+
full_text = card.inner_text()
|
| 100 |
+
# Validación de seguridad: Si el texto del anuncio no menciona la zona o ciudad, lo ignoramos
|
| 101 |
+
if "medellin" in full_text.lower() and "bogota" in busqueda_url:
|
| 102 |
+
continue
|
| 103 |
+
|
| 104 |
link_elem = card.query_selector("a")
|
| 105 |
href = link_elem.get_attribute("href") if link_elem else ""
|
| 106 |
url_final = href if "http" in href else f"https://www.{p_info['id'].lower().replace(' ', '')}.com.co{href}"
|
| 107 |
|
| 108 |
+
precios = [int(s) for s in full_text.replace('.', '').replace('$', '').split() if s.isdigit() and len(s) >= 6]
|
|
|
|
|
|
|
| 109 |
|
| 110 |
if precios:
|
| 111 |
+
# Extraer descripción real (buscando líneas con texto largo)
|
| 112 |
+
desc_lineas = [l.strip() for l in full_text.split('\n') if len(l) > 20]
|
| 113 |
+
descripcion = desc_lineas[0] if desc_lineas else "Ver más detalles en el portal."
|
| 114 |
|
| 115 |
all_data.append({
|
| 116 |
"Portal": p_info["id"],
|
|
|
|
| 129 |
browser.close()
|
| 130 |
|
| 131 |
if not all_data:
|
| 132 |
+
return "⚠️ No se hallaron datos exactos. Intenta con una zona más amplia o verifica la ortografía.", None, None, None, None
|
| 133 |
|
| 134 |
df = pd.DataFrame(all_data)
|
| 135 |
+
pdf_path = generar_pdf_pro(zona, area, df)
|
| 136 |
+
|
| 137 |
+
# Gráfica comparativa
|
| 138 |
+
fig = px.bar(df, x="Portal", y="Precio_M2", color="Portal", barmode="group",
|
| 139 |
+
title="Comparativa Valor M2 por Fuente")
|
| 140 |
+
|
| 141 |
+
# Mapa centrado en Bogotá
|
| 142 |
m = folium.Map(location=[4.66, -74.11], zoom_start=14)
|
| 143 |
+
folium.Marker([4.66, -74.11], popup=f"Análisis {zona}").add_to(m)
|
| 144 |
|
| 145 |
+
return f"✅ Análisis Exitoso para {zona}. Se encontraron {len(df)} referencias justificadas.", df, pdf_path, fig, m._repr_html_()
|
| 146 |
|
| 147 |
+
# --- INTERFAZ PANEL DE CONTROL ---
|
| 148 |
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
| 149 |
+
gr.HTML("<h1 style='text-align:center'>🤖 TramitIA Pro</h1><p style='text-align:center;'>Sistema de Estimación de Renta Justificada</p>")
|
| 150 |
|
| 151 |
with gr.Row():
|
| 152 |
with gr.Column(scale=1):
|
| 153 |
+
zona_in = gr.Textbox(label="Ubicación (Barrio y Ciudad)", value="Salitre Bogota")
|
| 154 |
+
area_in = gr.Number(label="Área M2", value=150)
|
| 155 |
+
tipo_in = gr.Dropdown(["Apartamento", "Casa"], label="Tipo de Inmueble", value="Apartamento")
|
| 156 |
with gr.Row():
|
| 157 |
hab_in = gr.Number(label="Habitaciones", value=3)
|
| 158 |
ban_in = gr.Number(label="Baños", value=2)
|
| 159 |
with gr.Row():
|
| 160 |
+
gar_in = gr.Number(label="Parqueaderos", value=1)
|
| 161 |
ant_in = gr.Dropdown(["Nuevo", "1-5 años", "5-10 años", "Más de 10"], label="Antigüedad", value="5-10 años")
|
| 162 |
+
btn = gr.Button("EJECUTAR ANÁLISIS PROFUNDO", variant="primary")
|
| 163 |
|
| 164 |
with gr.Column(scale=2):
|
| 165 |
+
msg = gr.Markdown("Esperando consulta...")
|
| 166 |
with gr.Tabs():
|
| 167 |
+
with gr.TabItem("📋 Ficha Técnica y PDF"):
|
| 168 |
table = gr.Dataframe(interactive=False)
|
| 169 |
+
file = gr.File(label="Descargar Reporte de Consultoría")
|
| 170 |
with gr.TabItem("📊 Mapa y Gráficas"):
|
| 171 |
plot = gr.Plot()
|
| 172 |
mapa = gr.HTML()
|