Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,171 +1,140 @@
|
|
| 1 |
import os
|
| 2 |
import subprocess
|
|
|
|
| 3 |
import pandas as pd
|
| 4 |
import datetime
|
| 5 |
import folium
|
| 6 |
-
|
| 7 |
-
import gradio as gr
|
| 8 |
import random
|
| 9 |
import time
|
| 10 |
-
import
|
| 11 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
|
| 13 |
-
# --- CONFIGURACIÓN DE ENTORNO ---
|
| 14 |
try:
|
| 15 |
subprocess.run(["playwright", "install", "chromium"], check=True)
|
| 16 |
except:
|
| 17 |
pass
|
| 18 |
|
|
|
|
|
|
|
| 19 |
# --- SISTEMA DE CACHÉ ---
|
| 20 |
cache_tramitia = {}
|
| 21 |
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
if clave in cache_tramitia:
|
| 25 |
-
datos, pdf, timestamp = cache_tramitia[clave]
|
| 26 |
-
if datetime.datetime.now() - timestamp < datetime.timedelta(hours=24):
|
| 27 |
-
return datos, pdf
|
| 28 |
-
return None, None
|
| 29 |
-
|
| 30 |
-
# --- GENERADOR DE PDF CON QR ---
|
| 31 |
-
def generar_pdf_profesional(zona, area, df):
|
| 32 |
pdf = FPDF()
|
| 33 |
pdf.add_page()
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
pdf.set_font("Arial", 'B', 18)
|
| 37 |
-
pdf.set_text_color(30, 64, 175)
|
| 38 |
-
pdf.cell(0, 15, "TRAMITIA PRO - INFORME DE VALORACIÓN DETALLADA", ln=True, align='C')
|
| 39 |
-
pdf.set_font("Arial", '', 10)
|
| 40 |
-
pdf.set_text_color(0, 0, 0)
|
| 41 |
-
pdf.cell(0, 5, f"Fecha de emisión: {datetime.datetime.now().strftime('%d/%m/%Y %H:%M')}", ln=True, align='C')
|
| 42 |
-
pdf.ln(10)
|
| 43 |
-
|
| 44 |
-
# Resumen
|
| 45 |
-
pdf.set_fill_color(243, 244, 246)
|
| 46 |
-
pdf.set_font("Arial", 'B', 11)
|
| 47 |
-
pdf.cell(0, 10, f" UBICACIÓN: {zona.upper()} | ÁREA OBJETIVO: {area} m2", ln=True, fill=True)
|
| 48 |
pdf.ln(5)
|
| 49 |
-
|
| 50 |
-
# Detalle de Referencias
|
| 51 |
for i, r in df.iterrows():
|
| 52 |
-
if pdf.get_y() > 230: pdf.add_page() # Salto de página
|
| 53 |
-
|
| 54 |
-
# Generar QR
|
| 55 |
qr = qrcode.make(r['URL'])
|
| 56 |
qr_path = f"qr_{i}.png"
|
| 57 |
qr.save(qr_path)
|
| 58 |
-
|
| 59 |
-
pdf.set_font("Arial", 'B', 10)
|
| 60 |
-
pdf.cell(0, 8, f"REF {i+1}: {r['Tipo']} - {r['Precio_M2']:,.0f}/m2", ln=True)
|
| 61 |
|
|
|
|
|
|
|
| 62 |
pdf.set_font("Arial", '', 9)
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
pdf.
|
| 66 |
-
|
| 67 |
-
# Posicionar QR a la derecha
|
| 68 |
-
pdf.image(qr_path, x=165, y=pdf.get_y()-12, w=25)
|
| 69 |
-
pdf.ln(12)
|
| 70 |
if os.path.exists(qr_path): os.remove(qr_path)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 71 |
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
return pdf_path
|
| 75 |
-
|
| 76 |
-
# --- DEEP SCRAPER (Entra a cada anuncio) ---
|
| 77 |
-
def extraer_detalle_anuncio(context, url):
|
| 78 |
page = context.new_page()
|
| 79 |
detalle = {"Baños": "N/A", "Garajes": "0", "Antigüedad": "N/A", "Conjunto": "No"}
|
| 80 |
try:
|
| 81 |
-
page.goto(url, wait_until="domcontentloaded", timeout=
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
if "
|
| 87 |
-
try: detalle["Baños"] = [s for s in contenido.split() if s.isdigit() and int(s) < 10][0]
|
| 88 |
-
except: pass
|
| 89 |
-
|
| 90 |
-
if "garaje" in contenido.lower() or "parqueadero" in contenido.lower():
|
| 91 |
-
detalle["Garajes"] = "1" # Valor base si se menciona
|
| 92 |
-
|
| 93 |
-
if "año" in contenido.lower():
|
| 94 |
-
detalle["Antigüedad"] = "1-10 años"
|
| 95 |
-
|
| 96 |
-
if "conjunto" in contenido.lower() or "vigilancia" in contenido.lower():
|
| 97 |
-
detalle["Conjunto"] = "Sí"
|
| 98 |
-
|
| 99 |
except: pass
|
| 100 |
page.close()
|
| 101 |
return detalle
|
| 102 |
|
| 103 |
-
def
|
| 104 |
-
#
|
| 105 |
-
|
| 106 |
-
if
|
| 107 |
-
return
|
| 108 |
|
|
|
|
| 109 |
with sync_playwright() as p:
|
| 110 |
browser = p.chromium.launch(headless=True)
|
| 111 |
-
context = browser.new_context(user_agent="Mozilla/5.0
|
| 112 |
page = context.new_page()
|
| 113 |
|
| 114 |
-
zona_url = zona.lower().replace(" ", "-")
|
| 115 |
-
resultados = []
|
| 116 |
-
|
| 117 |
for modo in ["arriendo", "venta"]:
|
| 118 |
-
|
| 119 |
try:
|
| 120 |
-
page.goto(
|
| 121 |
-
page.
|
| 122 |
-
cards = page.query_selector_all("article")[:3] # 3 de cada uno para no saturar
|
| 123 |
-
|
| 124 |
for card in cards:
|
| 125 |
link = "https://www.fincaraiz.com.co" + card.query_selector("a").get_attribute("href")
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
if precio > 0:
|
| 133 |
resultados.append({
|
| 134 |
"Tipo": modo.capitalize(),
|
| 135 |
"Precio": precio,
|
| 136 |
"Precio_M2": precio / area,
|
| 137 |
-
**
|
| 138 |
-
"URL": link
|
| 139 |
})
|
| 140 |
except: continue
|
| 141 |
-
|
| 142 |
browser.close()
|
| 143 |
|
| 144 |
if not resultados:
|
| 145 |
-
return "
|
| 146 |
|
| 147 |
df = pd.DataFrame(resultados)
|
| 148 |
-
pdf_final = generar_pdf_profesional(zona, area, df)
|
| 149 |
|
| 150 |
-
#
|
| 151 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 152 |
|
| 153 |
-
|
|
|
|
| 154 |
|
| 155 |
# --- INTERFAZ ---
|
| 156 |
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
| 157 |
-
gr.HTML("<h1 style='text-align:center
|
| 158 |
with gr.Row():
|
| 159 |
with gr.Column(scale=1):
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
btn = gr.Button("
|
| 164 |
with gr.Column(scale=2):
|
| 165 |
msg = gr.Markdown()
|
| 166 |
-
|
| 167 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 168 |
|
| 169 |
-
btn.click(
|
| 170 |
|
| 171 |
-
demo.launch()
|
|
|
|
| 1 |
import os
|
| 2 |
import subprocess
|
| 3 |
+
import sys
|
| 4 |
import pandas as pd
|
| 5 |
import datetime
|
| 6 |
import folium
|
| 7 |
+
import qrcode
|
|
|
|
| 8 |
import random
|
| 9 |
import time
|
| 10 |
+
from fpdf import FPDF
|
| 11 |
+
import gradio as gr
|
| 12 |
+
import plotly.express as px
|
| 13 |
+
|
| 14 |
+
# --- AUTO-INSTALACIÓN ---
|
| 15 |
+
try:
|
| 16 |
+
import playwright
|
| 17 |
+
except ImportError:
|
| 18 |
+
subprocess.run([sys.executable, "-m", "pip", "install", "playwright pandas fpdf qrcode folium plotly"])
|
| 19 |
|
|
|
|
| 20 |
try:
|
| 21 |
subprocess.run(["playwright", "install", "chromium"], check=True)
|
| 22 |
except:
|
| 23 |
pass
|
| 24 |
|
| 25 |
+
from playwright.sync_api import sync_playwright
|
| 26 |
+
|
| 27 |
# --- SISTEMA DE CACHÉ ---
|
| 28 |
cache_tramitia = {}
|
| 29 |
|
| 30 |
+
# --- GENERADOR DE PDF ---
|
| 31 |
+
def generar_pdf_tramitia(zona, area, df):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
pdf = FPDF()
|
| 33 |
pdf.add_page()
|
| 34 |
+
pdf.set_font("Arial", 'B', 16)
|
| 35 |
+
pdf.cell(0, 15, f"TRAMITIA PRO: REPORTE {zona.upper()}", ln=True, align='C')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
pdf.ln(5)
|
| 37 |
+
|
|
|
|
| 38 |
for i, r in df.iterrows():
|
|
|
|
|
|
|
|
|
|
| 39 |
qr = qrcode.make(r['URL'])
|
| 40 |
qr_path = f"qr_{i}.png"
|
| 41 |
qr.save(qr_path)
|
|
|
|
|
|
|
|
|
|
| 42 |
|
| 43 |
+
pdf.set_font("Arial", 'B', 10)
|
| 44 |
+
pdf.cell(0, 10, f"{i+1}. {r['Tipo']} - ${r['Precio']:,.0f}", ln=True)
|
| 45 |
pdf.set_font("Arial", '', 9)
|
| 46 |
+
pdf.multi_cell(140, 6, f"Baños: {r['Baños']} | Garajes: {r['Garajes']} | Antigüedad: {r['Antigüedad']}")
|
| 47 |
+
pdf.image(qr_path, x=160, y=pdf.get_y()-12, w=25)
|
| 48 |
+
pdf.ln(10)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
if os.path.exists(qr_path): os.remove(qr_path)
|
| 50 |
+
|
| 51 |
+
path = "Reporte_TramitIA.pdf"
|
| 52 |
+
pdf.output(path)
|
| 53 |
+
return path
|
| 54 |
|
| 55 |
+
# --- DEEP SCRAPER ---
|
| 56 |
+
def extraer_detalle(context, url):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
page = context.new_page()
|
| 58 |
detalle = {"Baños": "N/A", "Garajes": "0", "Antigüedad": "N/A", "Conjunto": "No"}
|
| 59 |
try:
|
| 60 |
+
page.goto(url, wait_until="domcontentloaded", timeout=20000)
|
| 61 |
+
texto = page.inner_text("body").lower()
|
| 62 |
+
if "baño" in texto: detalle["Baños"] = "2+" # Simplificado para velocidad
|
| 63 |
+
if "garaje" in texto or "parqueadero" in texto: detalle["Garajes"] = "1+"
|
| 64 |
+
if "año" in texto: detalle["Antigüedad"] = "Usado"
|
| 65 |
+
if "conjunto" in texto: detalle["Conjunto"] = "Sí"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
except: pass
|
| 67 |
page.close()
|
| 68 |
return detalle
|
| 69 |
|
| 70 |
+
def analizar_completo(zona, area, tipo):
|
| 71 |
+
# Lógica de Caché
|
| 72 |
+
clave = f"{zona}_{tipo}".lower()
|
| 73 |
+
if clave in cache_tramitia:
|
| 74 |
+
return cache_tramitia[clave]
|
| 75 |
|
| 76 |
+
resultados = []
|
| 77 |
with sync_playwright() as p:
|
| 78 |
browser = p.chromium.launch(headless=True)
|
| 79 |
+
context = browser.new_context(user_agent="Mozilla/5.0")
|
| 80 |
page = context.new_page()
|
| 81 |
|
|
|
|
|
|
|
|
|
|
| 82 |
for modo in ["arriendo", "venta"]:
|
| 83 |
+
url = f"https://www.fincaraiz.com.co/{tipo.lower()}/{modo}/{zona.lower().replace(' ', '-')}"
|
| 84 |
try:
|
| 85 |
+
page.goto(url, wait_until="domcontentloaded", timeout=30000)
|
| 86 |
+
cards = page.query_selector_all("article")[:3]
|
|
|
|
|
|
|
| 87 |
for card in cards:
|
| 88 |
link = "https://www.fincaraiz.com.co" + card.query_selector("a").get_attribute("href")
|
| 89 |
+
det = extraer_detalle(context, link)
|
| 90 |
+
t_card = card.inner_text().replace('.', '').replace('$', '')
|
| 91 |
+
precios = [int(s) for s in t_card.split() if s.isdigit() and len(s) >= 6]
|
| 92 |
+
if precios:
|
| 93 |
+
precio = max(precios)
|
|
|
|
|
|
|
| 94 |
resultados.append({
|
| 95 |
"Tipo": modo.capitalize(),
|
| 96 |
"Precio": precio,
|
| 97 |
"Precio_M2": precio / area,
|
| 98 |
+
**det, "URL": link
|
|
|
|
| 99 |
})
|
| 100 |
except: continue
|
|
|
|
| 101 |
browser.close()
|
| 102 |
|
| 103 |
if not resultados:
|
| 104 |
+
return "No hay datos", None, None, None
|
| 105 |
|
| 106 |
df = pd.DataFrame(resultados)
|
|
|
|
| 107 |
|
| 108 |
+
# --- GENERAR GRÁFICA ---
|
| 109 |
+
fig = px.bar(df, x="Tipo", y="Precio_M2", color="Tipo",
|
| 110 |
+
title="Comparativa Valor M2: Arriendo vs Venta",
|
| 111 |
+
labels={"Precio_M2": "Precio por M2 (COP)"},
|
| 112 |
+
template="plotly_white")
|
| 113 |
+
|
| 114 |
+
pdf_path = generar_pdf_tramitia(zona, area, df)
|
| 115 |
+
resumen = f"### Análisis para {zona}\nComparativa de {len(df)} inmuebles detallados."
|
| 116 |
|
| 117 |
+
cache_tramitia[clave] = (resumen, df, pdf_path, fig)
|
| 118 |
+
return resumen, df, pdf_path, fig
|
| 119 |
|
| 120 |
# --- INTERFAZ ---
|
| 121 |
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
| 122 |
+
gr.HTML("<h1 style='text-align:center'>🤖 TramitIA Pro</h1>")
|
| 123 |
with gr.Row():
|
| 124 |
with gr.Column(scale=1):
|
| 125 |
+
z_in = gr.Textbox(label="Zona/Barrio")
|
| 126 |
+
a_in = gr.Number(label="Área M2", value=70)
|
| 127 |
+
t_in = gr.Dropdown(["Apartamento", "Casa"], label="Tipo", value="Apartamento")
|
| 128 |
+
btn = gr.Button("EJECUTAR ANÁLISIS VISUAL", variant="primary")
|
| 129 |
with gr.Column(scale=2):
|
| 130 |
msg = gr.Markdown()
|
| 131 |
+
plot = gr.Plot(label="Gráfica de Mercado")
|
| 132 |
+
with gr.Tabs():
|
| 133 |
+
with gr.TabItem("Referencias"):
|
| 134 |
+
table = gr.Dataframe()
|
| 135 |
+
with gr.TabItem("PDF"):
|
| 136 |
+
file = gr.File()
|
| 137 |
|
| 138 |
+
btn.click(analizar_completo, [z_in, a_in, t_in], [msg, table, file, plot])
|
| 139 |
|
| 140 |
+
demo.launch(ssr_mode=False)
|