ImmoGuardIA / app.py
jcalbornoz's picture
Update app.py
7625658 verified
import os
import json
import io
import datetime
import re
import traceback
import base64
import subprocess
import time
from flask import Flask, request, jsonify, render_template, Response
import requests
import PyPDF2
import fitz
from PIL import Image, ImageOps
import pytesseract
import docx
from geopy.geocoders import Nominatim
import folium
from reportlab.lib.pagesizes import letter
from reportlab.lib import colors
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.lib.units import inch
app = Flask(__name__, template_folder='.')
app.config['MAX_CONTENT_LENGTH'] = 64 * 1024 * 1024
API_KEY = os.environ.get("GOOGLE_API_KEY")
try:
subprocess.check_output(["tesseract", "--version"])
HAS_OCR = True
except:
HAS_OCR = False
# --- 1. MODELO ---
def find_valid_model():
if not API_KEY: return None, "Falta API Key"
try:
url = f"https://generativelanguage.googleapis.com/v1beta/models?key={API_KEY}"
resp = requests.get(url)
if resp.status_code != 200: return f"https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:generateContent?key={API_KEY}", "gemini-1.5-flash"
models = [m['name'] for m in resp.json().get('models', []) if "generateContent" in m.get('supportedGenerationMethods', [])]
selected = next((m for m in models if "flash" in m), models[0] if models else None)
return f"https://generativelanguage.googleapis.com/v1beta/models/{selected.replace('models/', '')}:generateContent?key={API_KEY}", selected
except: return None, "gemini-1.5-flash"
# --- 2. EXTRACCIÓN (20 PÁGINAS) ---
def ocr_proc(img_bytes):
if not HAS_OCR: return ""
try:
img = Image.open(io.BytesIO(img_bytes))
return pytesseract.image_to_string(ImageOps.autocontrast(ImageOps.grayscale(img)), lang='spa', config='--psm 1')
except: return ""
def extract_multimodal(files):
text_accum = ""
images = []
for f in files:
try:
fname = f.filename.lower()
content = f.read()
f.seek(0)
text_accum += f"\n\n--- DOC: {fname} ---\n"
if fname.endswith('.pdf'):
try:
pdf = PyPDF2.PdfReader(io.BytesIO(content))
for p in pdf.pages: text_accum += p.extract_text() or ""
except: pass
if len(text_accum) < 200 and HAS_OCR:
doc = fitz.open(stream=content, filetype="pdf")
# AUMENTADO A 20 PÁGINAS
for i in range(min(len(doc), 20)):
ib = doc.load_page(i).get_pixmap(dpi=150).tobytes("jpeg")
text_accum += ocr_proc(ib) + "\n"
if i < 3: images.append({"mime_type": "image/jpeg", "data": base64.b64encode(ib).decode('utf-8')})
elif fname.endswith(('.jpg','.png','.jpeg')):
text_accum += ocr_proc(content)
images.append({"mime_type": "image/jpeg", "data": base64.b64encode(content).decode('utf-8')})
elif fname.endswith('.docx'):
doc = docx.Document(io.BytesIO(content))
text_accum += "\n".join([p.text for p in doc.paragraphs])
except: pass
return text_accum, images
# --- 3. MAPAS (ESTRATEGIA CASCADA) ---
def get_map(loc):
if not loc or len(loc)<4 or "No detect" in loc: return None
try:
geo = Nominatim(user_agent="inmoguard_v35_deep")
# 1. Limpieza inicial
base_clean = re.sub(r'\(.*?\)|Matrícula|FMI|No\.|#|Apartamento|Local|Oficina', '', loc).strip()
# Generar intentos de búsqueda (Del más específico al más general)
queries = []
queries.append(base_clean) # "Calle 123, Vereda X, Municipio"
# Intentar extraer solo Municipio/Ciudad (asumiendo formato "..., Ciudad")
parts = base_clean.split(',')
if len(parts) > 1:
queries.append(f"{parts[-2]}, {parts[-1]}") # "Barrio, Ciudad"
queries.append(parts[-1].strip()) # "Ciudad"
for q in queries:
if len(q) < 3: continue
search_q = q if "Colombia" in q else f"{q}, Colombia"
print(f"🌍 Intentando Mapa: {search_q}")
l = geo.geocode(search_q, timeout=4)
if l:
# Éxito!
m = folium.Map([l.latitude, l.longitude], zoom_start=14)
folium.Marker([l.latitude, l.longitude], popup=loc, icon=folium.Icon(color="red", icon="info-sign")).add_to(m)
return m.get_root().render()
except Exception as e: print(f"Error Mapa: {e}")
return None
# --- 4. IA CORE ---
def clean_json(text):
text = text.replace("```json", "").replace("```", "")
s = text.find('{')
e = text.rfind('}')
return text[s:e+1] if s!=-1 and e!=-1 else "{}"
def analyze(text, imgs):
url, _ = find_valid_model()
# PROMPT V35: CAZADOR DE FMI MÚLTIPLE
prompt = (
"Eres 'InmoGuard AI', Auditor Forense. Tu misión es detectar TODOS los activos."
"\n\n--- INSTRUCCIÓN CRÍTICA DE FMI ---"
"Un expediente puede tener MÚLTIPLES Matrículas Inmobiliarias (FMI). Ej: Apartamento + Garaje + Depósito."
"NO TE DETENGAS EN EL PRIMERO. Escanea todo el texto y extrae CADA FMI que encuentres (formato 000-00000)."
"En el campo 'fmi', ponlos todos separados por comas."
"\n\n--- OTRAS INSTRUCCIONES ---"
"1. SAE (Ley 1708): Aplica lógica FRISCO. Si es SAE y no hay lios, es viable."
"2. UBICACIÓN: Extrae la dirección física más clara posible (Ciudad, Vereda) para el mapa."
"3. HISTORIAL: Cadena de dueños hacia atrás."
"4. CÉDULA CATASTRAL: Extrae el número tal cual."
"\n\n--- JSON OBLIGATORIO ---"
"""
{
"meta": {
"fmi": "FMI 1, FMI 2, FMI 3...",
"cedula_catastral": "...",
"dir_legal": "...",
"tipo": "..."
},
"historial_propiedad": [
{"fecha": "...", "acto": "...", "detalles": "..."}
],
"propietarios_actuales": [{"nombre": "...", "id": "...", "pct": "..."}],
"analisis_sae_ley": {
"estado_proceso": "...",
"fundamento_legal": "...",
"viabilidad_comercializacion": "..."
},
"semaforo_riesgos": { "juridico": "BAJO/MEDIO/ALTO", "financiero": "...", "fisico": "..." },
"ambiental": { "restricciones": "...", "autoridad": "..." },
"fiscal_completo": { "municipal": "...", "departamental": "...", "avaluo": "..." },
"vur": {
"anotaciones_detalle": [{"nro": "...", "desc": "...", "estado": "..."}],
"falsa_tradicion": "..."
},
"laft": { "alertas": "..." },
"val": { "rango": "...", "just": "..." },
"dic": { "res": "VIABLE / NO VIABLE", "txt": "..." }
}
"""
)
parts = [{"text": prompt}, {"text": f"DOCS:\n{text[:900000]}"}]
for i in imgs: parts.append({"inlineData": {"mimeType": i["mime_type"], "data": i["data"]}})
safe = [{"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_NONE"}]
try:
for i in range(2):
res = requests.post(url, json={"contents": [{"parts": parts}], "safetySettings": safe}, headers={'Content-Type': 'application/json'}, timeout=120)
if res.status_code == 200: break
time.sleep(3)
if res.status_code != 200: return {"error": True, "msg": f"Google Error {res.status_code}"}
data = json.loads(clean_json(res.json()['candidates'][0]['content']['parts'][0]['text']))
data['mapa'] = get_map(data.get('meta', {}).get('dir_legal', ''))
return data
except Exception as e: return {"error": True, "msg": str(e)}
# --- 5. PDF ---
def gen_pdf_from_data(d):
b = io.BytesIO()
doc = SimpleDocTemplate(b, pagesize=letter, topMargin=0.5*inch)
s = getSampleStyleSheet()
story = [Paragraph("INFORME FORENSE V35", s['Heading1']), Spacer(1, 10)]
def add_kv(k, v):
story.append(Paragraph(f"<b>{k}:</b> {str(v)}", s['Normal']))
story.append(Spacer(1, 3))
m = d.get('meta', {})
add_kv("FMI (Matrículas)", m.get('fmi'))
add_kv("Cédula Catastral", m.get('cedula_catastral'))
story.append(Paragraph("ANÁLISIS LEY 1708 (SAE)", s['Heading2']))
sae = d.get('analisis_sae_ley', {})
add_kv("Concepto", sae.get('viabilidad_comercializacion'))
add_kv("Fundamento", sae.get('fundamento_legal'))
story.append(Paragraph("HISTORIAL", s['Heading2']))
hist = d.get('historial_propiedad', [])
if hist:
dt = [['Fecha', 'Acto', 'Detalle']]
for h in hist: dt.append([str(h.get('fecha','')), Paragraph(str(h.get('acto','')), s['Normal']), Paragraph(str(h.get('detalles','')), s['Normal'])])
t = Table(dt, colWidths=[1*inch, 2*inch, 3.5*inch])
t.setStyle(TableStyle([('GRID', (0,0), (-1,-1), 0.5, colors.grey)]))
story.append(t)
story.append(Paragraph("DICTAMEN FINAL", s['Heading2']))
story.append(Paragraph(d.get('dic', {}).get('txt', ''), s['Normal']))
doc.build(story)
b.seek(0)
return b
# --- RUTAS ---
@app.route('/analyze', methods=['POST'])
def analyze_route():
files = request.files.getlist('files')
txt, imgs = extract_multimodal(files)
return jsonify(analyze(txt, imgs))
@app.route('/print-pdf', methods=['POST'])
def print_pdf_route():
return Response(gen_pdf_from_data(request.json).read(), mimetype='application/pdf', headers={'Content-Disposition': 'attachment;filename=Informe_InmoGuard.pdf'})
@app.route('/download-json', methods=['POST'])
def dl_json_route():
return Response(json.dumps(request.json, indent=4, ensure_ascii=False), mimetype='application/json', headers={'Content-Disposition': 'attachment;filename=data.json'})
@app.route('/')
def index(): return render_template('index.html')
if __name__ == '__main__': app.run(debug=True, host='0.0.0.0', port=7860)