ImmoGuardIA

Sleeping

App Files Files Community

ImmoGuardIA / app.py

jcalbornoz

Update app.py

7625658 verified 20 days ago

raw

history blame contribute delete

10.4 kB

	import os
	import json
	import io
	import datetime
	import re
	import traceback
	import base64
	import subprocess
	import time
	from flask import Flask, request, jsonify, render_template, Response
	import requests
	import PyPDF2
	import fitz
	from PIL import Image, ImageOps
	import pytesseract
	import docx
	from geopy.geocoders import Nominatim
	import folium
	from reportlab.lib.pagesizes import letter
	from reportlab.lib import colors
	from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle
	from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
	from reportlab.lib.units import inch

	app = Flask(__name__, template_folder='.')
	app.config['MAX_CONTENT_LENGTH'] = 64 * 1024 * 1024

	API_KEY = os.environ.get("GOOGLE_API_KEY")

	try:
	subprocess.check_output(["tesseract", "--version"])
	HAS_OCR = True
	except:
	HAS_OCR = False

	# --- 1. MODELO ---
	def find_valid_model():
	if not API_KEY: return None, "Falta API Key"
	try:
	url = f"https://generativelanguage.googleapis.com/v1beta/models?key={API_KEY}"
	resp = requests.get(url)
	if resp.status_code != 200: return f"https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:generateContent?key={API_KEY}", "gemini-1.5-flash"

	models = [m['name'] for m in resp.json().get('models', []) if "generateContent" in m.get('supportedGenerationMethods', [])]
	selected = next((m for m in models if "flash" in m), models[0] if models else None)
	return f"https://generativelanguage.googleapis.com/v1beta/models/{selected.replace('models/', '')}:generateContent?key={API_KEY}", selected
	except: return None, "gemini-1.5-flash"

	# --- 2. EXTRACCIÓN (20 PÁGINAS) ---
	def ocr_proc(img_bytes):
	if not HAS_OCR: return ""
	try:
	img = Image.open(io.BytesIO(img_bytes))
	return pytesseract.image_to_string(ImageOps.autocontrast(ImageOps.grayscale(img)), lang='spa', config='--psm 1')
	except: return ""

	def extract_multimodal(files):
	text_accum = ""
	images = []
	for f in files:
	try:
	fname = f.filename.lower()
	content = f.read()
	f.seek(0)
	text_accum += f"\n\n--- DOC: {fname} ---\n"
	if fname.endswith('.pdf'):
	try:
	pdf = PyPDF2.PdfReader(io.BytesIO(content))
	for p in pdf.pages: text_accum += p.extract_text() or ""
	except: pass
	if len(text_accum) < 200 and HAS_OCR:
	doc = fitz.open(stream=content, filetype="pdf")
	# AUMENTADO A 20 PÁGINAS
	for i in range(min(len(doc), 20)):
	ib = doc.load_page(i).get_pixmap(dpi=150).tobytes("jpeg")
	text_accum += ocr_proc(ib) + "\n"
	if i < 3: images.append({"mime_type": "image/jpeg", "data": base64.b64encode(ib).decode('utf-8')})
	elif fname.endswith(('.jpg','.png','.jpeg')):
	text_accum += ocr_proc(content)
	images.append({"mime_type": "image/jpeg", "data": base64.b64encode(content).decode('utf-8')})
	elif fname.endswith('.docx'):
	doc = docx.Document(io.BytesIO(content))
	text_accum += "\n".join([p.text for p in doc.paragraphs])
	except: pass
	return text_accum, images

	# --- 3. MAPAS (ESTRATEGIA CASCADA) ---
	def get_map(loc):
	if not loc or len(loc)<4 or "No detect" in loc: return None
	try:
	geo = Nominatim(user_agent="inmoguard_v35_deep")

	# 1. Limpieza inicial
	base_clean = re.sub(r'\(.*?\)\|Matrícula\|FMI\|No\.\|#\|Apartamento\|Local\|Oficina', '', loc).strip()

	# Generar intentos de búsqueda (Del más específico al más general)
	queries = []
	queries.append(base_clean) # "Calle 123, Vereda X, Municipio"

	# Intentar extraer solo Municipio/Ciudad (asumiendo formato "..., Ciudad")
	parts = base_clean.split(',')
	if len(parts) > 1:
	queries.append(f"{parts[-2]}, {parts[-1]}") # "Barrio, Ciudad"
	queries.append(parts[-1].strip()) # "Ciudad"

	for q in queries:
	if len(q) < 3: continue
	search_q = q if "Colombia" in q else f"{q}, Colombia"
	print(f"🌍 Intentando Mapa: {search_q}")

	l = geo.geocode(search_q, timeout=4)
	if l:
	# Éxito!
	m = folium.Map([l.latitude, l.longitude], zoom_start=14)
	folium.Marker([l.latitude, l.longitude], popup=loc, icon=folium.Icon(color="red", icon="info-sign")).add_to(m)
	return m.get_root().render()

	except Exception as e: print(f"Error Mapa: {e}")
	return None

	# --- 4. IA CORE ---
	def clean_json(text):
	text = text.replace("```json", "").replace("```", "")
	s = text.find('{')
	e = text.rfind('}')
	return text[s:e+1] if s!=-1 and e!=-1 else "{}"

	def analyze(text, imgs):
	url, _ = find_valid_model()

	# PROMPT V35: CAZADOR DE FMI MÚLTIPLE
	prompt = (
	"Eres 'InmoGuard AI', Auditor Forense. Tu misión es detectar TODOS los activos."

	"\n\n--- INSTRUCCIÓN CRÍTICA DE FMI ---"
	"Un expediente puede tener MÚLTIPLES Matrículas Inmobiliarias (FMI). Ej: Apartamento + Garaje + Depósito."
	"NO TE DETENGAS EN EL PRIMERO. Escanea todo el texto y extrae CADA FMI que encuentres (formato 000-00000)."
	"En el campo 'fmi', ponlos todos separados por comas."

	"\n\n--- OTRAS INSTRUCCIONES ---"
	"1. SAE (Ley 1708): Aplica lógica FRISCO. Si es SAE y no hay lios, es viable."
	"2. UBICACIÓN: Extrae la dirección física más clara posible (Ciudad, Vereda) para el mapa."
	"3. HISTORIAL: Cadena de dueños hacia atrás."
	"4. CÉDULA CATASTRAL: Extrae el número tal cual."

	"\n\n--- JSON OBLIGATORIO ---"
	"""
	{
	"meta": {
	"fmi": "FMI 1, FMI 2, FMI 3...",
	"cedula_catastral": "...",
	"dir_legal": "...",
	"tipo": "..."
	},
	"historial_propiedad": [
	{"fecha": "...", "acto": "...", "detalles": "..."}
	],
	"propietarios_actuales": [{"nombre": "...", "id": "...", "pct": "..."}],
	"analisis_sae_ley": {
	"estado_proceso": "...",
	"fundamento_legal": "...",
	"viabilidad_comercializacion": "..."
	},
	"semaforo_riesgos": { "juridico": "BAJO/MEDIO/ALTO", "financiero": "...", "fisico": "..." },
	"ambiental": { "restricciones": "...", "autoridad": "..." },
	"fiscal_completo": { "municipal": "...", "departamental": "...", "avaluo": "..." },
	"vur": {
	"anotaciones_detalle": [{"nro": "...", "desc": "...", "estado": "..."}],
	"falsa_tradicion": "..."
	},
	"laft": { "alertas": "..." },
	"val": { "rango": "...", "just": "..." },
	"dic": { "res": "VIABLE / NO VIABLE", "txt": "..." }
	}
	"""
	)

	parts = [{"text": prompt}, {"text": f"DOCS:\n{text[:900000]}"}]
	for i in imgs: parts.append({"inlineData": {"mimeType": i["mime_type"], "data": i["data"]}})

	safe = [{"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_NONE"}]

	try:
	for i in range(2):
	res = requests.post(url, json={"contents": [{"parts": parts}], "safetySettings": safe}, headers={'Content-Type': 'application/json'}, timeout=120)
	if res.status_code == 200: break
	time.sleep(3)

	if res.status_code != 200: return {"error": True, "msg": f"Google Error {res.status_code}"}

	data = json.loads(clean_json(res.json()['candidates'][0]['content']['parts'][0]['text']))
	data['mapa'] = get_map(data.get('meta', {}).get('dir_legal', ''))
	return data
	except Exception as e: return {"error": True, "msg": str(e)}

	# --- 5. PDF ---
	def gen_pdf_from_data(d):
	b = io.BytesIO()
	doc = SimpleDocTemplate(b, pagesize=letter, topMargin=0.5*inch)
	s = getSampleStyleSheet()
	story = [Paragraph("INFORME FORENSE V35", s['Heading1']), Spacer(1, 10)]

	def add_kv(k, v):
	story.append(Paragraph(f"<b>{k}:</b> {str(v)}", s['Normal']))
	story.append(Spacer(1, 3))

	m = d.get('meta', {})
	add_kv("FMI (Matrículas)", m.get('fmi'))
	add_kv("Cédula Catastral", m.get('cedula_catastral'))

	story.append(Paragraph("ANÁLISIS LEY 1708 (SAE)", s['Heading2']))
	sae = d.get('analisis_sae_ley', {})
	add_kv("Concepto", sae.get('viabilidad_comercializacion'))
	add_kv("Fundamento", sae.get('fundamento_legal'))

	story.append(Paragraph("HISTORIAL", s['Heading2']))
	hist = d.get('historial_propiedad', [])
	if hist:
	dt = [['Fecha', 'Acto', 'Detalle']]
	for h in hist: dt.append([str(h.get('fecha','')), Paragraph(str(h.get('acto','')), s['Normal']), Paragraph(str(h.get('detalles','')), s['Normal'])])
	t = Table(dt, colWidths=[1inch, 2inch, 3.5*inch])
	t.setStyle(TableStyle([('GRID', (0,0), (-1,-1), 0.5, colors.grey)]))
	story.append(t)

	story.append(Paragraph("DICTAMEN FINAL", s['Heading2']))
	story.append(Paragraph(d.get('dic', {}).get('txt', ''), s['Normal']))

	doc.build(story)
	b.seek(0)
	return b

	# --- RUTAS ---
	@app.route('/analyze', methods=['POST'])
	def analyze_route():
	files = request.files.getlist('files')
	txt, imgs = extract_multimodal(files)
	return jsonify(analyze(txt, imgs))

	@app.route('/print-pdf', methods=['POST'])
	def print_pdf_route():
	return Response(gen_pdf_from_data(request.json).read(), mimetype='application/pdf', headers={'Content-Disposition': 'attachment;filename=Informe_InmoGuard.pdf'})

	@app.route('/download-json', methods=['POST'])
	def dl_json_route():
	return Response(json.dumps(request.json, indent=4, ensure_ascii=False), mimetype='application/json', headers={'Content-Disposition': 'attachment;filename=data.json'})

	@app.route('/')
	def index(): return render_template('index.html')

	if __name__ == '__main__': app.run(debug=True, host='0.0.0.0', port=7860)