import os import json import io import datetime import re import traceback import base64 import subprocess import time from flask import Flask, request, jsonify, render_template, Response import requests import PyPDF2 import fitz from PIL import Image, ImageOps import pytesseract import docx from geopy.geocoders import Nominatim import folium from reportlab.lib.pagesizes import letter from reportlab.lib import colors from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle from reportlab.lib.units import inch app = Flask(__name__, template_folder='.') app.config['MAX_CONTENT_LENGTH'] = 64 * 1024 * 1024 API_KEY = os.environ.get("GOOGLE_API_KEY") try: subprocess.check_output(["tesseract", "--version"]) HAS_OCR = True except: HAS_OCR = False # --- 1. MODELO --- def find_valid_model(): if not API_KEY: return None, "Falta API Key" try: url = f"https://generativelanguage.googleapis.com/v1beta/models?key={API_KEY}" resp = requests.get(url) if resp.status_code != 200: return f"https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:generateContent?key={API_KEY}", "gemini-1.5-flash" models = [m['name'] for m in resp.json().get('models', []) if "generateContent" in m.get('supportedGenerationMethods', [])] selected = next((m for m in models if "flash" in m), models[0] if models else None) return f"https://generativelanguage.googleapis.com/v1beta/models/{selected.replace('models/', '')}:generateContent?key={API_KEY}", selected except: return None, "gemini-1.5-flash" # --- 2. EXTRACCIÓN (20 PÁGINAS) --- def ocr_proc(img_bytes): if not HAS_OCR: return "" try: img = Image.open(io.BytesIO(img_bytes)) return pytesseract.image_to_string(ImageOps.autocontrast(ImageOps.grayscale(img)), lang='spa', config='--psm 1') except: return "" def extract_multimodal(files): text_accum = "" images = [] for f in files: try: fname = f.filename.lower() content = f.read() f.seek(0) text_accum += f"\n\n--- DOC: {fname} ---\n" if fname.endswith('.pdf'): try: pdf = PyPDF2.PdfReader(io.BytesIO(content)) for p in pdf.pages: text_accum += p.extract_text() or "" except: pass if len(text_accum) < 200 and HAS_OCR: doc = fitz.open(stream=content, filetype="pdf") # AUMENTADO A 20 PÁGINAS for i in range(min(len(doc), 20)): ib = doc.load_page(i).get_pixmap(dpi=150).tobytes("jpeg") text_accum += ocr_proc(ib) + "\n" if i < 3: images.append({"mime_type": "image/jpeg", "data": base64.b64encode(ib).decode('utf-8')}) elif fname.endswith(('.jpg','.png','.jpeg')): text_accum += ocr_proc(content) images.append({"mime_type": "image/jpeg", "data": base64.b64encode(content).decode('utf-8')}) elif fname.endswith('.docx'): doc = docx.Document(io.BytesIO(content)) text_accum += "\n".join([p.text for p in doc.paragraphs]) except: pass return text_accum, images # --- 3. MAPAS (ESTRATEGIA CASCADA) --- def get_map(loc): if not loc or len(loc)<4 or "No detect" in loc: return None try: geo = Nominatim(user_agent="inmoguard_v35_deep") # 1. Limpieza inicial base_clean = re.sub(r'\(.*?\)|Matrícula|FMI|No\.|#|Apartamento|Local|Oficina', '', loc).strip() # Generar intentos de búsqueda (Del más específico al más general) queries = [] queries.append(base_clean) # "Calle 123, Vereda X, Municipio" # Intentar extraer solo Municipio/Ciudad (asumiendo formato "..., Ciudad") parts = base_clean.split(',') if len(parts) > 1: queries.append(f"{parts[-2]}, {parts[-1]}") # "Barrio, Ciudad" queries.append(parts[-1].strip()) # "Ciudad" for q in queries: if len(q) < 3: continue search_q = q if "Colombia" in q else f"{q}, Colombia" print(f"🌍 Intentando Mapa: {search_q}") l = geo.geocode(search_q, timeout=4) if l: # Éxito! m = folium.Map([l.latitude, l.longitude], zoom_start=14) folium.Marker([l.latitude, l.longitude], popup=loc, icon=folium.Icon(color="red", icon="info-sign")).add_to(m) return m.get_root().render() except Exception as e: print(f"Error Mapa: {e}") return None # --- 4. IA CORE --- def clean_json(text): text = text.replace("```json", "").replace("```", "") s = text.find('{') e = text.rfind('}') return text[s:e+1] if s!=-1 and e!=-1 else "{}" def analyze(text, imgs): url, _ = find_valid_model() # PROMPT V35: CAZADOR DE FMI MÚLTIPLE prompt = ( "Eres 'InmoGuard AI', Auditor Forense. Tu misión es detectar TODOS los activos." "\n\n--- INSTRUCCIÓN CRÍTICA DE FMI ---" "Un expediente puede tener MÚLTIPLES Matrículas Inmobiliarias (FMI). Ej: Apartamento + Garaje + Depósito." "NO TE DETENGAS EN EL PRIMERO. Escanea todo el texto y extrae CADA FMI que encuentres (formato 000-00000)." "En el campo 'fmi', ponlos todos separados por comas." "\n\n--- OTRAS INSTRUCCIONES ---" "1. SAE (Ley 1708): Aplica lógica FRISCO. Si es SAE y no hay lios, es viable." "2. UBICACIÓN: Extrae la dirección física más clara posible (Ciudad, Vereda) para el mapa." "3. HISTORIAL: Cadena de dueños hacia atrás." "4. CÉDULA CATASTRAL: Extrae el número tal cual." "\n\n--- JSON OBLIGATORIO ---" """ { "meta": { "fmi": "FMI 1, FMI 2, FMI 3...", "cedula_catastral": "...", "dir_legal": "...", "tipo": "..." }, "historial_propiedad": [ {"fecha": "...", "acto": "...", "detalles": "..."} ], "propietarios_actuales": [{"nombre": "...", "id": "...", "pct": "..."}], "analisis_sae_ley": { "estado_proceso": "...", "fundamento_legal": "...", "viabilidad_comercializacion": "..." }, "semaforo_riesgos": { "juridico": "BAJO/MEDIO/ALTO", "financiero": "...", "fisico": "..." }, "ambiental": { "restricciones": "...", "autoridad": "..." }, "fiscal_completo": { "municipal": "...", "departamental": "...", "avaluo": "..." }, "vur": { "anotaciones_detalle": [{"nro": "...", "desc": "...", "estado": "..."}], "falsa_tradicion": "..." }, "laft": { "alertas": "..." }, "val": { "rango": "...", "just": "..." }, "dic": { "res": "VIABLE / NO VIABLE", "txt": "..." } } """ ) parts = [{"text": prompt}, {"text": f"DOCS:\n{text[:900000]}"}] for i in imgs: parts.append({"inlineData": {"mimeType": i["mime_type"], "data": i["data"]}}) safe = [{"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_NONE"}] try: for i in range(2): res = requests.post(url, json={"contents": [{"parts": parts}], "safetySettings": safe}, headers={'Content-Type': 'application/json'}, timeout=120) if res.status_code == 200: break time.sleep(3) if res.status_code != 200: return {"error": True, "msg": f"Google Error {res.status_code}"} data = json.loads(clean_json(res.json()['candidates'][0]['content']['parts'][0]['text'])) data['mapa'] = get_map(data.get('meta', {}).get('dir_legal', '')) return data except Exception as e: return {"error": True, "msg": str(e)} # --- 5. PDF --- def gen_pdf_from_data(d): b = io.BytesIO() doc = SimpleDocTemplate(b, pagesize=letter, topMargin=0.5*inch) s = getSampleStyleSheet() story = [Paragraph("INFORME FORENSE V35", s['Heading1']), Spacer(1, 10)] def add_kv(k, v): story.append(Paragraph(f"{k}: {str(v)}", s['Normal'])) story.append(Spacer(1, 3)) m = d.get('meta', {}) add_kv("FMI (Matrículas)", m.get('fmi')) add_kv("Cédula Catastral", m.get('cedula_catastral')) story.append(Paragraph("ANÁLISIS LEY 1708 (SAE)", s['Heading2'])) sae = d.get('analisis_sae_ley', {}) add_kv("Concepto", sae.get('viabilidad_comercializacion')) add_kv("Fundamento", sae.get('fundamento_legal')) story.append(Paragraph("HISTORIAL", s['Heading2'])) hist = d.get('historial_propiedad', []) if hist: dt = [['Fecha', 'Acto', 'Detalle']] for h in hist: dt.append([str(h.get('fecha','')), Paragraph(str(h.get('acto','')), s['Normal']), Paragraph(str(h.get('detalles','')), s['Normal'])]) t = Table(dt, colWidths=[1*inch, 2*inch, 3.5*inch]) t.setStyle(TableStyle([('GRID', (0,0), (-1,-1), 0.5, colors.grey)])) story.append(t) story.append(Paragraph("DICTAMEN FINAL", s['Heading2'])) story.append(Paragraph(d.get('dic', {}).get('txt', ''), s['Normal'])) doc.build(story) b.seek(0) return b # --- RUTAS --- @app.route('/analyze', methods=['POST']) def analyze_route(): files = request.files.getlist('files') txt, imgs = extract_multimodal(files) return jsonify(analyze(txt, imgs)) @app.route('/print-pdf', methods=['POST']) def print_pdf_route(): return Response(gen_pdf_from_data(request.json).read(), mimetype='application/pdf', headers={'Content-Disposition': 'attachment;filename=Informe_InmoGuard.pdf'}) @app.route('/download-json', methods=['POST']) def dl_json_route(): return Response(json.dumps(request.json, indent=4, ensure_ascii=False), mimetype='application/json', headers={'Content-Disposition': 'attachment;filename=data.json'}) @app.route('/') def index(): return render_template('index.html') if __name__ == '__main__': app.run(debug=True, host='0.0.0.0', port=7860)