Spaces:
Sleeping
Sleeping
| import os | |
| import json | |
| import io | |
| import datetime | |
| import re | |
| import traceback | |
| import base64 | |
| import subprocess | |
| import time | |
| from flask import Flask, request, jsonify, render_template, Response | |
| import requests | |
| import PyPDF2 | |
| import fitz | |
| from PIL import Image, ImageOps | |
| import pytesseract | |
| import docx | |
| from geopy.geocoders import Nominatim | |
| import folium | |
| from reportlab.lib.pagesizes import letter | |
| from reportlab.lib import colors | |
| from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle | |
| from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle | |
| from reportlab.lib.units import inch | |
| app = Flask(__name__, template_folder='.') | |
| app.config['MAX_CONTENT_LENGTH'] = 64 * 1024 * 1024 | |
| API_KEY = os.environ.get("GOOGLE_API_KEY") | |
| try: | |
| subprocess.check_output(["tesseract", "--version"]) | |
| HAS_OCR = True | |
| except: | |
| HAS_OCR = False | |
| # --- 1. MODELO --- | |
| def find_valid_model(): | |
| if not API_KEY: return None, "Falta API Key" | |
| try: | |
| url = f"https://generativelanguage.googleapis.com/v1beta/models?key={API_KEY}" | |
| resp = requests.get(url) | |
| if resp.status_code != 200: return f"https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:generateContent?key={API_KEY}", "gemini-1.5-flash" | |
| models = [m['name'] for m in resp.json().get('models', []) if "generateContent" in m.get('supportedGenerationMethods', [])] | |
| selected = next((m for m in models if "flash" in m), models[0] if models else None) | |
| return f"https://generativelanguage.googleapis.com/v1beta/models/{selected.replace('models/', '')}:generateContent?key={API_KEY}", selected | |
| except: return None, "gemini-1.5-flash" | |
| # --- 2. EXTRACCIÓN (20 PÁGINAS) --- | |
| def ocr_proc(img_bytes): | |
| if not HAS_OCR: return "" | |
| try: | |
| img = Image.open(io.BytesIO(img_bytes)) | |
| return pytesseract.image_to_string(ImageOps.autocontrast(ImageOps.grayscale(img)), lang='spa', config='--psm 1') | |
| except: return "" | |
| def extract_multimodal(files): | |
| text_accum = "" | |
| images = [] | |
| for f in files: | |
| try: | |
| fname = f.filename.lower() | |
| content = f.read() | |
| f.seek(0) | |
| text_accum += f"\n\n--- DOC: {fname} ---\n" | |
| if fname.endswith('.pdf'): | |
| try: | |
| pdf = PyPDF2.PdfReader(io.BytesIO(content)) | |
| for p in pdf.pages: text_accum += p.extract_text() or "" | |
| except: pass | |
| if len(text_accum) < 200 and HAS_OCR: | |
| doc = fitz.open(stream=content, filetype="pdf") | |
| # AUMENTADO A 20 PÁGINAS | |
| for i in range(min(len(doc), 20)): | |
| ib = doc.load_page(i).get_pixmap(dpi=150).tobytes("jpeg") | |
| text_accum += ocr_proc(ib) + "\n" | |
| if i < 3: images.append({"mime_type": "image/jpeg", "data": base64.b64encode(ib).decode('utf-8')}) | |
| elif fname.endswith(('.jpg','.png','.jpeg')): | |
| text_accum += ocr_proc(content) | |
| images.append({"mime_type": "image/jpeg", "data": base64.b64encode(content).decode('utf-8')}) | |
| elif fname.endswith('.docx'): | |
| doc = docx.Document(io.BytesIO(content)) | |
| text_accum += "\n".join([p.text for p in doc.paragraphs]) | |
| except: pass | |
| return text_accum, images | |
| # --- 3. MAPAS (ESTRATEGIA CASCADA) --- | |
| def get_map(loc): | |
| if not loc or len(loc)<4 or "No detect" in loc: return None | |
| try: | |
| geo = Nominatim(user_agent="inmoguard_v35_deep") | |
| # 1. Limpieza inicial | |
| base_clean = re.sub(r'\(.*?\)|Matrícula|FMI|No\.|#|Apartamento|Local|Oficina', '', loc).strip() | |
| # Generar intentos de búsqueda (Del más específico al más general) | |
| queries = [] | |
| queries.append(base_clean) # "Calle 123, Vereda X, Municipio" | |
| # Intentar extraer solo Municipio/Ciudad (asumiendo formato "..., Ciudad") | |
| parts = base_clean.split(',') | |
| if len(parts) > 1: | |
| queries.append(f"{parts[-2]}, {parts[-1]}") # "Barrio, Ciudad" | |
| queries.append(parts[-1].strip()) # "Ciudad" | |
| for q in queries: | |
| if len(q) < 3: continue | |
| search_q = q if "Colombia" in q else f"{q}, Colombia" | |
| print(f"🌍 Intentando Mapa: {search_q}") | |
| l = geo.geocode(search_q, timeout=4) | |
| if l: | |
| # Éxito! | |
| m = folium.Map([l.latitude, l.longitude], zoom_start=14) | |
| folium.Marker([l.latitude, l.longitude], popup=loc, icon=folium.Icon(color="red", icon="info-sign")).add_to(m) | |
| return m.get_root().render() | |
| except Exception as e: print(f"Error Mapa: {e}") | |
| return None | |
| # --- 4. IA CORE --- | |
| def clean_json(text): | |
| text = text.replace("```json", "").replace("```", "") | |
| s = text.find('{') | |
| e = text.rfind('}') | |
| return text[s:e+1] if s!=-1 and e!=-1 else "{}" | |
| def analyze(text, imgs): | |
| url, _ = find_valid_model() | |
| # PROMPT V35: CAZADOR DE FMI MÚLTIPLE | |
| prompt = ( | |
| "Eres 'InmoGuard AI', Auditor Forense. Tu misión es detectar TODOS los activos." | |
| "\n\n--- INSTRUCCIÓN CRÍTICA DE FMI ---" | |
| "Un expediente puede tener MÚLTIPLES Matrículas Inmobiliarias (FMI). Ej: Apartamento + Garaje + Depósito." | |
| "NO TE DETENGAS EN EL PRIMERO. Escanea todo el texto y extrae CADA FMI que encuentres (formato 000-00000)." | |
| "En el campo 'fmi', ponlos todos separados por comas." | |
| "\n\n--- OTRAS INSTRUCCIONES ---" | |
| "1. SAE (Ley 1708): Aplica lógica FRISCO. Si es SAE y no hay lios, es viable." | |
| "2. UBICACIÓN: Extrae la dirección física más clara posible (Ciudad, Vereda) para el mapa." | |
| "3. HISTORIAL: Cadena de dueños hacia atrás." | |
| "4. CÉDULA CATASTRAL: Extrae el número tal cual." | |
| "\n\n--- JSON OBLIGATORIO ---" | |
| """ | |
| { | |
| "meta": { | |
| "fmi": "FMI 1, FMI 2, FMI 3...", | |
| "cedula_catastral": "...", | |
| "dir_legal": "...", | |
| "tipo": "..." | |
| }, | |
| "historial_propiedad": [ | |
| {"fecha": "...", "acto": "...", "detalles": "..."} | |
| ], | |
| "propietarios_actuales": [{"nombre": "...", "id": "...", "pct": "..."}], | |
| "analisis_sae_ley": { | |
| "estado_proceso": "...", | |
| "fundamento_legal": "...", | |
| "viabilidad_comercializacion": "..." | |
| }, | |
| "semaforo_riesgos": { "juridico": "BAJO/MEDIO/ALTO", "financiero": "...", "fisico": "..." }, | |
| "ambiental": { "restricciones": "...", "autoridad": "..." }, | |
| "fiscal_completo": { "municipal": "...", "departamental": "...", "avaluo": "..." }, | |
| "vur": { | |
| "anotaciones_detalle": [{"nro": "...", "desc": "...", "estado": "..."}], | |
| "falsa_tradicion": "..." | |
| }, | |
| "laft": { "alertas": "..." }, | |
| "val": { "rango": "...", "just": "..." }, | |
| "dic": { "res": "VIABLE / NO VIABLE", "txt": "..." } | |
| } | |
| """ | |
| ) | |
| parts = [{"text": prompt}, {"text": f"DOCS:\n{text[:900000]}"}] | |
| for i in imgs: parts.append({"inlineData": {"mimeType": i["mime_type"], "data": i["data"]}}) | |
| safe = [{"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_NONE"}] | |
| try: | |
| for i in range(2): | |
| res = requests.post(url, json={"contents": [{"parts": parts}], "safetySettings": safe}, headers={'Content-Type': 'application/json'}, timeout=120) | |
| if res.status_code == 200: break | |
| time.sleep(3) | |
| if res.status_code != 200: return {"error": True, "msg": f"Google Error {res.status_code}"} | |
| data = json.loads(clean_json(res.json()['candidates'][0]['content']['parts'][0]['text'])) | |
| data['mapa'] = get_map(data.get('meta', {}).get('dir_legal', '')) | |
| return data | |
| except Exception as e: return {"error": True, "msg": str(e)} | |
| # --- 5. PDF --- | |
| def gen_pdf_from_data(d): | |
| b = io.BytesIO() | |
| doc = SimpleDocTemplate(b, pagesize=letter, topMargin=0.5*inch) | |
| s = getSampleStyleSheet() | |
| story = [Paragraph("INFORME FORENSE V35", s['Heading1']), Spacer(1, 10)] | |
| def add_kv(k, v): | |
| story.append(Paragraph(f"<b>{k}:</b> {str(v)}", s['Normal'])) | |
| story.append(Spacer(1, 3)) | |
| m = d.get('meta', {}) | |
| add_kv("FMI (Matrículas)", m.get('fmi')) | |
| add_kv("Cédula Catastral", m.get('cedula_catastral')) | |
| story.append(Paragraph("ANÁLISIS LEY 1708 (SAE)", s['Heading2'])) | |
| sae = d.get('analisis_sae_ley', {}) | |
| add_kv("Concepto", sae.get('viabilidad_comercializacion')) | |
| add_kv("Fundamento", sae.get('fundamento_legal')) | |
| story.append(Paragraph("HISTORIAL", s['Heading2'])) | |
| hist = d.get('historial_propiedad', []) | |
| if hist: | |
| dt = [['Fecha', 'Acto', 'Detalle']] | |
| for h in hist: dt.append([str(h.get('fecha','')), Paragraph(str(h.get('acto','')), s['Normal']), Paragraph(str(h.get('detalles','')), s['Normal'])]) | |
| t = Table(dt, colWidths=[1*inch, 2*inch, 3.5*inch]) | |
| t.setStyle(TableStyle([('GRID', (0,0), (-1,-1), 0.5, colors.grey)])) | |
| story.append(t) | |
| story.append(Paragraph("DICTAMEN FINAL", s['Heading2'])) | |
| story.append(Paragraph(d.get('dic', {}).get('txt', ''), s['Normal'])) | |
| doc.build(story) | |
| b.seek(0) | |
| return b | |
| # --- RUTAS --- | |
| def analyze_route(): | |
| files = request.files.getlist('files') | |
| txt, imgs = extract_multimodal(files) | |
| return jsonify(analyze(txt, imgs)) | |
| def print_pdf_route(): | |
| return Response(gen_pdf_from_data(request.json).read(), mimetype='application/pdf', headers={'Content-Disposition': 'attachment;filename=Informe_InmoGuard.pdf'}) | |
| def dl_json_route(): | |
| return Response(json.dumps(request.json, indent=4, ensure_ascii=False), mimetype='application/json', headers={'Content-Disposition': 'attachment;filename=data.json'}) | |
| def index(): return render_template('index.html') | |
| if __name__ == '__main__': app.run(debug=True, host='0.0.0.0', port=7860) |