Spaces:

AKMESSI
/

archive-explorer

Sleeping

App Files Files Community

AKMESSI commited on Dec 20, 2025

Commit

d0a567e

1 Parent(s): 5c0acd3

initial commit

Browse files

Files changed (10) hide show

.gitattributes +4 -0
.gitignore +14 -0
Dockerfile +33 -0
app.py +294 -0
ingest.py +152 -0
ingest_visual.py +87 -0
requirements.txt +0 -0
templates/index.html +88 -0
templates/partials/results.html +28 -0
templates/viewer.html +134 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,4 @@

+*.pdf filter=lfs diff=lfs merge=lfs -text
+*.db filter=lfs diff=lfs merge=lfs -text
+*.lancedb filter=lfs diff=lfs merge=lfs -text
+data/**/* filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,14 @@

+# --- IGNORE HUGE DATA ---
+data/
+*.zip
+*.db
+*.lancedb
+*.pdf
+# --- IGNORE SYSTEM JUNK ---
+__pycache__/
+*.pyc
+venv/
+.venv/
+.DS_Store
+.env

Dockerfile ADDED Viewed

	@@ -0,0 +1,33 @@

+# Use Python 3.10 as base
+FROM python:3.10
+# 1. Install System Dependencies (Poppler for images)
+USER root
+RUN apt-get update && apt-get install -y \
+    poppler-utils \
+    ffmpeg \
+    libsm6 \
+    libxext6 \
+    && rm -rf /var/lib/apt/lists/*
+# 2. Set up a new user "user" (Security requirement for HF Spaces)
+RUN useradd -m -u 1000 user
+USER user
+ENV HOME=/home/user \
+    PATH=/home/user/.local/bin:$PATH
+# 3. Set Working Directory
+WORKDIR $HOME/app
+# 4. Copy Dependencies
+COPY --chown=user requirements.txt requirements.txt
+RUN pip install --no-cache-dir --upgrade -r requirements.txt
+# 5. Copy the Application Code & Data
+COPY --chown=user . .
+# 6. Expose the Port (Hugging Face expects port 7860)
+EXPOSE 7860
+# 7. Start the App
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

app.py ADDED Viewed

	@@ -0,0 +1,294 @@

+import os
+import sqlite3
+import lancedb
+from fastapi import FastAPI, Request, HTTPException
+from fastapi.responses import HTMLResponse, Response
+from fastapi.staticfiles import StaticFiles
+from fastapi.templating import Jinja2Templates
+from sentence_transformers import SentenceTransformer
+import uvicorn
+import fitz # PyMuPDF
+from PIL import Image, ImageDraw, ImageFont
+import io
+import zipfile
+from huggingface_hub import hf_hub_download
+app = FastAPI()
+# --- CONFIGURATION & UNZIPPING ---
+print("📥 Downloading Data from Hugging Face Dataset...")
+# 1. Download the ZIP file
+zip_path = hf_hub_download(
+    repo_id="AKMESSI/epstein-data",
+    filename="data.zip",
+    repo_type="dataset"
+)
+# 2. Extract it (if not already extracted)
+DATA_DIR = "data"
+if not os.path.exists(DATA_DIR):
+    print("📦 Extracting data.zip... (This takes a moment)")
+    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
+        zip_ref.extractall(".") # Extracts to current folder
+    print("✅ Extraction Complete!")
+else:
+    print("✅ Data already extracted.")
+# 3. Set DB Paths
+# The zip contains "data/", so we look inside it
+DB_NAME = "epstein.db" # This should ideally be uploaded separately if it's not in the zip
+# If your DB is inside the data folder, update this path:
+# DB_NAME = os.path.join(DATA_DIR, "epstein.db")
+VECTOR_DB_DIR = os.path.join(DATA_DIR, "lancedb")
+# --- DATABASE INITIALIZATION ---
+def init_db():
+    conn = sqlite3.connect(DB_NAME)
+    cursor = conn.cursor()
+    # 1. Main Pages
+    cursor.execute("""
+        CREATE TABLE IF NOT EXISTS pages (
+            id INTEGER PRIMARY KEY AUTOINCREMENT,
+            filename TEXT,
+            filepath TEXT,
+            page_number INTEGER,
+            text_content TEXT
+        )
+    """)
+    # 2. FTS Virtual Table
+    cursor.execute("""
+        CREATE VIRTUAL TABLE IF NOT EXISTS pages_fts USING fts5(
+            filename,
+            text_content,
+            content='pages',
+            content_rowid='id'
+        )
+    """)
+    # 3. Triggers
+    cursor.execute("""
+        CREATE TRIGGER IF NOT EXISTS pages_ai AFTER INSERT ON pages BEGIN
+            INSERT INTO pages_fts(rowid, filename, text_content) VALUES (new.id, new.filename, new.text_content);
+        END;
+    """)
+    # 4. Analytics
+    cursor.execute("""
+        CREATE TABLE IF NOT EXISTS search_analytics (
+            term TEXT PRIMARY KEY,
+            count INTEGER DEFAULT 1,
+            last_searched TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+        )
+    """)
+    conn.commit()
+    conn.close()
+init_db()
+# --- CONNECT TO DB HELPERS ---
+def get_db_connection():
+    conn = sqlite3.connect(DB_NAME)
+    conn.row_factory = sqlite3.Row
+    return conn
+# --- LOAD AI MODELS ---
+print("Loading Text AI Model...")
+text_model = SentenceTransformer('all-MiniLM-L6-v2')
+print("Loading Visual AI Model (CLIP)...")
+visual_model = SentenceTransformer('clip-ViT-B-32')
+# Connect to LanceDB
+ldb = lancedb.connect(VECTOR_DB_DIR)
+# Open Tables
+try:
+    tbl = ldb.open_table("pages") # Text Vectors
+except:
+    tbl = None
+try:
+    visual_tbl = ldb.open_table("visuals") # Visual Vectors
+except:
+    visual_tbl = None
+# --- TEMPLATES ---
+templates = Jinja2Templates(directory="templates")
+app.mount("/files", StaticFiles(directory=DATA_DIR), name="files")
+# --- ROUTES ---
+@app.get("/", response_class=HTMLResponse)
+async def home(request: Request):
+    conn = get_db_connection()
+    c = conn.cursor()
+    try:
+        c.execute("SELECT term, count FROM search_analytics ORDER BY count DESC LIMIT 5")
+        trends = c.fetchall()
+    except:
+        trends = []
+    conn.close()
+    return templates.TemplateResponse("index.html", {"request": request, "trends": trends})
+@app.get("/search", response_class=HTMLResponse)
+async def search(request: Request, q: str, searchmode: str = "text"):
+    if not q: return ""
+    # 1. ANALYTICS
+    try:
+        conn = get_db_connection()
+        c = conn.cursor()
+        c.execute("""
+            INSERT INTO search_analytics (term, count, last_searched)
+            VALUES (?, 1, CURRENT_TIMESTAMP)
+            ON CONFLICT(term) DO UPDATE SET count = count + 1, last_searched = CURRENT_TIMESTAMP
+        """, (q.lower().strip(),))
+        conn.commit()
+        conn.close()
+    except Exception as e:
+        print(f"Analytics error: {e}")
+    results = []
+    seen_files = set()
+    # --- MODE 1: VISUAL SEARCH ---
+    if searchmode == "visual" and visual_tbl:
+        try:
+            # Encode text query to Visual Vector Space
+            query_vec = visual_model.encode(q)
+            vec_results = visual_tbl.search(query_vec).limit(20).to_list()
+            for res in vec_results:
+                results.append({
+                    "type": "Visual Match",
+                    "filename": res['filename'],
+                    "page": res['page'],
+                    "text": f"Image match for '{q}'",
+                    "score": 1.0 - res['_distance']
+                })
+        except Exception as e:
+            print(f"Visual search error: {e}")
+        return templates.TemplateResponse("partials/results.html", {"request": request, "results": results})
+    # --- MODE 2: TEXT/HYBRID SEARCH ---
+    # A. SQLite Keyword Search
+    conn = get_db_connection()
+    cursor = conn.cursor()
+    cursor.execute("""
+        SELECT p.filename, p.page_number, snippet(pages_fts, 1, '<b>', '</b>', '...', 20) as snippet
+        FROM pages_fts
+        JOIN pages p ON pages_fts.rowid = p.id
+        WHERE pages_fts MATCH ?
+        ORDER BY rank LIMIT 10
+    """, (q,))
+    rows = cursor.fetchall()
+    conn.close()
+    for row in rows:
+        results.append({
+            "type": "Exact Match",
+            "filename": row['filename'],
+            "page": row['page_number'],
+            "text": row['snippet'],
+            "score": 1.0
+        })
+        seen_files.add(f"{row['filename']}-{row['page_number']}")
+    # B. LanceDB Text Concept Search
+    if tbl:
+        try:
+            vector_query = text_model.encode(q)
+            vec_results = tbl.search(vector_query).limit(10).to_list()
+            for res in vec_results:
+                unique_id = f"{res['filename']}-{res['page_number']}"
+                if unique_id not in seen_files:
+                    snippet = res['text'][:200] + "..."
+                    results.append({
+                        "type": "Concept Match",
+                        "filename": res['filename'],
+                        "page": res['page_number'],
+                        "text": snippet,
+                        "score": 1.0 - res['_distance']
+                    })
+        except:
+            pass
+    return templates.TemplateResponse("partials/results.html", {"request": request, "results": results})
+@app.get("/view/{filename}", response_class=HTMLResponse)
+async def view_document(request: Request, filename: str, page: int = 1):
+    filepath = None
+    for root, dirs, files in os.walk(DATA_DIR):
+        if filename in files:
+            rel_path = os.path.relpath(os.path.join(root, filename), DATA_DIR)
+            filepath = f"/files/{rel_path.replace(os.sep, '/')}"
+            break
+    if not filepath: raise HTTPException(status_code=404, detail="File not found")
+    return templates.TemplateResponse("viewer.html", {"request": request, "filename": filename, "filepath": filepath, "page": page})
+# --- API ENDPOINTS ---
+@app.get("/api/snap/{filename}/{page}")
+async def snap_evidence(filename: str, page: int):
+    # Find file
+    filepath = None
+    for root, dirs, files in os.walk(DATA_DIR):
+        if filename in files:
+            filepath = os.path.join(root, filename)
+            break
+    if not filepath: raise HTTPException(status_code=404, detail="File not found")
+    try:
+        # Render
+        doc = fitz.open(filepath)
+        pdf_page = doc.load_page(page - 1)
+        pix = pdf_page.get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
+        img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
+        doc.close()
+        # Watermark
+        draw = ImageDraw.Draw(img)
+        width, height = img.size
+        footer_h = 60
+        draw.rectangle([(0, height - footer_h), (width, height)], fill="#000000")
+        try: font = ImageFont.truetype("arial.ttf", 24)
+        except: font = ImageFont.load_default()
+        text = f"EVIDENCE: {filename} | PG {page} | SOURCE: EPSTEIN ARCHIVE"
+        draw.text((20, height - 40), text, fill="white", font=font)
+        # Return
+        img_byteyb = io.BytesIO()
+        img.save(img_byteyb, format='PNG')
+        img_byteyb.seek(0)
+        return Response(content=img_byteyb.getvalue(), media_type="image/png")
+    except Exception as e:
+        print(f"Snap error: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+@app.get("/api/similar/{filename}/{page}")
+async def similar_evidence(filename: str, page: int):
+    if not tbl: return []
+    try:
+        current_page = tbl.search().where(f"filename = '{filename}' AND page_number = {page}").limit(1).to_list()
+        if not current_page: return []
+        vector = current_page[0]['vector']
+        results = tbl.search(vector).limit(6).to_list()
+        similar = []
+        for res in results:
+            if res['filename'] == filename and res['page_number'] == page: continue
+            similar.append({
+                "filename": res['filename'],
+                "page": res['page_number'],
+                "snippet": res['text'][:150] + "..."
+            })
+        return similar
+    except:
+        return []
+if __name__ == "__main__":
+    uvicorn.run(app, host="0.0.0.0", port=7860)

ingest.py ADDED Viewed

	@@ -0,0 +1,152 @@

+import os
+import sqlite3
+import lancedb
+import PyPDF2
+from sentence_transformers import SentenceTransformer
+from lancedb.pydantic import LanceModel, Vector
+import warnings
+# Suppress warnings
+warnings.filterwarnings("ignore")
+# CONFIGURATION
+DATA_DIR = "data"
+DB_NAME = "epstein.db"
+VECTOR_DB_DIR = "data/lancedb"
+print("Initializing models and databases...")
+# 1. Setup SQLite (For Keyword Search)
+conn = sqlite3.connect(DB_NAME)
+cursor = conn.cursor()
+# Create main table and FTS (Full Text Search) virtual table
+cursor.execute("""
+    CREATE TABLE IF NOT EXISTS pages (
+        id INTEGER PRIMARY KEY AUTOINCREMENT,
+        filename TEXT,
+        filepath TEXT,
+        page_number INTEGER,
+        text_content TEXT
+    )
+""")
+cursor.execute("""
+    CREATE VIRTUAL TABLE IF NOT EXISTS pages_fts USING fts5(
+        filename,
+        text_content,
+        content='pages',
+        content_rowid='id'
+    )
+""")
+cursor.execute("""
+    CREATE TRIGGER IF NOT EXISTS pages_ai AFTER INSERT ON pages BEGIN
+        INSERT INTO pages_fts(rowid, filename, text_content) VALUES (new.id, new.filename, new.text_content);
+    END;
+""")
+conn.commit()
+# 2. Setup LanceDB (For Vector/AI Search)
+model = SentenceTransformer('all-MiniLM-L6-v2')
+ldb = lancedb.connect(VECTOR_DB_DIR)
+# --- THE FIX: Use Pydantic to define the Schema ---
+class PageSchema(LanceModel):
+    vector: Vector(384) # 384 is the dimension of all-MiniLM-L6-v2
+    text: str
+    filename: str
+    page_number: int
+    filepath: str
+# Create or Open the table using the Class Schema
+try:
+    tbl = ldb.open_table("pages")
+except:
+    tbl = ldb.create_table("pages", schema=PageSchema)
+# --------------------------------------------------
+def chunk_text(text, chunk_size=500):
+    """Split long page text into smaller chunks for better vector search"""
+    words = text.split()
+    chunks = []
+    current_chunk = []
+    current_length = 0
+    for word in words:
+        current_length += len(word) + 1
+        current_chunk.append(word)
+        if current_length >= chunk_size:
+            chunks.append(" ".join(current_chunk))
+            current_chunk = []
+            current_length = 0
+    if current_chunk:
+        chunks.append(" ".join(current_chunk))
+    return chunks
+def process_pdf(filepath):
+    filename = os.path.basename(filepath)
+    print(f"Processing: {filename}...")
+    try:
+        with open(filepath, 'rb') as f:
+            reader = PyPDF2.PdfReader(f)
+            num_pages = len(reader.pages)
+            for i in range(num_pages):
+                try:
+                    page = reader.pages[i]
+                    text = page.extract_text()
+                    # Junk Filter: Skip pages with too little text
+                    if not text or len(text.strip()) < 50:
+                        continue
+                    clean_text = text.replace('\x00', '') # Remove null bytes
+                    # A. Insert into SQLite (Keyword Search)
+                    cursor.execute(
+                        "INSERT INTO pages (filename, filepath, page_number, text_content) VALUES (?, ?, ?, ?)",
+                        (filename, filepath, i + 1, clean_text)
+                    )
+                    # B. Insert into LanceDB (Vector Search)
+                    chunks = chunk_text(clean_text)
+                    vectors = model.encode(chunks)
+                    data_to_add = []
+                    for chunk, vector in zip(chunks, vectors):
+                        data_to_add.append({
+                            "vector": vector,
+                            "text": chunk,
+                            "filename": filename,
+                            "page_number": i + 1,
+                            "filepath": filepath
+                        })
+                    if data_to_add:
+                        tbl.add(data_to_add)
+                except Exception as e:
+                    print(f"  Error on page {i+1}: {e}")
+            conn.commit()
+    except Exception as e:
+        print(f"Failed to read {filename}: {e}")
+def main():
+    print(f"Scanning directory: {DATA_DIR}")
+    pdf_count = 0
+    for root, dirs, files in os.walk(DATA_DIR):
+        for file in files:
+            if file.lower().endswith('.pdf'):
+                full_path = os.path.join(root, file)
+                process_pdf(full_path)
+                pdf_count += 1
+    print(f"Done! Processed {pdf_count} PDF files.")
+    print("Run 'python app.py' next to start the server.")
+if __name__ == "__main__":
+    main()

ingest_visual.py ADDED Viewed

	@@ -0,0 +1,87 @@

+import os
+import lancedb
+import fitz # PyMuPDF (The replacement for Poppler)
+from sentence_transformers import SentenceTransformer
+from lancedb.pydantic import LanceModel, Vector
+from PIL import Image
+import warnings
+# Suppress warnings
+warnings.filterwarnings("ignore")
+# --- CONFIGURATION ---
+DATA_DIR = "data"
+VECTOR_DB_DIR = "data/lancedb"
+print("Loading CLIP Model (Visual Intelligence)...")
+model = SentenceTransformer('clip-ViT-B-32')
+# Connect to DB
+ldb = lancedb.connect(VECTOR_DB_DIR)
+class VisualSchema(LanceModel):
+    vector: Vector(512)
+    filename: str
+    page: int
+    filepath: str
+# Create or Open the table
+try:
+    tbl = ldb.open_table("visuals")
+except:
+    tbl = ldb.create_table("visuals", schema=VisualSchema)
+def process_pdf_visuals(filepath):
+    filename = os.path.basename(filepath)
+    print(f"👀 Scanning visuals: {filename}...")
+    try:
+        # OPEN PDF WITH PYMUPDF (No Poppler needed)
+        doc = fitz.open(filepath)
+        data_to_add = []
+        for i, page in enumerate(doc):
+            try:
+                # Render page to image (RGB)
+                # matrix=fitz.Matrix(0.5, 0.5) scales it down for speed (approx 72-100 DPI)
+                pix = page.get_pixmap(matrix=fitz.Matrix(0.5, 0.5))
+                # Convert to PIL Image
+                img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
+                # VISUAL EMBEDDING
+                vector = model.encode(img)
+                data_to_add.append({
+                    "vector": vector,
+                    "filename": filename,
+                    "page": i + 1,
+                    "filepath": filepath
+                })
+                if len(data_to_add) >= 10:
+                    tbl.add(data_to_add)
+                    data_to_add = []
+            except Exception as e:
+                # Skip pages that fail to render
+                continue
+        if data_to_add:
+            tbl.add(data_to_add)
+        doc.close()
+    except Exception as e:
+        print(f"Skipping {filename}: {e}")
+def main():
+    print("Starting Visual Ingestion...")
+    for root, dirs, files in os.walk(DATA_DIR):
+        for file in files:
+            if file.lower().endswith('.pdf'):
+                process_pdf_visuals(os.path.join(root, file))
+    print("Visual Indexing Complete!")
+if __name__ == "__main__":
+    main()

requirements.txt ADDED Viewed

Binary file (2.39 kB). View file

templates/index.html ADDED Viewed

	@@ -0,0 +1,88 @@

+<!DOCTYPE html>
+<html lang="en" class="dark">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Epstein Archive Explorer</title>
+    <script src="https://cdn.tailwindcss.com"></script>
+    <script src="https://unpkg.com/htmx.org@1.9.10"></script>
+    <style>
+        .highlight { background-color: #fde047; color: black; padding: 2px 4px; border-radius: 2px; }
+        b { background-color: #fde047; color: black; font-weight: normal; }
+        /* Custom Radio Button Styling */
+        .mode-radio:checked + div {
+            background-color: #2563eb;
+            color: white;
+            border-color: #2563eb;
+        }
+        .mode-radio:checked + div.visual-mode {
+            background-color: #9333ea; /* Purple for Visual */
+            border-color: #9333ea;
+        }
+    </style>
+</head>
+<body class="bg-slate-900 text-slate-100 min-h-screen font-sans">
+    <div class="max-w-4xl mx-auto pt-16 px-4">
+        <h1 class="text-5xl font-bold text-center mb-2 bg-gradient-to-r from-red-500 to-orange-500 bg-clip-text text-transparent">
+            ARCHIVE EXPLORER
+        </h1>
+        <p class="text-center text-slate-400 mb-8">
+            Indexed <span class="text-white font-mono">4,085</span> Documents • Visual AI Active
+        </p>
+        <div class="flex justify-center gap-4 mb-6">
+            <label class="cursor-pointer">
+                <input type="radio" name="searchmode" value="text" class="mode-radio sr-only" checked
+                       onchange="htmx.trigger('#search-input', 'search')">
+                <div class="px-6 py-2 bg-slate-800 border border-slate-700 rounded-full transition text-slate-400 font-medium hover:border-blue-500">
+                    📄 Text Search
+                </div>
+            </label>
+            <label class="cursor-pointer">
+                <input type="radio" name="searchmode" value="visual" class="mode-radio sr-only"
+                       onchange="htmx.trigger('#search-input', 'search')">
+                <div class="visual-mode px-6 py-2 bg-slate-800 border border-slate-700 rounded-full transition text-slate-400 font-medium hover:border-purple-500">
+                    👁️ Visual AI
+                </div>
+            </label>
+        </div>
+        <div class="relative group z-10">
+            <div class="absolute -inset-1 bg-gradient-to-r from-red-600 to-orange-600 rounded-lg blur opacity-25 group-hover:opacity-75 transition duration-1000 group-hover:duration-200"></div>
+            <input
+                id="search-input"
+                type="text"
+                name="q"
+                class="relative w-full bg-slate-800 text-white text-xl p-4 rounded-lg border border-slate-700 focus:outline-none focus:border-red-500 placeholder-slate-500 shadow-xl"
+                placeholder="Search evidence..."
+                hx-get="/search"
+                hx-include="[name='searchmode']"
+                hx-trigger="keyup changed delay:300ms, search"
+                hx-target="#results-area"
+                autocomplete="off"
+            >
+        </div>
+        {% if trends %}
+        <div class="flex flex-wrap justify-center gap-2 mt-6 text-sm">
+            <span class="text-xs text-slate-500 uppercase font-bold tracking-widest mr-2 pt-1">Trending:</span>
+            {% for trend in trends %}
+            <span class="px-3 py-1 bg-slate-800 rounded-full border border-slate-700 text-xs text-red-400 cursor-pointer hover:bg-slate-700 hover:text-white transition"
+                  onclick="document.getElementById('search-input').value='{{ trend.term }}'; htmx.trigger('#search-input', 'search')">
+                🔥 {{ trend.term }}
+            </span>
+            {% endfor %}
+        </div>
+        {% endif %}
+    </div>
+    <div id="results-area" class="max-w-4xl mx-auto mt-10 px-4 pb-20 space-y-4">
+        <div class="text-center text-slate-600 italic mt-20">
+            Select a mode and start typing to uncover evidence...
+        </div>
+    </div>
+</body>
+</html>

templates/partials/results.html ADDED Viewed

	@@ -0,0 +1,28 @@

+{% if not results %}
+    <div class="text-center text-slate-500 py-10">No documents found matching that query.</div>
+{% endif %}
+{% for result in results %}
+<a href="/view/{{ result.filename }}?page={{ result.page }}" target="_blank" class="block group">
+    <div class="bg-slate-800 p-5 rounded-lg border border-slate-700 hover:border-red-500 transition shadow-lg relative overflow-hidden">
+        <div class="absolute top-0 right-0 p-2">
+            {% if result.type == 'Exact Match' %}
+                <span class="bg-blue-900 text-blue-200 text-xs font-bold px-2 py-1 rounded uppercase tracking-wider">Exact Keyword</span>
+            {% else %}
+                <span class="bg-purple-900 text-purple-200 text-xs font-bold px-2 py-1 rounded uppercase tracking-wider">AI Concept</span>
+            {% endif %}
+        </div>
+        <div class="flex items-center gap-3 mb-2">
+            <svg class="w-5 h-5 text-red-500" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M9 12h6m-6 4h6m2 5H7a2 2 0 01-2-2V5a2 2 0 012-2h5.586a1 1 0 01.707.293l5.414 5.414a1 1 0 01.293.707V19a2 2 0 01-2 2z"></path></svg>
+            <h3 class="font-bold text-lg text-slate-200 group-hover:text-red-400 transition">{{ result.filename }}</h3>
+            <span class="text-slate-500 text-sm">Page {{ result.page }}</span>
+        </div>
+        <p class="text-slate-400 text-sm leading-relaxed pl-8 border-l-2 border-slate-700">
+            ...{{ result.text|safe }}...
+        </p>
+    </div>
+</a>
+{% endfor %}

templates/viewer.html ADDED Viewed

	@@ -0,0 +1,134 @@

+<!DOCTYPE html>
+<html lang="en" class="bg-slate-900 h-screen">
+<head>
+    <meta charset="UTF-8">
+    <title>{{ filename }} - Page {{ page }}</title>
+    <script src="https://cdn.tailwindcss.com"></script>
+    <style>
+    .detective-active {
+        filter: contrast(175%) brightness(90%) grayscale(100%) invert(0%);
+    }
+    .iframe-container {
+        transition: filter 0.3s ease;
+    }
+    </style>
+    <button onclick="toggleDetective()"
+        class="bg-yellow-600 hover:bg-yellow-700 text-white px-3 py-1.5 rounded text-sm font-bold flex items-center gap-2 transition">
+        <svg class="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M21 21l-6-6m2-5a7 7 0 11-14 0 7 7 0 0114 0z"></path></svg>
+        Detective Mode
+    </button>
+</head>
+<body class="h-screen flex flex-col overflow-hidden">
+    <div class="h-14 bg-slate-800 border-b border-slate-700 flex items-center justify-between px-4 shrink-0 z-10">
+        <div class="flex items-center gap-4">
+            <a href="/" class="text-slate-400 hover:text-white transition">← Back to Search</a>
+            <h1 class="font-bold text-white truncate max-w-md">{{ filename }}</h1>
+            <span class="bg-red-600 text-white text-xs px-2 py-1 rounded">Page {{ page }}</span>
+        </div>
+        <div>
+            <button onclick="snapEvidence()"
+        class="bg-red-600 hover:bg-red-700 text-white px-4 py-2 rounded shadow-lg font-bold flex items-center gap-2 transition">
+    <svg class="w-5 h-5" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M3 9a2 2 0 012-2h.93a2 2 0 001.664-.89l.812-1.22A2 2 0 0110.07 4h3.86a2 2 0 011.664.89l.812 1.22A2 2 0 0018.07 7H19a2 2 0 012 2v9a2 2 0 01-2 2H5a2 2 0 01-2-2V9z"></path><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M15 13a3 3 0 11-6 0 3 3 0 016 0z"></path></svg>
+    Snap Evidence
+</button>
+<div id="snapModal" class="fixed inset-0 bg-black/90 hidden items-center justify-center z-50 p-4" onclick="closeModal()">
+    <div class="max-w-4xl w-full bg-slate-900 rounded-lg overflow-hidden shadow-2xl border border-slate-700" onclick="event.stopPropagation()">
+        <div class="p-4 border-b border-slate-700 flex justify-between items-center">
+            <h3 class="text-white font-bold">Evidence Receipt Generated</h3>
+            <button onclick="closeModal()" class="text-slate-400 hover:text-white">✕</button>
+        </div>
+        <div class="p-4 bg-black flex justify-center">
+            <img id="evidenceImg" src="" class="max-h-[70vh] object-contain border border-slate-800" />
+        </div>
+        <div class="p-4 flex gap-3 justify-end bg-slate-800">
+            <a id="downloadLink" href="#" download="evidence.png" class="bg-slate-700 hover:bg-slate-600 text-white px-4 py-2 rounded">Download</a>
+            <a id="twitterLink" href="#" target="_blank" class="bg-[#1DA1F2] hover:bg-[#1a91da] text-white px-4 py-2 rounded font-bold">Post to X</a>
+        </div>
+    </div>
+</div>
+<div class="absolute right-0 top-14 bottom-0 w-64 bg-slate-900 border-l border-slate-700 p-4 overflow-y-auto" id="related-panel">
+    <h3 class="text-xs font-bold text-slate-500 uppercase mb-4">Related Evidence (AI)</h3>
+    <div id="similar-results">
+        <div class="animate-pulse text-xs text-slate-600">Loading AI analysis...</div>
+    </div>
+</div>
+<script>
+// Load similar pages automatically
+fetch(`/api/similar/{{ filename }}/{{ page }}`)
+    .then(r => r.json())
+    .then(data => {
+        const container = document.getElementById('similar-results');
+        if(data.length === 0) {
+            container.innerHTML = '<div class="text-xs text-slate-600">No related links found.</div>';
+            return;
+        }
+        container.innerHTML = data.map(item => `
+            <a href="/view/${item.filename}?page=${item.page}" class="block mb-3 p-3 bg-slate-800 rounded hover:bg-slate-700 border border-slate-700 hover:border-blue-500 transition">
+                <div class="text-xs font-bold text-slate-300 truncate">${item.filename}</div>
+                <div class="text-[10px] text-blue-400 mb-1">Page ${item.page}</div>
+                <div class="text-[10px] text-slate-500 leading-tight">${item.snippet}</div>
+            </a>
+        `).join('');
+    });
+</script>
+<script>
+async function snapEvidence() {
+    const btn = document.querySelector('button');
+    const originalText = btn.innerHTML;
+    btn.innerHTML = "Generating...";
+    btn.disabled = true;
+    try {
+        // Fetch the image from our new Python endpoint
+        const response = await fetch(`/api/snap/{{ filename }}/{{ page }}`);
+        const blob = await response.blob();
+        const url = URL.createObjectURL(blob);
+        // Setup Modal
+        const img = document.getElementById('evidenceImg');
+        img.src = url;
+        const dl = document.getElementById('downloadLink');
+        dl.href = url;
+        dl.download = `Epstein_Evidence_{{ filename }}_Pg{{ page }}.png`;
+        const tw = document.getElementById('twitterLink');
+        const text = encodeURIComponent(`Found in the Epstein Archive: "{{ filename }}" Page {{ page }}. \n\nIndexed via Archive Explorer.`);
+        tw.href = `https://twitter.com/intent/tweet?text=${text}`;
+        // Show Modal
+        document.getElementById('snapModal').classList.remove('hidden');
+        document.getElementById('snapModal').classList.add('flex');
+    } catch (e) {
+        alert("Error generating snap. Is Poppler installed?");
+    } finally {
+        btn.innerHTML = originalText;
+        btn.disabled = false;
+    }
+}
+function closeModal() {
+    document.getElementById('snapModal').classList.add('hidden');
+    document.getElementById('snapModal').classList.remove('flex');
+}
+function toggleDetective() {
+    const iframe = document.querySelector('iframe');
+    iframe.classList.toggle('detective-active');
+}
+</script>
+        </div>
+    </div>
+    <iframe src="{{ filepath }}#page={{ page }}" class="w-full h-full border-none bg-slate-500"></iframe>
+</body>
+</html>