Spaces:
Running
Running
Create app.py
Browse files
app.py
ADDED
|
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import pandas as pd
|
| 3 |
+
from pathlib import Path
|
| 4 |
+
|
| 5 |
+
# --- CONFIGURATION ---
|
| 6 |
+
|
| 7 |
+
# Path inside the Space repo
|
| 8 |
+
FILE_PATH = Path("EPS_FILES_20K_NOV2025.csv")
|
| 9 |
+
|
| 10 |
+
print("⏳ Loading data... (This may take 10–20 seconds)")
|
| 11 |
+
|
| 12 |
+
try:
|
| 13 |
+
df = pd.read_csv(FILE_PATH, on_bad_lines="skip")
|
| 14 |
+
df.columns = [c.lower() for c in df.columns]
|
| 15 |
+
|
| 16 |
+
text_col = "text" if "text" in df.columns else df.columns[-1]
|
| 17 |
+
name_col = "filename" if "filename" in df.columns else df.columns[0]
|
| 18 |
+
|
| 19 |
+
print(f"✅ Success! Loaded {len(df)} documents.")
|
| 20 |
+
|
| 21 |
+
except Exception as e:
|
| 22 |
+
print(f"❌ Error loading file: {e}")
|
| 23 |
+
df = pd.DataFrame({"error": ["File not found. Check path in app.py and that the CSV is in the repo."]})
|
| 24 |
+
text_col = "error"
|
| 25 |
+
name_col = "error"
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
# --- APP LOGIC ---
|
| 29 |
+
|
| 30 |
+
def search_documents(query):
|
| 31 |
+
"""Filters the dataframe based on the search query."""
|
| 32 |
+
if not query:
|
| 33 |
+
return df.head(50)
|
| 34 |
+
|
| 35 |
+
mask = df.astype(str).apply(
|
| 36 |
+
lambda x: x.str.contains(query, case=False, na=False)
|
| 37 |
+
).any(axis=1)
|
| 38 |
+
|
| 39 |
+
results = df[mask]
|
| 40 |
+
return results.head(1000)
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
def display_document(evt: gr.SelectData, current_data):
|
| 44 |
+
"""Runs when you click a row in the table."""
|
| 45 |
+
try:
|
| 46 |
+
row_index = evt.index[0]
|
| 47 |
+
full_text = current_data.iloc[row_index][text_col]
|
| 48 |
+
doc_name = current_data.iloc[row_index][name_col]
|
| 49 |
+
return f"📄 File: {doc_name}\n\n{full_text}"
|
| 50 |
+
except Exception as e:
|
| 51 |
+
return f"Error retrieving document text: {e}"
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
# --- USER INTERFACE ---
|
| 55 |
+
|
| 56 |
+
with gr.Blocks(title="Epstein Docs Browser", theme=gr.themes.Soft()) as demo:
|
| 57 |
+
gr.Markdown("# 📂 Epstein Estate Document Browser")
|
| 58 |
+
gr.Markdown(
|
| 59 |
+
"Search 20,000+ documents. **Click on any row in the table to read the full file below.**"
|
| 60 |
+
)
|
| 61 |
+
|
| 62 |
+
with gr.Row():
|
| 63 |
+
search_box = gr.Textbox(
|
| 64 |
+
label="Search (Keywords, Names, Flight Logs)",
|
| 65 |
+
placeholder="Type here...",
|
| 66 |
+
scale=3,
|
| 67 |
+
)
|
| 68 |
+
search_btn = gr.Button("Search", variant="primary", scale=1)
|
| 69 |
+
|
| 70 |
+
results_table = gr.Dataframe(
|
| 71 |
+
headers=list(df.columns),
|
| 72 |
+
datatype="str",
|
| 73 |
+
label="Search Results (Click a row to view)",
|
| 74 |
+
interactive=False,
|
| 75 |
+
wrap=True,
|
| 76 |
+
)
|
| 77 |
+
|
| 78 |
+
doc_viewer = gr.TextArea(
|
| 79 |
+
label="Document Content",
|
| 80 |
+
lines=20,
|
| 81 |
+
placeholder="Select a document above to read it here...",
|
| 82 |
+
)
|
| 83 |
+
|
| 84 |
+
search_btn.click(fn=search_documents, inputs=search_box, outputs=results_table)
|
| 85 |
+
search_box.submit(fn=search_documents, inputs=search_box, outputs=results_table)
|
| 86 |
+
|
| 87 |
+
results_table.select(
|
| 88 |
+
fn=display_document,
|
| 89 |
+
inputs=results_table,
|
| 90 |
+
outputs=doc_viewer,
|
| 91 |
+
)
|
| 92 |
+
|
| 93 |
+
# On Spaces, just launch; no share=True
|
| 94 |
+
if __name__ == "__main__":
|
| 95 |
+
demo.launch(server_name="0.0.0.0", server_port=7860)
|