theelderemo commited on
Commit
a3b92cd
·
verified ·
1 Parent(s): 92c7fde

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +95 -0
app.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ from pathlib import Path
4
+
5
+ # --- CONFIGURATION ---
6
+
7
+ # Path inside the Space repo
8
+ FILE_PATH = Path("EPS_FILES_20K_NOV2025.csv")
9
+
10
+ print("⏳ Loading data... (This may take 10–20 seconds)")
11
+
12
+ try:
13
+ df = pd.read_csv(FILE_PATH, on_bad_lines="skip")
14
+ df.columns = [c.lower() for c in df.columns]
15
+
16
+ text_col = "text" if "text" in df.columns else df.columns[-1]
17
+ name_col = "filename" if "filename" in df.columns else df.columns[0]
18
+
19
+ print(f"✅ Success! Loaded {len(df)} documents.")
20
+
21
+ except Exception as e:
22
+ print(f"❌ Error loading file: {e}")
23
+ df = pd.DataFrame({"error": ["File not found. Check path in app.py and that the CSV is in the repo."]})
24
+ text_col = "error"
25
+ name_col = "error"
26
+
27
+
28
+ # --- APP LOGIC ---
29
+
30
+ def search_documents(query):
31
+ """Filters the dataframe based on the search query."""
32
+ if not query:
33
+ return df.head(50)
34
+
35
+ mask = df.astype(str).apply(
36
+ lambda x: x.str.contains(query, case=False, na=False)
37
+ ).any(axis=1)
38
+
39
+ results = df[mask]
40
+ return results.head(1000)
41
+
42
+
43
+ def display_document(evt: gr.SelectData, current_data):
44
+ """Runs when you click a row in the table."""
45
+ try:
46
+ row_index = evt.index[0]
47
+ full_text = current_data.iloc[row_index][text_col]
48
+ doc_name = current_data.iloc[row_index][name_col]
49
+ return f"📄 File: {doc_name}\n\n{full_text}"
50
+ except Exception as e:
51
+ return f"Error retrieving document text: {e}"
52
+
53
+
54
+ # --- USER INTERFACE ---
55
+
56
+ with gr.Blocks(title="Epstein Docs Browser", theme=gr.themes.Soft()) as demo:
57
+ gr.Markdown("# 📂 Epstein Estate Document Browser")
58
+ gr.Markdown(
59
+ "Search 20,000+ documents. **Click on any row in the table to read the full file below.**"
60
+ )
61
+
62
+ with gr.Row():
63
+ search_box = gr.Textbox(
64
+ label="Search (Keywords, Names, Flight Logs)",
65
+ placeholder="Type here...",
66
+ scale=3,
67
+ )
68
+ search_btn = gr.Button("Search", variant="primary", scale=1)
69
+
70
+ results_table = gr.Dataframe(
71
+ headers=list(df.columns),
72
+ datatype="str",
73
+ label="Search Results (Click a row to view)",
74
+ interactive=False,
75
+ wrap=True,
76
+ )
77
+
78
+ doc_viewer = gr.TextArea(
79
+ label="Document Content",
80
+ lines=20,
81
+ placeholder="Select a document above to read it here...",
82
+ )
83
+
84
+ search_btn.click(fn=search_documents, inputs=search_box, outputs=results_table)
85
+ search_box.submit(fn=search_documents, inputs=search_box, outputs=results_table)
86
+
87
+ results_table.select(
88
+ fn=display_document,
89
+ inputs=results_table,
90
+ outputs=doc_viewer,
91
+ )
92
+
93
+ # On Spaces, just launch; no share=True
94
+ if __name__ == "__main__":
95
+ demo.launch(server_name="0.0.0.0", server_port=7860)