ohmygaugh commited on
Commit
d4ee34f
Β·
1 Parent(s): 1e03b0a

Fix: Add missing session state logic and file upload processing

Browse files
Files changed (1) hide show
  1. app.py +101 -4
app.py CHANGED
@@ -34,7 +34,102 @@ st.title("Entity Resolution on CSV (Network Graph)")
34
  # SIDEBAR: CSV UPLOAD
35
  # ----------------------
36
  st.sidebar.header("Upload CSV for Entity Resolution")
37
- uploaded_file = st.sidebar.file_uploader("Choose a CSV file", type=["csv"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
 
39
  similarity_threshold = st.sidebar.slider(
40
  "Similarity Threshold",
@@ -148,10 +243,12 @@ def find_connected_components_manual(nodes, edges):
148
  # ----------------------
149
  # LOAD CSV & PROCESS
150
  # ----------------------
151
- if uploaded_file is not None:
152
- st.markdown("### Preview of Uploaded CSV Data")
153
- df = pd.read_csv(uploaded_file)
 
154
  st.dataframe(df.head(10))
 
155
 
156
  # Provide a "Run Entity Resolution" button
157
  if st.button("Run Entity Resolution"):
 
34
  # SIDEBAR: CSV UPLOAD
35
  # ----------------------
36
  st.sidebar.header("Upload CSV for Entity Resolution")
37
+ uploaded_file = st.sidebar.file_uploader(
38
+ "πŸ“ Choose a CSV file",
39
+ type=["csv"],
40
+ help="Drag and drop your CSV file here or click to browse",
41
+ accept_multiple_files=False,
42
+ key="csv_uploader",
43
+ label_visibility="visible"
44
+ )
45
+
46
+ # File upload status info
47
+ if uploaded_file is None:
48
+ st.sidebar.info("πŸ‘† **Drag & drop** your CSV file above or click to browse")
49
+ st.sidebar.markdown("**Supported formats:** `.csv` files")
50
+ st.sidebar.markdown("**Max size:** 200MB")
51
+
52
+ # Initialize session state for data
53
+ if 'uploaded_data_df' not in st.session_state:
54
+ st.session_state.uploaded_data_df = None
55
+ if 'last_uploaded_file' not in st.session_state:
56
+ st.session_state.last_uploaded_file = None
57
+
58
+ # Enhanced file upload processing
59
+ if uploaded_file is not None:
60
+ try:
61
+ # Get file info
62
+ file_details = {
63
+ "filename": uploaded_file.name,
64
+ "size": uploaded_file.size,
65
+ "type": uploaded_file.type
66
+ }
67
+
68
+ st.sidebar.success(f"πŸ“ **{file_details['filename']}** ({file_details['size']} bytes)")
69
+
70
+ # Process immediately if it's a new file
71
+ file_id = f"{file_details['filename']}_{file_details['size']}"
72
+
73
+ if 'current_file_id' not in st.session_state:
74
+ st.session_state.current_file_id = None
75
+
76
+ if st.session_state.current_file_id != file_id:
77
+ st.sidebar.info("πŸ”„ Processing uploaded file...")
78
+
79
+ try:
80
+ # Read the file
81
+ if uploaded_file.type == "text/csv" or uploaded_file.name.endswith('.csv'):
82
+ df = pd.read_csv(uploaded_file)
83
+ else:
84
+ # Try reading as CSV anyway
85
+ df = pd.read_csv(uploaded_file)
86
+
87
+ # Store in session state
88
+ st.session_state.uploaded_data_df = df
89
+ st.session_state.current_file_id = file_id
90
+ st.session_state.last_uploaded_file = uploaded_file.name
91
+
92
+ st.sidebar.success(f"βœ… **Successfully loaded!**")
93
+ st.sidebar.metric("πŸ“Š Dataset", f"{len(df)} rows Γ— {len(df.columns)} cols")
94
+
95
+ except Exception as e:
96
+ st.sidebar.error(f"❌ **Error:** {str(e)}")
97
+ st.sidebar.info("πŸ’‘ Make sure your file is a valid CSV format")
98
+ st.session_state.uploaded_data_df = None
99
+ else:
100
+ st.sidebar.success("βœ… **File already loaded**")
101
+ if st.session_state.uploaded_data_df is not None:
102
+ df = st.session_state.uploaded_data_df
103
+ st.sidebar.metric("πŸ“Š Current Dataset", f"{len(df)} rows Γ— {len(df.columns)} cols")
104
+
105
+ except Exception as e:
106
+ st.sidebar.error(f"❌ **Upload Error:** {str(e)}")
107
+ st.session_state.uploaded_data_df = None
108
+
109
+ # Show current data status
110
+ if st.session_state.uploaded_data_df is not None:
111
+ st.sidebar.write(f"**Current Data:** {len(st.session_state.uploaded_data_df)} rows loaded")
112
+ else:
113
+ st.sidebar.write("**Current Data:** None")
114
+
115
+ # Clear data button
116
+ if st.sidebar.button("πŸ—‘οΈ Clear All Data"):
117
+ st.session_state.uploaded_data_df = None
118
+ st.session_state.last_uploaded_file = None
119
+ st.sidebar.success("Data cleared!")
120
+
121
+ # Generate sample data option
122
+ st.sidebar.markdown("**Or use sample data:**")
123
+ if st.sidebar.button("Use Sample Data"):
124
+ # Create simple sample data for testing
125
+ st.session_state.uploaded_data_df = pd.DataFrame({
126
+ 'first_name': ['John', 'Jon', 'Jane', 'Jain', 'Mike', 'Michael'],
127
+ 'last_name': ['Smith', 'Smith', 'Doe', 'Doe', 'Johnson', 'Johnson'],
128
+ 'email_address': ['john.smith@email.com', 'j.smith@gmail.com', 'jane.doe@company.com', 'jdoe@company.com', 'mike.j@work.com', 'michael.johnson@work.com'],
129
+ 'phone_number': ['555-0123', '555-0123', '555-0456', '(555) 456-0000', '555-0789', '5550789']
130
+ })
131
+ st.session_state.last_uploaded_file = "sample_data"
132
+ st.sidebar.success("Sample data loaded!")
133
 
134
  similarity_threshold = st.sidebar.slider(
135
  "Similarity Threshold",
 
243
  # ----------------------
244
  # LOAD CSV & PROCESS
245
  # ----------------------
246
+ # Use the unified session state data
247
+ if st.session_state.uploaded_data_df is not None:
248
+ st.markdown("### Preview of Data")
249
+ df = st.session_state.uploaded_data_df
250
  st.dataframe(df.head(10))
251
+ st.info(f"πŸ“Š Dataset contains {len(df)} rows and {len(df.columns)} columns")
252
 
253
  # Provide a "Run Entity Resolution" button
254
  if st.button("Run Entity Resolution"):