BulatF commited on
Commit
4d924ac
·
1 Parent(s): be9422b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -14
app.py CHANGED
@@ -96,20 +96,24 @@ def main():
96
 
97
  if file is not None:
98
  try:
99
- df = pd.read_excel(file)
100
- # Drop rows where all columns are NaN
101
- df = df.dropna(how='all')
102
- # Replace blank spaces with NaN, then drop rows where all columns are NaN again
103
- df = df.replace(r'^\s*$', np.nan, regex=True)
104
- df = df.dropna(how='all')
105
- review_column = st.selectbox('Select the column from your excel file containing text', df.columns)
106
- df[review_column] = df[review_column].astype(str)
107
-
 
 
 
 
 
 
 
108
 
109
- filter_words_input = st.text_input('Enter words to filter the data by, separated by comma (or leave empty)') # New input field for filter words
110
- filter_words = [] if filter_words_input.strip() == "" else process_filter_words(filter_words_input) # Process the filter words
111
- class_names = st.text_input('Enter the possible class names separated by comma') # New input field for class names
112
- df = filter_dataframe(df, review_column, filter_words) # Filter the DataFrame
113
  except Exception as e:
114
  st.write("An error occurred while reading the uploaded file. Please make sure it's a valid Excel file.")
115
  return
@@ -118,7 +122,6 @@ def main():
118
 
119
 
120
  if start_button and df is not None:
121
- # Drop rows with NaN or blank values in the review_column
122
  df = df[df[review_column].notna()]
123
  df = df[df[review_column].str.strip() != '']
124
 
 
96
 
97
  if file is not None:
98
  try:
99
+ # Reading Excel file in chunks
100
+ chunk_size = 1000 # you can adjust this value according to your memory
101
+ chunks = []
102
+ for chunk in pd.read_excel(file, chunksize=chunk_size):
103
+ chunk = chunk.dropna(how='all')
104
+ chunk = chunk.replace(r'^\s*$', np.nan, regex=True)
105
+ chunk = chunk.dropna(how='all')
106
+ review_column = st.selectbox('Select the column from your excel file containing text', chunk.columns)
107
+ chunk[review_column] = chunk[review_column].astype(str)
108
+
109
+ filter_words_input = st.text_input('Enter words to filter the data by, separated by comma (or leave empty)') # New input field for filter words
110
+ filter_words = [] if filter_words_input.strip() == "" else process_filter_words(filter_words_input) # Process the filter words
111
+ class_names = st.text_input('Enter the possible class names separated by comma') # New input field for class names
112
+ chunk = filter_dataframe(chunk, review_column, filter_words) # Filter the DataFrame
113
+
114
+ chunks.append(chunk)
115
 
116
+ df = pd.concat(chunks, ignore_index=True)
 
 
 
117
  except Exception as e:
118
  st.write("An error occurred while reading the uploaded file. Please make sure it's a valid Excel file.")
119
  return
 
122
 
123
 
124
  if start_button and df is not None:
 
125
  df = df[df[review_column].notna()]
126
  df = df[df[review_column].str.strip() != '']
127