Spaces:

BulatF
/

StreamlitSentiment

Runtime error

App Files Files Community

BulatF commited on Jul 6, 2023

Commit

4d924ac

1 Parent(s): be9422b

Update app.py

Browse files

Files changed (1) hide show

app.py +17 -14

app.py CHANGED Viewed

@@ -96,20 +96,24 @@ def main():
     if file is not None:
         try:
-            df = pd.read_excel(file)
-            # Drop rows where all columns are NaN
-            df = df.dropna(how='all')
-            # Replace blank spaces with NaN, then drop rows where all columns are NaN again
-            df = df.replace(r'^\s*$', np.nan, regex=True)
-            df = df.dropna(how='all')
-            review_column = st.selectbox('Select the column from your excel file containing text', df.columns)
-            df[review_column] = df[review_column].astype(str)
-            filter_words_input = st.text_input('Enter words to filter the data by, separated by comma (or leave empty)')  # New input field for filter words
-            filter_words = [] if filter_words_input.strip() == "" else process_filter_words(filter_words_input)  # Process the filter words
-            class_names = st.text_input('Enter the possible class names separated by comma')  # New input field for class names
-            df = filter_dataframe(df, review_column, filter_words)  # Filter the DataFrame
         except Exception as e:
             st.write("An error occurred while reading the uploaded file. Please make sure it's a valid Excel file.")
             return
@@ -118,7 +122,6 @@ def main():
     if start_button and df is not None:
-        # Drop rows with NaN or blank values in the review_column
         df = df[df[review_column].notna()]
         df = df[df[review_column].str.strip() != '']

     if file is not None:
         try:
+            # Reading Excel file in chunks
+            chunk_size = 1000  # you can adjust this value according to your memory
+            chunks = []
+            for chunk in pd.read_excel(file, chunksize=chunk_size):
+                chunk = chunk.dropna(how='all')
+                chunk = chunk.replace(r'^\s*$', np.nan, regex=True)
+                chunk = chunk.dropna(how='all')
+                review_column = st.selectbox('Select the column from your excel file containing text', chunk.columns)
+                chunk[review_column] = chunk[review_column].astype(str)
+                filter_words_input = st.text_input('Enter words to filter the data by, separated by comma (or leave empty)')  # New input field for filter words
+                filter_words = [] if filter_words_input.strip() == "" else process_filter_words(filter_words_input)  # Process the filter words
+                class_names = st.text_input('Enter the possible class names separated by comma')  # New input field for class names
+                chunk = filter_dataframe(chunk, review_column, filter_words)  # Filter the DataFrame
+                chunks.append(chunk)
+            df = pd.concat(chunks, ignore_index=True)
         except Exception as e:
             st.write("An error occurred while reading the uploaded file. Please make sure it's a valid Excel file.")
             return
     if start_button and df is not None:
         df = df[df[review_column].notna()]
         df = df[df[review_column].str.strip() != '']