Haseeb-001 commited on
Commit
605e4d8
·
verified ·
1 Parent(s): 26895e4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -46
app.py CHANGED
@@ -27,7 +27,7 @@ def clean_with_llama(text):
27
 
28
  # Function to clean and process data
29
  def process_data(df):
30
- # Remove unnecessary data like completely empty rows and columns
31
  df.dropna(how="all", inplace=True)
32
  df.dropna(axis=1, how="all", inplace=True)
33
 
@@ -80,54 +80,52 @@ def main():
80
  dataset_url = st.text_input("Paste the URL of the dataset")
81
 
82
  if uploaded_file or dataset_url:
 
 
 
 
 
 
83
  try:
84
- if dataset_url:
85
- st.info("Downloading dataset from URL...")
86
- uploaded_file = download_dataset(dataset_url)
87
-
88
- if uploaded_file:
89
- st.success("File uploaded successfully!")
90
- st.write("Processing the file...")
91
-
92
- # Load dataset
93
- if hasattr(uploaded_file, 'read'): # Streamlit file upload
94
- df = pd.read_csv(uploaded_file)
95
- else: # File from URL download or local path
96
- with open(uploaded_file, 'r') as file:
97
- df = pd.read_csv(file)
98
-
99
- st.write("**Original Dataset**")
100
- st.dataframe(df)
101
-
102
- # Process data
103
- st.info("Cleaning and simplifying the dataset...")
104
- df_cleaned = process_data(df)
105
-
106
- # Display cleaned data
107
- st.write("**Cleaned Dataset**")
108
- st.dataframe(df_cleaned)
109
-
110
- # Chunk data
111
- st.info("Creating chunks for AI models...")
112
- processed_files = chunk_dataset(df_cleaned)
113
-
114
- # Allow download of processed chunks
115
- st.success(f"Processing complete! {len(processed_files)} chunk(s) created.")
116
- for file_name in processed_files:
117
- with open(file_name, 'rb') as file:
118
- st.download_button(
119
- label=f"Download {file_name}",
120
- data=file,
121
- file_name=file_name,
122
- mime="text/csv",
123
- )
124
-
125
- # Cleanup generated files
126
- for file_name in processed_files:
127
- os.remove(file_name)
128
 
129
  except Exception as e:
130
- st.error(f"An error occurred: {e}")
131
 
132
  if __name__ == "__main__":
133
  main()
 
27
 
28
  # Function to clean and process data
29
  def process_data(df):
30
+ # Remove completely empty rows and columns
31
  df.dropna(how="all", inplace=True)
32
  df.dropna(axis=1, how="all", inplace=True)
33
 
 
80
  dataset_url = st.text_input("Paste the URL of the dataset")
81
 
82
  if uploaded_file or dataset_url:
83
+ if dataset_url:
84
+ st.info("Downloading dataset from URL...")
85
+ file_path = download_dataset(dataset_url)
86
+ else:
87
+ file_path = uploaded_file # Use uploaded file
88
+
89
  try:
90
+ # Load dataset
91
+ if hasattr(file_path, 'read'): # For Streamlit uploads
92
+ df = pd.read_csv(file_path)
93
+ else: # For downloaded or local files
94
+ df = pd.read_csv(file_path)
95
+
96
+ st.success("File uploaded successfully!")
97
+ st.write("**Original Dataset**")
98
+ st.dataframe(df)
99
+
100
+ # Process data
101
+ st.info("Cleaning and simplifying the dataset...")
102
+ df_cleaned = process_data(df)
103
+
104
+ # Display cleaned data
105
+ st.write("**Cleaned Dataset**")
106
+ st.dataframe(df_cleaned)
107
+
108
+ # Chunk data
109
+ st.info("Creating chunks for AI models...")
110
+ processed_files = chunk_dataset(df_cleaned)
111
+
112
+ # Allow download of processed chunks
113
+ st.success(f"Processing complete! {len(processed_files)} chunk(s) created.")
114
+ for file_name in processed_files:
115
+ with open(file_name, 'rb') as file:
116
+ st.download_button(
117
+ label=f"Download {file_name}",
118
+ data=file,
119
+ file_name=file_name,
120
+ mime="text/csv",
121
+ )
122
+
123
+ # Cleanup generated files
124
+ for file_name in processed_files:
125
+ os.remove(file_name)
 
 
 
 
 
 
 
 
126
 
127
  except Exception as e:
128
+ st.error(f"An error occurred while processing the dataset: {e}")
129
 
130
  if __name__ == "__main__":
131
  main()