Spaces:

insly
/

policyimport

Sleeping

App Files Files Community

kivilaid commited on Oct 9, 2024

Commit

bd06620

verified ·

1 Parent(s): c92b498

Update app.py

Browse files

Files changed (1) hide show

app.py +47 -27

app.py CHANGED Viewed

@@ -4,13 +4,14 @@ import json
 from io import BytesIO
 import chardet
 import pandas as pd
 # Set page configuration to wide mode by default
 st.set_page_config(layout="wide")
 # Function to extract and combine JSON files from a ZIP file
 def extract_and_combine_zip(zip_file):
-    combined_data = []
     with zipfile.ZipFile(zip_file) as z:
         # Extract all JSON files, ignoring macOS-specific hidden files
         json_files = [name for name in z.namelist() if name.endswith('.json') and not name.startswith('__MACOSX')]
@@ -21,44 +22,52 @@ def extract_and_combine_zip(zip_file):
                 try:
                     decoded_content = content.decode(encoding)
                     data = json.loads(decoded_content)
-                    combined_data.append(data)
                 except (UnicodeDecodeError, json.JSONDecodeError) as e:
                     st.warning(f"Warning: Could not decode {json_file}. Error: {str(e)}")
     return combined_data
-# Function to flatten JSON data
-def flatten_json(data, prefix=''):
-    flattened = {}
     if isinstance(data, dict):
         for key, value in data.items():
-            new_key = f"{prefix}_{key}" if prefix else key
             if isinstance(value, (dict, list)):
-                flattened.update(flatten_json(value, new_key))
-            else:
                 flattened[new_key] = value
     elif isinstance(data, list):
         for i, item in enumerate(data):
-            new_key = f"{prefix}_{i}" if prefix else str(i)
-            flattened.update(flatten_json(item, new_key))
-    else:
         flattened[prefix] = data
     return flattened
 # Streamlit app setup
-st.title("ZIP JSON Extractor & Combiner")
 # File uploader widget
 uploaded_zip = st.file_uploader("Upload ZIP file containing JSON files:", type="zip")
 if uploaded_zip:
-    # Combine JSON data
-    combined_json = extract_and_combine_zip(uploaded_zip)
-    # Flatten the combined JSON data
-    flattened_json = [flatten_json(item) for item in combined_json]
     # Create a DataFrame from the flattened JSON data
-    df = pd.DataFrame(flattened_json)
     # Convert all object columns to string to avoid Arrow conversion issues
     for col in df.select_dtypes(include=['object']).columns:
@@ -68,16 +77,27 @@ if uploaded_zip:
     flattened_json_str = json.dumps(flattened_json, indent=4)
     json_bytes = flattened_json_str.encode()
     # Button to download the flattened JSON data
-    st.download_button(
-        label="Download Flattened JSON",
-        data=BytesIO(json_bytes),
-        file_name='flattened_json.json',
-        mime='application/json'
-    )
-    # Remove the table display
-    # st.dataframe(df)  # This line has been removed
     # Add a success message
-    st.success("JSON data has been successfully processed. You can now download the flattened JSON file.")

 from io import BytesIO
 import chardet
 import pandas as pd
+# import openpyxl
 # Set page configuration to wide mode by default
 st.set_page_config(layout="wide")
 # Function to extract and combine JSON files from a ZIP file
 def extract_and_combine_zip(zip_file):
+    combined_data = {}
     with zipfile.ZipFile(zip_file) as z:
         # Extract all JSON files, ignoring macOS-specific hidden files
         json_files = [name for name in z.namelist() if name.endswith('.json') and not name.startswith('__MACOSX')]
                 try:
                     decoded_content = content.decode(encoding)
                     data = json.loads(decoded_content)
+                    combined_data = flatten_json(data, combined_data)
                 except (UnicodeDecodeError, json.JSONDecodeError) as e:
                     st.warning(f"Warning: Could not decode {json_file}. Error: {str(e)}")
     return combined_data
+# Improved function to flatten and merge JSON data
+def flatten_json(data, flattened=None, prefix=''):
+    if flattened is None:
+        flattened = {}
     if isinstance(data, dict):
         for key, value in data.items():
+            new_key = f"{prefix}.{key}" if prefix else key
             if isinstance(value, (dict, list)):
+                flatten_json(value, flattened, new_key)
+            elif value is not None and value != "":
                 flattened[new_key] = value
     elif isinstance(data, list):
         for i, item in enumerate(data):
+            new_key = f"{prefix}[{i}]" if prefix else str(i)
+            flatten_json(item, flattened, new_key)
+    elif data is not None and data != "":
         flattened[prefix] = data
     return flattened
+# Function to convert DataFrame to Excel
+def to_excel(df):
+    output = BytesIO()
+    with pd.ExcelWriter(output, engine='openpyxl') as writer:
+        df.to_excel(writer, index=False, sheet_name='Sheet1')
+    processed_data = output.getvalue()
+    return processed_data
 # Streamlit app setup
+st.title("ZIP JSON Extractor & Flattener")
 # File uploader widget
 uploaded_zip = st.file_uploader("Upload ZIP file containing JSON files:", type="zip")
 if uploaded_zip:
+    # Combine and flatten JSON data
+    flattened_json = extract_and_combine_zip(uploaded_zip)
     # Create a DataFrame from the flattened JSON data
+    df = pd.DataFrame([flattened_json])
     # Convert all object columns to string to avoid Arrow conversion issues
     for col in df.select_dtypes(include=['object']).columns:
     flattened_json_str = json.dumps(flattened_json, indent=4)
     json_bytes = flattened_json_str.encode()
+    # Create columns for download buttons
+    col1, col2 = st.columns(2)
     # Button to download the flattened JSON data
+    with col1:
+        st.download_button(
+            label="Download Flattened JSON",
+            data=BytesIO(json_bytes),
+            file_name='flattened_json.json',
+            mime='application/json'
+        )
+    # Button to download the Excel file
+    with col2:
+        excel_data = to_excel(df)
+        st.download_button(
+            label="Download Excel File",
+            data=excel_data,
+            file_name='flattened_data.xlsx',
+            mime='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
+        )
     # Add a success message
+    st.success("JSON data has been successfully processed and flattened into a single object. You can now download the flattened JSON file or the Excel file.")