Spaces:

insly
/

policyimport

Sleeping

App Files Files Community

kivilaid commited on Oct 9, 2024

Commit

33541c0

verified ·

1 Parent(s): 97c4faa

Create app.py

Browse files

Files changed (1) hide show

app.py +103 -0

app.py ADDED Viewed

	@@ -0,0 +1,103 @@

+import streamlit as st
+import zipfile
+import json
+from io import BytesIO
+import chardet
+import pandas as pd
+import openpyxl
+# Set page configuration to wide mode by default
+st.set_page_config(layout="wide")
+# Function to extract and combine JSON files from a ZIP file
+def extract_and_combine_zip(zip_file):
+    combined_data = {}
+    with zipfile.ZipFile(zip_file) as z:
+        # Extract all JSON files, ignoring macOS-specific hidden files
+        json_files = [name for name in z.namelist() if name.endswith('.json') and not name.startswith('__MACOSX')]
+        for json_file in json_files:
+            with z.open(json_file) as f:
+                content = f.read()
+                encoding = chardet.detect(content)['encoding']
+                try:
+                    decoded_content = content.decode(encoding)
+                    data = json.loads(decoded_content)
+                    combined_data = flatten_json(data, combined_data)
+                except (UnicodeDecodeError, json.JSONDecodeError) as e:
+                    st.warning(f"Warning: Could not decode {json_file}. Error: {str(e)}")
+    return combined_data
+# Improved function to flatten and merge JSON data
+def flatten_json(data, flattened=None, prefix=''):
+    if flattened is None:
+        flattened = {}
+    if isinstance(data, dict):
+        for key, value in data.items():
+            new_key = f"{prefix}.{key}" if prefix else key
+            if isinstance(value, (dict, list)):
+                flatten_json(value, flattened, new_key)
+            elif value is not None and value != "":
+                flattened[new_key] = value
+    elif isinstance(data, list):
+        for i, item in enumerate(data):
+            new_key = f"{prefix}[{i}]" if prefix else str(i)
+            flatten_json(item, flattened, new_key)
+    elif data is not None and data != "":
+        flattened[prefix] = data
+    return flattened
+# Function to convert DataFrame to Excel
+def to_excel(df):
+    output = BytesIO()
+    with pd.ExcelWriter(output, engine='openpyxl') as writer:
+        df.to_excel(writer, index=False, sheet_name='Sheet1')
+    processed_data = output.getvalue()
+    return processed_data
+# Streamlit app setup
+st.title("ZIP JSON Extractor & Flattener")
+# File uploader widget
+uploaded_zip = st.file_uploader("Upload ZIP file containing JSON files:", type="zip")
+if uploaded_zip:
+    # Combine and flatten JSON data
+    flattened_json = extract_and_combine_zip(uploaded_zip)
+    # Create a DataFrame from the flattened JSON data
+    df = pd.DataFrame([flattened_json])
+    # Convert all object columns to string to avoid Arrow conversion issues
+    for col in df.select_dtypes(include=['object']).columns:
+        df[col] = df[col].astype(str)
+    # Create a downloadable JSON
+    flattened_json_str = json.dumps(flattened_json, indent=4)
+    json_bytes = flattened_json_str.encode()
+    # Create columns for download buttons
+    col1, col2 = st.columns(2)
+    # Button to download the flattened JSON data
+    with col1:
+        st.download_button(
+            label="Download Flattened JSON",
+            data=BytesIO(json_bytes),
+            file_name='flattened_json.json',
+            mime='application/json'
+        )
+    # Button to download the Excel file
+    with col2:
+        excel_data = to_excel(df)
+        st.download_button(
+            label="Download Excel File",
+            data=excel_data,
+            file_name='flattened_data.xlsx',
+            mime='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
+        )
+    # Add a success message
+    st.success("JSON data has been successfully processed and flattened into a single object. You can now download the flattened JSON file or the Excel file.")