File size: 4,346 Bytes
33541c0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86cc0b2
33541c0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c4e1ff9
33541c0
 
 
 
 
 
 
 
 
 
 
 
86cc0b2
33541c0
86cc0b2
 
33541c0
86cc0b2
 
c4e1ff9
86cc0b2
 
 
c4e1ff9
 
33541c0
 
c4e1ff9
33541c0
 
 
 
 
 
86cc0b2
33541c0
 
86cc0b2
c4e1ff9
 
86cc0b2
33541c0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c4e1ff9
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import streamlit as st
import zipfile
import json
from io import BytesIO
import chardet
import pandas as pd
import openpyxl

# Set page configuration to wide mode by default
st.set_page_config(layout="wide")

# Function to extract and combine JSON files from a ZIP file
def extract_and_combine_zip(zip_file):
    combined_data = {}
    with zipfile.ZipFile(zip_file) as z:
        # Extract all JSON files, ignoring macOS-specific hidden files
        json_files = [name for name in z.namelist() if name.endswith('.json') and not name.startswith('__MACOSX')]
        for json_file in json_files:
            with z.open(json_file) as f:
                content = f.read()
                encoding = chardet.detect(content)['encoding']
                try:
                    decoded_content = content.decode(encoding)
                    data = json.loads(decoded_content)
                    combined_data = flatten_json(data, combined_data)
                except (UnicodeDecodeError, json.JSONDecodeError) as e:
                    st.warning(f"Warning: Could not decode {json_file} in {zip_file.name}. Error: {str(e)}")
    return combined_data

# Improved function to flatten and merge JSON data
def flatten_json(data, flattened=None, prefix=''):
    if flattened is None:
        flattened = {}
    
    if isinstance(data, dict):
        for key, value in data.items():
            new_key = f"{prefix}.{key}" if prefix else key
            if isinstance(value, (dict, list)):
                flatten_json(value, flattened, new_key)
            elif value is not None and value != "":
                flattened[new_key] = value
    elif isinstance(data, list):
        for i, item in enumerate(data):
            new_key = f"{prefix}[{i}]" if prefix else str(i)
            flatten_json(item, flattened, new_key)
    elif data is not None and data != "":
        flattened[prefix] = value
    
    return flattened

# Function to convert DataFrame to Excel
def to_excel(df):
    output = BytesIO()
    with pd.ExcelWriter(output, engine='openpyxl') as writer:
        df.to_excel(writer, index=False, sheet_name='Sheet1')
    processed_data = output.getvalue()
    return processed_data

# Streamlit app setup
st.title("Multi-ZIP JSON Extractor & Flattener")

# File uploader widget for multiple files
uploaded_zips = st.file_uploader("Upload ZIP files containing JSON files:", type="zip", accept_multiple_files=True)

if uploaded_zips:
    # Process all uploaded ZIP files
    all_flattened_json = []
    
    for zip_file in uploaded_zips:
        flattened_json = extract_and_combine_zip(zip_file)
        flattened_json['zip_file_name'] = zip_file.name  # Add ZIP file name to the flattened JSON
        all_flattened_json.append(flattened_json)

    # Create a DataFrame from the flattened JSON data
    df = pd.DataFrame(all_flattened_json)

    # Convert all object columns to string to avoid Arrow conversion issues
    for col in df.select_dtypes(include=['object']).columns:
        df[col] = df[col].astype(str)

    # Create a downloadable JSON
    flattened_json_str = json.dumps(all_flattened_json, indent=4)
    json_bytes = flattened_json_str.encode()

    # Display processing summary
    st.write(f"Processed {len(uploaded_zips)} ZIP file(s)")
    st.write(f"Total rows in the output: {len(all_flattened_json)}")

    # Create columns for download buttons
    col1, col2 = st.columns(2)

    # Button to download the flattened JSON data
    with col1:
        st.download_button(
            label="Download Flattened JSON",
            data=BytesIO(json_bytes),
            file_name='flattened_json.json',
            mime='application/json'
        )

    # Button to download the Excel file
    with col2:
        excel_data = to_excel(df)
        st.download_button(
            label="Download Excel File",
            data=excel_data,
            file_name='flattened_data.xlsx',
            mime='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
        )

    # Add a success message
    st.success(f"All ZIP files have been successfully processed. Each ZIP file is represented as a separate row in the output. You can now download the flattened JSON file or the Excel file.")

    # Display a preview of the DataFrame
    st.write("Preview of the processed data:")
    st.dataframe(df.head())