File size: 3,733 Bytes
e874d5a
 
 
 
 
 
dfaa109
e874d5a
 
 
 
 
 
bd06620
e874d5a
 
 
 
 
 
 
 
 
 
bd06620
e874d5a
 
 
 
bd06620
 
 
 
 
e874d5a
 
bd06620
e874d5a
bd06620
 
e874d5a
 
 
bd06620
 
 
e874d5a
bd06620
e874d5a
 
bd06620
 
 
 
 
 
 
 
e874d5a
bd06620
e874d5a
 
 
 
 
bd06620
 
e874d5a
 
bd06620
e874d5a
 
 
 
 
 
 
 
 
bd06620
 
 
e874d5a
bd06620
 
 
 
 
 
 
e874d5a
bd06620
 
 
 
 
 
 
 
 
e874d5a
 
bd06620
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import streamlit as st
import zipfile
import json
from io import BytesIO
import chardet
import pandas as pd
import openpyxl

# Set page configuration to wide mode by default
st.set_page_config(layout="wide")

# Function to extract and combine JSON files from a ZIP file
def extract_and_combine_zip(zip_file):
    combined_data = {}
    with zipfile.ZipFile(zip_file) as z:
        # Extract all JSON files, ignoring macOS-specific hidden files
        json_files = [name for name in z.namelist() if name.endswith('.json') and not name.startswith('__MACOSX')]
        for json_file in json_files:
            with z.open(json_file) as f:
                content = f.read()
                encoding = chardet.detect(content)['encoding']
                try:
                    decoded_content = content.decode(encoding)
                    data = json.loads(decoded_content)
                    combined_data = flatten_json(data, combined_data)
                except (UnicodeDecodeError, json.JSONDecodeError) as e:
                    st.warning(f"Warning: Could not decode {json_file}. Error: {str(e)}")
    return combined_data

# Improved function to flatten and merge JSON data
def flatten_json(data, flattened=None, prefix=''):
    if flattened is None:
        flattened = {}
    
    if isinstance(data, dict):
        for key, value in data.items():
            new_key = f"{prefix}.{key}" if prefix else key
            if isinstance(value, (dict, list)):
                flatten_json(value, flattened, new_key)
            elif value is not None and value != "":
                flattened[new_key] = value
    elif isinstance(data, list):
        for i, item in enumerate(data):
            new_key = f"{prefix}[{i}]" if prefix else str(i)
            flatten_json(item, flattened, new_key)
    elif data is not None and data != "":
        flattened[prefix] = data
    
    return flattened

# Function to convert DataFrame to Excel
def to_excel(df):
    output = BytesIO()
    with pd.ExcelWriter(output, engine='openpyxl') as writer:
        df.to_excel(writer, index=False, sheet_name='Sheet1')
    processed_data = output.getvalue()
    return processed_data

# Streamlit app setup
st.title("ZIP JSON Extractor & Flattener")

# File uploader widget
uploaded_zip = st.file_uploader("Upload ZIP file containing JSON files:", type="zip")

if uploaded_zip:
    # Combine and flatten JSON data
    flattened_json = extract_and_combine_zip(uploaded_zip)

    # Create a DataFrame from the flattened JSON data
    df = pd.DataFrame([flattened_json])

    # Convert all object columns to string to avoid Arrow conversion issues
    for col in df.select_dtypes(include=['object']).columns:
        df[col] = df[col].astype(str)

    # Create a downloadable JSON
    flattened_json_str = json.dumps(flattened_json, indent=4)
    json_bytes = flattened_json_str.encode()

    # Create columns for download buttons
    col1, col2 = st.columns(2)

    # Button to download the flattened JSON data
    with col1:
        st.download_button(
            label="Download Flattened JSON",
            data=BytesIO(json_bytes),
            file_name='flattened_json.json',
            mime='application/json'
        )

    # Button to download the Excel file
    with col2:
        excel_data = to_excel(df)
        st.download_button(
            label="Download Excel File",
            data=excel_data,
            file_name='flattened_data.xlsx',
            mime='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
        )

    # Add a success message
    st.success("JSON data has been successfully processed and flattened into a single object. You can now download the flattened JSON file or the Excel file.")