kivilaid commited on
Commit
33541c0
·
verified ·
1 Parent(s): 97c4faa

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +103 -0
app.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import zipfile
3
+ import json
4
+ from io import BytesIO
5
+ import chardet
6
+ import pandas as pd
7
+ import openpyxl
8
+
9
+ # Set page configuration to wide mode by default
10
+ st.set_page_config(layout="wide")
11
+
12
+ # Function to extract and combine JSON files from a ZIP file
13
+ def extract_and_combine_zip(zip_file):
14
+ combined_data = {}
15
+ with zipfile.ZipFile(zip_file) as z:
16
+ # Extract all JSON files, ignoring macOS-specific hidden files
17
+ json_files = [name for name in z.namelist() if name.endswith('.json') and not name.startswith('__MACOSX')]
18
+ for json_file in json_files:
19
+ with z.open(json_file) as f:
20
+ content = f.read()
21
+ encoding = chardet.detect(content)['encoding']
22
+ try:
23
+ decoded_content = content.decode(encoding)
24
+ data = json.loads(decoded_content)
25
+ combined_data = flatten_json(data, combined_data)
26
+ except (UnicodeDecodeError, json.JSONDecodeError) as e:
27
+ st.warning(f"Warning: Could not decode {json_file}. Error: {str(e)}")
28
+ return combined_data
29
+
30
+ # Improved function to flatten and merge JSON data
31
+ def flatten_json(data, flattened=None, prefix=''):
32
+ if flattened is None:
33
+ flattened = {}
34
+
35
+ if isinstance(data, dict):
36
+ for key, value in data.items():
37
+ new_key = f"{prefix}.{key}" if prefix else key
38
+ if isinstance(value, (dict, list)):
39
+ flatten_json(value, flattened, new_key)
40
+ elif value is not None and value != "":
41
+ flattened[new_key] = value
42
+ elif isinstance(data, list):
43
+ for i, item in enumerate(data):
44
+ new_key = f"{prefix}[{i}]" if prefix else str(i)
45
+ flatten_json(item, flattened, new_key)
46
+ elif data is not None and data != "":
47
+ flattened[prefix] = data
48
+
49
+ return flattened
50
+
51
+ # Function to convert DataFrame to Excel
52
+ def to_excel(df):
53
+ output = BytesIO()
54
+ with pd.ExcelWriter(output, engine='openpyxl') as writer:
55
+ df.to_excel(writer, index=False, sheet_name='Sheet1')
56
+ processed_data = output.getvalue()
57
+ return processed_data
58
+
59
+ # Streamlit app setup
60
+ st.title("ZIP JSON Extractor & Flattener")
61
+
62
+ # File uploader widget
63
+ uploaded_zip = st.file_uploader("Upload ZIP file containing JSON files:", type="zip")
64
+
65
+ if uploaded_zip:
66
+ # Combine and flatten JSON data
67
+ flattened_json = extract_and_combine_zip(uploaded_zip)
68
+
69
+ # Create a DataFrame from the flattened JSON data
70
+ df = pd.DataFrame([flattened_json])
71
+
72
+ # Convert all object columns to string to avoid Arrow conversion issues
73
+ for col in df.select_dtypes(include=['object']).columns:
74
+ df[col] = df[col].astype(str)
75
+
76
+ # Create a downloadable JSON
77
+ flattened_json_str = json.dumps(flattened_json, indent=4)
78
+ json_bytes = flattened_json_str.encode()
79
+
80
+ # Create columns for download buttons
81
+ col1, col2 = st.columns(2)
82
+
83
+ # Button to download the flattened JSON data
84
+ with col1:
85
+ st.download_button(
86
+ label="Download Flattened JSON",
87
+ data=BytesIO(json_bytes),
88
+ file_name='flattened_json.json',
89
+ mime='application/json'
90
+ )
91
+
92
+ # Button to download the Excel file
93
+ with col2:
94
+ excel_data = to_excel(df)
95
+ st.download_button(
96
+ label="Download Excel File",
97
+ data=excel_data,
98
+ file_name='flattened_data.xlsx',
99
+ mime='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
100
+ )
101
+
102
+ # Add a success message
103
+ st.success("JSON data has been successfully processed and flattened into a single object. You can now download the flattened JSON file or the Excel file.")