kivilaid commited on
Commit
bd06620
·
verified ·
1 Parent(s): c92b498

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -27
app.py CHANGED
@@ -4,13 +4,14 @@ import json
4
  from io import BytesIO
5
  import chardet
6
  import pandas as pd
 
7
 
8
  # Set page configuration to wide mode by default
9
  st.set_page_config(layout="wide")
10
 
11
  # Function to extract and combine JSON files from a ZIP file
12
  def extract_and_combine_zip(zip_file):
13
- combined_data = []
14
  with zipfile.ZipFile(zip_file) as z:
15
  # Extract all JSON files, ignoring macOS-specific hidden files
16
  json_files = [name for name in z.namelist() if name.endswith('.json') and not name.startswith('__MACOSX')]
@@ -21,44 +22,52 @@ def extract_and_combine_zip(zip_file):
21
  try:
22
  decoded_content = content.decode(encoding)
23
  data = json.loads(decoded_content)
24
- combined_data.append(data)
25
  except (UnicodeDecodeError, json.JSONDecodeError) as e:
26
  st.warning(f"Warning: Could not decode {json_file}. Error: {str(e)}")
27
  return combined_data
28
 
29
- # Function to flatten JSON data
30
- def flatten_json(data, prefix=''):
31
- flattened = {}
 
 
32
  if isinstance(data, dict):
33
  for key, value in data.items():
34
- new_key = f"{prefix}_{key}" if prefix else key
35
  if isinstance(value, (dict, list)):
36
- flattened.update(flatten_json(value, new_key))
37
- else:
38
  flattened[new_key] = value
39
  elif isinstance(data, list):
40
  for i, item in enumerate(data):
41
- new_key = f"{prefix}_{i}" if prefix else str(i)
42
- flattened.update(flatten_json(item, new_key))
43
- else:
44
  flattened[prefix] = data
 
45
  return flattened
46
 
 
 
 
 
 
 
 
 
47
  # Streamlit app setup
48
- st.title("ZIP JSON Extractor & Combiner")
49
 
50
  # File uploader widget
51
  uploaded_zip = st.file_uploader("Upload ZIP file containing JSON files:", type="zip")
52
 
53
  if uploaded_zip:
54
- # Combine JSON data
55
- combined_json = extract_and_combine_zip(uploaded_zip)
56
-
57
- # Flatten the combined JSON data
58
- flattened_json = [flatten_json(item) for item in combined_json]
59
 
60
  # Create a DataFrame from the flattened JSON data
61
- df = pd.DataFrame(flattened_json)
62
 
63
  # Convert all object columns to string to avoid Arrow conversion issues
64
  for col in df.select_dtypes(include=['object']).columns:
@@ -68,16 +77,27 @@ if uploaded_zip:
68
  flattened_json_str = json.dumps(flattened_json, indent=4)
69
  json_bytes = flattened_json_str.encode()
70
 
 
 
 
71
  # Button to download the flattened JSON data
72
- st.download_button(
73
- label="Download Flattened JSON",
74
- data=BytesIO(json_bytes),
75
- file_name='flattened_json.json',
76
- mime='application/json'
77
- )
 
78
 
79
- # Remove the table display
80
- # st.dataframe(df) # This line has been removed
 
 
 
 
 
 
 
81
 
82
  # Add a success message
83
- st.success("JSON data has been successfully processed. You can now download the flattened JSON file.")
 
4
  from io import BytesIO
5
  import chardet
6
  import pandas as pd
7
+ # import openpyxl
8
 
9
  # Set page configuration to wide mode by default
10
  st.set_page_config(layout="wide")
11
 
12
  # Function to extract and combine JSON files from a ZIP file
13
  def extract_and_combine_zip(zip_file):
14
+ combined_data = {}
15
  with zipfile.ZipFile(zip_file) as z:
16
  # Extract all JSON files, ignoring macOS-specific hidden files
17
  json_files = [name for name in z.namelist() if name.endswith('.json') and not name.startswith('__MACOSX')]
 
22
  try:
23
  decoded_content = content.decode(encoding)
24
  data = json.loads(decoded_content)
25
+ combined_data = flatten_json(data, combined_data)
26
  except (UnicodeDecodeError, json.JSONDecodeError) as e:
27
  st.warning(f"Warning: Could not decode {json_file}. Error: {str(e)}")
28
  return combined_data
29
 
30
+ # Improved function to flatten and merge JSON data
31
+ def flatten_json(data, flattened=None, prefix=''):
32
+ if flattened is None:
33
+ flattened = {}
34
+
35
  if isinstance(data, dict):
36
  for key, value in data.items():
37
+ new_key = f"{prefix}.{key}" if prefix else key
38
  if isinstance(value, (dict, list)):
39
+ flatten_json(value, flattened, new_key)
40
+ elif value is not None and value != "":
41
  flattened[new_key] = value
42
  elif isinstance(data, list):
43
  for i, item in enumerate(data):
44
+ new_key = f"{prefix}[{i}]" if prefix else str(i)
45
+ flatten_json(item, flattened, new_key)
46
+ elif data is not None and data != "":
47
  flattened[prefix] = data
48
+
49
  return flattened
50
 
51
+ # Function to convert DataFrame to Excel
52
+ def to_excel(df):
53
+ output = BytesIO()
54
+ with pd.ExcelWriter(output, engine='openpyxl') as writer:
55
+ df.to_excel(writer, index=False, sheet_name='Sheet1')
56
+ processed_data = output.getvalue()
57
+ return processed_data
58
+
59
  # Streamlit app setup
60
+ st.title("ZIP JSON Extractor & Flattener")
61
 
62
  # File uploader widget
63
  uploaded_zip = st.file_uploader("Upload ZIP file containing JSON files:", type="zip")
64
 
65
  if uploaded_zip:
66
+ # Combine and flatten JSON data
67
+ flattened_json = extract_and_combine_zip(uploaded_zip)
 
 
 
68
 
69
  # Create a DataFrame from the flattened JSON data
70
+ df = pd.DataFrame([flattened_json])
71
 
72
  # Convert all object columns to string to avoid Arrow conversion issues
73
  for col in df.select_dtypes(include=['object']).columns:
 
77
  flattened_json_str = json.dumps(flattened_json, indent=4)
78
  json_bytes = flattened_json_str.encode()
79
 
80
+ # Create columns for download buttons
81
+ col1, col2 = st.columns(2)
82
+
83
  # Button to download the flattened JSON data
84
+ with col1:
85
+ st.download_button(
86
+ label="Download Flattened JSON",
87
+ data=BytesIO(json_bytes),
88
+ file_name='flattened_json.json',
89
+ mime='application/json'
90
+ )
91
 
92
+ # Button to download the Excel file
93
+ with col2:
94
+ excel_data = to_excel(df)
95
+ st.download_button(
96
+ label="Download Excel File",
97
+ data=excel_data,
98
+ file_name='flattened_data.xlsx',
99
+ mime='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
100
+ )
101
 
102
  # Add a success message
103
+ st.success("JSON data has been successfully processed and flattened into a single object. You can now download the flattened JSON file or the Excel file.")