Spaces:
Sleeping
Sleeping
File size: 3,733 Bytes
e874d5a dfaa109 e874d5a bd06620 e874d5a bd06620 e874d5a bd06620 e874d5a bd06620 e874d5a bd06620 e874d5a bd06620 e874d5a bd06620 e874d5a bd06620 e874d5a bd06620 e874d5a bd06620 e874d5a bd06620 e874d5a bd06620 e874d5a bd06620 e874d5a bd06620 e874d5a bd06620 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 |
import streamlit as st
import zipfile
import json
from io import BytesIO
import chardet
import pandas as pd
import openpyxl
# Set page configuration to wide mode by default
st.set_page_config(layout="wide")
# Function to extract and combine JSON files from a ZIP file
def extract_and_combine_zip(zip_file):
combined_data = {}
with zipfile.ZipFile(zip_file) as z:
# Extract all JSON files, ignoring macOS-specific hidden files
json_files = [name for name in z.namelist() if name.endswith('.json') and not name.startswith('__MACOSX')]
for json_file in json_files:
with z.open(json_file) as f:
content = f.read()
encoding = chardet.detect(content)['encoding']
try:
decoded_content = content.decode(encoding)
data = json.loads(decoded_content)
combined_data = flatten_json(data, combined_data)
except (UnicodeDecodeError, json.JSONDecodeError) as e:
st.warning(f"Warning: Could not decode {json_file}. Error: {str(e)}")
return combined_data
# Improved function to flatten and merge JSON data
def flatten_json(data, flattened=None, prefix=''):
if flattened is None:
flattened = {}
if isinstance(data, dict):
for key, value in data.items():
new_key = f"{prefix}.{key}" if prefix else key
if isinstance(value, (dict, list)):
flatten_json(value, flattened, new_key)
elif value is not None and value != "":
flattened[new_key] = value
elif isinstance(data, list):
for i, item in enumerate(data):
new_key = f"{prefix}[{i}]" if prefix else str(i)
flatten_json(item, flattened, new_key)
elif data is not None and data != "":
flattened[prefix] = data
return flattened
# Function to convert DataFrame to Excel
def to_excel(df):
output = BytesIO()
with pd.ExcelWriter(output, engine='openpyxl') as writer:
df.to_excel(writer, index=False, sheet_name='Sheet1')
processed_data = output.getvalue()
return processed_data
# Streamlit app setup
st.title("ZIP JSON Extractor & Flattener")
# File uploader widget
uploaded_zip = st.file_uploader("Upload ZIP file containing JSON files:", type="zip")
if uploaded_zip:
# Combine and flatten JSON data
flattened_json = extract_and_combine_zip(uploaded_zip)
# Create a DataFrame from the flattened JSON data
df = pd.DataFrame([flattened_json])
# Convert all object columns to string to avoid Arrow conversion issues
for col in df.select_dtypes(include=['object']).columns:
df[col] = df[col].astype(str)
# Create a downloadable JSON
flattened_json_str = json.dumps(flattened_json, indent=4)
json_bytes = flattened_json_str.encode()
# Create columns for download buttons
col1, col2 = st.columns(2)
# Button to download the flattened JSON data
with col1:
st.download_button(
label="Download Flattened JSON",
data=BytesIO(json_bytes),
file_name='flattened_json.json',
mime='application/json'
)
# Button to download the Excel file
with col2:
excel_data = to_excel(df)
st.download_button(
label="Download Excel File",
data=excel_data,
file_name='flattened_data.xlsx',
mime='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
)
# Add a success message
st.success("JSON data has been successfully processed and flattened into a single object. You can now download the flattened JSON file or the Excel file.") |