Spaces:

insly
/

policyimport

Sleeping

App Files Files Community

policyimport / backup_app.py

kivilaid

Rename app.py to backup_app.py

97c4faa verified about 1 year ago

raw

history blame contribute delete

3.73 kB

	import streamlit as st
	import zipfile
	import json
	from io import BytesIO
	import chardet
	import pandas as pd
	import openpyxl

	# Set page configuration to wide mode by default
	st.set_page_config(layout="wide")

	# Function to extract and combine JSON files from a ZIP file
	def extract_and_combine_zip(zip_file):
	combined_data = {}
	with zipfile.ZipFile(zip_file) as z:
	# Extract all JSON files, ignoring macOS-specific hidden files
	json_files = [name for name in z.namelist() if name.endswith('.json') and not name.startswith('__MACOSX')]
	for json_file in json_files:
	with z.open(json_file) as f:
	content = f.read()
	encoding = chardet.detect(content)['encoding']
	try:
	decoded_content = content.decode(encoding)
	data = json.loads(decoded_content)
	combined_data = flatten_json(data, combined_data)
	except (UnicodeDecodeError, json.JSONDecodeError) as e:
	st.warning(f"Warning: Could not decode {json_file}. Error: {str(e)}")
	return combined_data

	# Improved function to flatten and merge JSON data
	def flatten_json(data, flattened=None, prefix=''):
	if flattened is None:
	flattened = {}

	if isinstance(data, dict):
	for key, value in data.items():
	new_key = f"{prefix}.{key}" if prefix else key
	if isinstance(value, (dict, list)):
	flatten_json(value, flattened, new_key)
	elif value is not None and value != "":
	flattened[new_key] = value
	elif isinstance(data, list):
	for i, item in enumerate(data):
	new_key = f"{prefix}[{i}]" if prefix else str(i)
	flatten_json(item, flattened, new_key)
	elif data is not None and data != "":
	flattened[prefix] = data

	return flattened

	# Function to convert DataFrame to Excel
	def to_excel(df):
	output = BytesIO()
	with pd.ExcelWriter(output, engine='openpyxl') as writer:
	df.to_excel(writer, index=False, sheet_name='Sheet1')
	processed_data = output.getvalue()
	return processed_data

	# Streamlit app setup
	st.title("ZIP JSON Extractor & Flattener")

	# File uploader widget
	uploaded_zip = st.file_uploader("Upload ZIP file containing JSON files:", type="zip")

	if uploaded_zip:
	# Combine and flatten JSON data
	flattened_json = extract_and_combine_zip(uploaded_zip)

	# Create a DataFrame from the flattened JSON data
	df = pd.DataFrame([flattened_json])

	# Convert all object columns to string to avoid Arrow conversion issues
	for col in df.select_dtypes(include=['object']).columns:
	df[col] = df[col].astype(str)

	# Create a downloadable JSON
	flattened_json_str = json.dumps(flattened_json, indent=4)
	json_bytes = flattened_json_str.encode()

	# Create columns for download buttons
	col1, col2 = st.columns(2)

	# Button to download the flattened JSON data
	with col1:
	st.download_button(
	label="Download Flattened JSON",
	data=BytesIO(json_bytes),
	file_name='flattened_json.json',
	mime='application/json'
	)

	# Button to download the Excel file
	with col2:
	excel_data = to_excel(df)
	st.download_button(
	label="Download Excel File",
	data=excel_data,
	file_name='flattened_data.xlsx',
	mime='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
	)

	# Add a success message
	st.success("JSON data has been successfully processed and flattened into a single object. You can now download the flattened JSON file or the Excel file.")