Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import zipfile | |
| import json | |
| from io import BytesIO | |
| import chardet | |
| import pandas as pd | |
| import openpyxl | |
| # Set page configuration to wide mode by default | |
| st.set_page_config(layout="wide") | |
| # Function to extract and combine JSON files from a ZIP file | |
| def extract_and_combine_zip(zip_file): | |
| combined_data = {} | |
| with zipfile.ZipFile(zip_file) as z: | |
| # Extract all JSON files, ignoring macOS-specific hidden files | |
| json_files = [name for name in z.namelist() if name.endswith('.json') and not name.startswith('__MACOSX')] | |
| for json_file in json_files: | |
| with z.open(json_file) as f: | |
| content = f.read() | |
| encoding = chardet.detect(content)['encoding'] | |
| try: | |
| decoded_content = content.decode(encoding) | |
| data = json.loads(decoded_content) | |
| combined_data = flatten_json(data, combined_data) | |
| except (UnicodeDecodeError, json.JSONDecodeError) as e: | |
| st.warning(f"Warning: Could not decode {json_file}. Error: {str(e)}") | |
| return combined_data | |
| # Improved function to flatten and merge JSON data | |
| def flatten_json(data, flattened=None, prefix=''): | |
| if flattened is None: | |
| flattened = {} | |
| if isinstance(data, dict): | |
| for key, value in data.items(): | |
| new_key = f"{prefix}.{key}" if prefix else key | |
| if isinstance(value, (dict, list)): | |
| flatten_json(value, flattened, new_key) | |
| elif value is not None and value != "": | |
| flattened[new_key] = value | |
| elif isinstance(data, list): | |
| for i, item in enumerate(data): | |
| new_key = f"{prefix}[{i}]" if prefix else str(i) | |
| flatten_json(item, flattened, new_key) | |
| elif data is not None and data != "": | |
| flattened[prefix] = data | |
| return flattened | |
| # Function to convert DataFrame to Excel | |
| def to_excel(df): | |
| output = BytesIO() | |
| with pd.ExcelWriter(output, engine='openpyxl') as writer: | |
| df.to_excel(writer, index=False, sheet_name='Sheet1') | |
| processed_data = output.getvalue() | |
| return processed_data | |
| # Streamlit app setup | |
| st.title("ZIP JSON Extractor & Flattener") | |
| # File uploader widget | |
| uploaded_zip = st.file_uploader("Upload ZIP file containing JSON files:", type="zip") | |
| if uploaded_zip: | |
| # Combine and flatten JSON data | |
| flattened_json = extract_and_combine_zip(uploaded_zip) | |
| # Create a DataFrame from the flattened JSON data | |
| df = pd.DataFrame([flattened_json]) | |
| # Convert all object columns to string to avoid Arrow conversion issues | |
| for col in df.select_dtypes(include=['object']).columns: | |
| df[col] = df[col].astype(str) | |
| # Create a downloadable JSON | |
| flattened_json_str = json.dumps(flattened_json, indent=4) | |
| json_bytes = flattened_json_str.encode() | |
| # Create columns for download buttons | |
| col1, col2 = st.columns(2) | |
| # Button to download the flattened JSON data | |
| with col1: | |
| st.download_button( | |
| label="Download Flattened JSON", | |
| data=BytesIO(json_bytes), | |
| file_name='flattened_json.json', | |
| mime='application/json' | |
| ) | |
| # Button to download the Excel file | |
| with col2: | |
| excel_data = to_excel(df) | |
| st.download_button( | |
| label="Download Excel File", | |
| data=excel_data, | |
| file_name='flattened_data.xlsx', | |
| mime='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' | |
| ) | |
| # Add a success message | |
| st.success("JSON data has been successfully processed and flattened into a single object. You can now download the flattened JSON file or the Excel file.") |