Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import zipfile | |
| import json | |
| from io import BytesIO | |
| import chardet | |
| import pandas as pd | |
| import openpyxl | |
| # Set page configuration to wide mode by default | |
| st.set_page_config(layout="wide") | |
| # Function to extract and combine JSON files from a ZIP file | |
| def extract_and_combine_zip(zip_file): | |
| combined_data = {} | |
| with zipfile.ZipFile(zip_file) as z: | |
| # Extract all JSON files, ignoring macOS-specific hidden files | |
| json_files = [name for name in z.namelist() if name.endswith('.json') and not name.startswith('__MACOSX')] | |
| for json_file in json_files: | |
| with z.open(json_file) as f: | |
| content = f.read() | |
| encoding = chardet.detect(content)['encoding'] | |
| try: | |
| decoded_content = content.decode(encoding) | |
| data = json.loads(decoded_content) | |
| combined_data = flatten_json(data, combined_data) | |
| except (UnicodeDecodeError, json.JSONDecodeError) as e: | |
| st.warning(f"Warning: Could not decode {json_file} in {zip_file.name}. Error: {str(e)}") | |
| return combined_data | |
| # Improved function to flatten and merge JSON data | |
| def flatten_json(data, flattened=None, prefix=''): | |
| if flattened is None: | |
| flattened = {} | |
| if isinstance(data, dict): | |
| for key, value in data.items(): | |
| new_key = f"{prefix}.{key}" if prefix else key | |
| if isinstance(value, (dict, list)): | |
| flatten_json(value, flattened, new_key) | |
| elif value is not None and value != "": | |
| flattened[new_key] = value | |
| elif isinstance(data, list): | |
| for i, item in enumerate(data): | |
| new_key = f"{prefix}[{i}]" if prefix else str(i) | |
| flatten_json(item, flattened, new_key) | |
| elif data is not None and data != "": | |
| flattened[prefix] = value | |
| return flattened | |
| # Function to convert DataFrame to Excel | |
| def to_excel(df): | |
| output = BytesIO() | |
| with pd.ExcelWriter(output, engine='openpyxl') as writer: | |
| df.to_excel(writer, index=False, sheet_name='Sheet1') | |
| processed_data = output.getvalue() | |
| return processed_data | |
| # Streamlit app setup | |
| st.title("Multi-ZIP JSON Extractor & Flattener") | |
| # File uploader widget for multiple files | |
| uploaded_zips = st.file_uploader("Upload ZIP files containing JSON files:", type="zip", accept_multiple_files=True) | |
| if uploaded_zips: | |
| # Process all uploaded ZIP files | |
| all_flattened_json = [] | |
| for zip_file in uploaded_zips: | |
| flattened_json = extract_and_combine_zip(zip_file) | |
| flattened_json['zip_file_name'] = zip_file.name # Add ZIP file name to the flattened JSON | |
| all_flattened_json.append(flattened_json) | |
| # Create a DataFrame from the flattened JSON data | |
| df = pd.DataFrame(all_flattened_json) | |
| # Convert all object columns to string to avoid Arrow conversion issues | |
| for col in df.select_dtypes(include=['object']).columns: | |
| df[col] = df[col].astype(str) | |
| # Create a downloadable JSON | |
| flattened_json_str = json.dumps(all_flattened_json, indent=4) | |
| json_bytes = flattened_json_str.encode() | |
| # Display processing summary | |
| st.write(f"Processed {len(uploaded_zips)} ZIP file(s)") | |
| st.write(f"Total rows in the output: {len(all_flattened_json)}") | |
| # Create columns for download buttons | |
| col1, col2 = st.columns(2) | |
| # Button to download the flattened JSON data | |
| with col1: | |
| st.download_button( | |
| label="Download Flattened JSON", | |
| data=BytesIO(json_bytes), | |
| file_name='flattened_json.json', | |
| mime='application/json' | |
| ) | |
| # Button to download the Excel file | |
| with col2: | |
| excel_data = to_excel(df) | |
| st.download_button( | |
| label="Download Excel File", | |
| data=excel_data, | |
| file_name='flattened_data.xlsx', | |
| mime='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' | |
| ) | |
| # Add a success message | |
| st.success(f"All ZIP files have been successfully processed. Each ZIP file is represented as a separate row in the output. You can now download the flattened JSON file or the Excel file.") | |
| # Display a preview of the DataFrame | |
| st.write("Preview of the processed data:") | |
| st.dataframe(df.head()) |