Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| import os | |
| def process_jsonl(file): | |
| """ | |
| Processes an uploaded JSONL file to expand the 'messages' column and saves the result as a CSV. | |
| Args: | |
| file: Uploaded JSONL file. | |
| Returns: | |
| str: Path to the expanded CSV file. | |
| """ | |
| # Read the JSONL file | |
| data = [eval(line.strip()) for line in file.readlines()] | |
| df = pd.DataFrame(data) | |
| # Expand the 'messages' column if it exists | |
| if 'messages' in df.columns: | |
| messages_df = df['messages'].apply(pd.Series) | |
| expanded_messages_df = pd.DataFrame() | |
| for col in messages_df.columns: | |
| if messages_df[col].apply(lambda x: isinstance(x, dict)).any(): | |
| expanded_columns = messages_df[col].apply(pd.Series) | |
| expanded_columns = expanded_columns.add_prefix(f'message_{col}_') | |
| expanded_messages_df = pd.concat([expanded_messages_df, expanded_columns], axis=1) | |
| else: | |
| expanded_messages_df = pd.concat([expanded_messages_df, messages_df[col].rename(f'message_{col}')], axis=1) | |
| # Merge expanded columns back into the original DataFrame | |
| df = pd.concat([df.drop(columns=['messages']), expanded_messages_df], axis=1) | |
| # Save the expanded DataFrame to a CSV file | |
| output_csv_path = "expanded_messages_data.csv" | |
| df.to_csv(output_csv_path, index=False) | |
| return output_csv_path | |
| # Streamlit app | |
| st.title("JSONL to CSV Converter with Message Expansion") | |
| st.write("Upload a JSONL file, and download the processed CSV file with the `messages` column expanded.") | |
| uploaded_file = st.file_uploader("Upload your JSONL file", type=["jsonl"]) | |
| if uploaded_file is not None: | |
| st.success("File uploaded successfully!") | |
| # Process the uploaded file | |
| output_csv_path = process_jsonl(uploaded_file) | |
| # Provide download link | |
| st.download_button( | |
| label="Download Expanded CSV", | |
| data=open(output_csv_path, "rb"), | |
| file_name="expanded_messages_data.csv", | |
| mime="text/csv" | |
| ) | |
| # Ensure cleanup of temporary files | |
| if os.path.exists("expanded_messages_data.csv"): | |
| os.remove("expanded_messages_data.csv") | |