Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| import json | |
| import plotly.express as px | |
| from datetime import datetime | |
| import io | |
| st.set_page_config(page_title="ChatGPT Conversation Analyzer", layout="wide") | |
| st.title("π ChatGPT Conversation Analyzer") | |
| # Upload JSON file | |
| uploaded_file = st.file_uploader("Upload your chat-history.json file", type="json") | |
| def safe_timestamp(ts): | |
| try: | |
| return datetime.fromtimestamp(float(ts)) | |
| except (TypeError, ValueError): | |
| return None | |
| if uploaded_file: | |
| try: | |
| data = json.load(uploaded_file) | |
| # Attempt to get the list of chats | |
| if isinstance(data, dict): | |
| chats = data.get("chats") or data.get("conversations") | |
| elif isinstance(data, list): | |
| chats = data | |
| else: | |
| chats = [] | |
| if not chats: | |
| st.error("No valid 'chats' or 'conversations' data found in the uploaded JSON file.") | |
| else: | |
| # Normalize to DataFrame | |
| rows = [] | |
| for chat in chats: | |
| if "title" in chat and "mapping" in chat: | |
| for node_id, node in chat["mapping"].items(): | |
| message = node.get("message") | |
| if message: | |
| author = message.get("author", {}).get("role", "") | |
| content = message.get("content", {}).get("parts", [""])[0] | |
| created_at = safe_timestamp(message.get("create_time")) | |
| if created_at: | |
| rows.append({ | |
| "Title": chat["title"], | |
| "Author": author, | |
| "Content": content, | |
| "Time": created_at | |
| }) | |
| df = pd.DataFrame(rows) | |
| if not df.empty: | |
| # Sidebar filters | |
| st.sidebar.header("Filters") | |
| author_filter = st.sidebar.multiselect("Author", df["Author"].unique(), default=list(df["Author"].unique())) | |
| date_range = st.sidebar.date_input("Date Range", [df["Time"].min().date(), df["Time"].max().date()]) | |
| title_filter = st.sidebar.selectbox("Conversation Title", ["All"] + sorted(df["Title"].unique().tolist())) | |
| keyword_search = st.sidebar.text_input("Search in messages") | |
| filtered_df = df[df["Author"].isin(author_filter)] | |
| filtered_df = filtered_df[(filtered_df["Time"].dt.date >= date_range[0]) & (filtered_df["Time"].dt.date <= date_range[1])] | |
| if title_filter != "All": | |
| filtered_df = filtered_df[filtered_df["Title"] == title_filter] | |
| if keyword_search: | |
| filtered_df = filtered_df[filtered_df["Content"].str.contains(keyword_search, case=False, na=False)] | |
| st.subheader("π§Ύ Conversation Records") | |
| st.dataframe(filtered_df.sort_values(by="Time"), use_container_width=True) | |
| st.subheader("π Activity Over Time") | |
| time_series = filtered_df.groupby(filtered_df["Time"].dt.date).size().reset_index(name="Messages") | |
| fig = px.line(time_series, x="Time", y="Messages", markers=True) | |
| st.plotly_chart(fig, use_container_width=True) | |
| st.subheader("π₯ Export Options") | |
| export_format = st.selectbox("Choose export format", ["CSV", "Excel"]) | |
| if export_format == "CSV": | |
| csv_data = filtered_df.to_csv(index=False) | |
| st.download_button( | |
| label="Download CSV", | |
| data=csv_data, | |
| file_name="chat_export.csv", | |
| mime="text/csv" | |
| ) | |
| else: | |
| excel_buffer = io.BytesIO() | |
| with pd.ExcelWriter(excel_buffer, engine="openpyxl") as writer: | |
| filtered_df.to_excel(writer, index=False) | |
| st.download_button( | |
| label="Download Excel", | |
| data=excel_buffer.getvalue(), | |
| file_name="chat_export.xlsx", | |
| mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" | |
| ) | |
| else: | |
| st.warning("No valid conversation data found in the uploaded file.") | |
| except Exception as e: | |
| st.error(f"Failed to process the uploaded file: {e}") | |
| else: | |
| st.info("π Upload your exported ChatGPT JSON file to begin analysis.") | |