import streamlit as st import pandas as pd import json import plotly.express as px from datetime import datetime import io st.set_page_config(page_title="ChatGPT Conversation Analyzer", layout="wide") st.title("๐Ÿ“Š ChatGPT Conversation Analyzer") # Upload JSON file uploaded_file = st.file_uploader("Upload your chat-history.json file", type="json") def safe_timestamp(ts): try: return datetime.fromtimestamp(float(ts)) except (TypeError, ValueError): return None if uploaded_file: try: data = json.load(uploaded_file) # Attempt to get the list of chats if isinstance(data, dict): chats = data.get("chats") or data.get("conversations") elif isinstance(data, list): chats = data else: chats = [] if not chats: st.error("No valid 'chats' or 'conversations' data found in the uploaded JSON file.") else: # Normalize to DataFrame rows = [] for chat in chats: if "title" in chat and "mapping" in chat: for node_id, node in chat["mapping"].items(): message = node.get("message") if message: author = message.get("author", {}).get("role", "") content = message.get("content", {}).get("parts", [""])[0] created_at = safe_timestamp(message.get("create_time")) if created_at: rows.append({ "Title": chat["title"], "Author": author, "Content": content, "Time": created_at }) df = pd.DataFrame(rows) if not df.empty: # Sidebar filters st.sidebar.header("Filters") author_filter = st.sidebar.multiselect("Author", df["Author"].unique(), default=list(df["Author"].unique())) date_range = st.sidebar.date_input("Date Range", [df["Time"].min().date(), df["Time"].max().date()]) title_filter = st.sidebar.selectbox("Conversation Title", ["All"] + sorted(df["Title"].unique().tolist())) keyword_search = st.sidebar.text_input("Search in messages") filtered_df = df[df["Author"].isin(author_filter)] filtered_df = filtered_df[(filtered_df["Time"].dt.date >= date_range[0]) & (filtered_df["Time"].dt.date <= date_range[1])] if title_filter != "All": filtered_df = filtered_df[filtered_df["Title"] == title_filter] if keyword_search: filtered_df = filtered_df[filtered_df["Content"].str.contains(keyword_search, case=False, na=False)] st.subheader("๐Ÿงพ Conversation Records") st.dataframe(filtered_df.sort_values(by="Time"), use_container_width=True) st.subheader("๐Ÿ“ˆ Activity Over Time") time_series = filtered_df.groupby(filtered_df["Time"].dt.date).size().reset_index(name="Messages") fig = px.line(time_series, x="Time", y="Messages", markers=True) st.plotly_chart(fig, use_container_width=True) st.subheader("๐Ÿ“ฅ Export Options") export_format = st.selectbox("Choose export format", ["CSV", "Excel"]) if export_format == "CSV": csv_data = filtered_df.to_csv(index=False) st.download_button( label="Download CSV", data=csv_data, file_name="chat_export.csv", mime="text/csv" ) else: excel_buffer = io.BytesIO() with pd.ExcelWriter(excel_buffer, engine="openpyxl") as writer: filtered_df.to_excel(writer, index=False) st.download_button( label="Download Excel", data=excel_buffer.getvalue(), file_name="chat_export.xlsx", mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" ) else: st.warning("No valid conversation data found in the uploaded file.") except Exception as e: st.error(f"Failed to process the uploaded file: {e}") else: st.info("๐Ÿ‘ˆ Upload your exported ChatGPT JSON file to begin analysis.")