Spaces:

jasvir-singh1021
/

chat-analyzer

Sleeping

File size: 4,685 Bytes

8acd8ac

import streamlit as st
import pandas as pd
import json
import plotly.express as px
from datetime import datetime
import io

st.set_page_config(page_title="ChatGPT Conversation Analyzer", layout="wide")
st.title("📊 ChatGPT Conversation Analyzer")

# Upload JSON file
uploaded_file = st.file_uploader("Upload your chat-history.json file", type="json")

def safe_timestamp(ts):
    try:
        return datetime.fromtimestamp(float(ts))
    except (TypeError, ValueError):
        return None

if uploaded_file:
    try:
        data = json.load(uploaded_file)
        # Attempt to get the list of chats
        if isinstance(data, dict):
            chats = data.get("chats") or data.get("conversations")
        elif isinstance(data, list):
            chats = data
        else:
            chats = []

        if not chats:
            st.error("No valid 'chats' or 'conversations' data found in the uploaded JSON file.")
        else:
            # Normalize to DataFrame
            rows = []
            for chat in chats:
                if "title" in chat and "mapping" in chat:
                    for node_id, node in chat["mapping"].items():
                        message = node.get("message")
                        if message:
                            author = message.get("author", {}).get("role", "")
                            content = message.get("content", {}).get("parts", [""])[0]
                            created_at = safe_timestamp(message.get("create_time"))
                            if created_at:
                                rows.append({
                                    "Title": chat["title"],
                                    "Author": author,
                                    "Content": content,
                                    "Time": created_at
                                })

            df = pd.DataFrame(rows)

            if not df.empty:
                # Sidebar filters
                st.sidebar.header("Filters")
                author_filter = st.sidebar.multiselect("Author", df["Author"].unique(), default=list(df["Author"].unique()))
                date_range = st.sidebar.date_input("Date Range", [df["Time"].min().date(), df["Time"].max().date()])
                title_filter = st.sidebar.selectbox("Conversation Title", ["All"] + sorted(df["Title"].unique().tolist()))
                keyword_search = st.sidebar.text_input("Search in messages")

                filtered_df = df[df["Author"].isin(author_filter)]
                filtered_df = filtered_df[(filtered_df["Time"].dt.date >= date_range[0]) & (filtered_df["Time"].dt.date <= date_range[1])]

                if title_filter != "All":
                    filtered_df = filtered_df[filtered_df["Title"] == title_filter]

                if keyword_search:
                    filtered_df = filtered_df[filtered_df["Content"].str.contains(keyword_search, case=False, na=False)]

                st.subheader("🧾 Conversation Records")
                st.dataframe(filtered_df.sort_values(by="Time"), use_container_width=True)

                st.subheader("📈 Activity Over Time")
                time_series = filtered_df.groupby(filtered_df["Time"].dt.date).size().reset_index(name="Messages")
                fig = px.line(time_series, x="Time", y="Messages", markers=True)
                st.plotly_chart(fig, use_container_width=True)

                st.subheader("📥 Export Options")
                export_format = st.selectbox("Choose export format", ["CSV", "Excel"])

                if export_format == "CSV":
                    csv_data = filtered_df.to_csv(index=False)
                    st.download_button(
                        label="Download CSV",
                        data=csv_data,
                        file_name="chat_export.csv",
                        mime="text/csv"
                    )
                else:
                    excel_buffer = io.BytesIO()
                    with pd.ExcelWriter(excel_buffer, engine="openpyxl") as writer:
                        filtered_df.to_excel(writer, index=False)
                    st.download_button(
                        label="Download Excel",
                        data=excel_buffer.getvalue(),
                        file_name="chat_export.xlsx",
                        mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
                    )
            else:
                st.warning("No valid conversation data found in the uploaded file.")
    except Exception as e:
        st.error(f"Failed to process the uploaded file: {e}")
else:
    st.info("👈 Upload your exported ChatGPT JSON file to begin analysis.")