Spaces:
Sleeping
Sleeping
File size: 4,685 Bytes
8acd8ac |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 |
import streamlit as st
import pandas as pd
import json
import plotly.express as px
from datetime import datetime
import io
st.set_page_config(page_title="ChatGPT Conversation Analyzer", layout="wide")
st.title("๐ ChatGPT Conversation Analyzer")
# Upload JSON file
uploaded_file = st.file_uploader("Upload your chat-history.json file", type="json")
def safe_timestamp(ts):
try:
return datetime.fromtimestamp(float(ts))
except (TypeError, ValueError):
return None
if uploaded_file:
try:
data = json.load(uploaded_file)
# Attempt to get the list of chats
if isinstance(data, dict):
chats = data.get("chats") or data.get("conversations")
elif isinstance(data, list):
chats = data
else:
chats = []
if not chats:
st.error("No valid 'chats' or 'conversations' data found in the uploaded JSON file.")
else:
# Normalize to DataFrame
rows = []
for chat in chats:
if "title" in chat and "mapping" in chat:
for node_id, node in chat["mapping"].items():
message = node.get("message")
if message:
author = message.get("author", {}).get("role", "")
content = message.get("content", {}).get("parts", [""])[0]
created_at = safe_timestamp(message.get("create_time"))
if created_at:
rows.append({
"Title": chat["title"],
"Author": author,
"Content": content,
"Time": created_at
})
df = pd.DataFrame(rows)
if not df.empty:
# Sidebar filters
st.sidebar.header("Filters")
author_filter = st.sidebar.multiselect("Author", df["Author"].unique(), default=list(df["Author"].unique()))
date_range = st.sidebar.date_input("Date Range", [df["Time"].min().date(), df["Time"].max().date()])
title_filter = st.sidebar.selectbox("Conversation Title", ["All"] + sorted(df["Title"].unique().tolist()))
keyword_search = st.sidebar.text_input("Search in messages")
filtered_df = df[df["Author"].isin(author_filter)]
filtered_df = filtered_df[(filtered_df["Time"].dt.date >= date_range[0]) & (filtered_df["Time"].dt.date <= date_range[1])]
if title_filter != "All":
filtered_df = filtered_df[filtered_df["Title"] == title_filter]
if keyword_search:
filtered_df = filtered_df[filtered_df["Content"].str.contains(keyword_search, case=False, na=False)]
st.subheader("๐งพ Conversation Records")
st.dataframe(filtered_df.sort_values(by="Time"), use_container_width=True)
st.subheader("๐ Activity Over Time")
time_series = filtered_df.groupby(filtered_df["Time"].dt.date).size().reset_index(name="Messages")
fig = px.line(time_series, x="Time", y="Messages", markers=True)
st.plotly_chart(fig, use_container_width=True)
st.subheader("๐ฅ Export Options")
export_format = st.selectbox("Choose export format", ["CSV", "Excel"])
if export_format == "CSV":
csv_data = filtered_df.to_csv(index=False)
st.download_button(
label="Download CSV",
data=csv_data,
file_name="chat_export.csv",
mime="text/csv"
)
else:
excel_buffer = io.BytesIO()
with pd.ExcelWriter(excel_buffer, engine="openpyxl") as writer:
filtered_df.to_excel(writer, index=False)
st.download_button(
label="Download Excel",
data=excel_buffer.getvalue(),
file_name="chat_export.xlsx",
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
)
else:
st.warning("No valid conversation data found in the uploaded file.")
except Exception as e:
st.error(f"Failed to process the uploaded file: {e}")
else:
st.info("๐ Upload your exported ChatGPT JSON file to begin analysis.")
|