Spaces:

jasvir-singh1021
/

chat-analyzer

Sleeping

App Files Files Community

chat-analyzer / app.py

jasvir-singh1021

Create app.py

8acd8ac verified 7 months ago

raw

history blame contribute delete

4.69 kB

	import streamlit as st
	import pandas as pd
	import json
	import plotly.express as px
	from datetime import datetime
	import io

	st.set_page_config(page_title="ChatGPT Conversation Analyzer", layout="wide")
	st.title("📊 ChatGPT Conversation Analyzer")

	# Upload JSON file
	uploaded_file = st.file_uploader("Upload your chat-history.json file", type="json")

	def safe_timestamp(ts):
	try:
	return datetime.fromtimestamp(float(ts))
	except (TypeError, ValueError):
	return None

	if uploaded_file:
	try:
	data = json.load(uploaded_file)
	# Attempt to get the list of chats
	if isinstance(data, dict):
	chats = data.get("chats") or data.get("conversations")
	elif isinstance(data, list):
	chats = data
	else:
	chats = []

	if not chats:
	st.error("No valid 'chats' or 'conversations' data found in the uploaded JSON file.")
	else:
	# Normalize to DataFrame
	rows = []
	for chat in chats:
	if "title" in chat and "mapping" in chat:
	for node_id, node in chat["mapping"].items():
	message = node.get("message")
	if message:
	author = message.get("author", {}).get("role", "")
	content = message.get("content", {}).get("parts", [""])[0]
	created_at = safe_timestamp(message.get("create_time"))
	if created_at:
	rows.append({
	"Title": chat["title"],
	"Author": author,
	"Content": content,
	"Time": created_at
	})

	df = pd.DataFrame(rows)

	if not df.empty:
	# Sidebar filters
	st.sidebar.header("Filters")
	author_filter = st.sidebar.multiselect("Author", df["Author"].unique(), default=list(df["Author"].unique()))
	date_range = st.sidebar.date_input("Date Range", [df["Time"].min().date(), df["Time"].max().date()])
	title_filter = st.sidebar.selectbox("Conversation Title", ["All"] + sorted(df["Title"].unique().tolist()))
	keyword_search = st.sidebar.text_input("Search in messages")

	filtered_df = df[df["Author"].isin(author_filter)]
	filtered_df = filtered_df[(filtered_df["Time"].dt.date >= date_range[0]) & (filtered_df["Time"].dt.date <= date_range[1])]

	if title_filter != "All":
	filtered_df = filtered_df[filtered_df["Title"] == title_filter]

	if keyword_search:
	filtered_df = filtered_df[filtered_df["Content"].str.contains(keyword_search, case=False, na=False)]

	st.subheader("🧾 Conversation Records")
	st.dataframe(filtered_df.sort_values(by="Time"), use_container_width=True)

	st.subheader("📈 Activity Over Time")
	time_series = filtered_df.groupby(filtered_df["Time"].dt.date).size().reset_index(name="Messages")
	fig = px.line(time_series, x="Time", y="Messages", markers=True)
	st.plotly_chart(fig, use_container_width=True)

	st.subheader("📥 Export Options")
	export_format = st.selectbox("Choose export format", ["CSV", "Excel"])

	if export_format == "CSV":
	csv_data = filtered_df.to_csv(index=False)
	st.download_button(
	label="Download CSV",
	data=csv_data,
	file_name="chat_export.csv",
	mime="text/csv"
	)
	else:
	excel_buffer = io.BytesIO()
	with pd.ExcelWriter(excel_buffer, engine="openpyxl") as writer:
	filtered_df.to_excel(writer, index=False)
	st.download_button(
	label="Download Excel",
	data=excel_buffer.getvalue(),
	file_name="chat_export.xlsx",
	mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
	)
	else:
	st.warning("No valid conversation data found in the uploaded file.")
	except Exception as e:
	st.error(f"Failed to process the uploaded file: {e}")
	else:
	st.info("👈 Upload your exported ChatGPT JSON file to begin analysis.")