nice-bill's picture
initial commit
cdb73a8
import logging
import os
import pandas as pd
import plotly.express as px
import streamlit as st
import sys
import os
# Add the project root to the Python path
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../../..")))
from src.book_recommender.core.logging_config import configure_logging
from src.book_recommender.ml.feedback import get_all_feedback
configure_logging(log_file="analytics.log", log_level=os.getenv("LOG_LEVEL", "INFO"))
logger = logging.getLogger(__name__)
st.set_page_config(
page_title="DeepShelf Analytics Dashboard", page_icon="📊", layout="wide", initial_sidebar_state="collapsed"
)
st.title("DeepShelf Analytics Dashboard")
@st.cache_data
def load_feedback_data():
"""Load and process feedback data."""
logger.info("Loading feedback data for analytics...")
feedback_entries = get_all_feedback()
if not feedback_entries:
logger.warning("No feedback data found.")
return pd.DataFrame()
df = pd.DataFrame(feedback_entries)
df["timestamp"] = pd.to_datetime(df["timestamp"])
df["date"] = df["timestamp"].dt.date
logger.info(f"Loaded {len(df)} feedback entries.")
return df
feedback_df = load_feedback_data()
if feedback_df.empty:
st.info("No feedback data available yet to display analytics.")
else:
st.header("Key Metrics")
total_feedback = len(feedback_df)
positive_feedback = len(feedback_df[feedback_df["feedback"] == "positive"])
negative_feedback = len(feedback_df[feedback_df["feedback"] == "negative"])
satisfaction_percentage = (positive_feedback / total_feedback * 100) if total_feedback > 0 else 0
col1, col2, col3, col4 = st.columns(4)
with col1:
st.metric("Total Feedback", total_feedback)
with col2:
st.metric("Positive Feedback", positive_feedback)
with col3:
st.metric("Negative Feedback", negative_feedback)
with col4:
st.metric("Satisfaction Rate", f"{satisfaction_percentage:.1f}%")
st.markdown("---")
st.header("Feedback Over Time")
feedback_by_date = feedback_df.groupby(["date", "feedback"]).size().unstack(fill_value=0)
fig_time = px.line(
feedback_by_date,
x=feedback_by_date.index,
y=feedback_by_date.columns,
title="Feedback Count Over Time",
labels={"value": "Count", "date": "Date", "variable": "Feedback Type"},
color_discrete_map={"positive": "green", "negative": "red"},
)
st.plotly_chart(fig_time, use_container_width=True)
st.markdown("---")
st.header("Top Queries")
top_queries = feedback_df["query"].value_counts().reset_index()
top_queries.columns = ["Query", "Count"]
fig_queries = px.bar(
top_queries.head(10),
x="Query",
y="Count",
title="Top 10 Most Frequent Queries",
labels={"Count": "Number of Times Queried"},
color_discrete_sequence=px.colors.qualitative.Pastel,
)
st.plotly_chart(fig_queries, use_container_width=True)
st.markdown("---")
st.header("Most Liked / Disliked Books")
book_feedback_counts = feedback_df.groupby(["book_title", "feedback"]).size().unstack(fill_value=0)
book_feedback_counts["net_positive"] = book_feedback_counts.get("positive", 0) - book_feedback_counts.get(
"negative", 0
)
most_liked_books = book_feedback_counts.sort_values(by="net_positive", ascending=False).head(5)
most_disliked_books = book_feedback_counts.sort_values(by="net_positive", ascending=True).head(5)
col_liked, col_disliked = st.columns(2)
with col_liked:
st.subheader("Top 5 Most Liked Books (Net Positive)")
if not most_liked_books.empty:
st.dataframe(
most_liked_books[["positive", "negative", "net_positive"]].rename(
columns={"positive": "👍", "negative": "👎", "net_positive": "Net Score"}
)
)
else:
st.info("No liked book data.")
with col_disliked:
st.subheader("Top 5 Most Disliked Books (Net Negative)")
if not most_disliked_books.empty:
st.dataframe(
most_disliked_books[["positive", "negative", "net_positive"]].rename(
columns={"positive": "👍", "negative": "👎", "net_positive": "Net Score"}
)
)
else:
st.info("No disliked book data.")
st.markdown("---")
if st.checkbox("Show Raw Feedback Data"):
st.subheader("Raw Feedback Data")
st.dataframe(feedback_df)
def main():
"""Entry point for analytics dashboard"""
pass
if __name__ == "__main__":
main()