import streamlit as st import pandas as pd from pymongo import MongoClient from datetime import datetime, date import pytz import os from dotenv import load_dotenv load_dotenv() st.set_page_config( page_title="Agent Assistance Usage", page_icon=None, layout="wide", initial_sidebar_state="expanded" ) st.markdown(""" """, unsafe_allow_html=True) USD_TO_INR = 92.0 MODEL_PRICING = { "gpt-4.1-nano": {"input": 0.10, "output": 0.40}, "gpt-4o-mini": {"input": 0.15, "output": 0.60}, "gpt-4o": {"input": 2.50, "output": 10.00}, "gpt-3.5-turbo": {"input": 0.50, "output": 1.50}, "gpt-4-turbo": {"input": 10.00, "output": 30.00}, "claude-3-haiku": {"input": 0.25, "output": 1.25}, "claude-3-sonnet": {"input": 3.00, "output": 15.00}, "claude-3-opus": {"input": 15.00, "output": 75.00}, } FEATURE_REGISTRY = [ ("entity_extraction_logs", "Entity Extraction", None), ("generate_nps_questions_logs", "NPS Question Generation", None), ("grammer_correction_logs", "Grammar Correction", None), ("recommended_template_logs", "Recommended Templates", None), ("summarization_logs", "Summarization", None), ("template_recommendation_logs", "Template Recommendation (Chat)", "chat"), ("template_recommendation_logs", "Template Recommendation (Email)", "email"), ("text_expansion_logs", "Text Expansion", None), ("tone_change_logs", "Tone Change", None), ] FEATURE_DEFAULT_MODELS = { "Summarization": "gpt-4.1-nano", "Grammar Correction" : "gpt-4.1-nano" } NESTED_COLLECTIONS = {"template_recommendation_logs", "tone_change_logs"} def fmt_inr(usd_val): return f"₹{usd_val * USD_TO_INR:,.2f}" def build_excel(df): import io from openpyxl import Workbook from openpyxl.styles import Font, PatternFill, Alignment, Border, Side from openpyxl.utils import get_column_letter HEADER_FILL = PatternFill("solid", fgColor="2563EB") HEADER_FONT = Font(name="Calibri", bold=True, color="FFFFFF", size=10) TOTAL_FILL = PatternFill("solid", fgColor="EFF4FF") TOTAL_FONT = Font(name="Calibri", bold=True, color="111827", size=10) CELL_FONT = Font(name="Calibri", size=10) BORDER_SIDE = Side(style="thin", color="E2E5EF") THIN_BORDER = Border(left=BORDER_SIDE, right=BORDER_SIDE, top=BORDER_SIDE, bottom=BORDER_SIDE) COLUMNS = [ ("Client ID", "client_id", 18), ("Input Tokens", "input_tokens", 14), ("Output Tokens", "output_tokens", 14), ("Total Tokens", "total_tokens", 14), ("Input Cost (INR)", "input_cost_inr", 16), ("Output Cost (INR)", "output_cost_inr", 17), ("Total Cost (INR)", "total_cost_inr", 16), ("Model", "model", 16), ] wb = Workbook() wb.remove(wb.active) features = df["feature"].unique() for feature in features: sheet_name = feature[:31] ws = wb.create_sheet(title=sheet_name) ws.append([col[0] for col in COLUMNS]) for col_idx, (_, _, width) in enumerate(COLUMNS, start=1): cell = ws.cell(row=1, column=col_idx) cell.fill = HEADER_FILL cell.font = HEADER_FONT cell.alignment = Alignment(horizontal="center", vertical="center") cell.border = THIN_BORDER ws.column_dimensions[get_column_letter(col_idx)].width = width ws.row_dimensions[1].height = 20 fdf = df[df["feature"] == feature].copy() client_data = fdf.groupby("client_id").agg( input_tokens=("input_tokens", "sum"), output_tokens=("output_tokens", "sum"), total_tokens=("total_tokens", "sum"), input_cost_inr=("input_cost_inr", "sum"), output_cost_inr=("output_cost_inr","sum"), total_cost_inr=("total_cost_inr", "sum"), model=("model", "first"), ).reset_index().sort_values("total_cost_inr", ascending=False) for r_idx, row in enumerate(client_data.itertuples(index=False), start=2): values = [ row.client_id, row.input_tokens, row.output_tokens, row.total_tokens, round(row.input_cost_inr, 4), round(row.output_cost_inr, 4), round(row.total_cost_inr, 4), row.model, ] for c_idx, val in enumerate(values, start=1): cell = ws.cell(row=r_idx, column=c_idx, value=val) cell.font = CELL_FONT cell.border = THIN_BORDER cell.alignment = Alignment( horizontal="right" if c_idx > 1 else "left", vertical="center" ) total_row = len(client_data) + 2 totals = [ "TOTAL", int(client_data["input_tokens"].sum()), int(client_data["output_tokens"].sum()), int(client_data["total_tokens"].sum()), round(client_data["input_cost_inr"].sum(), 4), round(client_data["output_cost_inr"].sum(), 4), round(client_data["total_cost_inr"].sum(), 4), "", ] for c_idx, val in enumerate(totals, start=1): cell = ws.cell(row=total_row, column=c_idx, value=val) cell.fill = TOTAL_FILL cell.font = TOTAL_FONT cell.border = THIN_BORDER cell.alignment = Alignment( horizontal="right" if c_idx > 1 else "left", vertical="center" ) ws.freeze_panes = "A2" buf = io.BytesIO() wb.save(buf) buf.seek(0) return buf.getvalue() def calc_cost(input_tokens, output_tokens, model): p = MODEL_PRICING[model] ic = (input_tokens / 1_000_000) * p["input"] oc = (output_tokens / 1_000_000) * p["output"] return ic, oc, ic + oc @st.cache_resource def get_mongo_client(): conn = os.getenv("DEV_MONGO_CONNECTION") return MongoClient(conn) if conn else None def fetch_tokens(collection_name, label, ticket_filter, start_dt, end_dt, model): mongo = get_mongo_client() if mongo is None: return pd.DataFrame() col = mongo["agent_assistance"][collection_name] if collection_name == "template_recommendation_logs" and ticket_filter == "email": match = { "created_at": {"$gt": start_dt, "$lt": end_dt}, "ticket_type": "email", "template": {"$ne": []}, } elif collection_name == "template_recommendation_logs" and ticket_filter == "chat": match = { "created_at": {"$gt": start_dt, "$lt": end_dt}, "ticket_type": {"$ne": "email"}, "client_id": {"$exists": True, "$ne": None}, } else: match = { "created_at": {"$gte": start_dt, "$lt": end_dt}, "client_id": {"$exists": True, "$ne": None}, } if ticket_filter == "email": match["ticket_type"] = "email" elif ticket_filter == "chat": match["ticket_type"] = {"$ne": "email"} if collection_name in NESTED_COLLECTIONS: pipeline = [ {"$match": match}, {"$group": {"_id": "$client_id", "usage_arrays": {"$push": "$usage"}}}, {"$unwind": "$usage_arrays"}, {"$unwind": "$usage_arrays"}, {"$group": { "_id": "$_id", "total_tokens": {"$sum": "$usage_arrays.total_tokens"}, "input_tokens": {"$sum": "$usage_arrays.prompt_tokens"}, "output_tokens": {"$sum": "$usage_arrays.completion_tokens"}, }}, ] else: pipeline = [ {"$match": match}, {"$unwind": "$usage"}, {"$group": { "_id": "$client_id", "total_tokens": {"$sum": "$usage.total_tokens"}, "input_tokens": {"$sum": "$usage.prompt_tokens"}, "output_tokens": {"$sum": "$usage.completion_tokens"}, }}, ] pipeline.append({ "$project": { "_id": 0, "client_id": "$_id", "total_tokens": 1, "input_tokens": 1, "output_tokens": 1, } }) try: results = list(col.aggregate(pipeline)) if not results: return pd.DataFrame() df = pd.DataFrame(results) df["model"] = model df["collection"] = collection_name df["feature"] = label df["ticket_filter"] = ticket_filter or "all" ic, oc, tc = zip(*df.apply( lambda r: calc_cost(r["input_tokens"], r["output_tokens"], model), axis=1 )) df["input_cost_usd"] = ic df["output_cost_usd"] = oc df["total_cost_usd"] = tc df["input_cost_inr"] = df["input_cost_usd"] * USD_TO_INR df["output_cost_inr"] = df["output_cost_usd"] * USD_TO_INR df["total_cost_inr"] = df["total_cost_usd"] * USD_TO_INR return df except Exception as e: st.error(f"Error fetching `{collection_name}` ({label}): {e}") return pd.DataFrame() with st.sidebar: st.markdown('Agent Assistance', unsafe_allow_html=True) st.markdown('

Date Range

', unsafe_allow_html=True) start_date = st.date_input("From", value=date(2026, 3, 29)) end_date = st.date_input("To", value=date(2026, 4, 1)) st.markdown('

Model Assignment

', unsafe_allow_html=True) st.caption("Assign a model to each feature.") feature_models = {} seen_labels = set() for (coll, label, _) in FEATURE_REGISTRY: if label not in seen_labels: default_model = FEATURE_DEFAULT_MODELS.get(label, "gpt-4o-mini") default_index = list(MODEL_PRICING.keys()).index(default_model) feature_models[label] = st.selectbox( label, options=list(MODEL_PRICING.keys()), index=default_index, key=f"model_{label}", ) seen_labels.add(label) st.markdown("

", unsafe_allow_html=True) run = st.button("Run Analysis", use_container_width=True) if run: if not get_mongo_client(): st.error("MongoDB connection not found. Set DEV_MONGO_CONNECTION in your .env file.") st.stop() start_dt = datetime(start_date.year, start_date.month, start_date.day, tzinfo=pytz.UTC) end_dt = datetime(end_date.year, end_date.month, end_date.day, tzinfo=pytz.UTC) all_dfs = [] bar = st.progress(0, text="Fetching data…") for i, (coll, label, ticket_filter) in enumerate(FEATURE_REGISTRY): bar.progress( (i + 1) / len(FEATURE_REGISTRY), text=f"Fetching {label}…" ) model = feature_models[label] df = fetch_tokens(coll, label, ticket_filter, start_dt, end_dt, model) if not df.empty: all_dfs.append(df) bar.empty() if not all_dfs: st.info("No data found for the selected date range.") st.stop() full_df = pd.concat(all_dfs, ignore_index=True) total_input = int(full_df["input_tokens"].sum()) total_output = int(full_df["output_tokens"].sum()) total_tokens = int(full_df["total_tokens"].sum()) pct_in = total_input / total_tokens * 100 if total_tokens else 0 pct_out = total_output / total_tokens * 100 if total_tokens else 0 st.markdown("

", unsafe_allow_html=True) c1, c2, c3, c4 = st.columns(4, gap="medium") with c1: st.markdown(f"""

Total Tokens

{total_tokens:,}

Input + Output combined

""", unsafe_allow_html=True) with c2: st.markdown(f"""

Input Tokens

{total_input:,}

{pct_in:.1f}% of total • {fmt_inr(full_df['input_cost_usd'].sum())}

""", unsafe_allow_html=True) with c3: st.markdown(f"""

Output Tokens

{total_output:,}

{pct_out:.1f}% of total • {fmt_inr(full_df['output_cost_usd'].sum())}

""", unsafe_allow_html=True) with c4: st.markdown(f"""

Approximate Cost

{fmt_inr(full_df['total_cost_usd'].sum())}

{len(FEATURE_REGISTRY)} features • ${full_df['total_cost_usd'].sum():.4f} USD

""", unsafe_allow_html=True) st.markdown("

", unsafe_allow_html=True) tab1, tab2 = st.tabs(["By Feature", "By Client"]) with tab1: feat_df = full_df.groupby(["feature", "model", "ticket_filter"]).agg( input_tokens=("input_tokens", "sum"), output_tokens=("output_tokens", "sum"), total_tokens=("total_tokens", "sum"), input_cost_inr=("input_cost_inr", "sum"), output_cost_inr=("output_cost_inr","sum"), total_cost_inr=("total_cost_inr", "sum"), total_cost_usd=("total_cost_usd", "sum"), client_count=("client_id", "nunique"), ).reset_index().sort_values("total_cost_inr", ascending=False) st.markdown("""

Feature

Tokens (Total / Input / Output)

Input Cost

Output Cost

Total Cost

""", unsafe_allow_html=True) for _, row in feat_df.iterrows(): pct = row["total_tokens"] / total_tokens * 100 if total_tokens else 0 tfilter = row["ticket_filter"] type_badge = "" if tfilter == "email": type_badge = 'email' elif tfilter == "chat": type_badge = 'chat' st.markdown(f"""

{row['feature']}

{row['model']}{type_badge} {row['client_count']} client(s) • {pct:.1f}% of tokens

{row['total_tokens']:,}
IN: {row['input_tokens']:,} | OUT: {row['output_tokens']:,}

₹{row['input_cost_inr']:,.4f}

₹{row['output_cost_inr']:,.4f}

₹{row['total_cost_inr']:,.4f}

""", unsafe_allow_html=True) st.markdown("

", unsafe_allow_html=True) st.markdown('

Cost Distribution by Feature

', unsafe_allow_html=True) chart_df = feat_df.set_index("feature")[["input_cost_inr", "output_cost_inr"]] chart_df.columns = ["Input Cost (INR)", "Output Cost (INR)"] st.bar_chart(chart_df, color=["#2563eb", "#0ea5e9"]) with tab2: client_df = full_df.groupby("client_id").agg( input_tokens=("input_tokens", "sum"), output_tokens=("output_tokens", "sum"), total_tokens=("total_tokens", "sum"), input_cost_inr=("input_cost_inr", "sum"), output_cost_inr=("output_cost_inr","sum"), total_cost_inr=("total_cost_inr", "sum"), features_used=("feature", lambda x: ", ".join(sorted(set(x)))), ).reset_index().sort_values("total_cost_inr", ascending=False) st.markdown("""

Client ID

Tokens (Total / Input / Output)

Input Cost

Output Cost

Total Cost

""", unsafe_allow_html=True) for _, row in client_df.iterrows(): pct = row["total_tokens"] / total_tokens * 100 if total_tokens else 0 st.markdown(f"""

                        {row['client_id']}
                    

{row['features_used']} • {pct:.1f}% of tokens

{row['total_tokens']:,}
IN: {row['input_tokens']:,} | OUT: {row['output_tokens']:,}

₹{row['input_cost_inr']:,.4f}

₹{row['output_cost_inr']:,.4f}

₹{row['total_cost_inr']:,.4f}

""", unsafe_allow_html=True) st.markdown("

", unsafe_allow_html=True) st.markdown('

Top 10 Clients by Cost

', unsafe_allow_html=True) top_c = client_df.head(10).set_index("client_id")[["input_cost_inr", "output_cost_inr"]] top_c.columns = ["Input Cost (INR)", "Output Cost (INR)"] st.bar_chart(top_c, color=["#16a34a", "#d97706"]) st.markdown("

", unsafe_allow_html=True) st.markdown('

Export

', unsafe_allow_html=True) fname = f"token_usage_{start_date.strftime('%Y%m%d')}_to_{end_date.strftime('%Y%m%d')}.xlsx" excel_bytes = build_excel(full_df) st.download_button( label="⬇ Download", data=excel_bytes, file_name=fname, mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", use_container_width=False, ) else: st.markdown("""

Set the date range and assign a model to each feature in the sidebar, then click Run Analysis to calculate token usage and estimated costs across all features in INR.

""", unsafe_allow_html=True) st.markdown('

Features Included in Analysis

', unsafe_allow_html=True) colors = ["c-blue", "c-sky", "c-green", "c-amber", "c-blue", "c-sky", "c-green", "c-amber", "c-blue"] cols = st.columns(4, gap="medium") for i, (coll, label, tfilter) in enumerate(FEATURE_REGISTRY): if tfilter == "email": type_tag_html = 'email' elif tfilter == "chat": type_tag_html = 'chat' else: type_tag_html = "" default_model = FEATURE_DEFAULT_MODELS.get(label, "gpt-4o-mini") card_html = ( f'

' f'

Feature

' f'

{label}

' f'

' f'{default_model}' + (f' {type_tag_html}' if type_tag_html else "") + '

' '

' ) with cols[i % 4]: st.markdown(card_html, unsafe_allow_html=True)