ash27kh commited on
Commit
72a282a
·
verified ·
1 Parent(s): ab587ae

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +205 -0
app.py ADDED
@@ -0,0 +1,205 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ import numpy as np
4
+ import matplotlib.pyplot as plt
5
+ from io import BytesIO
6
+ from datetime import datetime
7
+ import opik
8
+ import warnings
9
+ warnings.filterwarnings('ignore')
10
+
11
+ # --------------------------------------------------------
12
+ # CONFIG
13
+ # --------------------------------------------------------
14
+ OPIK_PROJECT_NAME = 'production-vf-ai'
15
+ OPIK_WORKSPACE_NAME = 'verba-tech-ninja'
16
+ OPIK_API_KEY = "YOUR_OPIK_API_KEY_HERE" # INSERT YOUR KEY
17
+
18
+
19
+ # --------------------------------------------------------
20
+ # INIT OPik CLIENT
21
+ # --------------------------------------------------------
22
+ client = opik.Opik(
23
+ api_key=OPIK_API_KEY,
24
+ workspace=OPIK_WORKSPACE_NAME,
25
+ project_name=OPIK_PROJECT_NAME
26
+ )
27
+
28
+
29
+ # --------------------------------------------------------
30
+ # FETCH TRACES
31
+ # --------------------------------------------------------
32
+ def fetch_traces(client_name, start_iso, end_iso):
33
+ filter_string = (
34
+ 'name contains "analyse_transcript" '
35
+ f'AND start_time >= "{start_iso}" '
36
+ f'AND end_time <= "{end_iso}" '
37
+ f'AND tags contains "{client_name}"'
38
+ )
39
+
40
+ traces = client.search_traces(
41
+ project_name=OPIK_PROJECT_NAME,
42
+ filter_string=filter_string,
43
+ max_results=50000
44
+ )
45
+ return list(traces)
46
+
47
+
48
+ # --------------------------------------------------------
49
+ # FILTER TRACES
50
+ # --------------------------------------------------------
51
+ def filter_traces(traces):
52
+ final = []
53
+ for trace in traces:
54
+ tags = trace.tags or []
55
+
56
+ if "_call_" in tags or "[CAMPAIGN_CONVERSATION]" in tags:
57
+ continue
58
+
59
+ output = trace.output
60
+ if not output:
61
+ continue
62
+
63
+ category = output.get("category")
64
+ use_case = output.get("campaign_payload", {}).get("use_case")
65
+
66
+ if category != "customer" and use_case is None:
67
+ final.append(trace)
68
+
69
+ return final
70
+
71
+
72
+ # --------------------------------------------------------
73
+ # PARSE SPANS
74
+ # --------------------------------------------------------
75
+ def extract_meta(trace):
76
+ spans = client.search_spans(project_name=OPIK_PROJECT_NAME, trace_id=trace.id)
77
+ out = []
78
+
79
+ for s in spans:
80
+ if s.name != "chat_completion_parse":
81
+ continue
82
+
83
+ usage = s.metadata.get("usage", {})
84
+ out.append({
85
+ "duration": s.duration / 1000,
86
+ "tier": s.metadata.get("service_tier", "default"),
87
+ "model": s.metadata.get("model"),
88
+ "tokens": usage.get("completion_tokens", 0),
89
+ "error": bool(s.error_info)
90
+ })
91
+ return out
92
+
93
+
94
+ # --------------------------------------------------------
95
+ # RUN MAIN PIPELINE
96
+ # --------------------------------------------------------
97
+ def run_pipeline(client_name, start_dt, end_dt, metadata_fields):
98
+
99
+ start_iso = start_dt + "Z"
100
+ end_iso = end_dt + "Z"
101
+
102
+ traces = fetch_traces(client_name, start_iso, end_iso)
103
+ traces = filter_traces(traces)
104
+
105
+ rows = []
106
+ for t in traces:
107
+ rows.extend(extract_meta(t))
108
+
109
+ if not rows:
110
+ return "No data", None, None, None
111
+
112
+ # Filter selected metadata fields
113
+ df = pd.DataFrame(rows)
114
+ df_filtered = df[metadata_fields]
115
+
116
+ # ---------------- Stats -----------------
117
+ durations = df.loc[~df["error"], "duration"]
118
+ tokens = df["tokens"]
119
+
120
+ stats = {
121
+ "total_spans": len(df),
122
+ "errors": int(df["error"].sum()),
123
+ "error_rate_%": round(100 * df["error"].mean(), 2),
124
+
125
+ "mean_latency_sec": round(durations.mean(), 3) if len(durations) else None,
126
+ "median_latency_sec": round(durations.median(), 3) if len(durations) else None,
127
+ "p90_latency_sec": round(durations.quantile(0.9), 3) if len(durations) else None,
128
+ "p95_latency_sec": round(durations.quantile(0.95), 3) if len(durations) else None,
129
+ "min_latency": round(durations.min(), 3) if len(durations) else None,
130
+ "max_latency": round(durations.max(), 3) if len(durations) else None,
131
+
132
+ "avg_tokens": round(tokens.mean(), 2),
133
+ "max_tokens": int(tokens.max())
134
+ }
135
+
136
+ # ---------------- Charts -----------------
137
+ fig1, ax1 = plt.subplots()
138
+ ax1.hist(df["duration"], bins=30)
139
+ ax1.set_title("Latency Distribution (seconds)")
140
+ ax1.set_xlabel("Seconds")
141
+ ax1.set_ylabel("Frequency")
142
+
143
+ fig2, ax2 = plt.subplots()
144
+ ax2.hist(df["tokens"], bins=25)
145
+ ax2.set_title("Completion Token Distribution")
146
+ ax2.set_xlabel("Tokens")
147
+ ax2.set_ylabel("Frequency")
148
+
149
+ # Convert figs to image
150
+ buf1, buf2 = BytesIO(), BytesIO()
151
+ fig1.savefig(buf1, format="png")
152
+ fig2.savefig(buf2, format="png")
153
+ buf1.seek(0)
154
+ buf2.seek(0)
155
+ plt.close(fig1)
156
+ plt.close(fig2)
157
+
158
+ # CSV
159
+ csv_data = df_filtered.to_csv(index=False)
160
+
161
+ return stats, df_filtered, buf1, buf2, csv_data
162
+
163
+
164
+ # --------------------------------------------------------
165
+ # GRADIO UI
166
+ # --------------------------------------------------------
167
+ with gr.Blocks(title="Opik Analytics Dashboard") as demo:
168
+
169
+ gr.Markdown("# 📊 **Opik Analytics Dashboard** (Gradio)")
170
+ gr.Markdown("Analyze traces by client, date range, and metadata fields.")
171
+
172
+ with gr.Row():
173
+ client_name = gr.Dropdown(
174
+ ["fusiongroup", "vita", "staragent", "testclient", "other"],
175
+ label="Select Client",
176
+ value="fusiongroup"
177
+ )
178
+
179
+ with gr.Row():
180
+ start_dt = gr.Textbox(label="Start DateTime UTC (YYYY-MM-DDTHH:MM:SS)", value="2025-11-17T00:00:00")
181
+ end_dt = gr.Textbox(label="End DateTime UTC (YYYY-MM-DDTHH:MM:SS)", value="2025-11-17T12:00:00")
182
+
183
+ metadata_fields = gr.CheckboxGroup(
184
+ ["duration", "tier", "tokens", "model", "error"],
185
+ label="Select Metadata Fields",
186
+ value=["duration", "tier", "tokens"]
187
+ )
188
+
189
+ run_btn = gr.Button("Run Analysis")
190
+
191
+ stats_output = gr.JSON(label="📈 Summary Statistics")
192
+ table_output = gr.DataFrame(label="📄 Raw Data")
193
+ plot_latency = gr.Image(label="⏱ Latency Distribution")
194
+ plot_tokens = gr.Image(label="🔢 Token Distribution")
195
+
196
+ csv_download = gr.File(label="⬇ Download CSV")
197
+
198
+ run_btn.click(
199
+ fn=run_pipeline,
200
+ inputs=[client_name, start_dt, end_dt, metadata_fields],
201
+ outputs=[stats_output, table_output, plot_latency, plot_tokens, csv_download]
202
+ )
203
+
204
+
205
+ demo.launch()