llm-brand-tracker / metrics.py
Marek4321's picture
Upload 7 files
98306c5 verified
"""
Metrics calculation for brand visibility analysis
"""
from typing import List, Dict, Optional
from collections import defaultdict
import pandas as pd
class BrandMetrics:
"""Calculate brand visibility metrics from LLM responses"""
def __init__(self, responses: List[List[str]]):
"""
Initialize metrics calculator
Args:
responses: List of brand lists (each inner list is one LLM response)
"""
self.responses = responses
self.total_responses = len(responses)
# Dictionaries for counting
self.brand_counts = defaultdict(int) # Total mentions
self.brand_first_counts = defaultdict(int) # First position mentions
self.brand_positions_sum = defaultdict(int) # Sum of positions
self.brand_positions_n = defaultdict(int) # Count for average calculation
self._calculate_raw_metrics()
def _calculate_raw_metrics(self):
"""Calculate raw metrics from responses"""
for response in self.responses:
# Track unique brands in this response
seen_brands = set()
for position, brand in enumerate(response, start=1):
# Count total mentions
self.brand_counts[brand] += 1
# Count first position
if position == 1:
self.brand_first_counts[brand] += 1
# Track position (only count once per response for avg position)
if brand not in seen_brands:
self.brand_positions_sum[brand] += position
self.brand_positions_n[brand] += 1
seen_brands.add(brand)
def get_brand_metrics(self, brand_name: str) -> Optional[Dict]:
"""
Get metrics for a specific brand
Returns:
Dictionary with metrics or None if brand not found
"""
if brand_name not in self.brand_counts:
return None
total_mentions = sum(self.brand_counts.values())
return {
"brand": brand_name,
"visibility_rate": self.brand_positions_n[brand_name] / self.total_responses,
"top1_share": self.brand_first_counts[brand_name] / self.total_responses,
"avg_position": (
self.brand_positions_sum[brand_name] / self.brand_positions_n[brand_name]
if self.brand_positions_n[brand_name] > 0 else None
),
"mention_share": self.brand_counts[brand_name] / total_mentions if total_mentions > 0 else 0,
"total_mentions": self.brand_counts[brand_name],
"appearances": self.brand_positions_n[brand_name],
}
def get_all_brands_ranking(self) -> pd.DataFrame:
"""
Get ranking of all brands
Returns:
DataFrame with all brands and their metrics, sorted by visibility_rate
"""
rankings = []
total_mentions = sum(self.brand_counts.values())
for brand in self.brand_counts.keys():
metrics = {
"Brand": brand,
"Visibility Rate": f"{(self.brand_positions_n[brand] / self.total_responses * 100):.1f}%",
"Top-1 Share": f"{(self.brand_first_counts[brand] / self.total_responses * 100):.1f}%",
"Avg Position": (
f"{(self.brand_positions_sum[brand] / self.brand_positions_n[brand]):.2f}"
if self.brand_positions_n[brand] > 0 else "N/A"
),
"Mention Share": f"{(self.brand_counts[brand] / total_mentions * 100):.1f}%",
"Total Mentions": self.brand_counts[brand],
"Appearances": self.brand_positions_n[brand],
# Raw values for sorting
"_visibility_rate": self.brand_positions_n[brand] / self.total_responses,
"_top1_share": self.brand_first_counts[brand] / self.total_responses,
"_avg_position": (
self.brand_positions_sum[brand] / self.brand_positions_n[brand]
if self.brand_positions_n[brand] > 0 else 999
),
"_mention_share": self.brand_counts[brand] / total_mentions if total_mentions > 0 else 0,
}
rankings.append(metrics)
# Create DataFrame and sort by visibility rate
df = pd.DataFrame(rankings)
df = df.sort_values("_visibility_rate", ascending=False)
# Drop raw columns used for sorting
display_columns = ["Brand", "Visibility Rate", "Top-1 Share", "Avg Position",
"Mention Share", "Total Mentions", "Appearances"]
return df[display_columns].reset_index(drop=True)
def get_top_brands(self, n: int = 10, metric: str = "visibility") -> pd.DataFrame:
"""
Get top N brands by specified metric
Args:
n: Number of brands to return
metric: Metric to sort by ('visibility', 'top1', 'mentions')
Returns:
DataFrame with top brands
"""
df = self.get_all_brands_ranking()
# Add back raw columns for sorting
if metric == "visibility":
sort_col = "Visibility Rate"
elif metric == "top1":
sort_col = "Top-1 Share"
elif metric == "mentions":
sort_col = "Total Mentions"
else:
sort_col = "Visibility Rate"
return df.head(n)
def get_summary_stats(self) -> Dict:
"""Get overall summary statistics"""
total_mentions = sum(self.brand_counts.values())
unique_brands = len(self.brand_counts)
return {
"total_responses": self.total_responses,
"total_mentions": total_mentions,
"unique_brands": unique_brands,
"avg_brands_per_response": total_mentions / self.total_responses if self.total_responses > 0 else 0,
}