Spaces:

Marek4321
/

llm-brand-tracker

Running

App Files Files Community

llm-brand-tracker / metrics.py

Marek4321

Upload 7 files

98306c5 verified 5 months ago

raw

history blame contribute delete

5.95 kB

	"""
	Metrics calculation for brand visibility analysis
	"""

	from typing import List, Dict, Optional
	from collections import defaultdict
	import pandas as pd


	class BrandMetrics:
	"""Calculate brand visibility metrics from LLM responses"""

	def __init__(self, responses: List[List[str]]):
	"""
	Initialize metrics calculator

	Args:
	responses: List of brand lists (each inner list is one LLM response)
	"""
	self.responses = responses
	self.total_responses = len(responses)

	# Dictionaries for counting
	self.brand_counts = defaultdict(int) # Total mentions
	self.brand_first_counts = defaultdict(int) # First position mentions
	self.brand_positions_sum = defaultdict(int) # Sum of positions
	self.brand_positions_n = defaultdict(int) # Count for average calculation

	self._calculate_raw_metrics()

	def _calculate_raw_metrics(self):
	"""Calculate raw metrics from responses"""
	for response in self.responses:
	# Track unique brands in this response
	seen_brands = set()

	for position, brand in enumerate(response, start=1):
	# Count total mentions
	self.brand_counts[brand] += 1

	# Count first position
	if position == 1:
	self.brand_first_counts[brand] += 1

	# Track position (only count once per response for avg position)
	if brand not in seen_brands:
	self.brand_positions_sum[brand] += position
	self.brand_positions_n[brand] += 1
	seen_brands.add(brand)

	def get_brand_metrics(self, brand_name: str) -> Optional[Dict]:
	"""
	Get metrics for a specific brand

	Returns:
	Dictionary with metrics or None if brand not found
	"""
	if brand_name not in self.brand_counts:
	return None

	total_mentions = sum(self.brand_counts.values())

	return {
	"brand": brand_name,
	"visibility_rate": self.brand_positions_n[brand_name] / self.total_responses,
	"top1_share": self.brand_first_counts[brand_name] / self.total_responses,
	"avg_position": (
	self.brand_positions_sum[brand_name] / self.brand_positions_n[brand_name]
	if self.brand_positions_n[brand_name] > 0 else None
	),
	"mention_share": self.brand_counts[brand_name] / total_mentions if total_mentions > 0 else 0,
	"total_mentions": self.brand_counts[brand_name],
	"appearances": self.brand_positions_n[brand_name],
	}

	def get_all_brands_ranking(self) -> pd.DataFrame:
	"""
	Get ranking of all brands

	Returns:
	DataFrame with all brands and their metrics, sorted by visibility_rate
	"""
	rankings = []
	total_mentions = sum(self.brand_counts.values())

	for brand in self.brand_counts.keys():
	metrics = {
	"Brand": brand,
	"Visibility Rate": f"{(self.brand_positions_n[brand] / self.total_responses * 100):.1f}%",
	"Top-1 Share": f"{(self.brand_first_counts[brand] / self.total_responses * 100):.1f}%",
	"Avg Position": (
	f"{(self.brand_positions_sum[brand] / self.brand_positions_n[brand]):.2f}"
	if self.brand_positions_n[brand] > 0 else "N/A"
	),
	"Mention Share": f"{(self.brand_counts[brand] / total_mentions * 100):.1f}%",
	"Total Mentions": self.brand_counts[brand],
	"Appearances": self.brand_positions_n[brand],
	# Raw values for sorting
	"_visibility_rate": self.brand_positions_n[brand] / self.total_responses,
	"_top1_share": self.brand_first_counts[brand] / self.total_responses,
	"_avg_position": (
	self.brand_positions_sum[brand] / self.brand_positions_n[brand]
	if self.brand_positions_n[brand] > 0 else 999
	),
	"_mention_share": self.brand_counts[brand] / total_mentions if total_mentions > 0 else 0,
	}
	rankings.append(metrics)

	# Create DataFrame and sort by visibility rate
	df = pd.DataFrame(rankings)
	df = df.sort_values("_visibility_rate", ascending=False)

	# Drop raw columns used for sorting
	display_columns = ["Brand", "Visibility Rate", "Top-1 Share", "Avg Position",
	"Mention Share", "Total Mentions", "Appearances"]
	return df[display_columns].reset_index(drop=True)

	def get_top_brands(self, n: int = 10, metric: str = "visibility") -> pd.DataFrame:
	"""
	Get top N brands by specified metric

	Args:
	n: Number of brands to return
	metric: Metric to sort by ('visibility', 'top1', 'mentions')

	Returns:
	DataFrame with top brands
	"""
	df = self.get_all_brands_ranking()

	# Add back raw columns for sorting
	if metric == "visibility":
	sort_col = "Visibility Rate"
	elif metric == "top1":
	sort_col = "Top-1 Share"
	elif metric == "mentions":
	sort_col = "Total Mentions"
	else:
	sort_col = "Visibility Rate"

	return df.head(n)

	def get_summary_stats(self) -> Dict:
	"""Get overall summary statistics"""
	total_mentions = sum(self.brand_counts.values())
	unique_brands = len(self.brand_counts)

	return {
	"total_responses": self.total_responses,
	"total_mentions": total_mentions,
	"unique_brands": unique_brands,
	"avg_brands_per_response": total_mentions / self.total_responses if self.total_responses > 0 else 0,
	}