Spaces:
Running
Running
| """ | |
| Metrics calculation for brand visibility analysis | |
| """ | |
| from typing import List, Dict, Optional | |
| from collections import defaultdict | |
| import pandas as pd | |
| class BrandMetrics: | |
| """Calculate brand visibility metrics from LLM responses""" | |
| def __init__(self, responses: List[List[str]]): | |
| """ | |
| Initialize metrics calculator | |
| Args: | |
| responses: List of brand lists (each inner list is one LLM response) | |
| """ | |
| self.responses = responses | |
| self.total_responses = len(responses) | |
| # Dictionaries for counting | |
| self.brand_counts = defaultdict(int) # Total mentions | |
| self.brand_first_counts = defaultdict(int) # First position mentions | |
| self.brand_positions_sum = defaultdict(int) # Sum of positions | |
| self.brand_positions_n = defaultdict(int) # Count for average calculation | |
| self._calculate_raw_metrics() | |
| def _calculate_raw_metrics(self): | |
| """Calculate raw metrics from responses""" | |
| for response in self.responses: | |
| # Track unique brands in this response | |
| seen_brands = set() | |
| for position, brand in enumerate(response, start=1): | |
| # Count total mentions | |
| self.brand_counts[brand] += 1 | |
| # Count first position | |
| if position == 1: | |
| self.brand_first_counts[brand] += 1 | |
| # Track position (only count once per response for avg position) | |
| if brand not in seen_brands: | |
| self.brand_positions_sum[brand] += position | |
| self.brand_positions_n[brand] += 1 | |
| seen_brands.add(brand) | |
| def get_brand_metrics(self, brand_name: str) -> Optional[Dict]: | |
| """ | |
| Get metrics for a specific brand | |
| Returns: | |
| Dictionary with metrics or None if brand not found | |
| """ | |
| if brand_name not in self.brand_counts: | |
| return None | |
| total_mentions = sum(self.brand_counts.values()) | |
| return { | |
| "brand": brand_name, | |
| "visibility_rate": self.brand_positions_n[brand_name] / self.total_responses, | |
| "top1_share": self.brand_first_counts[brand_name] / self.total_responses, | |
| "avg_position": ( | |
| self.brand_positions_sum[brand_name] / self.brand_positions_n[brand_name] | |
| if self.brand_positions_n[brand_name] > 0 else None | |
| ), | |
| "mention_share": self.brand_counts[brand_name] / total_mentions if total_mentions > 0 else 0, | |
| "total_mentions": self.brand_counts[brand_name], | |
| "appearances": self.brand_positions_n[brand_name], | |
| } | |
| def get_all_brands_ranking(self) -> pd.DataFrame: | |
| """ | |
| Get ranking of all brands | |
| Returns: | |
| DataFrame with all brands and their metrics, sorted by visibility_rate | |
| """ | |
| rankings = [] | |
| total_mentions = sum(self.brand_counts.values()) | |
| for brand in self.brand_counts.keys(): | |
| metrics = { | |
| "Brand": brand, | |
| "Visibility Rate": f"{(self.brand_positions_n[brand] / self.total_responses * 100):.1f}%", | |
| "Top-1 Share": f"{(self.brand_first_counts[brand] / self.total_responses * 100):.1f}%", | |
| "Avg Position": ( | |
| f"{(self.brand_positions_sum[brand] / self.brand_positions_n[brand]):.2f}" | |
| if self.brand_positions_n[brand] > 0 else "N/A" | |
| ), | |
| "Mention Share": f"{(self.brand_counts[brand] / total_mentions * 100):.1f}%", | |
| "Total Mentions": self.brand_counts[brand], | |
| "Appearances": self.brand_positions_n[brand], | |
| # Raw values for sorting | |
| "_visibility_rate": self.brand_positions_n[brand] / self.total_responses, | |
| "_top1_share": self.brand_first_counts[brand] / self.total_responses, | |
| "_avg_position": ( | |
| self.brand_positions_sum[brand] / self.brand_positions_n[brand] | |
| if self.brand_positions_n[brand] > 0 else 999 | |
| ), | |
| "_mention_share": self.brand_counts[brand] / total_mentions if total_mentions > 0 else 0, | |
| } | |
| rankings.append(metrics) | |
| # Create DataFrame and sort by visibility rate | |
| df = pd.DataFrame(rankings) | |
| df = df.sort_values("_visibility_rate", ascending=False) | |
| # Drop raw columns used for sorting | |
| display_columns = ["Brand", "Visibility Rate", "Top-1 Share", "Avg Position", | |
| "Mention Share", "Total Mentions", "Appearances"] | |
| return df[display_columns].reset_index(drop=True) | |
| def get_top_brands(self, n: int = 10, metric: str = "visibility") -> pd.DataFrame: | |
| """ | |
| Get top N brands by specified metric | |
| Args: | |
| n: Number of brands to return | |
| metric: Metric to sort by ('visibility', 'top1', 'mentions') | |
| Returns: | |
| DataFrame with top brands | |
| """ | |
| df = self.get_all_brands_ranking() | |
| # Add back raw columns for sorting | |
| if metric == "visibility": | |
| sort_col = "Visibility Rate" | |
| elif metric == "top1": | |
| sort_col = "Top-1 Share" | |
| elif metric == "mentions": | |
| sort_col = "Total Mentions" | |
| else: | |
| sort_col = "Visibility Rate" | |
| return df.head(n) | |
| def get_summary_stats(self) -> Dict: | |
| """Get overall summary statistics""" | |
| total_mentions = sum(self.brand_counts.values()) | |
| unique_brands = len(self.brand_counts) | |
| return { | |
| "total_responses": self.total_responses, | |
| "total_mentions": total_mentions, | |
| "unique_brands": unique_brands, | |
| "avg_brands_per_response": total_mentions / self.total_responses if self.total_responses > 0 else 0, | |
| } | |