File size: 7,276 Bytes
0aee734 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 |
import gradio as gr
from transformers import pipeline
from textblob import TextBlob
from collections import defaultdict
import pandas as pd
from tabulate import tabulate
# Initialize summarization pipeline
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
def generate_category_summaries(df):
"""Generate product summaries in table format"""
summaries = {}
for category in df['cluster_name'].unique():
category_df = df[df['cluster_name'] == category]
if len(category_df) < 10:
continue
product_stats = get_product_stats(category_df)
if len(product_stats) < 3:
continue
top_products, worst_product = get_top_and_worst_products(product_stats)
product_details = analyze_top_products(top_products)
# Format as tables
summary_tables = format_tables(category, product_details, worst_product)
summaries[category] = summary_tables
return summaries
def format_tables(category, product_details, worst_product):
"""Format all sections as tables"""
tables = []
# Top Products Table
top_table = []
for product in product_details:
top_table.append([
product['name'],
f"★{product['rating']:.1f}",
product['review_count'],
"\n".join(product['pros']),
"\n".join(product['cons'])
])
tables.append({
'section': f"TOP PRODUCTS IN {category.upper()}",
'headers': ["Product", "Rating", "Reviews", "Pros", "Cons"],
'data': top_table
})
# Key Differences Table
common_pros = set(product_details[0]['pros'])
for product in product_details[1:]:
common_pros.intersection_update(product['pros'])
diff_table = []
for product in product_details:
unique_pros = [p for p in product['pros'] if p not in common_pros]
if unique_pros:
diff_table.append([product['name'], ", ".join(unique_pros)])
if diff_table:
tables.append({
'section': "KEY DIFFERENCES",
'headers': ["Product", "Unique Features"],
'data': diff_table
})
# Worst Product Table
if not worst_product.empty:
worst = worst_product.iloc[0]
_, cons = analyze_sentiment(worst['reviews'])
tables.append({
'section': "PRODUCT TO AVOID",
'headers': ["Product", "Rating", "Reasons to Avoid"],
'data': [[
worst_product.index[0],
f"★{worst['avg_rating']:.1f}",
", ".join(cons[:3]) if cons else "Consistently poor ratings"
]]
})
return tables
def get_product_stats(category_df):
"""Calculate product statistics from dataframe"""
stats = category_df.groupby('name').agg({
'rating': ['mean', 'count'],
'text': list
})
stats.columns = ['avg_rating', 'review_count', 'reviews']
return stats[stats['review_count'] >= 5]
def get_top_and_worst_products(product_stats):
"""Identify best and worst performing products"""
return (
product_stats.nlargest(3, 'avg_rating'),
product_stats.nsmallest(1, 'avg_rating')
)
def analyze_top_products(top_products):
"""Extract pros/cons from top products' reviews"""
product_details = []
for product, row in top_products.iterrows():
pros, cons = analyze_sentiment(row['reviews'])
product_details.append({
'name': product,
'rating': row['avg_rating'],
'review_count': row['review_count'],
'pros': pros[:3] or ["no significant positive feedback"],
'cons': cons[:3] or ["no major complaints"]
})
return product_details
def analyze_sentiment(reviews):
"""Perform sentiment analysis on reviews"""
pros = defaultdict(int)
cons = defaultdict(int)
for review in reviews:
blob = TextBlob(review)
for sentence in blob.sentences:
polarity = sentence.sentiment.polarity
words = [word for word, tag in blob.tags
if tag in ('NN', 'NNS', 'JJ', 'JJR', 'JJS')]
if polarity > 0.3: # Positive
for word in words:
pros[word] += 1
elif polarity < -0.3: # Negative
for word in words:
cons[word] += 1
# Filter and sort results
pros_sorted = [k for k, _ in sorted(pros.items(), key=lambda x: -x[1])] if pros else []
cons_sorted = [k for k, _ in sorted(cons.items(), key=lambda x: -x[1])] if cons else []
return pros_sorted, cons_sorted
def format_for_gradio(summaries):
"""Convert summary tables to HTML for Gradio display"""
outputs = []
for category, tables in summaries.items():
category_html = f"<h2 style='color: #4a6baf;'>{category.upper()}</h2>"
for table in tables:
table_html = f"<h3 style='color: #3a5a8a;'>{table['section']}</h3>"
table_html += tabulate(
table['data'],
headers=table['headers'],
tablefmt="html",
stralign="left",
numalign="center"
)
table_html = table_html.replace('<table>', '<table style="width:100%; border-collapse: collapse; margin-bottom: 20px;">')
table_html = table_html.replace('<th>', '<th style="background-color: #f2f2f2; padding: 8px; text-align: left; border: 1px solid #ddd;">')
table_html = table_html.replace('<td>', '<td style="padding: 8px; border: 1px solid #ddd;">')
category_html += table_html
outputs.append(category_html)
return "<hr>".join(outputs)
def analyze_reviews(df):
"""Main function to process data and generate summaries"""
summaries = generate_category_summaries(df)
return format_for_gradio(summaries)
# Create Gradio interface
with gr.Blocks(title="Amazon Product Review Analyzer", theme=gr.themes.Soft()) as demo:
gr.Markdown("# Amazon Product Review Analyzer")
gr.Markdown("Analyzing top products and reviews across categories")
with gr.Row():
with gr.Column():
gr.Markdown("### Product Categories Found")
category_dropdown = gr.Dropdown(
choices=df['cluster_name'].unique().tolist(),
label="Select a Category",
interactive=True
)
analyze_btn = gr.Button("Analyze Selected Category", variant="primary")
with gr.Column():
gr.Markdown("### All Categories Summary")
all_categories_btn = gr.Button("Analyze All Categories", variant="secondary")
output_html = gr.HTML(label="Analysis Results")
# Button actions
category_dropdown.change(
fn=lambda x: gr.update(interactive=bool(x)),
inputs=category_dropdown,
outputs=analyze_btn
)
analyze_btn.click(
fn=lambda cat: analyze_reviews(df[df['cluster_name'] == cat]),
inputs=category_dropdown,
outputs=output_html
)
all_categories_btn.click(
fn=lambda: analyze_reviews(df),
outputs=output_html
)
# Launch the interface
demo.launch() |