Spaces:

mfoud444
/

tt

Paused

File size: 7,276 Bytes

0aee734

import gradio as gr
from transformers import pipeline
from textblob import TextBlob
from collections import defaultdict
import pandas as pd
from tabulate import tabulate

# Initialize summarization pipeline
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

def generate_category_summaries(df):
    """Generate product summaries in table format"""
    summaries = {}

    for category in df['cluster_name'].unique():
        category_df = df[df['cluster_name'] == category]

        if len(category_df) < 10:
            continue

        product_stats = get_product_stats(category_df)
        if len(product_stats) < 3:
            continue

        top_products, worst_product = get_top_and_worst_products(product_stats)
        product_details = analyze_top_products(top_products)

        # Format as tables
        summary_tables = format_tables(category, product_details, worst_product)
        summaries[category] = summary_tables

    return summaries

def format_tables(category, product_details, worst_product):
    """Format all sections as tables"""
    tables = []

    # Top Products Table
    top_table = []
    for product in product_details:
        top_table.append([
            product['name'],
            f"★{product['rating']:.1f}",
            product['review_count'],
            "\n".join(product['pros']),
            "\n".join(product['cons'])
        ])

    tables.append({
        'section': f"TOP PRODUCTS IN {category.upper()}",
        'headers': ["Product", "Rating", "Reviews", "Pros", "Cons"],
        'data': top_table
    })

    # Key Differences Table
    common_pros = set(product_details[0]['pros'])
    for product in product_details[1:]:
        common_pros.intersection_update(product['pros'])

    diff_table = []
    for product in product_details:
        unique_pros = [p for p in product['pros'] if p not in common_pros]
        if unique_pros:
            diff_table.append([product['name'], ", ".join(unique_pros)])

    if diff_table:
        tables.append({
            'section': "KEY DIFFERENCES",
            'headers': ["Product", "Unique Features"],
            'data': diff_table
        })

    # Worst Product Table
    if not worst_product.empty:
        worst = worst_product.iloc[0]
        _, cons = analyze_sentiment(worst['reviews'])
        tables.append({
            'section': "PRODUCT TO AVOID",
            'headers': ["Product", "Rating", "Reasons to Avoid"],
            'data': [[
                worst_product.index[0],
                f"★{worst['avg_rating']:.1f}",
                ", ".join(cons[:3]) if cons else "Consistently poor ratings"
            ]]
        })

    return tables

def get_product_stats(category_df):
    """Calculate product statistics from dataframe"""
    stats = category_df.groupby('name').agg({
        'rating': ['mean', 'count'],
        'text': list
    })
    stats.columns = ['avg_rating', 'review_count', 'reviews']
    return stats[stats['review_count'] >= 5]

def get_top_and_worst_products(product_stats):
    """Identify best and worst performing products"""
    return (
        product_stats.nlargest(3, 'avg_rating'),
        product_stats.nsmallest(1, 'avg_rating')
    )

def analyze_top_products(top_products):
    """Extract pros/cons from top products' reviews"""
    product_details = []
    for product, row in top_products.iterrows():
        pros, cons = analyze_sentiment(row['reviews'])
        product_details.append({
            'name': product,
            'rating': row['avg_rating'],
            'review_count': row['review_count'],
            'pros': pros[:3] or ["no significant positive feedback"],
            'cons': cons[:3] or ["no major complaints"]
        })
    return product_details

def analyze_sentiment(reviews):
    """Perform sentiment analysis on reviews"""
    pros = defaultdict(int)
    cons = defaultdict(int)

    for review in reviews:
        blob = TextBlob(review)
        for sentence in blob.sentences:
            polarity = sentence.sentiment.polarity
            words = [word for word, tag in blob.tags
                    if tag in ('NN', 'NNS', 'JJ', 'JJR', 'JJS')]

            if polarity > 0.3:  # Positive
                for word in words:
                    pros[word] += 1
            elif polarity < -0.3:  # Negative
                for word in words:
                    cons[word] += 1

    # Filter and sort results
    pros_sorted = [k for k, _ in sorted(pros.items(), key=lambda x: -x[1])] if pros else []
    cons_sorted = [k for k, _ in sorted(cons.items(), key=lambda x: -x[1])] if cons else []

    return pros_sorted, cons_sorted

def format_for_gradio(summaries):
    """Convert summary tables to HTML for Gradio display"""
    outputs = []
    for category, tables in summaries.items():
        category_html = f"<h2 style='color: #4a6baf;'>{category.upper()}</h2>"
        
        for table in tables:
            table_html = f"<h3 style='color: #3a5a8a;'>{table['section']}</h3>"
            table_html += tabulate(
                table['data'],
                headers=table['headers'],
                tablefmt="html",
                stralign="left",
                numalign="center"
            )
            table_html = table_html.replace('<table>', '<table style="width:100%; border-collapse: collapse; margin-bottom: 20px;">')
            table_html = table_html.replace('<th>', '<th style="background-color: #f2f2f2; padding: 8px; text-align: left; border: 1px solid #ddd;">')
            table_html = table_html.replace('<td>', '<td style="padding: 8px; border: 1px solid #ddd;">')
            category_html += table_html
        
        outputs.append(category_html)
    
    return "<hr>".join(outputs)

def analyze_reviews(df):
    """Main function to process data and generate summaries"""
    summaries = generate_category_summaries(df)
    return format_for_gradio(summaries)

# Create Gradio interface
with gr.Blocks(title="Amazon Product Review Analyzer", theme=gr.themes.Soft()) as demo:
    gr.Markdown("# Amazon Product Review Analyzer")
    gr.Markdown("Analyzing top products and reviews across categories")
    
    with gr.Row():
        with gr.Column():
            gr.Markdown("### Product Categories Found")
            category_dropdown = gr.Dropdown(
                choices=df['cluster_name'].unique().tolist(),
                label="Select a Category",
                interactive=True
            )
            analyze_btn = gr.Button("Analyze Selected Category", variant="primary")
        
        with gr.Column():
            gr.Markdown("### All Categories Summary")
            all_categories_btn = gr.Button("Analyze All Categories", variant="secondary")
    
    output_html = gr.HTML(label="Analysis Results")
    
    # Button actions
    category_dropdown.change(
        fn=lambda x: gr.update(interactive=bool(x)),
        inputs=category_dropdown,
        outputs=analyze_btn
    )
    
    analyze_btn.click(
        fn=lambda cat: analyze_reviews(df[df['cluster_name'] == cat]),
        inputs=category_dropdown,
        outputs=output_html
    )
    
    all_categories_btn.click(
        fn=lambda: analyze_reviews(df),
        outputs=output_html
    )

# Launch the interface
demo.launch()