Spaces:

mfoud444
/

tt

Paused

App Files Files Community

Mohammed Foud commited on Apr 14, 2025

Commit

0aee734

1 Parent(s): 06e1fb2

Add application file

Browse files

Files changed (1) hide show

app.py +213 -0

app.py CHANGED Viewed

	@@ -0,0 +1,213 @@

+import gradio as gr
+from transformers import pipeline
+from textblob import TextBlob
+from collections import defaultdict
+import pandas as pd
+from tabulate import tabulate
+# Initialize summarization pipeline
+summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
+def generate_category_summaries(df):
+    """Generate product summaries in table format"""
+    summaries = {}
+    for category in df['cluster_name'].unique():
+        category_df = df[df['cluster_name'] == category]
+        if len(category_df) < 10:
+            continue
+        product_stats = get_product_stats(category_df)
+        if len(product_stats) < 3:
+            continue
+        top_products, worst_product = get_top_and_worst_products(product_stats)
+        product_details = analyze_top_products(top_products)
+        # Format as tables
+        summary_tables = format_tables(category, product_details, worst_product)
+        summaries[category] = summary_tables
+    return summaries
+def format_tables(category, product_details, worst_product):
+    """Format all sections as tables"""
+    tables = []
+    # Top Products Table
+    top_table = []
+    for product in product_details:
+        top_table.append([
+            product['name'],
+            f"★{product['rating']:.1f}",
+            product['review_count'],
+            "\n".join(product['pros']),
+            "\n".join(product['cons'])
+        ])
+    tables.append({
+        'section': f"TOP PRODUCTS IN {category.upper()}",
+        'headers': ["Product", "Rating", "Reviews", "Pros", "Cons"],
+        'data': top_table
+    })
+    # Key Differences Table
+    common_pros = set(product_details[0]['pros'])
+    for product in product_details[1:]:
+        common_pros.intersection_update(product['pros'])
+    diff_table = []
+    for product in product_details:
+        unique_pros = [p for p in product['pros'] if p not in common_pros]
+        if unique_pros:
+            diff_table.append([product['name'], ", ".join(unique_pros)])
+    if diff_table:
+        tables.append({
+            'section': "KEY DIFFERENCES",
+            'headers': ["Product", "Unique Features"],
+            'data': diff_table
+        })
+    # Worst Product Table
+    if not worst_product.empty:
+        worst = worst_product.iloc[0]
+        _, cons = analyze_sentiment(worst['reviews'])
+        tables.append({
+            'section': "PRODUCT TO AVOID",
+            'headers': ["Product", "Rating", "Reasons to Avoid"],
+            'data': [[
+                worst_product.index[0],
+                f"★{worst['avg_rating']:.1f}",
+                ", ".join(cons[:3]) if cons else "Consistently poor ratings"
+            ]]
+        })
+    return tables
+def get_product_stats(category_df):
+    """Calculate product statistics from dataframe"""
+    stats = category_df.groupby('name').agg({
+        'rating': ['mean', 'count'],
+        'text': list
+    })
+    stats.columns = ['avg_rating', 'review_count', 'reviews']
+    return stats[stats['review_count'] >= 5]
+def get_top_and_worst_products(product_stats):
+    """Identify best and worst performing products"""
+    return (
+        product_stats.nlargest(3, 'avg_rating'),
+        product_stats.nsmallest(1, 'avg_rating')
+    )
+def analyze_top_products(top_products):
+    """Extract pros/cons from top products' reviews"""
+    product_details = []
+    for product, row in top_products.iterrows():
+        pros, cons = analyze_sentiment(row['reviews'])
+        product_details.append({
+            'name': product,
+            'rating': row['avg_rating'],
+            'review_count': row['review_count'],
+            'pros': pros[:3] or ["no significant positive feedback"],
+            'cons': cons[:3] or ["no major complaints"]
+        })
+    return product_details
+def analyze_sentiment(reviews):
+    """Perform sentiment analysis on reviews"""
+    pros = defaultdict(int)
+    cons = defaultdict(int)
+    for review in reviews:
+        blob = TextBlob(review)
+        for sentence in blob.sentences:
+            polarity = sentence.sentiment.polarity
+            words = [word for word, tag in blob.tags
+                    if tag in ('NN', 'NNS', 'JJ', 'JJR', 'JJS')]
+            if polarity > 0.3:  # Positive
+                for word in words:
+                    pros[word] += 1
+            elif polarity < -0.3:  # Negative
+                for word in words:
+                    cons[word] += 1
+    # Filter and sort results
+    pros_sorted = [k for k, _ in sorted(pros.items(), key=lambda x: -x[1])] if pros else []
+    cons_sorted = [k for k, _ in sorted(cons.items(), key=lambda x: -x[1])] if cons else []
+    return pros_sorted, cons_sorted
+def format_for_gradio(summaries):
+    """Convert summary tables to HTML for Gradio display"""
+    outputs = []
+    for category, tables in summaries.items():
+        category_html = f"<h2 style='color: #4a6baf;'>{category.upper()}</h2>"
+        for table in tables:
+            table_html = f"<h3 style='color: #3a5a8a;'>{table['section']}</h3>"
+            table_html += tabulate(
+                table['data'],
+                headers=table['headers'],
+                tablefmt="html",
+                stralign="left",
+                numalign="center"
+            )
+            table_html = table_html.replace('<table>', '<table style="width:100%; border-collapse: collapse; margin-bottom: 20px;">')
+            table_html = table_html.replace('<th>', '<th style="background-color: #f2f2f2; padding: 8px; text-align: left; border: 1px solid #ddd;">')
+            table_html = table_html.replace('<td>', '<td style="padding: 8px; border: 1px solid #ddd;">')
+            category_html += table_html
+        outputs.append(category_html)
+    return "<hr>".join(outputs)
+def analyze_reviews(df):
+    """Main function to process data and generate summaries"""
+    summaries = generate_category_summaries(df)
+    return format_for_gradio(summaries)
+# Create Gradio interface
+with gr.Blocks(title="Amazon Product Review Analyzer", theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# Amazon Product Review Analyzer")
+    gr.Markdown("Analyzing top products and reviews across categories")
+    with gr.Row():
+        with gr.Column():
+            gr.Markdown("### Product Categories Found")
+            category_dropdown = gr.Dropdown(
+                choices=df['cluster_name'].unique().tolist(),
+                label="Select a Category",
+                interactive=True
+            )
+            analyze_btn = gr.Button("Analyze Selected Category", variant="primary")
+        with gr.Column():
+            gr.Markdown("### All Categories Summary")
+            all_categories_btn = gr.Button("Analyze All Categories", variant="secondary")
+    output_html = gr.HTML(label="Analysis Results")
+    # Button actions
+    category_dropdown.change(
+        fn=lambda x: gr.update(interactive=bool(x)),
+        inputs=category_dropdown,
+        outputs=analyze_btn
+    )
+    analyze_btn.click(
+        fn=lambda cat: analyze_reviews(df[df['cluster_name'] == cat]),
+        inputs=category_dropdown,
+        outputs=output_html
+    )
+    all_categories_btn.click(
+        fn=lambda: analyze_reviews(df),
+        outputs=output_html
+    )
+# Launch the interface
+demo.launch()