Mohammed Foud commited on
Commit
0aee734
·
1 Parent(s): 06e1fb2

Add application file

Browse files
Files changed (1) hide show
  1. app.py +213 -0
app.py CHANGED
@@ -0,0 +1,213 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import pipeline
3
+ from textblob import TextBlob
4
+ from collections import defaultdict
5
+ import pandas as pd
6
+ from tabulate import tabulate
7
+
8
+ # Initialize summarization pipeline
9
+ summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
10
+
11
+ def generate_category_summaries(df):
12
+ """Generate product summaries in table format"""
13
+ summaries = {}
14
+
15
+ for category in df['cluster_name'].unique():
16
+ category_df = df[df['cluster_name'] == category]
17
+
18
+ if len(category_df) < 10:
19
+ continue
20
+
21
+ product_stats = get_product_stats(category_df)
22
+ if len(product_stats) < 3:
23
+ continue
24
+
25
+ top_products, worst_product = get_top_and_worst_products(product_stats)
26
+ product_details = analyze_top_products(top_products)
27
+
28
+ # Format as tables
29
+ summary_tables = format_tables(category, product_details, worst_product)
30
+ summaries[category] = summary_tables
31
+
32
+ return summaries
33
+
34
+ def format_tables(category, product_details, worst_product):
35
+ """Format all sections as tables"""
36
+ tables = []
37
+
38
+ # Top Products Table
39
+ top_table = []
40
+ for product in product_details:
41
+ top_table.append([
42
+ product['name'],
43
+ f"★{product['rating']:.1f}",
44
+ product['review_count'],
45
+ "\n".join(product['pros']),
46
+ "\n".join(product['cons'])
47
+ ])
48
+
49
+ tables.append({
50
+ 'section': f"TOP PRODUCTS IN {category.upper()}",
51
+ 'headers': ["Product", "Rating", "Reviews", "Pros", "Cons"],
52
+ 'data': top_table
53
+ })
54
+
55
+ # Key Differences Table
56
+ common_pros = set(product_details[0]['pros'])
57
+ for product in product_details[1:]:
58
+ common_pros.intersection_update(product['pros'])
59
+
60
+ diff_table = []
61
+ for product in product_details:
62
+ unique_pros = [p for p in product['pros'] if p not in common_pros]
63
+ if unique_pros:
64
+ diff_table.append([product['name'], ", ".join(unique_pros)])
65
+
66
+ if diff_table:
67
+ tables.append({
68
+ 'section': "KEY DIFFERENCES",
69
+ 'headers': ["Product", "Unique Features"],
70
+ 'data': diff_table
71
+ })
72
+
73
+ # Worst Product Table
74
+ if not worst_product.empty:
75
+ worst = worst_product.iloc[0]
76
+ _, cons = analyze_sentiment(worst['reviews'])
77
+ tables.append({
78
+ 'section': "PRODUCT TO AVOID",
79
+ 'headers': ["Product", "Rating", "Reasons to Avoid"],
80
+ 'data': [[
81
+ worst_product.index[0],
82
+ f"★{worst['avg_rating']:.1f}",
83
+ ", ".join(cons[:3]) if cons else "Consistently poor ratings"
84
+ ]]
85
+ })
86
+
87
+ return tables
88
+
89
+ def get_product_stats(category_df):
90
+ """Calculate product statistics from dataframe"""
91
+ stats = category_df.groupby('name').agg({
92
+ 'rating': ['mean', 'count'],
93
+ 'text': list
94
+ })
95
+ stats.columns = ['avg_rating', 'review_count', 'reviews']
96
+ return stats[stats['review_count'] >= 5]
97
+
98
+ def get_top_and_worst_products(product_stats):
99
+ """Identify best and worst performing products"""
100
+ return (
101
+ product_stats.nlargest(3, 'avg_rating'),
102
+ product_stats.nsmallest(1, 'avg_rating')
103
+ )
104
+
105
+ def analyze_top_products(top_products):
106
+ """Extract pros/cons from top products' reviews"""
107
+ product_details = []
108
+ for product, row in top_products.iterrows():
109
+ pros, cons = analyze_sentiment(row['reviews'])
110
+ product_details.append({
111
+ 'name': product,
112
+ 'rating': row['avg_rating'],
113
+ 'review_count': row['review_count'],
114
+ 'pros': pros[:3] or ["no significant positive feedback"],
115
+ 'cons': cons[:3] or ["no major complaints"]
116
+ })
117
+ return product_details
118
+
119
+ def analyze_sentiment(reviews):
120
+ """Perform sentiment analysis on reviews"""
121
+ pros = defaultdict(int)
122
+ cons = defaultdict(int)
123
+
124
+ for review in reviews:
125
+ blob = TextBlob(review)
126
+ for sentence in blob.sentences:
127
+ polarity = sentence.sentiment.polarity
128
+ words = [word for word, tag in blob.tags
129
+ if tag in ('NN', 'NNS', 'JJ', 'JJR', 'JJS')]
130
+
131
+ if polarity > 0.3: # Positive
132
+ for word in words:
133
+ pros[word] += 1
134
+ elif polarity < -0.3: # Negative
135
+ for word in words:
136
+ cons[word] += 1
137
+
138
+ # Filter and sort results
139
+ pros_sorted = [k for k, _ in sorted(pros.items(), key=lambda x: -x[1])] if pros else []
140
+ cons_sorted = [k for k, _ in sorted(cons.items(), key=lambda x: -x[1])] if cons else []
141
+
142
+ return pros_sorted, cons_sorted
143
+
144
+ def format_for_gradio(summaries):
145
+ """Convert summary tables to HTML for Gradio display"""
146
+ outputs = []
147
+ for category, tables in summaries.items():
148
+ category_html = f"<h2 style='color: #4a6baf;'>{category.upper()}</h2>"
149
+
150
+ for table in tables:
151
+ table_html = f"<h3 style='color: #3a5a8a;'>{table['section']}</h3>"
152
+ table_html += tabulate(
153
+ table['data'],
154
+ headers=table['headers'],
155
+ tablefmt="html",
156
+ stralign="left",
157
+ numalign="center"
158
+ )
159
+ table_html = table_html.replace('<table>', '<table style="width:100%; border-collapse: collapse; margin-bottom: 20px;">')
160
+ table_html = table_html.replace('<th>', '<th style="background-color: #f2f2f2; padding: 8px; text-align: left; border: 1px solid #ddd;">')
161
+ table_html = table_html.replace('<td>', '<td style="padding: 8px; border: 1px solid #ddd;">')
162
+ category_html += table_html
163
+
164
+ outputs.append(category_html)
165
+
166
+ return "<hr>".join(outputs)
167
+
168
+ def analyze_reviews(df):
169
+ """Main function to process data and generate summaries"""
170
+ summaries = generate_category_summaries(df)
171
+ return format_for_gradio(summaries)
172
+
173
+ # Create Gradio interface
174
+ with gr.Blocks(title="Amazon Product Review Analyzer", theme=gr.themes.Soft()) as demo:
175
+ gr.Markdown("# Amazon Product Review Analyzer")
176
+ gr.Markdown("Analyzing top products and reviews across categories")
177
+
178
+ with gr.Row():
179
+ with gr.Column():
180
+ gr.Markdown("### Product Categories Found")
181
+ category_dropdown = gr.Dropdown(
182
+ choices=df['cluster_name'].unique().tolist(),
183
+ label="Select a Category",
184
+ interactive=True
185
+ )
186
+ analyze_btn = gr.Button("Analyze Selected Category", variant="primary")
187
+
188
+ with gr.Column():
189
+ gr.Markdown("### All Categories Summary")
190
+ all_categories_btn = gr.Button("Analyze All Categories", variant="secondary")
191
+
192
+ output_html = gr.HTML(label="Analysis Results")
193
+
194
+ # Button actions
195
+ category_dropdown.change(
196
+ fn=lambda x: gr.update(interactive=bool(x)),
197
+ inputs=category_dropdown,
198
+ outputs=analyze_btn
199
+ )
200
+
201
+ analyze_btn.click(
202
+ fn=lambda cat: analyze_reviews(df[df['cluster_name'] == cat]),
203
+ inputs=category_dropdown,
204
+ outputs=output_html
205
+ )
206
+
207
+ all_categories_btn.click(
208
+ fn=lambda: analyze_reviews(df),
209
+ outputs=output_html
210
+ )
211
+
212
+ # Launch the interface
213
+ demo.launch()