new new graphs and summaries
Browse files
app.py
CHANGED
|
@@ -49,59 +49,106 @@ def process_single_sheet(df, product_name):
|
|
| 49 |
|
| 50 |
def create_comparison_charts(sentiment_results):
|
| 51 |
"""
|
| 52 |
-
Create comparison charts for different products
|
| 53 |
-
Returns two plotly figures: bar chart and pie chart
|
| 54 |
"""
|
| 55 |
# Prepare data for plotting
|
| 56 |
-
|
| 57 |
-
sentiments = []
|
| 58 |
-
counts = []
|
| 59 |
-
|
| 60 |
for product, sentiment_counts in sentiment_results.items():
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
})
|
| 71 |
|
| 72 |
-
#
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
# Create pie chart for overall sentiment distribution
|
| 82 |
-
pie_fig = px.pie(plot_df,
|
| 83 |
-
values='Count',
|
| 84 |
-
names='Sentiment',
|
| 85 |
-
title='Overall Sentiment Distribution',
|
| 86 |
-
color_discrete_sequence=px.colors.qualitative.Set3)
|
| 87 |
-
|
| 88 |
-
# Create summary table
|
| 89 |
-
summary_df = plot_df.pivot_table(
|
| 90 |
-
values='Count',
|
| 91 |
-
index='Product',
|
| 92 |
-
columns='Sentiment',
|
| 93 |
-
fill_value=0
|
| 94 |
-
).round(2)
|
| 95 |
-
|
| 96 |
-
# Add total reviews column
|
| 97 |
-
summary_df['Total Reviews'] = summary_df.sum(axis=1)
|
| 98 |
-
|
| 99 |
-
# Calculate percentage of positive reviews (Positive + Very Positive)
|
| 100 |
-
positive_cols = ['Positive', 'Very Positive']
|
| 101 |
-
positive_cols = [col for col in positive_cols if col in summary_df.columns]
|
| 102 |
-
summary_df['Positive Ratio'] = (summary_df[positive_cols].sum(axis=1) / summary_df['Total Reviews'] * 100).round(2)
|
| 103 |
-
|
| 104 |
-
return bar_fig, pie_fig, summary_df
|
| 105 |
|
| 106 |
|
| 107 |
def process_file(file_obj):
|
|
@@ -114,7 +161,6 @@ def process_file(file_obj):
|
|
| 114 |
all_processed_dfs = {}
|
| 115 |
|
| 116 |
if file_path.endswith('.csv'):
|
| 117 |
-
# Process single CSV file
|
| 118 |
df = pd.read_csv(file_path)
|
| 119 |
product_name = "Product" # Default name for CSV
|
| 120 |
processed_df, sentiment_counts = process_single_sheet(df, product_name)
|
|
@@ -122,9 +168,7 @@ def process_file(file_obj):
|
|
| 122 |
sentiment_results[product_name] = sentiment_counts
|
| 123 |
|
| 124 |
elif file_path.endswith(('.xlsx', '.xls')):
|
| 125 |
-
# Process multiple sheets in Excel file
|
| 126 |
excel_file = pd.ExcelFile(file_path)
|
| 127 |
-
|
| 128 |
for sheet_name in excel_file.sheet_names:
|
| 129 |
df = pd.read_excel(file_path, sheet_name=sheet_name)
|
| 130 |
processed_df, sentiment_counts = process_single_sheet(df, sheet_name)
|
|
@@ -134,21 +178,17 @@ def process_file(file_obj):
|
|
| 134 |
raise ValueError("Unsupported file format. Please upload a CSV or Excel file.")
|
| 135 |
|
| 136 |
# Create visualizations
|
| 137 |
-
|
| 138 |
|
| 139 |
-
# Save results
|
| 140 |
output_path = "sentiment_analysis_results.xlsx"
|
| 141 |
with pd.ExcelWriter(output_path) as writer:
|
| 142 |
-
# Save processed data
|
| 143 |
for sheet_name, df in all_processed_dfs.items():
|
| 144 |
df.to_excel(writer, sheet_name=sheet_name, index=False)
|
| 145 |
-
|
| 146 |
-
# Save summary
|
| 147 |
-
summary_table.to_excel(writer, sheet_name='Summary', index=True)
|
| 148 |
|
| 149 |
return (
|
| 150 |
-
|
| 151 |
-
pie_chart,
|
| 152 |
summary_table,
|
| 153 |
output_path
|
| 154 |
)
|
|
@@ -158,12 +198,86 @@ def process_file(file_obj):
|
|
| 158 |
|
| 159 |
|
| 160 |
# Create Gradio interface
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 161 |
with gr.Blocks() as interface:
|
| 162 |
-
gr.Markdown("#
|
|
|
|
| 163 |
gr.Markdown("""
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 167 |
""")
|
| 168 |
|
| 169 |
with gr.Row():
|
|
@@ -176,20 +290,19 @@ with gr.Blocks() as interface:
|
|
| 176 |
analyze_btn = gr.Button("Analyze Sentiments")
|
| 177 |
|
| 178 |
with gr.Row():
|
| 179 |
-
|
| 180 |
-
pie_plot = gr.Plot(label="Overall Sentiment Distribution")
|
| 181 |
|
| 182 |
with gr.Row():
|
| 183 |
-
summary_table = gr.Dataframe(label="Summary
|
| 184 |
|
| 185 |
with gr.Row():
|
| 186 |
-
output_file = gr.File(label="Download
|
| 187 |
|
| 188 |
analyze_btn.click(
|
| 189 |
fn=process_file,
|
| 190 |
inputs=[file_input],
|
| 191 |
-
outputs=[
|
| 192 |
)
|
| 193 |
|
| 194 |
-
#
|
| 195 |
-
interface.launch()
|
|
|
|
| 49 |
|
| 50 |
def create_comparison_charts(sentiment_results):
|
| 51 |
"""
|
| 52 |
+
Create investment-focused comparison charts for different products
|
|
|
|
| 53 |
"""
|
| 54 |
# Prepare data for plotting
|
| 55 |
+
plot_data = []
|
|
|
|
|
|
|
|
|
|
| 56 |
for product, sentiment_counts in sentiment_results.items():
|
| 57 |
+
# Convert to dictionary and get sum
|
| 58 |
+
sentiment_dict = sentiment_counts.to_dict()
|
| 59 |
+
total = sum(sentiment_dict.values())
|
| 60 |
+
|
| 61 |
+
row = {
|
| 62 |
+
'Product': product,
|
| 63 |
+
'Total Reviews': total
|
| 64 |
+
}
|
| 65 |
+
# Calculate percentages for each sentiment
|
| 66 |
+
for sentiment, count in sentiment_dict.items():
|
| 67 |
+
row[sentiment] = (count / total) * 100
|
| 68 |
+
plot_data.append(row)
|
| 69 |
+
|
| 70 |
+
df = pd.DataFrame(plot_data)
|
| 71 |
+
|
| 72 |
+
# Ensure all sentiment columns exist (in case some products don't have all sentiments)
|
| 73 |
+
for sentiment in ['Very Negative', 'Negative', 'Neutral', 'Positive', 'Very Positive']:
|
| 74 |
+
if sentiment not in df.columns:
|
| 75 |
+
df[sentiment] = 0
|
| 76 |
+
|
| 77 |
+
# Calculate weighted sentiment score (0 to 100)
|
| 78 |
+
sentiment_weights = {
|
| 79 |
+
'Very Negative': 0,
|
| 80 |
+
'Negative': 25,
|
| 81 |
+
'Neutral': 50,
|
| 82 |
+
'Positive': 75,
|
| 83 |
+
'Very Positive': 100
|
| 84 |
+
}
|
| 85 |
+
|
| 86 |
+
df['Sentiment Score'] = 0
|
| 87 |
+
for product in df['Product']:
|
| 88 |
+
score = 0
|
| 89 |
+
for sentiment, weight in sentiment_weights.items():
|
| 90 |
+
if sentiment in df.columns:
|
| 91 |
+
score += (df.loc[df['Product'] == product, sentiment].iloc[0] * weight / 100)
|
| 92 |
+
df.loc[df['Product'] == product, 'Sentiment Score'] = round(score, 2)
|
| 93 |
+
|
| 94 |
+
# Create sentiment score chart
|
| 95 |
+
score_fig = go.Figure()
|
| 96 |
+
score_fig.add_trace(go.Bar(
|
| 97 |
+
x=df['Product'],
|
| 98 |
+
y=df['Sentiment Score'],
|
| 99 |
+
text=df['Sentiment Score'].round(1),
|
| 100 |
+
textposition='auto',
|
| 101 |
+
marker_color='rgb(65, 105, 225)'
|
| 102 |
+
))
|
| 103 |
+
score_fig.update_layout(
|
| 104 |
+
title='Overall Sentiment Score by Product (0-100)',
|
| 105 |
+
yaxis_title='Weighted Sentiment Score',
|
| 106 |
+
yaxis_range=[0, 100],
|
| 107 |
+
showlegend=False
|
| 108 |
+
)
|
| 109 |
+
|
| 110 |
+
# Calculate Positive-Negative Ratios
|
| 111 |
+
df['Positive Ratio'] = df[['Positive', 'Very Positive']].sum(axis=1)
|
| 112 |
+
df['Negative Ratio'] = df[['Negative', 'Very Negative']].sum(axis=1)
|
| 113 |
+
|
| 114 |
+
# Create Positive-Negative ratio chart
|
| 115 |
+
ratio_fig = go.Figure()
|
| 116 |
+
ratio_fig.add_trace(go.Bar(
|
| 117 |
+
name='Positive',
|
| 118 |
+
x=df['Product'],
|
| 119 |
+
y=df['Positive Ratio'],
|
| 120 |
+
marker_color='rgb(50, 205, 50)'
|
| 121 |
+
))
|
| 122 |
+
ratio_fig.add_trace(go.Bar(
|
| 123 |
+
name='Negative',
|
| 124 |
+
x=df['Product'],
|
| 125 |
+
y=df['Negative Ratio'],
|
| 126 |
+
marker_color='rgb(220, 20, 60)'
|
| 127 |
+
))
|
| 128 |
+
ratio_fig.update_layout(
|
| 129 |
+
barmode='group',
|
| 130 |
+
title='Positive vs Negative Sentiment Ratio by Product',
|
| 131 |
+
yaxis_title='Percentage (%)'
|
| 132 |
+
)
|
| 133 |
+
|
| 134 |
+
# Create summary table with investment-relevant metrics
|
| 135 |
+
summary_df = pd.DataFrame({
|
| 136 |
+
'Product': df['Product'],
|
| 137 |
+
'Total Reviews': df['Total Reviews'],
|
| 138 |
+
'Sentiment Score (0-100)': df['Sentiment Score'],
|
| 139 |
+
'Positive Ratio (%)': df['Positive Ratio'].round(2),
|
| 140 |
+
'Negative Ratio (%)': df['Negative Ratio'].round(2),
|
| 141 |
+
'Neutral Ratio (%)': df['Neutral'].round(2)
|
| 142 |
})
|
| 143 |
|
| 144 |
+
# Calculate Confidence Score (avoiding division by zero)
|
| 145 |
+
summary_df['Confidence Score'] = ((summary_df['Positive Ratio (%)'] + summary_df['Negative Ratio (%)']) /
|
| 146 |
+
summary_df['Neutral Ratio (%)'].replace(0, 0.001)).round(2)
|
| 147 |
+
|
| 148 |
+
# Sort by Sentiment Score for easy comparison
|
| 149 |
+
summary_df = summary_df.sort_values('Sentiment Score (0-100)', ascending=False)
|
| 150 |
+
|
| 151 |
+
return score_fig, ratio_fig, summary_df
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 152 |
|
| 153 |
|
| 154 |
def process_file(file_obj):
|
|
|
|
| 161 |
all_processed_dfs = {}
|
| 162 |
|
| 163 |
if file_path.endswith('.csv'):
|
|
|
|
| 164 |
df = pd.read_csv(file_path)
|
| 165 |
product_name = "Product" # Default name for CSV
|
| 166 |
processed_df, sentiment_counts = process_single_sheet(df, product_name)
|
|
|
|
| 168 |
sentiment_results[product_name] = sentiment_counts
|
| 169 |
|
| 170 |
elif file_path.endswith(('.xlsx', '.xls')):
|
|
|
|
| 171 |
excel_file = pd.ExcelFile(file_path)
|
|
|
|
| 172 |
for sheet_name in excel_file.sheet_names:
|
| 173 |
df = pd.read_excel(file_path, sheet_name=sheet_name)
|
| 174 |
processed_df, sentiment_counts = process_single_sheet(df, sheet_name)
|
|
|
|
| 178 |
raise ValueError("Unsupported file format. Please upload a CSV or Excel file.")
|
| 179 |
|
| 180 |
# Create visualizations
|
| 181 |
+
distribution_plot, summary_table = create_comparison_charts(sentiment_results)
|
| 182 |
|
| 183 |
+
# Save results
|
| 184 |
output_path = "sentiment_analysis_results.xlsx"
|
| 185 |
with pd.ExcelWriter(output_path) as writer:
|
|
|
|
| 186 |
for sheet_name, df in all_processed_dfs.items():
|
| 187 |
df.to_excel(writer, sheet_name=sheet_name, index=False)
|
| 188 |
+
summary_table.to_excel(writer, sheet_name='Summary', index=False)
|
|
|
|
|
|
|
| 189 |
|
| 190 |
return (
|
| 191 |
+
distribution_plot,
|
|
|
|
| 192 |
summary_table,
|
| 193 |
output_path
|
| 194 |
)
|
|
|
|
| 198 |
|
| 199 |
|
| 200 |
# Create Gradio interface
|
| 201 |
+
# In the Gradio interface section
|
| 202 |
+
def create_comparison_charts(sentiment_results):
|
| 203 |
+
"""
|
| 204 |
+
Create simplified, investment-focused comparison charts
|
| 205 |
+
"""
|
| 206 |
+
# Prepare data
|
| 207 |
+
plot_data = []
|
| 208 |
+
for product, sentiment_counts in sentiment_results.items():
|
| 209 |
+
sentiment_dict = sentiment_counts.to_dict()
|
| 210 |
+
total = sum(sentiment_dict.values())
|
| 211 |
+
|
| 212 |
+
row = {
|
| 213 |
+
'Product': product,
|
| 214 |
+
'Total Reviews': total
|
| 215 |
+
}
|
| 216 |
+
for sentiment, count in sentiment_dict.items():
|
| 217 |
+
row[sentiment] = (count / total) * 100
|
| 218 |
+
plot_data.append(row)
|
| 219 |
+
|
| 220 |
+
df = pd.DataFrame(plot_data)
|
| 221 |
+
|
| 222 |
+
# Ensure all sentiment columns exist
|
| 223 |
+
for sentiment in ['Very Negative', 'Negative', 'Neutral', 'Positive', 'Very Positive']:
|
| 224 |
+
if sentiment not in df.columns:
|
| 225 |
+
df[sentiment] = 0
|
| 226 |
+
|
| 227 |
+
# 1. Simple Stacked Bar Chart showing sentiment distribution
|
| 228 |
+
stack_fig = go.Figure()
|
| 229 |
+
sentiments = ['Very Positive', 'Positive', 'Neutral', 'Negative', 'Very Negative']
|
| 230 |
+
colors = ['rgb(39, 174, 96)', 'rgb(46, 204, 113)',
|
| 231 |
+
'rgb(241, 196, 15)', 'rgb(231, 76, 60)',
|
| 232 |
+
'rgb(192, 57, 43)']
|
| 233 |
+
|
| 234 |
+
for sentiment, color in zip(sentiments, colors):
|
| 235 |
+
stack_fig.add_trace(go.Bar(
|
| 236 |
+
name=sentiment,
|
| 237 |
+
x=df['Product'],
|
| 238 |
+
y=df[sentiment],
|
| 239 |
+
marker_color=color
|
| 240 |
+
))
|
| 241 |
+
|
| 242 |
+
stack_fig.update_layout(
|
| 243 |
+
barmode='stack',
|
| 244 |
+
title='Sentiment Distribution by Product',
|
| 245 |
+
yaxis_title='Percentage (%)'
|
| 246 |
+
)
|
| 247 |
+
|
| 248 |
+
# 2. Aggregated Sentiment Ratios for Quick Comparison
|
| 249 |
+
df['Positive_Total'] = df[['Positive', 'Very Positive']].sum(axis=1)
|
| 250 |
+
df['Negative_Total'] = df[['Negative', 'Very Negative']].sum(axis=1)
|
| 251 |
+
|
| 252 |
+
summary_df = pd.DataFrame({
|
| 253 |
+
'Product': df['Product'],
|
| 254 |
+
'Total Reviews': df['Total Reviews'],
|
| 255 |
+
'Positive (%)': df['Positive_Total'].round(2),
|
| 256 |
+
'Neutral (%)': df['Neutral'].round(2),
|
| 257 |
+
'Negative (%)': df['Negative_Total'].round(2)
|
| 258 |
+
})
|
| 259 |
+
|
| 260 |
+
# Sort by Positive percentage for easy comparison
|
| 261 |
+
summary_df = summary_df.sort_values('Positive (%)', ascending=False)
|
| 262 |
+
|
| 263 |
+
return stack_fig, summary_df
|
| 264 |
+
|
| 265 |
+
|
| 266 |
+
# Update the Gradio interface
|
| 267 |
with gr.Blocks() as interface:
|
| 268 |
+
gr.Markdown("# Product Review Sentiment Analysis")
|
| 269 |
+
|
| 270 |
gr.Markdown("""
|
| 271 |
+
### Quick Guide
|
| 272 |
+
1. **Excel File (Multiple Products)**:
|
| 273 |
+
- Create separate sheets for each product
|
| 274 |
+
- Name sheets with product/company names
|
| 275 |
+
- Include "Reviews" column in each sheet
|
| 276 |
+
|
| 277 |
+
2. **CSV File (Single Product)**:
|
| 278 |
+
- Include "Reviews" column
|
| 279 |
+
|
| 280 |
+
Upload your file and click Analyze to get started.
|
| 281 |
""")
|
| 282 |
|
| 283 |
with gr.Row():
|
|
|
|
| 290 |
analyze_btn = gr.Button("Analyze Sentiments")
|
| 291 |
|
| 292 |
with gr.Row():
|
| 293 |
+
distribution_plot = gr.Plot(label="Sentiment Distribution")
|
|
|
|
| 294 |
|
| 295 |
with gr.Row():
|
| 296 |
+
summary_table = gr.Dataframe(label="Summary Metrics")
|
| 297 |
|
| 298 |
with gr.Row():
|
| 299 |
+
output_file = gr.File(label="Download Full Report")
|
| 300 |
|
| 301 |
analyze_btn.click(
|
| 302 |
fn=process_file,
|
| 303 |
inputs=[file_input],
|
| 304 |
+
outputs=[distribution_plot, summary_table, output_file]
|
| 305 |
)
|
| 306 |
|
| 307 |
+
# launch interface
|
| 308 |
+
interface.launch()
|