Mohammed Foud
commited on
Commit
·
b3ed9e6
1
Parent(s):
7f87155
first commit
Browse files
app.py
CHANGED
|
@@ -11,6 +11,8 @@ import base64
|
|
| 11 |
from textblob import TextBlob
|
| 12 |
from collections import defaultdict
|
| 13 |
from tabulate import tabulate
|
|
|
|
|
|
|
| 14 |
|
| 15 |
# Load models and initialize components
|
| 16 |
model_path = "./final_model"
|
|
@@ -48,9 +50,51 @@ def get_initial_summary():
|
|
| 48 |
return "Error: Could not load dataset.csv"
|
| 49 |
|
| 50 |
try:
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
except Exception as e:
|
| 55 |
return f"Error generating initial summary: {str(e)}"
|
| 56 |
|
|
@@ -146,10 +190,34 @@ def analyze_reviews(reviews_text):
|
|
| 146 |
# Original sentiment analysis
|
| 147 |
df, plot_html = analyze_reviews_sentiment(reviews_text)
|
| 148 |
|
| 149 |
-
#
|
| 150 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 151 |
|
| 152 |
-
return df, plot_html,
|
| 153 |
|
| 154 |
def analyze_reviews_sentiment(reviews_text):
|
| 155 |
reviews = [r.strip() for r in reviews_text.split('\n') if r.strip()]
|
|
@@ -193,7 +261,7 @@ def create_interface():
|
|
| 193 |
with gr.Tab("Review Analysis"):
|
| 194 |
# Add initial dataset summary
|
| 195 |
gr.Markdown("## Dataset Overview")
|
| 196 |
-
gr.
|
| 197 |
|
| 198 |
gr.Markdown("## Analyze New Reviews")
|
| 199 |
reviews_input = gr.Textbox(
|
|
@@ -211,9 +279,8 @@ def create_interface():
|
|
| 211 |
plot_output = gr.HTML(label="Sentiment Distribution")
|
| 212 |
|
| 213 |
with gr.Column():
|
| 214 |
-
summary_output = gr.Textbox
|
| 215 |
-
label="Review Summary"
|
| 216 |
-
lines=5
|
| 217 |
)
|
| 218 |
|
| 219 |
analyze_button.click(
|
|
@@ -224,6 +291,28 @@ def create_interface():
|
|
| 224 |
|
| 225 |
return demo
|
| 226 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 227 |
# Create and launch the interface
|
| 228 |
if __name__ == "__main__":
|
| 229 |
demo = create_interface()
|
|
|
|
| 11 |
from textblob import TextBlob
|
| 12 |
from collections import defaultdict
|
| 13 |
from tabulate import tabulate
|
| 14 |
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
| 15 |
+
from sklearn.cluster import KMeans
|
| 16 |
|
| 17 |
# Load models and initialize components
|
| 18 |
model_path = "./final_model"
|
|
|
|
| 50 |
return "Error: Could not load dataset.csv"
|
| 51 |
|
| 52 |
try:
|
| 53 |
+
# Generate summaries for all categories
|
| 54 |
+
summaries = generate_category_summaries(df)
|
| 55 |
+
|
| 56 |
+
# Convert summaries to HTML format for Gradio
|
| 57 |
+
html_output = []
|
| 58 |
+
for category, tables in summaries.items():
|
| 59 |
+
html_output.append(f"<h2>CATEGORY: {category}</h2>")
|
| 60 |
+
|
| 61 |
+
for table in tables:
|
| 62 |
+
html_output.append(f"<h3>{table['section']}</h3>")
|
| 63 |
+
# Convert table to HTML using tabulate
|
| 64 |
+
table_html = tabulate(
|
| 65 |
+
table['data'],
|
| 66 |
+
headers=table['headers'],
|
| 67 |
+
tablefmt="html",
|
| 68 |
+
stralign="left",
|
| 69 |
+
numalign="center"
|
| 70 |
+
)
|
| 71 |
+
# Add some CSS styling
|
| 72 |
+
styled_table = f"""
|
| 73 |
+
<style>
|
| 74 |
+
table {{
|
| 75 |
+
border-collapse: collapse;
|
| 76 |
+
margin: 15px 0;
|
| 77 |
+
width: 100%;
|
| 78 |
+
}}
|
| 79 |
+
th, td {{
|
| 80 |
+
padding: 8px;
|
| 81 |
+
border: 1px solid #ddd;
|
| 82 |
+
text-align: left;
|
| 83 |
+
}}
|
| 84 |
+
th {{
|
| 85 |
+
background-color: #f5f5f5;
|
| 86 |
+
}}
|
| 87 |
+
tr:nth-child(even) {{
|
| 88 |
+
background-color: #f9f9f9;
|
| 89 |
+
}}
|
| 90 |
+
</style>
|
| 91 |
+
{table_html}
|
| 92 |
+
"""
|
| 93 |
+
html_output.append(styled_table)
|
| 94 |
+
|
| 95 |
+
html_output.append("<hr>") # Add separator between categories
|
| 96 |
+
|
| 97 |
+
return "\n".join(html_output)
|
| 98 |
except Exception as e:
|
| 99 |
return f"Error generating initial summary: {str(e)}"
|
| 100 |
|
|
|
|
| 190 |
# Original sentiment analysis
|
| 191 |
df, plot_html = analyze_reviews_sentiment(reviews_text)
|
| 192 |
|
| 193 |
+
# Create a temporary DataFrame with the new reviews
|
| 194 |
+
temp_df = pd.DataFrame({
|
| 195 |
+
'text': reviews_text.split('\n'),
|
| 196 |
+
'rating': [3] * len(reviews_text.split('\n')), # Default neutral rating
|
| 197 |
+
'name': ['New Review'] * len(reviews_text.split('\n')),
|
| 198 |
+
'cluster_name': ['New Reviews'] * len(reviews_text.split('\n'))
|
| 199 |
+
})
|
| 200 |
+
|
| 201 |
+
# Generate summary tables
|
| 202 |
+
summaries = generate_category_summaries(temp_df)
|
| 203 |
+
|
| 204 |
+
# Convert summaries to HTML
|
| 205 |
+
html_output = []
|
| 206 |
+
for category, tables in summaries.items():
|
| 207 |
+
for table in tables:
|
| 208 |
+
html_output.append(f"<h3>{table['section']}</h3>")
|
| 209 |
+
table_html = tabulate(
|
| 210 |
+
table['data'],
|
| 211 |
+
headers=table['headers'],
|
| 212 |
+
tablefmt="html",
|
| 213 |
+
stralign="left",
|
| 214 |
+
numalign="center"
|
| 215 |
+
)
|
| 216 |
+
html_output.append(table_html)
|
| 217 |
+
|
| 218 |
+
summary_html = "\n".join(html_output)
|
| 219 |
|
| 220 |
+
return df, plot_html, summary_html
|
| 221 |
|
| 222 |
def analyze_reviews_sentiment(reviews_text):
|
| 223 |
reviews = [r.strip() for r in reviews_text.split('\n') if r.strip()]
|
|
|
|
| 261 |
with gr.Tab("Review Analysis"):
|
| 262 |
# Add initial dataset summary
|
| 263 |
gr.Markdown("## Dataset Overview")
|
| 264 |
+
gr.HTML(initial_summary) # Changed from gr.Markdown to gr.HTML
|
| 265 |
|
| 266 |
gr.Markdown("## Analyze New Reviews")
|
| 267 |
reviews_input = gr.Textbox(
|
|
|
|
| 279 |
plot_output = gr.HTML(label="Sentiment Distribution")
|
| 280 |
|
| 281 |
with gr.Column():
|
| 282 |
+
summary_output = gr.HTML( # Changed from gr.Textbox to gr.HTML
|
| 283 |
+
label="Review Summary"
|
|
|
|
| 284 |
)
|
| 285 |
|
| 286 |
analyze_button.click(
|
|
|
|
| 291 |
|
| 292 |
return demo
|
| 293 |
|
| 294 |
+
def add_clusters_to_df(df):
|
| 295 |
+
"""Add cluster names to the DataFrame if they don't exist"""
|
| 296 |
+
# Create text features
|
| 297 |
+
vectorizer = TfidfVectorizer(max_features=1000, stop_words='english')
|
| 298 |
+
text_features = vectorizer.fit_transform(df['text'])
|
| 299 |
+
|
| 300 |
+
# Perform clustering
|
| 301 |
+
n_clusters = 4 # You can adjust this
|
| 302 |
+
kmeans = KMeans(n_clusters=n_clusters, random_state=42)
|
| 303 |
+
df['cluster_name'] = kmeans.fit_predict(text_features)
|
| 304 |
+
|
| 305 |
+
# Map cluster numbers to names
|
| 306 |
+
cluster_names = {
|
| 307 |
+
0: "Electronics",
|
| 308 |
+
1: "Home & Kitchen",
|
| 309 |
+
2: "Books & Media",
|
| 310 |
+
3: "Other Products"
|
| 311 |
+
}
|
| 312 |
+
df['cluster_name'] = df['cluster_name'].map(cluster_names)
|
| 313 |
+
|
| 314 |
+
return df
|
| 315 |
+
|
| 316 |
# Create and launch the interface
|
| 317 |
if __name__ == "__main__":
|
| 318 |
demo = create_interface()
|