Spaces:

mfoud444
/

oop

Paused

App Files Files Community

Mohammed Foud commited on Apr 14, 2025

Commit

b3ed9e6

1 Parent(s): 7f87155

first commit

Browse files

Files changed (1) hide show

app.py +99 -10

app.py CHANGED Viewed

@@ -11,6 +11,8 @@ import base64
 from textblob import TextBlob
 from collections import defaultdict
 from tabulate import tabulate
 # Load models and initialize components
 model_path = "./final_model"
@@ -48,9 +50,51 @@ def get_initial_summary():
         return "Error: Could not load dataset.csv"
     try:
-        sample_reviews = df['reviews.text'].sample(n=min(50, len(df))).fillna('').tolist()
-        sample_text = '\n'.join(sample_reviews)
-        return generate_category_summary(sample_text)
     except Exception as e:
         return f"Error generating initial summary: {str(e)}"
@@ -146,10 +190,34 @@ def analyze_reviews(reviews_text):
     # Original sentiment analysis
     df, plot_html = analyze_reviews_sentiment(reviews_text)
-    # Generate summary
-    summary = generate_category_summary(reviews_text)
-    return df, plot_html, summary
 def analyze_reviews_sentiment(reviews_text):
     reviews = [r.strip() for r in reviews_text.split('\n') if r.strip()]
@@ -193,7 +261,7 @@ def create_interface():
         with gr.Tab("Review Analysis"):
             # Add initial dataset summary
             gr.Markdown("## Dataset Overview")
-            gr.Markdown(initial_summary)
             gr.Markdown("## Analyze New Reviews")
             reviews_input = gr.Textbox(
@@ -211,9 +279,8 @@ def create_interface():
                     plot_output = gr.HTML(label="Sentiment Distribution")
                 with gr.Column():
-                    summary_output = gr.Textbox(
-                        label="Review Summary",
-                        lines=5
                     )
         analyze_button.click(
@@ -224,6 +291,28 @@ def create_interface():
     return demo
 # Create and launch the interface
 if __name__ == "__main__":
     demo = create_interface()

 from textblob import TextBlob
 from collections import defaultdict
 from tabulate import tabulate
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.cluster import KMeans
 # Load models and initialize components
 model_path = "./final_model"
         return "Error: Could not load dataset.csv"
     try:
+        # Generate summaries for all categories
+        summaries = generate_category_summaries(df)
+        # Convert summaries to HTML format for Gradio
+        html_output = []
+        for category, tables in summaries.items():
+            html_output.append(f"<h2>CATEGORY: {category}</h2>")
+            for table in tables:
+                html_output.append(f"<h3>{table['section']}</h3>")
+                # Convert table to HTML using tabulate
+                table_html = tabulate(
+                    table['data'],
+                    headers=table['headers'],
+                    tablefmt="html",
+                    stralign="left",
+                    numalign="center"
+                )
+                # Add some CSS styling
+                styled_table = f"""
+                <style>
+                    table {{
+                        border-collapse: collapse;
+                        margin: 15px 0;
+                        width: 100%;
+                    }}
+                    th, td {{
+                        padding: 8px;
+                        border: 1px solid #ddd;
+                        text-align: left;
+                    }}
+                    th {{
+                        background-color: #f5f5f5;
+                    }}
+                    tr:nth-child(even) {{
+                        background-color: #f9f9f9;
+                    }}
+                </style>
+                {table_html}
+                """
+                html_output.append(styled_table)
+            html_output.append("<hr>")  # Add separator between categories
+        return "\n".join(html_output)
     except Exception as e:
         return f"Error generating initial summary: {str(e)}"
     # Original sentiment analysis
     df, plot_html = analyze_reviews_sentiment(reviews_text)
+    # Create a temporary DataFrame with the new reviews
+    temp_df = pd.DataFrame({
+        'text': reviews_text.split('\n'),
+        'rating': [3] * len(reviews_text.split('\n')),  # Default neutral rating
+        'name': ['New Review'] * len(reviews_text.split('\n')),
+        'cluster_name': ['New Reviews'] * len(reviews_text.split('\n'))
+    })
+    # Generate summary tables
+    summaries = generate_category_summaries(temp_df)
+    # Convert summaries to HTML
+    html_output = []
+    for category, tables in summaries.items():
+        for table in tables:
+            html_output.append(f"<h3>{table['section']}</h3>")
+            table_html = tabulate(
+                table['data'],
+                headers=table['headers'],
+                tablefmt="html",
+                stralign="left",
+                numalign="center"
+            )
+            html_output.append(table_html)
+    summary_html = "\n".join(html_output)
+    return df, plot_html, summary_html
 def analyze_reviews_sentiment(reviews_text):
     reviews = [r.strip() for r in reviews_text.split('\n') if r.strip()]
         with gr.Tab("Review Analysis"):
             # Add initial dataset summary
             gr.Markdown("## Dataset Overview")
+            gr.HTML(initial_summary)  # Changed from gr.Markdown to gr.HTML
             gr.Markdown("## Analyze New Reviews")
             reviews_input = gr.Textbox(
                     plot_output = gr.HTML(label="Sentiment Distribution")
                 with gr.Column():
+                    summary_output = gr.HTML(  # Changed from gr.Textbox to gr.HTML
+                        label="Review Summary"
                     )
         analyze_button.click(
     return demo
+def add_clusters_to_df(df):
+    """Add cluster names to the DataFrame if they don't exist"""
+    # Create text features
+    vectorizer = TfidfVectorizer(max_features=1000, stop_words='english')
+    text_features = vectorizer.fit_transform(df['text'])
+    # Perform clustering
+    n_clusters = 4  # You can adjust this
+    kmeans = KMeans(n_clusters=n_clusters, random_state=42)
+    df['cluster_name'] = kmeans.fit_predict(text_features)
+    # Map cluster numbers to names
+    cluster_names = {
+        0: "Electronics",
+        1: "Home & Kitchen",
+        2: "Books & Media",
+        3: "Other Products"
+    }
+    df['cluster_name'] = df['cluster_name'].map(cluster_names)
+    return df
 # Create and launch the interface
 if __name__ == "__main__":
     demo = create_interface()