Slash-eg
/

Product-Summarizer

English

custom

Model card Files Files and versions

xet

Community

Wafaa22 commited on Oct 22, 2024

Commit

357d82f

verified ·

1 Parent(s): 9f97a14

Create app.py

Browse files

Files changed (1) hide show

app.py +217 -0

app.py ADDED Viewed

	@@ -0,0 +1,217 @@

+import streamlit as st
+import pandas as pd
+import google.generativeai as genai
+import os
+import json
+import re
+# Set your API key here directly
+os.environ["API_KEY"] = "AIzaSyAFHyRhWWEVGTzNXH3xHq8vBx229DzVkPM"
+genai.configure(api_key=os.environ["API_KEY"])
+model = genai.GenerativeModel("gemini-1.5-flash")
+# Load schema for Gemini model (if needed for your specific summarization task)
+with open("./scheme.json", "r") as f:
+    gemini_flash_schema = json.load(f)
+# Preprocess text function
+def preprocess_text(text):
+    stopwords = {
+        "the", "is", "in", "at", "on", "a", "an", "and", "or", "for", "to", "of", "with", "that", "by", "it",
+    }
+    text = re.sub(r"\d+|[^\w\s]|\s+", " ", text.lower()).strip()
+    return " ".join([word for word in text.split() if word not in stopwords])
+# Generate sentiment and grade using Gemini
+def generate_review_grade_with_sentiment(review_text):
+    try:
+        prompt = f"""
+        Analyze the following review: {review_text}.
+        Determine its sentiment (positive, neutral, or negative) based on your analysis. You can use these examples as a reference, but the actual sentiment should be based on the review's content:
+        - **Positive**: "The product was exactly as described, high quality, and arrived quickly." (Example grade: 4 or 5)
+        - **Neutral**: "The product is okay, nothing special, but it works as expected." (Example grade: 3)
+        - **Negative**: "The product was poorly made, broke easily, and did not meet expectations." (Example grade: 1 or 2)
+        After analyzing the review, assign a grade from 1 to 5:
+        - **4 or 5** for positive reviews.
+        - **3** for neutral reviews.
+        - **1 or 2** for negative reviews.
+        Make sure the grade reflects the overall tone and content of the review.
+        """
+        response = model.generate_content(prompt)
+        # Extract only sentiment and grade
+        sentiment_match = re.search(r"(positive|negative|neutral)", response.text, re.IGNORECASE)
+        grade_match = re.search(r"\d(\.\d+)?", response.text)
+        if sentiment_match and grade_match:
+            sentiment_label = sentiment_match.group().upper()
+            grade = float(grade_match.group())
+            return sentiment_label, grade
+        else:
+            return "Unknown", None
+    except Exception as e:
+        st.error(f"Error generating sentiment and grade for review: {e}")
+        return None, None
+# Generate summary using Gemini
+def generate_summary(text):
+    try:
+        schema_str = json.dumps(gemini_flash_schema)
+        prompt = f"Using the following constraints: {schema_str}, summarize the following text: {text}"
+        response = model.generate_content(prompt)
+        summary = response.text.strip()
+        return summary
+    except Exception as e:
+        st.error(f"Error generating summary: {e}")
+        return "Summary could not be generated."
+# Generate pros and cons using Gemini
+def generate_pros_and_cons(text):
+    try:
+        schema_str = json.dumps(gemini_flash_schema)
+        prompt = f"Using the following constraints: {schema_str}, extract pros and cons from the following text: {text}"
+        response = model.generate_content(prompt)
+        response_text = response.text.strip()
+        pros, cons = "", ""
+        if "Pros:" in response_text:
+            pros = response_text.split("Pros:")[1].split("Cons:")[0].strip()
+        if "Cons:" in response_text:
+            cons = response_text.split("Cons:")[1].strip()
+        return pros, cons
+    except Exception as e:
+        st.error(f"Error generating pros and cons: {e}")
+        return "Pros could not be generated.", "Cons could not be generated."
+# Calculate mean grades
+def calculate_mean_grades():
+    encodings = ["latin1", "ISO-8859-1", "cp1252"]
+    for enc in encodings:
+        try:
+            df = pd.read_csv("/content/English_Reviews_WithNewDateISO&IDColumn-WhichIdon'tAgreeOn.csv", encoding=enc)
+            global_avg_rating = df["product_rating"].mean()
+            min_raters = 35
+            result = {}
+            for product in df["product_name"].unique():
+                filtered_reviews = df[df["product_name"] == product]
+                result[product] = process_product_reviews(filtered_reviews, global_avg_rating, min_raters)
+            st.write("Product Grades:")
+            st.json(result)
+            return result
+        except UnicodeDecodeError as e:
+            st.error(f"Error: {e}")
+            continue
+def process_product_reviews(filtered_reviews, global_avg_rating, min_raters):
+    if filtered_reviews.empty:
+        return {
+            "mean_grade": None,
+            "final_rate": None,
+            "grades": [],
+            "review_output": []
+        }
+    grades, total_weighted_rating, total_mean_grade = [], 0, 0
+    review_output = []
+    for _, row in filtered_reviews.iterrows():
+        review_text = preprocess_text(row["product_review_name"])
+        sentiment_label, grade = generate_review_grade_with_sentiment(review_text)
+        if grade is not None:
+            grades.append(grade)
+            review_output.append({
+                "review": row['product_review_name'],
+                "sentiment": sentiment_label,
+                "grade": grade
+            })
+            weighted_rating = (
+                (row["product_rating"] * row["product_number_of_rating"])
+                + (global_avg_rating * min_raters)
+            ) / (row["product_number_of_rating"] + min_raters)
+            total_weighted_rating += weighted_rating
+            total_mean_grade += grade
+    if grades:
+        mean_grade = sum(grades) / len(grades)
+        final_rate = (total_mean_grade / len(grades) + total_weighted_rating / len(filtered_reviews)) / 2
+        return {
+            "mean_grade": mean_grade,
+            "final_rate": final_rate,
+            "grades": grades,
+            "review_output": review_output
+        }
+    else:
+        return {
+            "mean_grade": None,
+            "final_rate": None,
+            "grades": [],
+            "review_output": []
+        }
+# Streamlit App Layout
+st.title("Product Review Analyzer and Grader")
+# Input product name for summarization, pros/cons extraction, and grading
+product_name = st.text_input("Enter Product Name:")
+if product_name:
+    default_encoding = "latin1"
+    st.subheader(f"Reviews for {product_name}")
+    try:
+        df = pd.read_csv("/content/English_Reviews_WithNewDateISO&IDColumn-WhichIdon'tAgreeOn.csv", encoding=default_encoding)
+    except UnicodeDecodeError as e:
+        st.error(f"Error reading file: {e}")
+    filtered_reviews = df[df["product_name"].str.contains(product_name, case=False)]
+    if not filtered_reviews.empty:
+        # Sort by Date and get the latest 5 reviews
+        filtered_reviews['Date'] = pd.to_datetime(filtered_reviews['Date'])
+        latest_reviews = filtered_reviews.sort_values(by='Date', ascending=False).head(5)
+        combined_reviews_text = " ".join(filtered_reviews["product_review_name"].tolist())
+        # Summarize reviews
+        st.subheader("Summarization")
+        summary = generate_summary(combined_reviews_text)
+        st.write(f"Summary:\n{summary}")
+        # Generate pros and cons
+        st.subheader("Pros and Cons")
+        pros, cons = generate_pros_and_cons(combined_reviews_text)
+        st.write(f"**Pros:**\n{pros}")
+        st.write(f"**Cons:**\n{cons}")
+        # Calculate grades only for the latest 5 reviews
+        st.subheader("Grades and Ratings for Latest Reviews")
+        latest_result = process_product_reviews(latest_reviews, df["product_rating"].mean(), 35)
+        # Display the latest reviews with sentiment and grade
+        for review_info in latest_result["review_output"]:
+            st.write(f"Review: {review_info['review']}")
+            st.write(f"Sentiment: {review_info['sentiment']}")
+            st.write(f"Grade: {review_info['grade']}")
+        # Calculate overall grades for all reviews
+        overall_result = process_product_reviews(filtered_reviews, df["product_rating"].mean(), 35)
+        # Show all grades in JSON format
+        all_grades_json = {
+            "product_name": product_name,
+            "grades": overall_result["grades"],
+            "mean_grade": overall_result["mean_grade"],
+            "final_rate": overall_result["final_rate"],
+        }
+        st.json(all_grades_json)
+    else:
+        st.error(f"No reviews found for product: {product_name}")