Create app.py
Browse files
app.py
ADDED
|
@@ -0,0 +1,217 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import google.generativeai as genai
|
| 4 |
+
import os
|
| 5 |
+
import json
|
| 6 |
+
import re
|
| 7 |
+
|
| 8 |
+
# Set your API key here directly
|
| 9 |
+
os.environ["API_KEY"] = "AIzaSyAFHyRhWWEVGTzNXH3xHq8vBx229DzVkPM"
|
| 10 |
+
genai.configure(api_key=os.environ["API_KEY"])
|
| 11 |
+
model = genai.GenerativeModel("gemini-1.5-flash")
|
| 12 |
+
|
| 13 |
+
# Load schema for Gemini model (if needed for your specific summarization task)
|
| 14 |
+
with open("./scheme.json", "r") as f:
|
| 15 |
+
gemini_flash_schema = json.load(f)
|
| 16 |
+
|
| 17 |
+
# Preprocess text function
|
| 18 |
+
def preprocess_text(text):
|
| 19 |
+
stopwords = {
|
| 20 |
+
"the", "is", "in", "at", "on", "a", "an", "and", "or", "for", "to", "of", "with", "that", "by", "it",
|
| 21 |
+
}
|
| 22 |
+
text = re.sub(r"\d+|[^\w\s]|\s+", " ", text.lower()).strip()
|
| 23 |
+
return " ".join([word for word in text.split() if word not in stopwords])
|
| 24 |
+
|
| 25 |
+
# Generate sentiment and grade using Gemini
|
| 26 |
+
def generate_review_grade_with_sentiment(review_text):
|
| 27 |
+
try:
|
| 28 |
+
prompt = f"""
|
| 29 |
+
Analyze the following review: {review_text}.
|
| 30 |
+
|
| 31 |
+
Determine its sentiment (positive, neutral, or negative) based on your analysis. You can use these examples as a reference, but the actual sentiment should be based on the review's content:
|
| 32 |
+
- **Positive**: "The product was exactly as described, high quality, and arrived quickly." (Example grade: 4 or 5)
|
| 33 |
+
- **Neutral**: "The product is okay, nothing special, but it works as expected." (Example grade: 3)
|
| 34 |
+
- **Negative**: "The product was poorly made, broke easily, and did not meet expectations." (Example grade: 1 or 2)
|
| 35 |
+
|
| 36 |
+
After analyzing the review, assign a grade from 1 to 5:
|
| 37 |
+
- **4 or 5** for positive reviews.
|
| 38 |
+
- **3** for neutral reviews.
|
| 39 |
+
- **1 or 2** for negative reviews.
|
| 40 |
+
|
| 41 |
+
Make sure the grade reflects the overall tone and content of the review.
|
| 42 |
+
"""
|
| 43 |
+
response = model.generate_content(prompt)
|
| 44 |
+
|
| 45 |
+
# Extract only sentiment and grade
|
| 46 |
+
sentiment_match = re.search(r"(positive|negative|neutral)", response.text, re.IGNORECASE)
|
| 47 |
+
grade_match = re.search(r"\d(\.\d+)?", response.text)
|
| 48 |
+
|
| 49 |
+
if sentiment_match and grade_match:
|
| 50 |
+
sentiment_label = sentiment_match.group().upper()
|
| 51 |
+
grade = float(grade_match.group())
|
| 52 |
+
return sentiment_label, grade
|
| 53 |
+
else:
|
| 54 |
+
return "Unknown", None
|
| 55 |
+
except Exception as e:
|
| 56 |
+
st.error(f"Error generating sentiment and grade for review: {e}")
|
| 57 |
+
return None, None
|
| 58 |
+
|
| 59 |
+
# Generate summary using Gemini
|
| 60 |
+
def generate_summary(text):
|
| 61 |
+
try:
|
| 62 |
+
schema_str = json.dumps(gemini_flash_schema)
|
| 63 |
+
prompt = f"Using the following constraints: {schema_str}, summarize the following text: {text}"
|
| 64 |
+
response = model.generate_content(prompt)
|
| 65 |
+
summary = response.text.strip()
|
| 66 |
+
return summary
|
| 67 |
+
except Exception as e:
|
| 68 |
+
st.error(f"Error generating summary: {e}")
|
| 69 |
+
return "Summary could not be generated."
|
| 70 |
+
|
| 71 |
+
# Generate pros and cons using Gemini
|
| 72 |
+
def generate_pros_and_cons(text):
|
| 73 |
+
try:
|
| 74 |
+
schema_str = json.dumps(gemini_flash_schema)
|
| 75 |
+
prompt = f"Using the following constraints: {schema_str}, extract pros and cons from the following text: {text}"
|
| 76 |
+
response = model.generate_content(prompt)
|
| 77 |
+
response_text = response.text.strip()
|
| 78 |
+
|
| 79 |
+
pros, cons = "", ""
|
| 80 |
+
if "Pros:" in response_text:
|
| 81 |
+
pros = response_text.split("Pros:")[1].split("Cons:")[0].strip()
|
| 82 |
+
if "Cons:" in response_text:
|
| 83 |
+
cons = response_text.split("Cons:")[1].strip()
|
| 84 |
+
|
| 85 |
+
return pros, cons
|
| 86 |
+
except Exception as e:
|
| 87 |
+
st.error(f"Error generating pros and cons: {e}")
|
| 88 |
+
return "Pros could not be generated.", "Cons could not be generated."
|
| 89 |
+
|
| 90 |
+
# Calculate mean grades
|
| 91 |
+
def calculate_mean_grades():
|
| 92 |
+
encodings = ["latin1", "ISO-8859-1", "cp1252"]
|
| 93 |
+
for enc in encodings:
|
| 94 |
+
try:
|
| 95 |
+
df = pd.read_csv("/content/English_Reviews_WithNewDateISO&IDColumn-WhichIdon'tAgreeOn.csv", encoding=enc)
|
| 96 |
+
global_avg_rating = df["product_rating"].mean()
|
| 97 |
+
min_raters = 35
|
| 98 |
+
result = {}
|
| 99 |
+
|
| 100 |
+
for product in df["product_name"].unique():
|
| 101 |
+
filtered_reviews = df[df["product_name"] == product]
|
| 102 |
+
result[product] = process_product_reviews(filtered_reviews, global_avg_rating, min_raters)
|
| 103 |
+
|
| 104 |
+
st.write("Product Grades:")
|
| 105 |
+
st.json(result)
|
| 106 |
+
return result
|
| 107 |
+
|
| 108 |
+
except UnicodeDecodeError as e:
|
| 109 |
+
st.error(f"Error: {e}")
|
| 110 |
+
continue
|
| 111 |
+
|
| 112 |
+
def process_product_reviews(filtered_reviews, global_avg_rating, min_raters):
|
| 113 |
+
if filtered_reviews.empty:
|
| 114 |
+
return {
|
| 115 |
+
"mean_grade": None,
|
| 116 |
+
"final_rate": None,
|
| 117 |
+
"grades": [],
|
| 118 |
+
"review_output": []
|
| 119 |
+
}
|
| 120 |
+
|
| 121 |
+
grades, total_weighted_rating, total_mean_grade = [], 0, 0
|
| 122 |
+
review_output = []
|
| 123 |
+
|
| 124 |
+
for _, row in filtered_reviews.iterrows():
|
| 125 |
+
review_text = preprocess_text(row["product_review_name"])
|
| 126 |
+
sentiment_label, grade = generate_review_grade_with_sentiment(review_text)
|
| 127 |
+
|
| 128 |
+
if grade is not None:
|
| 129 |
+
grades.append(grade)
|
| 130 |
+
review_output.append({
|
| 131 |
+
"review": row['product_review_name'],
|
| 132 |
+
"sentiment": sentiment_label,
|
| 133 |
+
"grade": grade
|
| 134 |
+
})
|
| 135 |
+
|
| 136 |
+
weighted_rating = (
|
| 137 |
+
(row["product_rating"] * row["product_number_of_rating"])
|
| 138 |
+
+ (global_avg_rating * min_raters)
|
| 139 |
+
) / (row["product_number_of_rating"] + min_raters)
|
| 140 |
+
total_weighted_rating += weighted_rating
|
| 141 |
+
total_mean_grade += grade
|
| 142 |
+
|
| 143 |
+
if grades:
|
| 144 |
+
mean_grade = sum(grades) / len(grades)
|
| 145 |
+
final_rate = (total_mean_grade / len(grades) + total_weighted_rating / len(filtered_reviews)) / 2
|
| 146 |
+
return {
|
| 147 |
+
"mean_grade": mean_grade,
|
| 148 |
+
"final_rate": final_rate,
|
| 149 |
+
"grades": grades,
|
| 150 |
+
"review_output": review_output
|
| 151 |
+
}
|
| 152 |
+
else:
|
| 153 |
+
return {
|
| 154 |
+
"mean_grade": None,
|
| 155 |
+
"final_rate": None,
|
| 156 |
+
"grades": [],
|
| 157 |
+
"review_output": []
|
| 158 |
+
}
|
| 159 |
+
|
| 160 |
+
# Streamlit App Layout
|
| 161 |
+
st.title("Product Review Analyzer and Grader")
|
| 162 |
+
|
| 163 |
+
# Input product name for summarization, pros/cons extraction, and grading
|
| 164 |
+
product_name = st.text_input("Enter Product Name:")
|
| 165 |
+
|
| 166 |
+
if product_name:
|
| 167 |
+
default_encoding = "latin1"
|
| 168 |
+
st.subheader(f"Reviews for {product_name}")
|
| 169 |
+
try:
|
| 170 |
+
df = pd.read_csv("/content/English_Reviews_WithNewDateISO&IDColumn-WhichIdon'tAgreeOn.csv", encoding=default_encoding)
|
| 171 |
+
except UnicodeDecodeError as e:
|
| 172 |
+
st.error(f"Error reading file: {e}")
|
| 173 |
+
|
| 174 |
+
filtered_reviews = df[df["product_name"].str.contains(product_name, case=False)]
|
| 175 |
+
|
| 176 |
+
if not filtered_reviews.empty:
|
| 177 |
+
# Sort by Date and get the latest 5 reviews
|
| 178 |
+
filtered_reviews['Date'] = pd.to_datetime(filtered_reviews['Date'])
|
| 179 |
+
latest_reviews = filtered_reviews.sort_values(by='Date', ascending=False).head(5)
|
| 180 |
+
|
| 181 |
+
combined_reviews_text = " ".join(filtered_reviews["product_review_name"].tolist())
|
| 182 |
+
|
| 183 |
+
# Summarize reviews
|
| 184 |
+
st.subheader("Summarization")
|
| 185 |
+
summary = generate_summary(combined_reviews_text)
|
| 186 |
+
st.write(f"Summary:\n{summary}")
|
| 187 |
+
|
| 188 |
+
# Generate pros and cons
|
| 189 |
+
st.subheader("Pros and Cons")
|
| 190 |
+
pros, cons = generate_pros_and_cons(combined_reviews_text)
|
| 191 |
+
st.write(f"**Pros:**\n{pros}")
|
| 192 |
+
st.write(f"**Cons:**\n{cons}")
|
| 193 |
+
|
| 194 |
+
# Calculate grades only for the latest 5 reviews
|
| 195 |
+
st.subheader("Grades and Ratings for Latest Reviews")
|
| 196 |
+
latest_result = process_product_reviews(latest_reviews, df["product_rating"].mean(), 35)
|
| 197 |
+
|
| 198 |
+
# Display the latest reviews with sentiment and grade
|
| 199 |
+
for review_info in latest_result["review_output"]:
|
| 200 |
+
st.write(f"Review: {review_info['review']}")
|
| 201 |
+
st.write(f"Sentiment: {review_info['sentiment']}")
|
| 202 |
+
st.write(f"Grade: {review_info['grade']}")
|
| 203 |
+
|
| 204 |
+
# Calculate overall grades for all reviews
|
| 205 |
+
overall_result = process_product_reviews(filtered_reviews, df["product_rating"].mean(), 35)
|
| 206 |
+
|
| 207 |
+
# Show all grades in JSON format
|
| 208 |
+
all_grades_json = {
|
| 209 |
+
"product_name": product_name,
|
| 210 |
+
"grades": overall_result["grades"],
|
| 211 |
+
"mean_grade": overall_result["mean_grade"],
|
| 212 |
+
"final_rate": overall_result["final_rate"],
|
| 213 |
+
}
|
| 214 |
+
st.json(all_grades_json)
|
| 215 |
+
|
| 216 |
+
else:
|
| 217 |
+
st.error(f"No reviews found for product: {product_name}")
|