Wafaa22 commited on
Commit
357d82f
·
verified ·
1 Parent(s): 9f97a14

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +217 -0
app.py ADDED
@@ -0,0 +1,217 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import google.generativeai as genai
4
+ import os
5
+ import json
6
+ import re
7
+
8
+ # Set your API key here directly
9
+ os.environ["API_KEY"] = "AIzaSyAFHyRhWWEVGTzNXH3xHq8vBx229DzVkPM"
10
+ genai.configure(api_key=os.environ["API_KEY"])
11
+ model = genai.GenerativeModel("gemini-1.5-flash")
12
+
13
+ # Load schema for Gemini model (if needed for your specific summarization task)
14
+ with open("./scheme.json", "r") as f:
15
+ gemini_flash_schema = json.load(f)
16
+
17
+ # Preprocess text function
18
+ def preprocess_text(text):
19
+ stopwords = {
20
+ "the", "is", "in", "at", "on", "a", "an", "and", "or", "for", "to", "of", "with", "that", "by", "it",
21
+ }
22
+ text = re.sub(r"\d+|[^\w\s]|\s+", " ", text.lower()).strip()
23
+ return " ".join([word for word in text.split() if word not in stopwords])
24
+
25
+ # Generate sentiment and grade using Gemini
26
+ def generate_review_grade_with_sentiment(review_text):
27
+ try:
28
+ prompt = f"""
29
+ Analyze the following review: {review_text}.
30
+
31
+ Determine its sentiment (positive, neutral, or negative) based on your analysis. You can use these examples as a reference, but the actual sentiment should be based on the review's content:
32
+ - **Positive**: "The product was exactly as described, high quality, and arrived quickly." (Example grade: 4 or 5)
33
+ - **Neutral**: "The product is okay, nothing special, but it works as expected." (Example grade: 3)
34
+ - **Negative**: "The product was poorly made, broke easily, and did not meet expectations." (Example grade: 1 or 2)
35
+
36
+ After analyzing the review, assign a grade from 1 to 5:
37
+ - **4 or 5** for positive reviews.
38
+ - **3** for neutral reviews.
39
+ - **1 or 2** for negative reviews.
40
+
41
+ Make sure the grade reflects the overall tone and content of the review.
42
+ """
43
+ response = model.generate_content(prompt)
44
+
45
+ # Extract only sentiment and grade
46
+ sentiment_match = re.search(r"(positive|negative|neutral)", response.text, re.IGNORECASE)
47
+ grade_match = re.search(r"\d(\.\d+)?", response.text)
48
+
49
+ if sentiment_match and grade_match:
50
+ sentiment_label = sentiment_match.group().upper()
51
+ grade = float(grade_match.group())
52
+ return sentiment_label, grade
53
+ else:
54
+ return "Unknown", None
55
+ except Exception as e:
56
+ st.error(f"Error generating sentiment and grade for review: {e}")
57
+ return None, None
58
+
59
+ # Generate summary using Gemini
60
+ def generate_summary(text):
61
+ try:
62
+ schema_str = json.dumps(gemini_flash_schema)
63
+ prompt = f"Using the following constraints: {schema_str}, summarize the following text: {text}"
64
+ response = model.generate_content(prompt)
65
+ summary = response.text.strip()
66
+ return summary
67
+ except Exception as e:
68
+ st.error(f"Error generating summary: {e}")
69
+ return "Summary could not be generated."
70
+
71
+ # Generate pros and cons using Gemini
72
+ def generate_pros_and_cons(text):
73
+ try:
74
+ schema_str = json.dumps(gemini_flash_schema)
75
+ prompt = f"Using the following constraints: {schema_str}, extract pros and cons from the following text: {text}"
76
+ response = model.generate_content(prompt)
77
+ response_text = response.text.strip()
78
+
79
+ pros, cons = "", ""
80
+ if "Pros:" in response_text:
81
+ pros = response_text.split("Pros:")[1].split("Cons:")[0].strip()
82
+ if "Cons:" in response_text:
83
+ cons = response_text.split("Cons:")[1].strip()
84
+
85
+ return pros, cons
86
+ except Exception as e:
87
+ st.error(f"Error generating pros and cons: {e}")
88
+ return "Pros could not be generated.", "Cons could not be generated."
89
+
90
+ # Calculate mean grades
91
+ def calculate_mean_grades():
92
+ encodings = ["latin1", "ISO-8859-1", "cp1252"]
93
+ for enc in encodings:
94
+ try:
95
+ df = pd.read_csv("/content/English_Reviews_WithNewDateISO&IDColumn-WhichIdon'tAgreeOn.csv", encoding=enc)
96
+ global_avg_rating = df["product_rating"].mean()
97
+ min_raters = 35
98
+ result = {}
99
+
100
+ for product in df["product_name"].unique():
101
+ filtered_reviews = df[df["product_name"] == product]
102
+ result[product] = process_product_reviews(filtered_reviews, global_avg_rating, min_raters)
103
+
104
+ st.write("Product Grades:")
105
+ st.json(result)
106
+ return result
107
+
108
+ except UnicodeDecodeError as e:
109
+ st.error(f"Error: {e}")
110
+ continue
111
+
112
+ def process_product_reviews(filtered_reviews, global_avg_rating, min_raters):
113
+ if filtered_reviews.empty:
114
+ return {
115
+ "mean_grade": None,
116
+ "final_rate": None,
117
+ "grades": [],
118
+ "review_output": []
119
+ }
120
+
121
+ grades, total_weighted_rating, total_mean_grade = [], 0, 0
122
+ review_output = []
123
+
124
+ for _, row in filtered_reviews.iterrows():
125
+ review_text = preprocess_text(row["product_review_name"])
126
+ sentiment_label, grade = generate_review_grade_with_sentiment(review_text)
127
+
128
+ if grade is not None:
129
+ grades.append(grade)
130
+ review_output.append({
131
+ "review": row['product_review_name'],
132
+ "sentiment": sentiment_label,
133
+ "grade": grade
134
+ })
135
+
136
+ weighted_rating = (
137
+ (row["product_rating"] * row["product_number_of_rating"])
138
+ + (global_avg_rating * min_raters)
139
+ ) / (row["product_number_of_rating"] + min_raters)
140
+ total_weighted_rating += weighted_rating
141
+ total_mean_grade += grade
142
+
143
+ if grades:
144
+ mean_grade = sum(grades) / len(grades)
145
+ final_rate = (total_mean_grade / len(grades) + total_weighted_rating / len(filtered_reviews)) / 2
146
+ return {
147
+ "mean_grade": mean_grade,
148
+ "final_rate": final_rate,
149
+ "grades": grades,
150
+ "review_output": review_output
151
+ }
152
+ else:
153
+ return {
154
+ "mean_grade": None,
155
+ "final_rate": None,
156
+ "grades": [],
157
+ "review_output": []
158
+ }
159
+
160
+ # Streamlit App Layout
161
+ st.title("Product Review Analyzer and Grader")
162
+
163
+ # Input product name for summarization, pros/cons extraction, and grading
164
+ product_name = st.text_input("Enter Product Name:")
165
+
166
+ if product_name:
167
+ default_encoding = "latin1"
168
+ st.subheader(f"Reviews for {product_name}")
169
+ try:
170
+ df = pd.read_csv("/content/English_Reviews_WithNewDateISO&IDColumn-WhichIdon'tAgreeOn.csv", encoding=default_encoding)
171
+ except UnicodeDecodeError as e:
172
+ st.error(f"Error reading file: {e}")
173
+
174
+ filtered_reviews = df[df["product_name"].str.contains(product_name, case=False)]
175
+
176
+ if not filtered_reviews.empty:
177
+ # Sort by Date and get the latest 5 reviews
178
+ filtered_reviews['Date'] = pd.to_datetime(filtered_reviews['Date'])
179
+ latest_reviews = filtered_reviews.sort_values(by='Date', ascending=False).head(5)
180
+
181
+ combined_reviews_text = " ".join(filtered_reviews["product_review_name"].tolist())
182
+
183
+ # Summarize reviews
184
+ st.subheader("Summarization")
185
+ summary = generate_summary(combined_reviews_text)
186
+ st.write(f"Summary:\n{summary}")
187
+
188
+ # Generate pros and cons
189
+ st.subheader("Pros and Cons")
190
+ pros, cons = generate_pros_and_cons(combined_reviews_text)
191
+ st.write(f"**Pros:**\n{pros}")
192
+ st.write(f"**Cons:**\n{cons}")
193
+
194
+ # Calculate grades only for the latest 5 reviews
195
+ st.subheader("Grades and Ratings for Latest Reviews")
196
+ latest_result = process_product_reviews(latest_reviews, df["product_rating"].mean(), 35)
197
+
198
+ # Display the latest reviews with sentiment and grade
199
+ for review_info in latest_result["review_output"]:
200
+ st.write(f"Review: {review_info['review']}")
201
+ st.write(f"Sentiment: {review_info['sentiment']}")
202
+ st.write(f"Grade: {review_info['grade']}")
203
+
204
+ # Calculate overall grades for all reviews
205
+ overall_result = process_product_reviews(filtered_reviews, df["product_rating"].mean(), 35)
206
+
207
+ # Show all grades in JSON format
208
+ all_grades_json = {
209
+ "product_name": product_name,
210
+ "grades": overall_result["grades"],
211
+ "mean_grade": overall_result["mean_grade"],
212
+ "final_rate": overall_result["final_rate"],
213
+ }
214
+ st.json(all_grades_json)
215
+
216
+ else:
217
+ st.error(f"No reviews found for product: {product_name}")