parthnuwal7 commited on
Commit
99cc145
·
1 Parent(s): a8443d7

Updating models

Browse files
Files changed (1) hide show
  1. src/utils/data_processor.py +44 -0
src/utils/data_processor.py CHANGED
@@ -1081,8 +1081,49 @@ class DataProcessor:
1081
  if task_id and self.task_manager:
1082
  self.task_manager.update_task(task_id, stage='completed', progress=100)
1083
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1084
  return {
1085
  'processed_data': df_processed,
 
 
1086
  'absa_details': absa_results,
1087
  'areas_of_improvement': areas_of_improvement,
1088
  'strength_anchors': strength_anchors,
@@ -1092,6 +1133,9 @@ class DataProcessor:
1092
  'micro_summaries': micro_summaries,
1093
  'summary': {
1094
  'total_reviews': len(df_processed),
 
 
 
1095
  'languages_detected': list(set(detected_languages)),
1096
  'intents_distribution': pd.Series([r['intent'] for r in intent_results]).value_counts().to_dict(),
1097
  'sentiment_distribution': pd.Series(overall_sentiment).value_counts().to_dict(),
 
1081
  if task_id and self.task_manager:
1082
  self.task_manager.update_task(task_id, stage='completed', progress=100)
1083
 
1084
+ # ========== NEW: ASPECT-LEVEL DATA TRANSFORMATION ==========
1085
+ aspect_level_data = []
1086
+ mixed_sentiment_reviews = []
1087
+
1088
+ for idx, row in df_processed.iterrows():
1089
+ aspects = row['aspects'] if isinstance(row['aspects'], list) else []
1090
+ aspect_sentiments = row['aspect_sentiments'] if isinstance(row['aspect_sentiments'], list) else []
1091
+
1092
+ # Check for mixed sentiments (conflicting aspect sentiments)
1093
+ unique_sentiments = set(aspect_sentiments)
1094
+ is_mixed = ('Positive' in unique_sentiments and 'Negative' in unique_sentiments)
1095
+
1096
+ if is_mixed:
1097
+ mixed_sentiment_reviews.append({
1098
+ 'review_id': row['id'],
1099
+ 'review': row['review'],
1100
+ 'aspects': aspects,
1101
+ 'aspect_sentiments': aspect_sentiments,
1102
+ 'intent': row['intent'],
1103
+ 'date': row['date']
1104
+ })
1105
+
1106
+ # Create aspect-level records
1107
+ for aspect, sentiment in zip(aspects, aspect_sentiments):
1108
+ aspect_level_data.append({
1109
+ 'review_id': row['id'],
1110
+ 'review': row['review'],
1111
+ 'aspect': aspect,
1112
+ 'aspect_sentiment': sentiment,
1113
+ 'overall_sentiment': row['overall_sentiment'],
1114
+ 'intent': row['intent'],
1115
+ 'intent_severity': row['intent_severity'],
1116
+ 'date': row['date'],
1117
+ 'language': row['detected_language']
1118
+ })
1119
+
1120
+ aspect_level_df = pd.DataFrame(aspect_level_data) if aspect_level_data else pd.DataFrame()
1121
+ mixed_sentiment_df = pd.DataFrame(mixed_sentiment_reviews) if mixed_sentiment_reviews else pd.DataFrame()
1122
+
1123
  return {
1124
  'processed_data': df_processed,
1125
+ 'aspect_level_data': aspect_level_df, # NEW: Aspect-level granular data
1126
+ 'mixed_sentiment_reviews': mixed_sentiment_df, # NEW: Mixed sentiment detection
1127
  'absa_details': absa_results,
1128
  'areas_of_improvement': areas_of_improvement,
1129
  'strength_anchors': strength_anchors,
 
1133
  'micro_summaries': micro_summaries,
1134
  'summary': {
1135
  'total_reviews': len(df_processed),
1136
+ 'total_aspects': len(aspect_level_df),
1137
+ 'mixed_sentiment_count': len(mixed_sentiment_df),
1138
+ 'mixed_sentiment_pct': round(len(mixed_sentiment_df) / len(df_processed) * 100, 1) if len(df_processed) > 0 else 0,
1139
  'languages_detected': list(set(detected_languages)),
1140
  'intents_distribution': pd.Series([r['intent'] for r in intent_results]).value_counts().to_dict(),
1141
  'sentiment_distribution': pd.Series(overall_sentiment).value_counts().to_dict(),