kushvanth commited on
Commit
68aba94
·
verified ·
1 Parent(s): f02f5b9

Update fastapi_example.py

Browse files
Files changed (1) hide show
  1. fastapi_example.py +389 -87
fastapi_example.py CHANGED
@@ -1,6 +1,10 @@
 
 
 
1
  # """
2
  # Enhanced FastAPI Service for Comment Sentiment Analysis
3
  # with improved performance, validation, and configuration management
 
4
  # """
5
 
6
  # from fastapi import FastAPI, HTTPException, Depends
@@ -80,13 +84,13 @@
80
  # """Application settings with environment variable support"""
81
  # # API Settings
82
  # app_name: str = "Comment Analysis API"
83
- # app_version: str = "2.0.0"
84
  # debug_mode: bool = False
85
 
86
  # # Request Limits
87
  # max_comments_per_request: int = 1000
88
  # max_comment_length: int = 5000
89
- # min_comment_words: int = 2
90
 
91
  # # Sentiment Thresholds
92
  # vader_pos_threshold: float = 0.2
@@ -113,11 +117,30 @@
113
  # env_file = ".env"
114
  # env_file_encoding = 'utf-8'
115
  # extra = 'ignore'
 
 
 
 
 
 
 
 
 
 
 
 
116
 
117
  # @lru_cache()
118
  # def get_settings() -> Settings:
119
  # """Cached settings instance"""
120
- # return Settings()
 
 
 
 
 
 
 
121
 
122
  # # Pydantic Models
123
  # class FacultyInfo(BaseModel):
@@ -208,7 +231,7 @@
208
  # device = None
209
  # summarizer = None
210
 
211
- # # Heuristic phrase/regex rules for explicit negative feedback
212
  # NEGATIVE_PHRASES = [
213
  # 'need more staff',
214
  # 'need more faculty',
@@ -225,6 +248,7 @@
225
  # 'boring class',
226
  # 'boring classes',
227
  # 'waste of time',
 
228
  # 'bad teacher',
229
  # 'bad teaching',
230
  # 'poor teaching',
@@ -238,6 +262,8 @@
238
  # 'lectures are going fast',
239
  # 'going too fast',
240
  # 'too fast',
 
 
241
  # 'lacking',
242
  # 'is lacking',
243
  # 'knowledge is lacking',
@@ -245,26 +271,67 @@
245
  # 'no practical',
246
  # 'lack of practical',
247
  # 'no hands-on',
248
- # 'no real world'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
249
  # ]
250
 
251
  # NEGATIVE_REGEXES = [
252
  # re.compile(r"\bno\s+(proper|sufficient)\s+(classes|notes|support)\b", re.IGNORECASE),
253
  # re.compile(r"\bno\s+staff\b", re.IGNORECASE),
254
  # re.compile(r"\bneed(s)?\s+more\s+(staff|faculty|support)\b", re.IGNORECASE),
255
- # re.compile(r"\b(lecture|lectures|class|classes|teaching)\s+(are\s+)?(too|very)\s+fast\b", re.IGNORECASE),
256
- # re.compile(r"\blectures?\s+are\s+going\s+fast\b", re.IGNORECASE),
257
  # re.compile(r"\b(require|needs?|needed)\s+(some\s+)?improv(e|ement)s?\s+(in|of)?\s*communication(\s+skills?)?\b", re.IGNORECASE),
258
  # re.compile(r"\b(is\s+)?lacking\b", re.IGNORECASE),
259
- # re.compile(r"\bno\s+(practical|hands-on|real-world)\b", re.IGNORECASE)
 
 
 
 
 
 
 
 
 
 
 
 
 
260
  # ]
261
 
262
  # META_COMMENT_PATTERNS = [
263
- # re.compile(r"^no\s+(other\s+)?(comments?|remarks?|feedback)$", re.IGNORECASE),
264
- # re.compile(r"^no\s+remarks?\s+(about|on)", re.IGNORECASE),
265
- # re.compile(r"^nil$", re.IGNORECASE),
266
- # re.compile(r"^none$", re.IGNORECASE),
267
- # re.compile(r"^n/?a$", re.IGNORECASE)
 
 
 
268
  # ]
269
 
270
  # def is_meta_comment(text: str) -> bool:
@@ -275,22 +342,27 @@
275
 
276
  # for pattern in META_COMMENT_PATTERNS:
277
  # if pattern.match(text):
 
278
  # return True
279
 
280
  # return False
281
 
282
  # def is_explicit_negative(text: str) -> bool:
283
- # """Check if text contains explicit negative phrases"""
284
  # if not text:
285
  # return False
286
  # lower = text.lower()
287
 
 
288
  # for phrase in NEGATIVE_PHRASES:
289
  # if phrase in lower:
 
290
  # return True
291
 
 
292
  # for regex in NEGATIVE_REGEXES:
293
  # if regex.search(text):
 
294
  # return True
295
 
296
  # return False
@@ -305,7 +377,7 @@
305
 
306
  # # Initialize VADER (NLTK data already downloaded)
307
  # sia = SentimentIntensityAnalyzer()
308
- # logger.info("VADER initialized")
309
 
310
  # # Initialize RoBERTa with caching
311
  # cache_dir = settings.model_cache_dir
@@ -323,7 +395,7 @@
323
  # device = "cuda" if torch.cuda.is_available() else "cpu"
324
  # model.to(device)
325
  # model.eval()
326
- # logger.info(f"RoBERTa initialized on device: {device}")
327
 
328
  # # Initialize summarizer (optional)
329
  # if settings.use_abstractive_summary:
@@ -333,12 +405,12 @@
333
  # model=settings.summarizer_model,
334
  # device=0 if device == "cuda" else -1
335
  # )
336
- # logger.info("Summarizer initialized")
337
  # except Exception as e:
338
  # logger.warning(f"Summarizer initialization failed: {e}")
339
  # summarizer = None
340
 
341
- # logger.info("All models initialized successfully")
342
 
343
  # except Exception as e:
344
  # logger.error(f"Error initializing models: {e}")
@@ -437,9 +509,11 @@
437
  # roberta_neg = row.get('roberta_neg', 0.0)
438
  # roberta_pos = row.get('roberta_pos', 0.0)
439
 
 
440
  # if row.get('heuristic_negative') is True:
441
  # return 'Negative'
442
 
 
443
  # if (
444
  # vader_compound <= settings.vader_neg_threshold or
445
  # roberta_neg >= settings.roberta_neg_threshold or
@@ -447,6 +521,7 @@
447
  # ):
448
  # return 'Negative'
449
 
 
450
  # if (
451
  # vader_compound >= settings.vader_pos_threshold or
452
  # roberta_pos >= settings.roberta_pos_threshold or
@@ -454,14 +529,17 @@
454
  # ):
455
  # return 'Positive'
456
 
 
457
  # return 'Neutral'
458
 
459
  # def sanitize_text(text: str) -> str:
460
- # """Sanitize input text"""
461
  # if not text:
462
  # return ""
 
 
 
463
  # text = ' '.join(text.split())
464
- # text = ''.join(char for char in text if ord(char) >= 32 or char == '\n')
465
  # return text.strip()
466
 
467
  # def analyze_comments_sentiment(comments: List[str]) -> Dict[str, Any]:
@@ -470,12 +548,13 @@
470
  # settings = get_settings()
471
  # logger.info(f"Received {len(comments)} comments for analysis")
472
 
 
473
  # sanitized_comments = [sanitize_text(comment) for comment in comments]
474
 
 
475
  # filtered_comments = [
476
  # comment for comment in sanitized_comments
477
- # if (settings.min_comment_words < len(comment.split()) <= settings.max_comment_length
478
- # and not is_meta_comment(comment))
479
  # ]
480
 
481
  # logger.info(f"After filtering: {len(filtered_comments)} valid comments")
@@ -486,19 +565,32 @@
486
  # "message": "No valid comments found for analysis"
487
  # }
488
 
 
489
  # df = pd.DataFrame({'comment': filtered_comments})
 
 
 
490
  # df['heuristic_negative'] = df['comment'].apply(is_explicit_negative)
491
 
 
 
 
 
 
 
492
  # vader_results = []
493
  # for text in df['comment']:
494
  # vader_results.append(vader_sentiment(text))
495
 
 
496
  # roberta_results = roberta_sentiment_batch(df['comment'].tolist())
497
 
 
498
  # vader_df = pd.DataFrame(vader_results)
499
  # roberta_df = pd.DataFrame(roberta_results)
500
  # final_df = pd.concat([df.reset_index(drop=True), vader_df, roberta_df], axis=1)
501
 
 
502
  # final_df['combined_pos'] = (
503
  # settings.combined_weight_vader * final_df['vader_pos'] +
504
  # settings.combined_weight_roberta * final_df['roberta_pos']
@@ -512,11 +604,13 @@
512
  # settings.combined_weight_roberta * final_df['roberta_neu']
513
  # )
514
 
 
515
  # final_df['Overall_Sentiment'] = final_df.apply(
516
- # lambda row: overall_sentiment(row, settings),
517
  # axis=1
518
  # )
519
 
 
520
  # total_comments = len(final_df)
521
  # positive_count = len(final_df[final_df['Overall_Sentiment'] == 'Positive'])
522
  # negative_count = len(final_df[final_df['Overall_Sentiment'] == 'Negative'])
@@ -527,10 +621,12 @@
527
  # f"{negative_count} negative, {neutral_count} neutral"
528
  # )
529
 
 
530
  # avg_positive = float(final_df['combined_pos'].mean())
531
  # avg_negative = float(final_df['combined_neg'].mean())
532
  # avg_neutral = float(final_df['combined_neu'].mean())
533
 
 
534
  # if avg_positive > max(avg_negative, avg_neutral):
535
  # overall_sentiment_label = "Positive"
536
  # elif avg_negative > max(avg_positive, avg_neutral):
@@ -538,6 +634,7 @@
538
  # else:
539
  # overall_sentiment_label = "Neutral"
540
 
 
541
  # negative_summary = ""
542
  # negative_comments_list = []
543
  # negative_comments = final_df[final_df['Overall_Sentiment'] == 'Negative']
@@ -546,6 +643,7 @@
546
  # negative_comments_list = negative_comments['comment'].tolist()
547
 
548
  # try:
 
549
  # top_idx = negative_comments['combined_neg'].nlargest(3).index
550
  # top_comments = negative_comments.loc[top_idx, 'comment'].tolist()
551
 
@@ -562,11 +660,13 @@
562
  # )
563
  # negative_summary = summary_result[0]['summary_text']
564
  # else:
 
565
  # negative_summary = "; ".join(top_comments)
566
  # except Exception as e:
567
  # logger.warning(f"Summary generation failed: {e}")
568
  # negative_summary = "; ".join(negative_comments_list[:3])
569
 
 
570
  # insights = []
571
  # recommendations = []
572
 
@@ -647,11 +747,14 @@
647
  # async def startup_event():
648
  # """Initialize models on startup"""
649
  # try:
650
- # logger.info("===== Application Startup at {} =====".format(datetime.now().strftime('%Y-%m-%d %H:%M:%S')))
 
 
651
  # initialize_models()
652
- # logger.info("Service started successfully")
 
653
  # except Exception as e:
654
- # logger.error(f"Startup failed: {e}")
655
  # raise e
656
 
657
  # @app.on_event("shutdown")
@@ -665,7 +768,13 @@
665
  # return {
666
  # "service": get_settings().app_name,
667
  # "version": get_settings().app_version,
668
- # "status": "running"
 
 
 
 
 
 
669
  # }
670
 
671
  # @app.get("/health")
@@ -740,18 +849,61 @@
740
 
741
  # @app.get("/config")
742
  # async def get_config(settings: Settings = Depends(get_settings)):
743
- # """Get current configuration"""
744
  # if not settings.debug_mode:
745
  # raise HTTPException(status_code=404, detail="Not found")
746
 
747
  # return {
748
  # "max_comments_per_request": settings.max_comments_per_request,
 
 
749
  # "vader_pos_threshold": settings.vader_pos_threshold,
750
  # "vader_neg_threshold": settings.vader_neg_threshold,
751
  # "roberta_pos_threshold": settings.roberta_pos_threshold,
752
  # "roberta_neg_threshold": settings.roberta_neg_threshold,
 
 
753
  # "enable_caching": settings.enable_caching,
754
- # "batch_size": settings.batch_size
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
755
  # }
756
 
757
  # if __name__ == "__main__":
@@ -765,6 +917,13 @@
765
 
766
 
767
 
 
 
 
 
 
 
 
768
  """
769
  Enhanced FastAPI Service for Comment Sentiment Analysis
770
  with improved performance, validation, and configuration management
@@ -997,113 +1156,240 @@ summarizer = None
997
 
998
  # Enhanced heuristic phrase/regex rules for explicit negative feedback
999
  NEGATIVE_PHRASES = [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1000
  'need more staff',
1001
  'need more faculty',
1002
  'insufficient staff',
1003
  'lack of staff',
 
 
 
 
 
 
 
 
1004
  'lack of knowledge',
1005
  'better knowledge needed',
1006
  'poor knowledge',
1007
- 'not good',
1008
- 'not satisfied',
1009
- 'not satisfactory',
1010
- 'no classes',
1011
- 'no regular classes',
1012
- 'boring class',
1013
- 'boring classes',
1014
- 'waste of time',
1015
- 'wasting time',
1016
- 'bad teacher',
1017
- 'bad teaching',
1018
- 'poor teaching',
 
 
 
 
 
 
 
1019
  'improve class',
1020
  'improvement needed',
1021
  'needs improvement',
 
 
 
1022
  'not helpful',
1023
  'not clear',
1024
  'communication skills need improvement',
1025
  'improve communication',
 
 
1026
  'lectures are going fast',
1027
  'going too fast',
1028
  'too fast',
1029
  'too slow',
1030
- 'too boring',
1031
- 'lacking',
1032
- 'is lacking',
1033
- 'knowledge is lacking',
1034
- 'practical knowledge lacking',
1035
- 'no practical',
1036
- 'lack of practical',
1037
- 'no hands-on',
1038
- 'no real world',
1039
  'too lag',
1040
  'lag',
1041
  'lagging',
1042
  'lag in teaching',
1043
- 'not interested',
1044
- 'no interest',
1045
  'not managing time',
1046
  'poor time management',
1047
- 'time management',
1048
- 'not at all',
 
 
 
1049
  'going for attendance',
1050
  'just for attendance',
1051
  'only for attendance',
1052
- 'cant understand',
1053
- "can't understand",
1054
- 'not understandable',
1055
  'nothing learnt',
1056
  'learned nothing',
1057
  'no improvement',
1058
  'same teaching',
1059
  'monotonous',
1060
  'sleeping in class',
 
 
 
 
 
1061
  'no use',
1062
  'useless',
1063
- 'waste our time'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1064
  ]
1065
 
1066
  NEGATIVE_REGEXES = [
1067
- re.compile(r"\bno\s+(proper|sufficient)\s+(classes|notes|support)\b", re.IGNORECASE),
1068
- re.compile(r"\bno\s+staff\b", re.IGNORECASE),
1069
- re.compile(r"\bneed(s)?\s+more\s+(staff|faculty|support)\b", re.IGNORECASE),
1070
- re.compile(r"\b(lecture|lectures|class|classes|teaching)\s+(are\s+)?(too|very)\s+(fast|slow|boring)\b", re.IGNORECASE),
1071
- re.compile(r"\blectures?\s+are\s+going\s+(too\s+)?fast\b", re.IGNORECASE),
1072
- re.compile(r"\b(require|needs?|needed)\s+(some\s+)?improv(e|ement)s?\s+(in|of)?\s*communication(\s+skills?)?\b", re.IGNORECASE),
1073
- re.compile(r"\b(is\s+)?lacking\b", re.IGNORECASE),
1074
- re.compile(r"\bno\s+(practical|hands-on|real-world)\b", re.IGNORECASE),
1075
- re.compile(r"\btoo\s+(lag|lagging?|slow|boring)\b", re.IGNORECASE),
1076
- re.compile(r"\b(not\s+)?managing\s+time\b", re.IGNORECASE),
1077
- re.compile(r"\btime\s+management", re.IGNORECASE),
1078
- re.compile(r"\bnot\s+interested(\s+in|\s+to)?\b", re.IGNORECASE),
1079
- re.compile(r"\bno\s+interest\b", re.IGNORECASE),
1080
- re.compile(r"\b(just\s+|only\s+)?for\s+attendance\b", re.IGNORECASE),
1081
- re.compile(r"\b(just\s+)?going\s+(to|for)\s+(her|his|their)\s+class\b", re.IGNORECASE),
1082
- re.compile(r"\bnot\s+at\s+all\b", re.IGNORECASE),
1083
- re.compile(r"\b(overall|its?)\s+(is\s+)?good\s+but\b", re.IGNORECASE), # "good but" often precedes criticism
1084
- re.compile(r"\bcan'?t\s+understand", re.IGNORECASE),
1085
- re.compile(r"\bwaste\s+(of\s+)?time\b", re.IGNORECASE),
1086
- re.compile(r"\bno\s+use(ful)?\b", re.IGNORECASE),
1087
- re.compile(r"\buseless\b", re.IGNORECASE)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1088
  ]
1089
 
1090
  META_COMMENT_PATTERNS = [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1091
  re.compile(r"^no\s+more\s+(comments?|remarks?|feedback)", re.IGNORECASE),
1092
  re.compile(r"^no\s+(other\s+)?(comments?|remarks?|feedback)", re.IGNORECASE),
1093
- re.compile(r"^nothing\s+to\s+(say|comment|mention)", re.IGNORECASE),
1094
  re.compile(r"^no\s+remarks?(\s+(about|on))?", re.IGNORECASE),
1095
- re.compile(r"^(nil|none|na|n/a)$", re.IGNORECASE),
1096
- re.compile(r"^(no|nothing)\.?$", re.IGNORECASE),
1097
- re.compile(r"^everything\s+(is\s+)?(good|fine|ok|okay)", re.IGNORECASE),
1098
- re.compile(r"^(all\s+)?good$", re.IGNORECASE)
 
 
 
 
 
 
 
1099
  ]
1100
 
1101
  def is_meta_comment(text: str) -> bool:
1102
- """Check if comment is a meta-comment (not actual feedback)"""
 
 
 
1103
  if not text:
1104
- return False
 
1105
  text = text.strip()
1106
 
 
 
 
 
 
 
1107
  for pattern in META_COMMENT_PATTERNS:
1108
  if pattern.match(text):
1109
  logger.debug(f"Meta-comment detected: '{text[:50]}...'")
@@ -1112,14 +1398,28 @@ def is_meta_comment(text: str) -> bool:
1112
  return False
1113
 
1114
  def is_explicit_negative(text: str) -> bool:
1115
- """Check if text contains explicit negative phrases with logging"""
 
 
 
1116
  if not text:
1117
  return False
 
 
 
 
 
1118
  lower = text.lower()
1119
 
1120
  # Check phrases
1121
  for phrase in NEGATIVE_PHRASES:
1122
  if phrase in lower:
 
 
 
 
 
 
1123
  logger.debug(f"Negative phrase detected: '{phrase}' in '{text[:50]}...'")
1124
  return True
1125
 
@@ -1678,3 +1978,5 @@ if __name__ == "__main__":
1678
  log_level="info"
1679
  )
1680
 
 
 
 
1
+
2
+
3
+
4
  # """
5
  # Enhanced FastAPI Service for Comment Sentiment Analysis
6
  # with improved performance, validation, and configuration management
7
+ # Version 2.1.0 - Updated with bug fixes and improvements
8
  # """
9
 
10
  # from fastapi import FastAPI, HTTPException, Depends
 
84
  # """Application settings with environment variable support"""
85
  # # API Settings
86
  # app_name: str = "Comment Analysis API"
87
+ # app_version: str = "2.1.0"
88
  # debug_mode: bool = False
89
 
90
  # # Request Limits
91
  # max_comments_per_request: int = 1000
92
  # max_comment_length: int = 5000
93
+ # min_comment_words: int = 1
94
 
95
  # # Sentiment Thresholds
96
  # vader_pos_threshold: float = 0.2
 
117
  # env_file = ".env"
118
  # env_file_encoding = 'utf-8'
119
  # extra = 'ignore'
120
+
121
+ # @validator('min_comment_words')
122
+ # def validate_min_words(cls, v):
123
+ # if v < 0:
124
+ # raise ValueError('min_comment_words must be non-negative')
125
+ # return v
126
+
127
+ # @validator('combined_weight_vader', 'combined_weight_roberta')
128
+ # def validate_weights(cls, v):
129
+ # if not 0 <= v <= 1:
130
+ # raise ValueError('Weights must be between 0 and 1')
131
+ # return v
132
 
133
  # @lru_cache()
134
  # def get_settings() -> Settings:
135
  # """Cached settings instance"""
136
+ # settings = Settings()
137
+ # # Normalize weights if needed
138
+ # total = settings.combined_weight_vader + settings.combined_weight_roberta
139
+ # if not (0.99 <= total <= 1.01):
140
+ # logger.warning(f"Weights sum to {total}, normalizing to 1.0")
141
+ # settings.combined_weight_vader /= total
142
+ # settings.combined_weight_roberta /= total
143
+ # return settings
144
 
145
  # # Pydantic Models
146
  # class FacultyInfo(BaseModel):
 
231
  # device = None
232
  # summarizer = None
233
 
234
+ # # Enhanced heuristic phrase/regex rules for explicit negative feedback
235
  # NEGATIVE_PHRASES = [
236
  # 'need more staff',
237
  # 'need more faculty',
 
248
  # 'boring class',
249
  # 'boring classes',
250
  # 'waste of time',
251
+ # 'wasting time',
252
  # 'bad teacher',
253
  # 'bad teaching',
254
  # 'poor teaching',
 
262
  # 'lectures are going fast',
263
  # 'going too fast',
264
  # 'too fast',
265
+ # 'too slow',
266
+ # 'too boring',
267
  # 'lacking',
268
  # 'is lacking',
269
  # 'knowledge is lacking',
 
271
  # 'no practical',
272
  # 'lack of practical',
273
  # 'no hands-on',
274
+ # 'no real world',
275
+ # 'too lag',
276
+ # 'lag',
277
+ # 'lagging',
278
+ # 'lag in teaching',
279
+ # 'not interested',
280
+ # 'no interest',
281
+ # 'not managing time',
282
+ # 'poor time management',
283
+ # 'time management',
284
+ # 'not at all',
285
+ # 'going for attendance',
286
+ # 'just for attendance',
287
+ # 'only for attendance',
288
+ # 'cant understand',
289
+ # "can't understand",
290
+ # 'not understandable',
291
+ # 'nothing learnt',
292
+ # 'learned nothing',
293
+ # 'no improvement',
294
+ # 'same teaching',
295
+ # 'monotonous',
296
+ # 'sleeping in class',
297
+ # 'no use',
298
+ # 'useless',
299
+ # 'waste our time'
300
  # ]
301
 
302
  # NEGATIVE_REGEXES = [
303
  # re.compile(r"\bno\s+(proper|sufficient)\s+(classes|notes|support)\b", re.IGNORECASE),
304
  # re.compile(r"\bno\s+staff\b", re.IGNORECASE),
305
  # re.compile(r"\bneed(s)?\s+more\s+(staff|faculty|support)\b", re.IGNORECASE),
306
+ # re.compile(r"\b(lecture|lectures|class|classes|teaching)\s+(are\s+)?(too|very)\s+(fast|slow|boring)\b", re.IGNORECASE),
307
+ # re.compile(r"\blectures?\s+are\s+going\s+(too\s+)?fast\b", re.IGNORECASE),
308
  # re.compile(r"\b(require|needs?|needed)\s+(some\s+)?improv(e|ement)s?\s+(in|of)?\s*communication(\s+skills?)?\b", re.IGNORECASE),
309
  # re.compile(r"\b(is\s+)?lacking\b", re.IGNORECASE),
310
+ # re.compile(r"\bno\s+(practical|hands-on|real-world)\b", re.IGNORECASE),
311
+ # re.compile(r"\btoo\s+(lag|lagging?|slow|boring)\b", re.IGNORECASE),
312
+ # re.compile(r"\b(not\s+)?managing\s+time\b", re.IGNORECASE),
313
+ # re.compile(r"\btime\s+management", re.IGNORECASE),
314
+ # re.compile(r"\bnot\s+interested(\s+in|\s+to)?\b", re.IGNORECASE),
315
+ # re.compile(r"\bno\s+interest\b", re.IGNORECASE),
316
+ # re.compile(r"\b(just\s+|only\s+)?for\s+attendance\b", re.IGNORECASE),
317
+ # re.compile(r"\b(just\s+)?going\s+(to|for)\s+(her|his|their)\s+class\b", re.IGNORECASE),
318
+ # re.compile(r"\bnot\s+at\s+all\b", re.IGNORECASE),
319
+ # re.compile(r"\b(overall|its?)\s+(is\s+)?good\s+but\b", re.IGNORECASE), # "good but" often precedes criticism
320
+ # re.compile(r"\bcan'?t\s+understand", re.IGNORECASE),
321
+ # re.compile(r"\bwaste\s+(of\s+)?time\b", re.IGNORECASE),
322
+ # re.compile(r"\bno\s+use(ful)?\b", re.IGNORECASE),
323
+ # re.compile(r"\buseless\b", re.IGNORECASE)
324
  # ]
325
 
326
  # META_COMMENT_PATTERNS = [
327
+ # re.compile(r"^no\s+more\s+(comments?|remarks?|feedback)", re.IGNORECASE),
328
+ # re.compile(r"^no\s+(other\s+)?(comments?|remarks?|feedback)", re.IGNORECASE),
329
+ # re.compile(r"^nothing\s+to\s+(say|comment|mention)", re.IGNORECASE),
330
+ # re.compile(r"^no\s+remarks?(\s+(about|on))?", re.IGNORECASE),
331
+ # re.compile(r"^(nil|none|na|n/a)$", re.IGNORECASE),
332
+ # re.compile(r"^(no|nothing)\.?$", re.IGNORECASE),
333
+ # re.compile(r"^everything\s+(is\s+)?(good|fine|ok|okay)", re.IGNORECASE),
334
+ # re.compile(r"^(all\s+)?good$", re.IGNORECASE)
335
  # ]
336
 
337
  # def is_meta_comment(text: str) -> bool:
 
342
 
343
  # for pattern in META_COMMENT_PATTERNS:
344
  # if pattern.match(text):
345
+ # logger.debug(f"Meta-comment detected: '{text[:50]}...'")
346
  # return True
347
 
348
  # return False
349
 
350
  # def is_explicit_negative(text: str) -> bool:
351
+ # """Check if text contains explicit negative phrases with logging"""
352
  # if not text:
353
  # return False
354
  # lower = text.lower()
355
 
356
+ # # Check phrases
357
  # for phrase in NEGATIVE_PHRASES:
358
  # if phrase in lower:
359
+ # logger.debug(f"Negative phrase detected: '{phrase}' in '{text[:50]}...'")
360
  # return True
361
 
362
+ # # Check regexes
363
  # for regex in NEGATIVE_REGEXES:
364
  # if regex.search(text):
365
+ # logger.debug(f"Negative pattern matched: {regex.pattern} in '{text[:50]}...'")
366
  # return True
367
 
368
  # return False
 
377
 
378
  # # Initialize VADER (NLTK data already downloaded)
379
  # sia = SentimentIntensityAnalyzer()
380
+ # logger.info("VADER initialized")
381
 
382
  # # Initialize RoBERTa with caching
383
  # cache_dir = settings.model_cache_dir
 
395
  # device = "cuda" if torch.cuda.is_available() else "cpu"
396
  # model.to(device)
397
  # model.eval()
398
+ # logger.info(f"RoBERTa initialized on device: {device}")
399
 
400
  # # Initialize summarizer (optional)
401
  # if settings.use_abstractive_summary:
 
405
  # model=settings.summarizer_model,
406
  # device=0 if device == "cuda" else -1
407
  # )
408
+ # logger.info("Summarizer initialized")
409
  # except Exception as e:
410
  # logger.warning(f"Summarizer initialization failed: {e}")
411
  # summarizer = None
412
 
413
+ # logger.info("All models initialized successfully")
414
 
415
  # except Exception as e:
416
  # logger.error(f"Error initializing models: {e}")
 
509
  # roberta_neg = row.get('roberta_neg', 0.0)
510
  # roberta_pos = row.get('roberta_pos', 0.0)
511
 
512
+ # # Priority 1: Heuristic negative patterns override everything
513
  # if row.get('heuristic_negative') is True:
514
  # return 'Negative'
515
 
516
+ # # Priority 2: Strong negative signals
517
  # if (
518
  # vader_compound <= settings.vader_neg_threshold or
519
  # roberta_neg >= settings.roberta_neg_threshold or
 
521
  # ):
522
  # return 'Negative'
523
 
524
+ # # Priority 3: Positive signals
525
  # if (
526
  # vader_compound >= settings.vader_pos_threshold or
527
  # roberta_pos >= settings.roberta_pos_threshold or
 
529
  # ):
530
  # return 'Positive'
531
 
532
+ # # Default: Neutral
533
  # return 'Neutral'
534
 
535
  # def sanitize_text(text: str) -> str:
536
+ # """Sanitize input text while preserving emojis"""
537
  # if not text:
538
  # return ""
539
+ # # Remove control characters but keep printable characters and emojis
540
+ # text = re.sub(r'[\x00-\x08\x0B-\x0C\x0E-\x1F\x7F]', '', text)
541
+ # # Normalize whitespace
542
  # text = ' '.join(text.split())
 
543
  # return text.strip()
544
 
545
  # def analyze_comments_sentiment(comments: List[str]) -> Dict[str, Any]:
 
548
  # settings = get_settings()
549
  # logger.info(f"Received {len(comments)} comments for analysis")
550
 
551
+ # # Sanitize comments
552
  # sanitized_comments = [sanitize_text(comment) for comment in comments]
553
 
554
+ # # FIXED: Changed < to <= to properly handle min_comment_words
555
  # filtered_comments = [
556
  # comment for comment in sanitized_comments
557
+ # if (settings.min_comment_words <= len(comment.split()) <= settings.max_comment_length)
 
558
  # ]
559
 
560
  # logger.info(f"After filtering: {len(filtered_comments)} valid comments")
 
565
  # "message": "No valid comments found for analysis"
566
  # }
567
 
568
+ # # Create dataframe
569
  # df = pd.DataFrame({'comment': filtered_comments})
570
+
571
+ # # Detect meta-comments and explicit negatives
572
+ # df['is_meta'] = df['comment'].apply(is_meta_comment)
573
  # df['heuristic_negative'] = df['comment'].apply(is_explicit_negative)
574
 
575
+ # # Log detection results
576
+ # meta_count = df['is_meta'].sum()
577
+ # heuristic_neg_count = df['heuristic_negative'].sum()
578
+ # logger.info(f"Detected {meta_count} meta-comments and {heuristic_neg_count} heuristic negatives")
579
+
580
+ # # VADER sentiment analysis
581
  # vader_results = []
582
  # for text in df['comment']:
583
  # vader_results.append(vader_sentiment(text))
584
 
585
+ # # RoBERTa sentiment analysis (batch)
586
  # roberta_results = roberta_sentiment_batch(df['comment'].tolist())
587
 
588
+ # # Combine results
589
  # vader_df = pd.DataFrame(vader_results)
590
  # roberta_df = pd.DataFrame(roberta_results)
591
  # final_df = pd.concat([df.reset_index(drop=True), vader_df, roberta_df], axis=1)
592
 
593
+ # # Calculate combined scores
594
  # final_df['combined_pos'] = (
595
  # settings.combined_weight_vader * final_df['vader_pos'] +
596
  # settings.combined_weight_roberta * final_df['roberta_pos']
 
604
  # settings.combined_weight_roberta * final_df['roberta_neu']
605
  # )
606
 
607
+ # # Classify overall sentiment (meta-comments become Neutral)
608
  # final_df['Overall_Sentiment'] = final_df.apply(
609
+ # lambda row: 'Neutral' if row.get('is_meta') else overall_sentiment(row, settings),
610
  # axis=1
611
  # )
612
 
613
+ # # Calculate statistics
614
  # total_comments = len(final_df)
615
  # positive_count = len(final_df[final_df['Overall_Sentiment'] == 'Positive'])
616
  # negative_count = len(final_df[final_df['Overall_Sentiment'] == 'Negative'])
 
621
  # f"{negative_count} negative, {neutral_count} neutral"
622
  # )
623
 
624
+ # # Average scores
625
  # avg_positive = float(final_df['combined_pos'].mean())
626
  # avg_negative = float(final_df['combined_neg'].mean())
627
  # avg_neutral = float(final_df['combined_neu'].mean())
628
 
629
+ # # Determine overall sentiment label
630
  # if avg_positive > max(avg_negative, avg_neutral):
631
  # overall_sentiment_label = "Positive"
632
  # elif avg_negative > max(avg_positive, avg_neutral):
 
634
  # else:
635
  # overall_sentiment_label = "Neutral"
636
 
637
+ # # Process negative comments
638
  # negative_summary = ""
639
  # negative_comments_list = []
640
  # negative_comments = final_df[final_df['Overall_Sentiment'] == 'Negative']
 
643
  # negative_comments_list = negative_comments['comment'].tolist()
644
 
645
  # try:
646
+ # # Get top negative comments
647
  # top_idx = negative_comments['combined_neg'].nlargest(3).index
648
  # top_comments = negative_comments.loc[top_idx, 'comment'].tolist()
649
 
 
660
  # )
661
  # negative_summary = summary_result[0]['summary_text']
662
  # else:
663
+ # # Extractive summary
664
  # negative_summary = "; ".join(top_comments)
665
  # except Exception as e:
666
  # logger.warning(f"Summary generation failed: {e}")
667
  # negative_summary = "; ".join(negative_comments_list[:3])
668
 
669
+ # # Generate insights and recommendations
670
  # insights = []
671
  # recommendations = []
672
 
 
747
  # async def startup_event():
748
  # """Initialize models on startup"""
749
  # try:
750
+ # logger.info("=" * 80)
751
+ # logger.info(f"Application Startup at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
752
+ # logger.info("=" * 80)
753
  # initialize_models()
754
+ # logger.info("Service started successfully")
755
+ # logger.info("=" * 80)
756
  # except Exception as e:
757
+ # logger.error(f"Startup failed: {e}")
758
  # raise e
759
 
760
  # @app.on_event("shutdown")
 
768
  # return {
769
  # "service": get_settings().app_name,
770
  # "version": get_settings().app_version,
771
+ # "status": "running",
772
+ # "endpoints": {
773
+ # "health": "/health",
774
+ # "analyze": "/analyze-comments",
775
+ # "config": "/config (debug mode only)",
776
+ # "test": "/test"
777
+ # }
778
  # }
779
 
780
  # @app.get("/health")
 
849
 
850
  # @app.get("/config")
851
  # async def get_config(settings: Settings = Depends(get_settings)):
852
+ # """Get current configuration (debug mode only)"""
853
  # if not settings.debug_mode:
854
  # raise HTTPException(status_code=404, detail="Not found")
855
 
856
  # return {
857
  # "max_comments_per_request": settings.max_comments_per_request,
858
+ # "max_comment_length": settings.max_comment_length,
859
+ # "min_comment_words": settings.min_comment_words,
860
  # "vader_pos_threshold": settings.vader_pos_threshold,
861
  # "vader_neg_threshold": settings.vader_neg_threshold,
862
  # "roberta_pos_threshold": settings.roberta_pos_threshold,
863
  # "roberta_neg_threshold": settings.roberta_neg_threshold,
864
+ # "combined_weight_vader": settings.combined_weight_vader,
865
+ # "combined_weight_roberta": settings.combined_weight_roberta,
866
  # "enable_caching": settings.enable_caching,
867
+ # "batch_size": settings.batch_size,
868
+ # "use_abstractive_summary": settings.use_abstractive_summary
869
+ # }
870
+
871
+ # @app.get("/test")
872
+ # async def test_endpoint():
873
+ # """Test endpoint to verify sentiment classification"""
874
+ # test_cases = [
875
+ # "No more comments 😅",
876
+ # "Overall good but too lag",
877
+ # "Not interested to be in her class just we are going for attendance thats it not at all managing time.",
878
+ # "Nothing to say anything just we are going to her class mean, only for attendance",
879
+ # "Excellent teaching! Very clear explanations.",
880
+ # "Good teacher with strong subject knowledge",
881
+ # "Class is okay, nothing special"
882
+ # ]
883
+
884
+ # results = []
885
+ # for text in test_cases:
886
+ # is_meta = is_meta_comment(text)
887
+ # is_neg = is_explicit_negative(text)
888
+
889
+ # # Predict classification
890
+ # if is_meta:
891
+ # predicted = "Neutral (meta-comment)"
892
+ # elif is_neg:
893
+ # predicted = "Negative (heuristic)"
894
+ # else:
895
+ # predicted = "Needs full analysis"
896
+
897
+ # results.append({
898
+ # "text": text,
899
+ # "is_meta_comment": is_meta,
900
+ # "is_heuristic_negative": is_neg,
901
+ # "predicted_classification": predicted
902
+ # })
903
+
904
+ # return {
905
+ # "test_results": results,
906
+ # "note": "Full analysis requires VADER and RoBERTa scores"
907
  # }
908
 
909
  # if __name__ == "__main__":
 
917
 
918
 
919
 
920
+
921
+
922
+
923
+
924
+
925
+
926
+
927
  """
928
  Enhanced FastAPI Service for Comment Sentiment Analysis
929
  with improved performance, validation, and configuration management
 
1156
 
1157
  # Enhanced heuristic phrase/regex rules for explicit negative feedback
1158
  NEGATIVE_PHRASES = [
1159
+ # Teaching quality issues
1160
+ 'very poor',
1161
+ 'extremely poor',
1162
+ 'poor in teaching',
1163
+ 'poor teaching level',
1164
+ 'poor teaching',
1165
+ 'bad teacher',
1166
+ 'bad teaching',
1167
+ 'not good', # Keep but check it's not "no negative"
1168
+ 'not satisfied',
1169
+ 'not satisfactory',
1170
+
1171
+ # Content/delivery issues
1172
+ 'boring class',
1173
+ 'boring classes',
1174
+ 'boring subject',
1175
+ 'subject is boring',
1176
+ 'low voice',
1177
+ 'voice is low',
1178
+ 'cannot hear',
1179
+ "can't hear",
1180
+ 'speak louder',
1181
+
1182
+ # Resource/support issues
1183
  'need more staff',
1184
  'need more faculty',
1185
  'insufficient staff',
1186
  'lack of staff',
1187
+ 'not sufficient',
1188
+ 'insufficient',
1189
+ 'not enough',
1190
+ 'no classes',
1191
+ 'no regular classes',
1192
+ 'not sufficient classes',
1193
+
1194
+ # Knowledge/understanding issues
1195
  'lack of knowledge',
1196
  'better knowledge needed',
1197
  'poor knowledge',
1198
+ 'knowledge is lacking',
1199
+ 'practical knowledge lacking',
1200
+ 'no practical',
1201
+ 'lack of practical',
1202
+ 'no hands-on',
1203
+ 'no real world',
1204
+ 'did not understand',
1205
+ "didn't understand",
1206
+ 'not able to understand',
1207
+ 'unable to understand',
1208
+ 'difficult to understand',
1209
+ 'hard to understand',
1210
+ 'concepts are difficult',
1211
+ 'concepts difficult',
1212
+ 'cant understand',
1213
+ "can't understand",
1214
+ 'not understandable',
1215
+
1216
+ # Improvement needed
1217
  'improve class',
1218
  'improvement needed',
1219
  'needs improvement',
1220
+ 'need improvement',
1221
+ 'should improve',
1222
+ 'must improve',
1223
  'not helpful',
1224
  'not clear',
1225
  'communication skills need improvement',
1226
  'improve communication',
1227
+
1228
+ # Pace/time issues
1229
  'lectures are going fast',
1230
  'going too fast',
1231
  'too fast',
1232
  'too slow',
 
 
 
 
 
 
 
 
 
1233
  'too lag',
1234
  'lag',
1235
  'lagging',
1236
  'lag in teaching',
 
 
1237
  'not managing time',
1238
  'poor time management',
1239
+ 'time management issue',
1240
+
1241
+ # Engagement issues
1242
+ 'not interested',
1243
+ 'no interest',
1244
  'going for attendance',
1245
  'just for attendance',
1246
  'only for attendance',
1247
+ 'not at all',
 
 
1248
  'nothing learnt',
1249
  'learned nothing',
1250
  'no improvement',
1251
  'same teaching',
1252
  'monotonous',
1253
  'sleeping in class',
1254
+
1255
+ # Value/utility issues
1256
+ 'waste of time',
1257
+ 'wasting time',
1258
+ 'waste our time',
1259
  'no use',
1260
  'useless',
1261
+
1262
+ # Administrative issues
1263
+ 'military rules',
1264
+ 'strict rules',
1265
+ 'too strict',
1266
+ 'very strict',
1267
+ 'attendance issue',
1268
+ 'attendance problem',
1269
+ 'not providing attendance',
1270
+ 'claim od',
1271
+
1272
+ # Workload issues
1273
+ 'too many projects',
1274
+ 'many projects review',
1275
+ 'trouble to make',
1276
+ 'difficult to make',
1277
+ 'hard to make',
1278
+ 'placement activities', # When context is negative
1279
  ]
1280
 
1281
  NEGATIVE_REGEXES = [
1282
+ # Teaching quality patterns
1283
+ re.compile(r"\b(very|extremely|quite|so)\s+(poor|bad|weak)\s+(in\s+)?(teaching|knowledge|communication)", re.IGNORECASE),
1284
+ re.compile(r"\bpoor\s+(teaching|teacher|faculty|knowledge|communication)", re.IGNORECASE),
1285
+ re.compile(r"\b(teaching|knowledge)\s+(is\s+)?(poor|bad|weak|lacking)", re.IGNORECASE),
1286
+
1287
+ # Boring/engagement patterns
1288
+ re.compile(r"\b(boring|dull|monotonous)\s+(class|classes|subject|lecture|lectures)", re.IGNORECASE),
1289
+ re.compile(r"\b(class|classes|subject|lecture|lectures)\s+(is|are)\s+(boring|dull|monotonous)", re.IGNORECASE),
1290
+
1291
+ # Voice/communication patterns
1292
+ re.compile(r"\b(low|soft|quiet)\s+voice\b", re.IGNORECASE),
1293
+ re.compile(r"\bvoice\s+(is\s+)?(low|soft|quiet|not clear)", re.IGNORECASE),
1294
+ re.compile(r"\b(cannot|can't|cant|unable to)\s+hear", re.IGNORECASE),
1295
+
1296
+ # Resource/support patterns
1297
+ re.compile(r"\b(no|not|insufficient|lack of)\s+(proper|sufficient|enough|regular)?\s*(classes|notes|support|staff|faculty)", re.IGNORECASE),
1298
+ re.compile(r"\bneed(s)?\s+more\s+(staff|faculty|support|classes)", re.IGNORECASE),
1299
+
1300
+ # Understanding/clarity patterns
1301
+ re.compile(r"\b(cannot|can't|cant|unable to|difficult to|hard to)\s+understand", re.IGNORECASE),
1302
+ re.compile(r"\b(not|difficult|hard)\s+(able\s+to\s+)?understand(\s+the)?(\s+(concepts?|teaching|lectures?))?", re.IGNORECASE),
1303
+ re.compile(r"\bconcepts?\s+(are\s+)?(difficult|hard|tough|complex)\s+to\s+understand", re.IGNORECASE),
1304
+
1305
+ # Improvement patterns
1306
+ re.compile(r"\b(need|needs|needed|require|requires)\s+(some\s+)?(improvement|to improve)", re.IGNORECASE),
1307
+ re.compile(r"\b(should|must|have to)\s+improve", re.IGNORECASE),
1308
+ re.compile(r"\bimprovement\s+(is\s+)?need(ed)?", re.IGNORECASE),
1309
+
1310
+ # Pace patterns
1311
+ re.compile(r"\b(lecture|lectures|class|classes|teaching)\s+(is|are|going)\s+(too|very)\s+(fast|slow)", re.IGNORECASE),
1312
+ re.compile(r"\b(too|very)\s+(fast|slow|lag|lagging)", re.IGNORECASE),
1313
+
1314
+ # Time management patterns
1315
+ re.compile(r"\b(not|poor|bad)\s+(managing|managing)\s+time", re.IGNORECASE),
1316
+ re.compile(r"\btime\s+management\s+(is\s+)?(poor|bad|lacking)", re.IGNORECASE),
1317
+
1318
+ # Attendance/engagement patterns
1319
+ re.compile(r"\b(just|only)\s+(for|going for)\s+attendance", re.IGNORECASE),
1320
+ re.compile(r"\b(going|attend|attending)\s+(to|for)\s+(her|his|their)\s+class\s+(just|only)\s+for\s+attendance", re.IGNORECASE),
1321
+ re.compile(r"\bnot\s+(at\s+all\s+)?(interested|engaging|helpful)", re.IGNORECASE),
1322
+
1323
+ # Value patterns
1324
+ re.compile(r"\b(waste|wasting)\s+(of\s+)?time", re.IGNORECASE),
1325
+ re.compile(r"\b(no\s+use|useless|not useful)", re.IGNORECASE),
1326
+
1327
+ # Workload patterns
1328
+ re.compile(r"\b(too\s+)?many\s+projects", re.IGNORECASE),
1329
+ re.compile(r"\btrouble\s+to\s+(make|complete|do)", re.IGNORECASE),
1330
+
1331
+ # Administrative patterns
1332
+ re.compile(r"\bmilitary\s+rules", re.IGNORECASE),
1333
+ re.compile(r"\b(too|very)\s+strict", re.IGNORECASE),
1334
+ re.compile(r"\battendance\s+(issue|problem)", re.IGNORECASE),
1335
+ re.compile(r"\bnot\s+providing\s+attendance", re.IGNORECASE),
1336
+ re.compile(r"\bclaim\s+od", re.IGNORECASE),
1337
+
1338
+ # Placement/scheduling patterns
1339
+ re.compile(r"\bplacement\s+activities\s+(and|with)\s+(attendance|issue|problem)", re.IGNORECASE),
1340
+ re.compile(r"\b(class|classes)\s+(intersecting|conflicting)\s+with\s+placement", re.IGNORECASE),
1341
  ]
1342
 
1343
  META_COMMENT_PATTERNS = [
1344
+ re.compile(r"^no\s+negative\s+(comments?|feedback|remarks?)", re.IGNORECASE),
1345
+ re.compile(r"^no\s+negative\s+comments?\s+on\s+the\s+(faculty|teacher|staff|course)", re.IGNORECASE),
1346
+ re.compile(r"^no\s+(issues?|problems?|complaints?)\.?$", re.IGNORECASE),
1347
+ re.compile(r"^no\s+(issues?|problems?|complaints?)\s+(at\s+all|whatsoever)", re.IGNORECASE),
1348
+
1349
+ # "Everything is good" patterns
1350
+ re.compile(r"^(everything|all)\s+(is\s+)?(good|fine|ok|okay|great|perfect|excellent)", re.IGNORECASE),
1351
+ re.compile(r"^no,?\s+(everything|all)\s+(is\s+)?(good|fine|ok|okay)", re.IGNORECASE),
1352
+ re.compile(r"^(all\s+)?good\.?$", re.IGNORECASE),
1353
+ re.compile(r"^everything\s+at\s+the\s+too\s+only", re.IGNORECASE), # From your data
1354
+
1355
+ # "Nothing" patterns
1356
+ re.compile(r"^nothing\.?$", re.IGNORECASE),
1357
+ re.compile(r"^nothing\s+(to\s+)?(say|comment|mention|add)", re.IGNORECASE),
1358
+ re.compile(r"^nothing,?\s+(and\s+)?(all|everything)\s+(is\s+)?(good|fine)", re.IGNORECASE),
1359
+
1360
+ # "No more comments" patterns
1361
  re.compile(r"^no\s+more\s+(comments?|remarks?|feedback)", re.IGNORECASE),
1362
  re.compile(r"^no\s+(other\s+)?(comments?|remarks?|feedback)", re.IGNORECASE),
 
1363
  re.compile(r"^no\s+remarks?(\s+(about|on))?", re.IGNORECASE),
1364
+
1365
+ # Empty/nil responses
1366
+ re.compile(r"^(nil|none|na|n/a|nill)\.?$", re.IGNORECASE),
1367
+ re.compile(r"^(no|nothing|none)\.?$", re.IGNORECASE),
1368
+
1369
+ # Positive meta-comments (not actual feedback)
1370
+ re.compile(r"^(it's\s+|its\s+)?(all\s+)?good\.?$", re.IGNORECASE),
1371
+ re.compile(r"^fine\.?$", re.IGNORECASE),
1372
+ re.compile(r"^ok(ay)?\.?$", re.IGNORECASE),
1373
+ re.compile(r"^great\.?$", re.IGNORECASE),
1374
+ re.compile(r"^nice\.?$", re.IGNORECASE),
1375
  ]
1376
 
1377
  def is_meta_comment(text: str) -> bool:
1378
+ """
1379
+ Check if comment is a meta-comment (not actual feedback).
1380
+ These are generic statements that don't provide substantive feedback.
1381
+ """
1382
  if not text:
1383
+ return True # Empty text is meta
1384
+
1385
  text = text.strip()
1386
 
1387
+ # Check length - very short comments are likely meta
1388
+ if len(text) < 3:
1389
+ logger.debug(f"Meta-comment (too short): '{text}'")
1390
+ return True
1391
+
1392
+ # Check against patterns
1393
  for pattern in META_COMMENT_PATTERNS:
1394
  if pattern.match(text):
1395
  logger.debug(f"Meta-comment detected: '{text[:50]}...'")
 
1398
  return False
1399
 
1400
  def is_explicit_negative(text: str) -> bool:
1401
+ """
1402
+ Check if text contains explicit negative phrases.
1403
+ IMPORTANT: Must check if it's a meta-comment FIRST.
1404
+ """
1405
  if not text:
1406
  return False
1407
+
1408
+ # CRITICAL: Don't classify meta-comments as negative
1409
+ if is_meta_comment(text):
1410
+ return False
1411
+
1412
  lower = text.lower()
1413
 
1414
  # Check phrases
1415
  for phrase in NEGATIVE_PHRASES:
1416
  if phrase in lower:
1417
+ # Double-check it's not a false positive like "no negative comments"
1418
+ if phrase == 'not good' and 'no negative' in lower:
1419
+ continue
1420
+ if phrase == 'no interest' and 'no negative' in lower:
1421
+ continue
1422
+
1423
  logger.debug(f"Negative phrase detected: '{phrase}' in '{text[:50]}...'")
1424
  return True
1425
 
 
1978
  log_level="info"
1979
  )
1980
 
1981
+
1982
+