Spaces:
Sleeping
Sleeping
Update fastapi_example.py
Browse files- fastapi_example.py +389 -87
fastapi_example.py
CHANGED
|
@@ -1,6 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
| 1 |
# """
|
| 2 |
# Enhanced FastAPI Service for Comment Sentiment Analysis
|
| 3 |
# with improved performance, validation, and configuration management
|
|
|
|
| 4 |
# """
|
| 5 |
|
| 6 |
# from fastapi import FastAPI, HTTPException, Depends
|
|
@@ -80,13 +84,13 @@
|
|
| 80 |
# """Application settings with environment variable support"""
|
| 81 |
# # API Settings
|
| 82 |
# app_name: str = "Comment Analysis API"
|
| 83 |
-
# app_version: str = "2.
|
| 84 |
# debug_mode: bool = False
|
| 85 |
|
| 86 |
# # Request Limits
|
| 87 |
# max_comments_per_request: int = 1000
|
| 88 |
# max_comment_length: int = 5000
|
| 89 |
-
# min_comment_words: int =
|
| 90 |
|
| 91 |
# # Sentiment Thresholds
|
| 92 |
# vader_pos_threshold: float = 0.2
|
|
@@ -113,11 +117,30 @@
|
|
| 113 |
# env_file = ".env"
|
| 114 |
# env_file_encoding = 'utf-8'
|
| 115 |
# extra = 'ignore'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 116 |
|
| 117 |
# @lru_cache()
|
| 118 |
# def get_settings() -> Settings:
|
| 119 |
# """Cached settings instance"""
|
| 120 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 121 |
|
| 122 |
# # Pydantic Models
|
| 123 |
# class FacultyInfo(BaseModel):
|
|
@@ -208,7 +231,7 @@
|
|
| 208 |
# device = None
|
| 209 |
# summarizer = None
|
| 210 |
|
| 211 |
-
# #
|
| 212 |
# NEGATIVE_PHRASES = [
|
| 213 |
# 'need more staff',
|
| 214 |
# 'need more faculty',
|
|
@@ -225,6 +248,7 @@
|
|
| 225 |
# 'boring class',
|
| 226 |
# 'boring classes',
|
| 227 |
# 'waste of time',
|
|
|
|
| 228 |
# 'bad teacher',
|
| 229 |
# 'bad teaching',
|
| 230 |
# 'poor teaching',
|
|
@@ -238,6 +262,8 @@
|
|
| 238 |
# 'lectures are going fast',
|
| 239 |
# 'going too fast',
|
| 240 |
# 'too fast',
|
|
|
|
|
|
|
| 241 |
# 'lacking',
|
| 242 |
# 'is lacking',
|
| 243 |
# 'knowledge is lacking',
|
|
@@ -245,26 +271,67 @@
|
|
| 245 |
# 'no practical',
|
| 246 |
# 'lack of practical',
|
| 247 |
# 'no hands-on',
|
| 248 |
-
# 'no real world'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 249 |
# ]
|
| 250 |
|
| 251 |
# NEGATIVE_REGEXES = [
|
| 252 |
# re.compile(r"\bno\s+(proper|sufficient)\s+(classes|notes|support)\b", re.IGNORECASE),
|
| 253 |
# re.compile(r"\bno\s+staff\b", re.IGNORECASE),
|
| 254 |
# re.compile(r"\bneed(s)?\s+more\s+(staff|faculty|support)\b", re.IGNORECASE),
|
| 255 |
-
# re.compile(r"\b(lecture|lectures|class|classes|teaching)\s+(are\s+)?(too|very)\s+fast\b", re.IGNORECASE),
|
| 256 |
-
# re.compile(r"\blectures?\s+are\s+going\s+fast\b", re.IGNORECASE),
|
| 257 |
# re.compile(r"\b(require|needs?|needed)\s+(some\s+)?improv(e|ement)s?\s+(in|of)?\s*communication(\s+skills?)?\b", re.IGNORECASE),
|
| 258 |
# re.compile(r"\b(is\s+)?lacking\b", re.IGNORECASE),
|
| 259 |
-
# re.compile(r"\bno\s+(practical|hands-on|real-world)\b", re.IGNORECASE)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 260 |
# ]
|
| 261 |
|
| 262 |
# META_COMMENT_PATTERNS = [
|
| 263 |
-
# re.compile(r"^no\s+
|
| 264 |
-
# re.compile(r"^no\s+
|
| 265 |
-
# re.compile(r"^
|
| 266 |
-
# re.compile(r"^
|
| 267 |
-
# re.compile(r"^n/
|
|
|
|
|
|
|
|
|
|
| 268 |
# ]
|
| 269 |
|
| 270 |
# def is_meta_comment(text: str) -> bool:
|
|
@@ -275,22 +342,27 @@
|
|
| 275 |
|
| 276 |
# for pattern in META_COMMENT_PATTERNS:
|
| 277 |
# if pattern.match(text):
|
|
|
|
| 278 |
# return True
|
| 279 |
|
| 280 |
# return False
|
| 281 |
|
| 282 |
# def is_explicit_negative(text: str) -> bool:
|
| 283 |
-
# """Check if text contains explicit negative phrases"""
|
| 284 |
# if not text:
|
| 285 |
# return False
|
| 286 |
# lower = text.lower()
|
| 287 |
|
|
|
|
| 288 |
# for phrase in NEGATIVE_PHRASES:
|
| 289 |
# if phrase in lower:
|
|
|
|
| 290 |
# return True
|
| 291 |
|
|
|
|
| 292 |
# for regex in NEGATIVE_REGEXES:
|
| 293 |
# if regex.search(text):
|
|
|
|
| 294 |
# return True
|
| 295 |
|
| 296 |
# return False
|
|
@@ -305,7 +377,7 @@
|
|
| 305 |
|
| 306 |
# # Initialize VADER (NLTK data already downloaded)
|
| 307 |
# sia = SentimentIntensityAnalyzer()
|
| 308 |
-
# logger.info("VADER initialized")
|
| 309 |
|
| 310 |
# # Initialize RoBERTa with caching
|
| 311 |
# cache_dir = settings.model_cache_dir
|
|
@@ -323,7 +395,7 @@
|
|
| 323 |
# device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 324 |
# model.to(device)
|
| 325 |
# model.eval()
|
| 326 |
-
# logger.info(f"RoBERTa initialized on device: {device}")
|
| 327 |
|
| 328 |
# # Initialize summarizer (optional)
|
| 329 |
# if settings.use_abstractive_summary:
|
|
@@ -333,12 +405,12 @@
|
|
| 333 |
# model=settings.summarizer_model,
|
| 334 |
# device=0 if device == "cuda" else -1
|
| 335 |
# )
|
| 336 |
-
# logger.info("Summarizer initialized")
|
| 337 |
# except Exception as e:
|
| 338 |
# logger.warning(f"Summarizer initialization failed: {e}")
|
| 339 |
# summarizer = None
|
| 340 |
|
| 341 |
-
# logger.info("All models initialized successfully")
|
| 342 |
|
| 343 |
# except Exception as e:
|
| 344 |
# logger.error(f"Error initializing models: {e}")
|
|
@@ -437,9 +509,11 @@
|
|
| 437 |
# roberta_neg = row.get('roberta_neg', 0.0)
|
| 438 |
# roberta_pos = row.get('roberta_pos', 0.0)
|
| 439 |
|
|
|
|
| 440 |
# if row.get('heuristic_negative') is True:
|
| 441 |
# return 'Negative'
|
| 442 |
|
|
|
|
| 443 |
# if (
|
| 444 |
# vader_compound <= settings.vader_neg_threshold or
|
| 445 |
# roberta_neg >= settings.roberta_neg_threshold or
|
|
@@ -447,6 +521,7 @@
|
|
| 447 |
# ):
|
| 448 |
# return 'Negative'
|
| 449 |
|
|
|
|
| 450 |
# if (
|
| 451 |
# vader_compound >= settings.vader_pos_threshold or
|
| 452 |
# roberta_pos >= settings.roberta_pos_threshold or
|
|
@@ -454,14 +529,17 @@
|
|
| 454 |
# ):
|
| 455 |
# return 'Positive'
|
| 456 |
|
|
|
|
| 457 |
# return 'Neutral'
|
| 458 |
|
| 459 |
# def sanitize_text(text: str) -> str:
|
| 460 |
-
# """Sanitize input text"""
|
| 461 |
# if not text:
|
| 462 |
# return ""
|
|
|
|
|
|
|
|
|
|
| 463 |
# text = ' '.join(text.split())
|
| 464 |
-
# text = ''.join(char for char in text if ord(char) >= 32 or char == '\n')
|
| 465 |
# return text.strip()
|
| 466 |
|
| 467 |
# def analyze_comments_sentiment(comments: List[str]) -> Dict[str, Any]:
|
|
@@ -470,12 +548,13 @@
|
|
| 470 |
# settings = get_settings()
|
| 471 |
# logger.info(f"Received {len(comments)} comments for analysis")
|
| 472 |
|
|
|
|
| 473 |
# sanitized_comments = [sanitize_text(comment) for comment in comments]
|
| 474 |
|
|
|
|
| 475 |
# filtered_comments = [
|
| 476 |
# comment for comment in sanitized_comments
|
| 477 |
-
# if (settings.min_comment_words < len(comment.split()) <= settings.max_comment_length
|
| 478 |
-
# and not is_meta_comment(comment))
|
| 479 |
# ]
|
| 480 |
|
| 481 |
# logger.info(f"After filtering: {len(filtered_comments)} valid comments")
|
|
@@ -486,19 +565,32 @@
|
|
| 486 |
# "message": "No valid comments found for analysis"
|
| 487 |
# }
|
| 488 |
|
|
|
|
| 489 |
# df = pd.DataFrame({'comment': filtered_comments})
|
|
|
|
|
|
|
|
|
|
| 490 |
# df['heuristic_negative'] = df['comment'].apply(is_explicit_negative)
|
| 491 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 492 |
# vader_results = []
|
| 493 |
# for text in df['comment']:
|
| 494 |
# vader_results.append(vader_sentiment(text))
|
| 495 |
|
|
|
|
| 496 |
# roberta_results = roberta_sentiment_batch(df['comment'].tolist())
|
| 497 |
|
|
|
|
| 498 |
# vader_df = pd.DataFrame(vader_results)
|
| 499 |
# roberta_df = pd.DataFrame(roberta_results)
|
| 500 |
# final_df = pd.concat([df.reset_index(drop=True), vader_df, roberta_df], axis=1)
|
| 501 |
|
|
|
|
| 502 |
# final_df['combined_pos'] = (
|
| 503 |
# settings.combined_weight_vader * final_df['vader_pos'] +
|
| 504 |
# settings.combined_weight_roberta * final_df['roberta_pos']
|
|
@@ -512,11 +604,13 @@
|
|
| 512 |
# settings.combined_weight_roberta * final_df['roberta_neu']
|
| 513 |
# )
|
| 514 |
|
|
|
|
| 515 |
# final_df['Overall_Sentiment'] = final_df.apply(
|
| 516 |
-
# lambda row: overall_sentiment(row, settings),
|
| 517 |
# axis=1
|
| 518 |
# )
|
| 519 |
|
|
|
|
| 520 |
# total_comments = len(final_df)
|
| 521 |
# positive_count = len(final_df[final_df['Overall_Sentiment'] == 'Positive'])
|
| 522 |
# negative_count = len(final_df[final_df['Overall_Sentiment'] == 'Negative'])
|
|
@@ -527,10 +621,12 @@
|
|
| 527 |
# f"{negative_count} negative, {neutral_count} neutral"
|
| 528 |
# )
|
| 529 |
|
|
|
|
| 530 |
# avg_positive = float(final_df['combined_pos'].mean())
|
| 531 |
# avg_negative = float(final_df['combined_neg'].mean())
|
| 532 |
# avg_neutral = float(final_df['combined_neu'].mean())
|
| 533 |
|
|
|
|
| 534 |
# if avg_positive > max(avg_negative, avg_neutral):
|
| 535 |
# overall_sentiment_label = "Positive"
|
| 536 |
# elif avg_negative > max(avg_positive, avg_neutral):
|
|
@@ -538,6 +634,7 @@
|
|
| 538 |
# else:
|
| 539 |
# overall_sentiment_label = "Neutral"
|
| 540 |
|
|
|
|
| 541 |
# negative_summary = ""
|
| 542 |
# negative_comments_list = []
|
| 543 |
# negative_comments = final_df[final_df['Overall_Sentiment'] == 'Negative']
|
|
@@ -546,6 +643,7 @@
|
|
| 546 |
# negative_comments_list = negative_comments['comment'].tolist()
|
| 547 |
|
| 548 |
# try:
|
|
|
|
| 549 |
# top_idx = negative_comments['combined_neg'].nlargest(3).index
|
| 550 |
# top_comments = negative_comments.loc[top_idx, 'comment'].tolist()
|
| 551 |
|
|
@@ -562,11 +660,13 @@
|
|
| 562 |
# )
|
| 563 |
# negative_summary = summary_result[0]['summary_text']
|
| 564 |
# else:
|
|
|
|
| 565 |
# negative_summary = "; ".join(top_comments)
|
| 566 |
# except Exception as e:
|
| 567 |
# logger.warning(f"Summary generation failed: {e}")
|
| 568 |
# negative_summary = "; ".join(negative_comments_list[:3])
|
| 569 |
|
|
|
|
| 570 |
# insights = []
|
| 571 |
# recommendations = []
|
| 572 |
|
|
@@ -647,11 +747,14 @@
|
|
| 647 |
# async def startup_event():
|
| 648 |
# """Initialize models on startup"""
|
| 649 |
# try:
|
| 650 |
-
# logger.info("=
|
|
|
|
|
|
|
| 651 |
# initialize_models()
|
| 652 |
-
# logger.info("Service started successfully")
|
|
|
|
| 653 |
# except Exception as e:
|
| 654 |
-
# logger.error(f"Startup failed: {e}")
|
| 655 |
# raise e
|
| 656 |
|
| 657 |
# @app.on_event("shutdown")
|
|
@@ -665,7 +768,13 @@
|
|
| 665 |
# return {
|
| 666 |
# "service": get_settings().app_name,
|
| 667 |
# "version": get_settings().app_version,
|
| 668 |
-
# "status": "running"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 669 |
# }
|
| 670 |
|
| 671 |
# @app.get("/health")
|
|
@@ -740,18 +849,61 @@
|
|
| 740 |
|
| 741 |
# @app.get("/config")
|
| 742 |
# async def get_config(settings: Settings = Depends(get_settings)):
|
| 743 |
-
# """Get current configuration"""
|
| 744 |
# if not settings.debug_mode:
|
| 745 |
# raise HTTPException(status_code=404, detail="Not found")
|
| 746 |
|
| 747 |
# return {
|
| 748 |
# "max_comments_per_request": settings.max_comments_per_request,
|
|
|
|
|
|
|
| 749 |
# "vader_pos_threshold": settings.vader_pos_threshold,
|
| 750 |
# "vader_neg_threshold": settings.vader_neg_threshold,
|
| 751 |
# "roberta_pos_threshold": settings.roberta_pos_threshold,
|
| 752 |
# "roberta_neg_threshold": settings.roberta_neg_threshold,
|
|
|
|
|
|
|
| 753 |
# "enable_caching": settings.enable_caching,
|
| 754 |
-
# "batch_size": settings.batch_size
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 755 |
# }
|
| 756 |
|
| 757 |
# if __name__ == "__main__":
|
|
@@ -765,6 +917,13 @@
|
|
| 765 |
|
| 766 |
|
| 767 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 768 |
"""
|
| 769 |
Enhanced FastAPI Service for Comment Sentiment Analysis
|
| 770 |
with improved performance, validation, and configuration management
|
|
@@ -997,113 +1156,240 @@ summarizer = None
|
|
| 997 |
|
| 998 |
# Enhanced heuristic phrase/regex rules for explicit negative feedback
|
| 999 |
NEGATIVE_PHRASES = [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1000 |
'need more staff',
|
| 1001 |
'need more faculty',
|
| 1002 |
'insufficient staff',
|
| 1003 |
'lack of staff',
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1004 |
'lack of knowledge',
|
| 1005 |
'better knowledge needed',
|
| 1006 |
'poor knowledge',
|
| 1007 |
-
'
|
| 1008 |
-
'
|
| 1009 |
-
'
|
| 1010 |
-
'
|
| 1011 |
-
'no
|
| 1012 |
-
'
|
| 1013 |
-
'
|
| 1014 |
-
'
|
| 1015 |
-
'
|
| 1016 |
-
'
|
| 1017 |
-
'
|
| 1018 |
-
'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1019 |
'improve class',
|
| 1020 |
'improvement needed',
|
| 1021 |
'needs improvement',
|
|
|
|
|
|
|
|
|
|
| 1022 |
'not helpful',
|
| 1023 |
'not clear',
|
| 1024 |
'communication skills need improvement',
|
| 1025 |
'improve communication',
|
|
|
|
|
|
|
| 1026 |
'lectures are going fast',
|
| 1027 |
'going too fast',
|
| 1028 |
'too fast',
|
| 1029 |
'too slow',
|
| 1030 |
-
'too boring',
|
| 1031 |
-
'lacking',
|
| 1032 |
-
'is lacking',
|
| 1033 |
-
'knowledge is lacking',
|
| 1034 |
-
'practical knowledge lacking',
|
| 1035 |
-
'no practical',
|
| 1036 |
-
'lack of practical',
|
| 1037 |
-
'no hands-on',
|
| 1038 |
-
'no real world',
|
| 1039 |
'too lag',
|
| 1040 |
'lag',
|
| 1041 |
'lagging',
|
| 1042 |
'lag in teaching',
|
| 1043 |
-
'not interested',
|
| 1044 |
-
'no interest',
|
| 1045 |
'not managing time',
|
| 1046 |
'poor time management',
|
| 1047 |
-
'time management',
|
| 1048 |
-
|
|
|
|
|
|
|
|
|
|
| 1049 |
'going for attendance',
|
| 1050 |
'just for attendance',
|
| 1051 |
'only for attendance',
|
| 1052 |
-
'
|
| 1053 |
-
"can't understand",
|
| 1054 |
-
'not understandable',
|
| 1055 |
'nothing learnt',
|
| 1056 |
'learned nothing',
|
| 1057 |
'no improvement',
|
| 1058 |
'same teaching',
|
| 1059 |
'monotonous',
|
| 1060 |
'sleeping in class',
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1061 |
'no use',
|
| 1062 |
'useless',
|
| 1063 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1064 |
]
|
| 1065 |
|
| 1066 |
NEGATIVE_REGEXES = [
|
| 1067 |
-
|
| 1068 |
-
re.compile(r"\
|
| 1069 |
-
re.compile(r"\
|
| 1070 |
-
re.compile(r"\b(
|
| 1071 |
-
|
| 1072 |
-
|
| 1073 |
-
re.compile(r"\b(
|
| 1074 |
-
re.compile(r"\
|
| 1075 |
-
|
| 1076 |
-
|
| 1077 |
-
re.compile(r"\
|
| 1078 |
-
re.compile(r"\
|
| 1079 |
-
re.compile(r"\
|
| 1080 |
-
|
| 1081 |
-
|
| 1082 |
-
re.compile(r"\
|
| 1083 |
-
re.compile(r"\
|
| 1084 |
-
|
| 1085 |
-
|
| 1086 |
-
re.compile(r"\
|
| 1087 |
-
re.compile(r"\
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1088 |
]
|
| 1089 |
|
| 1090 |
META_COMMENT_PATTERNS = [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1091 |
re.compile(r"^no\s+more\s+(comments?|remarks?|feedback)", re.IGNORECASE),
|
| 1092 |
re.compile(r"^no\s+(other\s+)?(comments?|remarks?|feedback)", re.IGNORECASE),
|
| 1093 |
-
re.compile(r"^nothing\s+to\s+(say|comment|mention)", re.IGNORECASE),
|
| 1094 |
re.compile(r"^no\s+remarks?(\s+(about|on))?", re.IGNORECASE),
|
| 1095 |
-
|
| 1096 |
-
|
| 1097 |
-
re.compile(r"^
|
| 1098 |
-
re.compile(r"^(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1099 |
]
|
| 1100 |
|
| 1101 |
def is_meta_comment(text: str) -> bool:
|
| 1102 |
-
"""
|
|
|
|
|
|
|
|
|
|
| 1103 |
if not text:
|
| 1104 |
-
return
|
|
|
|
| 1105 |
text = text.strip()
|
| 1106 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1107 |
for pattern in META_COMMENT_PATTERNS:
|
| 1108 |
if pattern.match(text):
|
| 1109 |
logger.debug(f"Meta-comment detected: '{text[:50]}...'")
|
|
@@ -1112,14 +1398,28 @@ def is_meta_comment(text: str) -> bool:
|
|
| 1112 |
return False
|
| 1113 |
|
| 1114 |
def is_explicit_negative(text: str) -> bool:
|
| 1115 |
-
"""
|
|
|
|
|
|
|
|
|
|
| 1116 |
if not text:
|
| 1117 |
return False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1118 |
lower = text.lower()
|
| 1119 |
|
| 1120 |
# Check phrases
|
| 1121 |
for phrase in NEGATIVE_PHRASES:
|
| 1122 |
if phrase in lower:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1123 |
logger.debug(f"Negative phrase detected: '{phrase}' in '{text[:50]}...'")
|
| 1124 |
return True
|
| 1125 |
|
|
@@ -1678,3 +1978,5 @@ if __name__ == "__main__":
|
|
| 1678 |
log_level="info"
|
| 1679 |
)
|
| 1680 |
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
|
| 3 |
+
|
| 4 |
# """
|
| 5 |
# Enhanced FastAPI Service for Comment Sentiment Analysis
|
| 6 |
# with improved performance, validation, and configuration management
|
| 7 |
+
# Version 2.1.0 - Updated with bug fixes and improvements
|
| 8 |
# """
|
| 9 |
|
| 10 |
# from fastapi import FastAPI, HTTPException, Depends
|
|
|
|
| 84 |
# """Application settings with environment variable support"""
|
| 85 |
# # API Settings
|
| 86 |
# app_name: str = "Comment Analysis API"
|
| 87 |
+
# app_version: str = "2.1.0"
|
| 88 |
# debug_mode: bool = False
|
| 89 |
|
| 90 |
# # Request Limits
|
| 91 |
# max_comments_per_request: int = 1000
|
| 92 |
# max_comment_length: int = 5000
|
| 93 |
+
# min_comment_words: int = 1
|
| 94 |
|
| 95 |
# # Sentiment Thresholds
|
| 96 |
# vader_pos_threshold: float = 0.2
|
|
|
|
| 117 |
# env_file = ".env"
|
| 118 |
# env_file_encoding = 'utf-8'
|
| 119 |
# extra = 'ignore'
|
| 120 |
+
|
| 121 |
+
# @validator('min_comment_words')
|
| 122 |
+
# def validate_min_words(cls, v):
|
| 123 |
+
# if v < 0:
|
| 124 |
+
# raise ValueError('min_comment_words must be non-negative')
|
| 125 |
+
# return v
|
| 126 |
+
|
| 127 |
+
# @validator('combined_weight_vader', 'combined_weight_roberta')
|
| 128 |
+
# def validate_weights(cls, v):
|
| 129 |
+
# if not 0 <= v <= 1:
|
| 130 |
+
# raise ValueError('Weights must be between 0 and 1')
|
| 131 |
+
# return v
|
| 132 |
|
| 133 |
# @lru_cache()
|
| 134 |
# def get_settings() -> Settings:
|
| 135 |
# """Cached settings instance"""
|
| 136 |
+
# settings = Settings()
|
| 137 |
+
# # Normalize weights if needed
|
| 138 |
+
# total = settings.combined_weight_vader + settings.combined_weight_roberta
|
| 139 |
+
# if not (0.99 <= total <= 1.01):
|
| 140 |
+
# logger.warning(f"Weights sum to {total}, normalizing to 1.0")
|
| 141 |
+
# settings.combined_weight_vader /= total
|
| 142 |
+
# settings.combined_weight_roberta /= total
|
| 143 |
+
# return settings
|
| 144 |
|
| 145 |
# # Pydantic Models
|
| 146 |
# class FacultyInfo(BaseModel):
|
|
|
|
| 231 |
# device = None
|
| 232 |
# summarizer = None
|
| 233 |
|
| 234 |
+
# # Enhanced heuristic phrase/regex rules for explicit negative feedback
|
| 235 |
# NEGATIVE_PHRASES = [
|
| 236 |
# 'need more staff',
|
| 237 |
# 'need more faculty',
|
|
|
|
| 248 |
# 'boring class',
|
| 249 |
# 'boring classes',
|
| 250 |
# 'waste of time',
|
| 251 |
+
# 'wasting time',
|
| 252 |
# 'bad teacher',
|
| 253 |
# 'bad teaching',
|
| 254 |
# 'poor teaching',
|
|
|
|
| 262 |
# 'lectures are going fast',
|
| 263 |
# 'going too fast',
|
| 264 |
# 'too fast',
|
| 265 |
+
# 'too slow',
|
| 266 |
+
# 'too boring',
|
| 267 |
# 'lacking',
|
| 268 |
# 'is lacking',
|
| 269 |
# 'knowledge is lacking',
|
|
|
|
| 271 |
# 'no practical',
|
| 272 |
# 'lack of practical',
|
| 273 |
# 'no hands-on',
|
| 274 |
+
# 'no real world',
|
| 275 |
+
# 'too lag',
|
| 276 |
+
# 'lag',
|
| 277 |
+
# 'lagging',
|
| 278 |
+
# 'lag in teaching',
|
| 279 |
+
# 'not interested',
|
| 280 |
+
# 'no interest',
|
| 281 |
+
# 'not managing time',
|
| 282 |
+
# 'poor time management',
|
| 283 |
+
# 'time management',
|
| 284 |
+
# 'not at all',
|
| 285 |
+
# 'going for attendance',
|
| 286 |
+
# 'just for attendance',
|
| 287 |
+
# 'only for attendance',
|
| 288 |
+
# 'cant understand',
|
| 289 |
+
# "can't understand",
|
| 290 |
+
# 'not understandable',
|
| 291 |
+
# 'nothing learnt',
|
| 292 |
+
# 'learned nothing',
|
| 293 |
+
# 'no improvement',
|
| 294 |
+
# 'same teaching',
|
| 295 |
+
# 'monotonous',
|
| 296 |
+
# 'sleeping in class',
|
| 297 |
+
# 'no use',
|
| 298 |
+
# 'useless',
|
| 299 |
+
# 'waste our time'
|
| 300 |
# ]
|
| 301 |
|
| 302 |
# NEGATIVE_REGEXES = [
|
| 303 |
# re.compile(r"\bno\s+(proper|sufficient)\s+(classes|notes|support)\b", re.IGNORECASE),
|
| 304 |
# re.compile(r"\bno\s+staff\b", re.IGNORECASE),
|
| 305 |
# re.compile(r"\bneed(s)?\s+more\s+(staff|faculty|support)\b", re.IGNORECASE),
|
| 306 |
+
# re.compile(r"\b(lecture|lectures|class|classes|teaching)\s+(are\s+)?(too|very)\s+(fast|slow|boring)\b", re.IGNORECASE),
|
| 307 |
+
# re.compile(r"\blectures?\s+are\s+going\s+(too\s+)?fast\b", re.IGNORECASE),
|
| 308 |
# re.compile(r"\b(require|needs?|needed)\s+(some\s+)?improv(e|ement)s?\s+(in|of)?\s*communication(\s+skills?)?\b", re.IGNORECASE),
|
| 309 |
# re.compile(r"\b(is\s+)?lacking\b", re.IGNORECASE),
|
| 310 |
+
# re.compile(r"\bno\s+(practical|hands-on|real-world)\b", re.IGNORECASE),
|
| 311 |
+
# re.compile(r"\btoo\s+(lag|lagging?|slow|boring)\b", re.IGNORECASE),
|
| 312 |
+
# re.compile(r"\b(not\s+)?managing\s+time\b", re.IGNORECASE),
|
| 313 |
+
# re.compile(r"\btime\s+management", re.IGNORECASE),
|
| 314 |
+
# re.compile(r"\bnot\s+interested(\s+in|\s+to)?\b", re.IGNORECASE),
|
| 315 |
+
# re.compile(r"\bno\s+interest\b", re.IGNORECASE),
|
| 316 |
+
# re.compile(r"\b(just\s+|only\s+)?for\s+attendance\b", re.IGNORECASE),
|
| 317 |
+
# re.compile(r"\b(just\s+)?going\s+(to|for)\s+(her|his|their)\s+class\b", re.IGNORECASE),
|
| 318 |
+
# re.compile(r"\bnot\s+at\s+all\b", re.IGNORECASE),
|
| 319 |
+
# re.compile(r"\b(overall|its?)\s+(is\s+)?good\s+but\b", re.IGNORECASE), # "good but" often precedes criticism
|
| 320 |
+
# re.compile(r"\bcan'?t\s+understand", re.IGNORECASE),
|
| 321 |
+
# re.compile(r"\bwaste\s+(of\s+)?time\b", re.IGNORECASE),
|
| 322 |
+
# re.compile(r"\bno\s+use(ful)?\b", re.IGNORECASE),
|
| 323 |
+
# re.compile(r"\buseless\b", re.IGNORECASE)
|
| 324 |
# ]
|
| 325 |
|
| 326 |
# META_COMMENT_PATTERNS = [
|
| 327 |
+
# re.compile(r"^no\s+more\s+(comments?|remarks?|feedback)", re.IGNORECASE),
|
| 328 |
+
# re.compile(r"^no\s+(other\s+)?(comments?|remarks?|feedback)", re.IGNORECASE),
|
| 329 |
+
# re.compile(r"^nothing\s+to\s+(say|comment|mention)", re.IGNORECASE),
|
| 330 |
+
# re.compile(r"^no\s+remarks?(\s+(about|on))?", re.IGNORECASE),
|
| 331 |
+
# re.compile(r"^(nil|none|na|n/a)$", re.IGNORECASE),
|
| 332 |
+
# re.compile(r"^(no|nothing)\.?$", re.IGNORECASE),
|
| 333 |
+
# re.compile(r"^everything\s+(is\s+)?(good|fine|ok|okay)", re.IGNORECASE),
|
| 334 |
+
# re.compile(r"^(all\s+)?good$", re.IGNORECASE)
|
| 335 |
# ]
|
| 336 |
|
| 337 |
# def is_meta_comment(text: str) -> bool:
|
|
|
|
| 342 |
|
| 343 |
# for pattern in META_COMMENT_PATTERNS:
|
| 344 |
# if pattern.match(text):
|
| 345 |
+
# logger.debug(f"Meta-comment detected: '{text[:50]}...'")
|
| 346 |
# return True
|
| 347 |
|
| 348 |
# return False
|
| 349 |
|
| 350 |
# def is_explicit_negative(text: str) -> bool:
|
| 351 |
+
# """Check if text contains explicit negative phrases with logging"""
|
| 352 |
# if not text:
|
| 353 |
# return False
|
| 354 |
# lower = text.lower()
|
| 355 |
|
| 356 |
+
# # Check phrases
|
| 357 |
# for phrase in NEGATIVE_PHRASES:
|
| 358 |
# if phrase in lower:
|
| 359 |
+
# logger.debug(f"Negative phrase detected: '{phrase}' in '{text[:50]}...'")
|
| 360 |
# return True
|
| 361 |
|
| 362 |
+
# # Check regexes
|
| 363 |
# for regex in NEGATIVE_REGEXES:
|
| 364 |
# if regex.search(text):
|
| 365 |
+
# logger.debug(f"Negative pattern matched: {regex.pattern} in '{text[:50]}...'")
|
| 366 |
# return True
|
| 367 |
|
| 368 |
# return False
|
|
|
|
| 377 |
|
| 378 |
# # Initialize VADER (NLTK data already downloaded)
|
| 379 |
# sia = SentimentIntensityAnalyzer()
|
| 380 |
+
# logger.info("✓ VADER initialized")
|
| 381 |
|
| 382 |
# # Initialize RoBERTa with caching
|
| 383 |
# cache_dir = settings.model_cache_dir
|
|
|
|
| 395 |
# device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 396 |
# model.to(device)
|
| 397 |
# model.eval()
|
| 398 |
+
# logger.info(f"✓ RoBERTa initialized on device: {device}")
|
| 399 |
|
| 400 |
# # Initialize summarizer (optional)
|
| 401 |
# if settings.use_abstractive_summary:
|
|
|
|
| 405 |
# model=settings.summarizer_model,
|
| 406 |
# device=0 if device == "cuda" else -1
|
| 407 |
# )
|
| 408 |
+
# logger.info("✓ Summarizer initialized")
|
| 409 |
# except Exception as e:
|
| 410 |
# logger.warning(f"Summarizer initialization failed: {e}")
|
| 411 |
# summarizer = None
|
| 412 |
|
| 413 |
+
# logger.info("✓ All models initialized successfully")
|
| 414 |
|
| 415 |
# except Exception as e:
|
| 416 |
# logger.error(f"Error initializing models: {e}")
|
|
|
|
| 509 |
# roberta_neg = row.get('roberta_neg', 0.0)
|
| 510 |
# roberta_pos = row.get('roberta_pos', 0.0)
|
| 511 |
|
| 512 |
+
# # Priority 1: Heuristic negative patterns override everything
|
| 513 |
# if row.get('heuristic_negative') is True:
|
| 514 |
# return 'Negative'
|
| 515 |
|
| 516 |
+
# # Priority 2: Strong negative signals
|
| 517 |
# if (
|
| 518 |
# vader_compound <= settings.vader_neg_threshold or
|
| 519 |
# roberta_neg >= settings.roberta_neg_threshold or
|
|
|
|
| 521 |
# ):
|
| 522 |
# return 'Negative'
|
| 523 |
|
| 524 |
+
# # Priority 3: Positive signals
|
| 525 |
# if (
|
| 526 |
# vader_compound >= settings.vader_pos_threshold or
|
| 527 |
# roberta_pos >= settings.roberta_pos_threshold or
|
|
|
|
| 529 |
# ):
|
| 530 |
# return 'Positive'
|
| 531 |
|
| 532 |
+
# # Default: Neutral
|
| 533 |
# return 'Neutral'
|
| 534 |
|
| 535 |
# def sanitize_text(text: str) -> str:
|
| 536 |
+
# """Sanitize input text while preserving emojis"""
|
| 537 |
# if not text:
|
| 538 |
# return ""
|
| 539 |
+
# # Remove control characters but keep printable characters and emojis
|
| 540 |
+
# text = re.sub(r'[\x00-\x08\x0B-\x0C\x0E-\x1F\x7F]', '', text)
|
| 541 |
+
# # Normalize whitespace
|
| 542 |
# text = ' '.join(text.split())
|
|
|
|
| 543 |
# return text.strip()
|
| 544 |
|
| 545 |
# def analyze_comments_sentiment(comments: List[str]) -> Dict[str, Any]:
|
|
|
|
| 548 |
# settings = get_settings()
|
| 549 |
# logger.info(f"Received {len(comments)} comments for analysis")
|
| 550 |
|
| 551 |
+
# # Sanitize comments
|
| 552 |
# sanitized_comments = [sanitize_text(comment) for comment in comments]
|
| 553 |
|
| 554 |
+
# # FIXED: Changed < to <= to properly handle min_comment_words
|
| 555 |
# filtered_comments = [
|
| 556 |
# comment for comment in sanitized_comments
|
| 557 |
+
# if (settings.min_comment_words <= len(comment.split()) <= settings.max_comment_length)
|
|
|
|
| 558 |
# ]
|
| 559 |
|
| 560 |
# logger.info(f"After filtering: {len(filtered_comments)} valid comments")
|
|
|
|
| 565 |
# "message": "No valid comments found for analysis"
|
| 566 |
# }
|
| 567 |
|
| 568 |
+
# # Create dataframe
|
| 569 |
# df = pd.DataFrame({'comment': filtered_comments})
|
| 570 |
+
|
| 571 |
+
# # Detect meta-comments and explicit negatives
|
| 572 |
+
# df['is_meta'] = df['comment'].apply(is_meta_comment)
|
| 573 |
# df['heuristic_negative'] = df['comment'].apply(is_explicit_negative)
|
| 574 |
|
| 575 |
+
# # Log detection results
|
| 576 |
+
# meta_count = df['is_meta'].sum()
|
| 577 |
+
# heuristic_neg_count = df['heuristic_negative'].sum()
|
| 578 |
+
# logger.info(f"Detected {meta_count} meta-comments and {heuristic_neg_count} heuristic negatives")
|
| 579 |
+
|
| 580 |
+
# # VADER sentiment analysis
|
| 581 |
# vader_results = []
|
| 582 |
# for text in df['comment']:
|
| 583 |
# vader_results.append(vader_sentiment(text))
|
| 584 |
|
| 585 |
+
# # RoBERTa sentiment analysis (batch)
|
| 586 |
# roberta_results = roberta_sentiment_batch(df['comment'].tolist())
|
| 587 |
|
| 588 |
+
# # Combine results
|
| 589 |
# vader_df = pd.DataFrame(vader_results)
|
| 590 |
# roberta_df = pd.DataFrame(roberta_results)
|
| 591 |
# final_df = pd.concat([df.reset_index(drop=True), vader_df, roberta_df], axis=1)
|
| 592 |
|
| 593 |
+
# # Calculate combined scores
|
| 594 |
# final_df['combined_pos'] = (
|
| 595 |
# settings.combined_weight_vader * final_df['vader_pos'] +
|
| 596 |
# settings.combined_weight_roberta * final_df['roberta_pos']
|
|
|
|
| 604 |
# settings.combined_weight_roberta * final_df['roberta_neu']
|
| 605 |
# )
|
| 606 |
|
| 607 |
+
# # Classify overall sentiment (meta-comments become Neutral)
|
| 608 |
# final_df['Overall_Sentiment'] = final_df.apply(
|
| 609 |
+
# lambda row: 'Neutral' if row.get('is_meta') else overall_sentiment(row, settings),
|
| 610 |
# axis=1
|
| 611 |
# )
|
| 612 |
|
| 613 |
+
# # Calculate statistics
|
| 614 |
# total_comments = len(final_df)
|
| 615 |
# positive_count = len(final_df[final_df['Overall_Sentiment'] == 'Positive'])
|
| 616 |
# negative_count = len(final_df[final_df['Overall_Sentiment'] == 'Negative'])
|
|
|
|
| 621 |
# f"{negative_count} negative, {neutral_count} neutral"
|
| 622 |
# )
|
| 623 |
|
| 624 |
+
# # Average scores
|
| 625 |
# avg_positive = float(final_df['combined_pos'].mean())
|
| 626 |
# avg_negative = float(final_df['combined_neg'].mean())
|
| 627 |
# avg_neutral = float(final_df['combined_neu'].mean())
|
| 628 |
|
| 629 |
+
# # Determine overall sentiment label
|
| 630 |
# if avg_positive > max(avg_negative, avg_neutral):
|
| 631 |
# overall_sentiment_label = "Positive"
|
| 632 |
# elif avg_negative > max(avg_positive, avg_neutral):
|
|
|
|
| 634 |
# else:
|
| 635 |
# overall_sentiment_label = "Neutral"
|
| 636 |
|
| 637 |
+
# # Process negative comments
|
| 638 |
# negative_summary = ""
|
| 639 |
# negative_comments_list = []
|
| 640 |
# negative_comments = final_df[final_df['Overall_Sentiment'] == 'Negative']
|
|
|
|
| 643 |
# negative_comments_list = negative_comments['comment'].tolist()
|
| 644 |
|
| 645 |
# try:
|
| 646 |
+
# # Get top negative comments
|
| 647 |
# top_idx = negative_comments['combined_neg'].nlargest(3).index
|
| 648 |
# top_comments = negative_comments.loc[top_idx, 'comment'].tolist()
|
| 649 |
|
|
|
|
| 660 |
# )
|
| 661 |
# negative_summary = summary_result[0]['summary_text']
|
| 662 |
# else:
|
| 663 |
+
# # Extractive summary
|
| 664 |
# negative_summary = "; ".join(top_comments)
|
| 665 |
# except Exception as e:
|
| 666 |
# logger.warning(f"Summary generation failed: {e}")
|
| 667 |
# negative_summary = "; ".join(negative_comments_list[:3])
|
| 668 |
|
| 669 |
+
# # Generate insights and recommendations
|
| 670 |
# insights = []
|
| 671 |
# recommendations = []
|
| 672 |
|
|
|
|
| 747 |
# async def startup_event():
|
| 748 |
# """Initialize models on startup"""
|
| 749 |
# try:
|
| 750 |
+
# logger.info("=" * 80)
|
| 751 |
+
# logger.info(f"Application Startup at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
|
| 752 |
+
# logger.info("=" * 80)
|
| 753 |
# initialize_models()
|
| 754 |
+
# logger.info("✓ Service started successfully")
|
| 755 |
+
# logger.info("=" * 80)
|
| 756 |
# except Exception as e:
|
| 757 |
+
# logger.error(f"✗ Startup failed: {e}")
|
| 758 |
# raise e
|
| 759 |
|
| 760 |
# @app.on_event("shutdown")
|
|
|
|
| 768 |
# return {
|
| 769 |
# "service": get_settings().app_name,
|
| 770 |
# "version": get_settings().app_version,
|
| 771 |
+
# "status": "running",
|
| 772 |
+
# "endpoints": {
|
| 773 |
+
# "health": "/health",
|
| 774 |
+
# "analyze": "/analyze-comments",
|
| 775 |
+
# "config": "/config (debug mode only)",
|
| 776 |
+
# "test": "/test"
|
| 777 |
+
# }
|
| 778 |
# }
|
| 779 |
|
| 780 |
# @app.get("/health")
|
|
|
|
| 849 |
|
| 850 |
# @app.get("/config")
|
| 851 |
# async def get_config(settings: Settings = Depends(get_settings)):
|
| 852 |
+
# """Get current configuration (debug mode only)"""
|
| 853 |
# if not settings.debug_mode:
|
| 854 |
# raise HTTPException(status_code=404, detail="Not found")
|
| 855 |
|
| 856 |
# return {
|
| 857 |
# "max_comments_per_request": settings.max_comments_per_request,
|
| 858 |
+
# "max_comment_length": settings.max_comment_length,
|
| 859 |
+
# "min_comment_words": settings.min_comment_words,
|
| 860 |
# "vader_pos_threshold": settings.vader_pos_threshold,
|
| 861 |
# "vader_neg_threshold": settings.vader_neg_threshold,
|
| 862 |
# "roberta_pos_threshold": settings.roberta_pos_threshold,
|
| 863 |
# "roberta_neg_threshold": settings.roberta_neg_threshold,
|
| 864 |
+
# "combined_weight_vader": settings.combined_weight_vader,
|
| 865 |
+
# "combined_weight_roberta": settings.combined_weight_roberta,
|
| 866 |
# "enable_caching": settings.enable_caching,
|
| 867 |
+
# "batch_size": settings.batch_size,
|
| 868 |
+
# "use_abstractive_summary": settings.use_abstractive_summary
|
| 869 |
+
# }
|
| 870 |
+
|
| 871 |
+
# @app.get("/test")
|
| 872 |
+
# async def test_endpoint():
|
| 873 |
+
# """Test endpoint to verify sentiment classification"""
|
| 874 |
+
# test_cases = [
|
| 875 |
+
# "No more comments 😅",
|
| 876 |
+
# "Overall good but too lag",
|
| 877 |
+
# "Not interested to be in her class just we are going for attendance thats it not at all managing time.",
|
| 878 |
+
# "Nothing to say anything just we are going to her class mean, only for attendance",
|
| 879 |
+
# "Excellent teaching! Very clear explanations.",
|
| 880 |
+
# "Good teacher with strong subject knowledge",
|
| 881 |
+
# "Class is okay, nothing special"
|
| 882 |
+
# ]
|
| 883 |
+
|
| 884 |
+
# results = []
|
| 885 |
+
# for text in test_cases:
|
| 886 |
+
# is_meta = is_meta_comment(text)
|
| 887 |
+
# is_neg = is_explicit_negative(text)
|
| 888 |
+
|
| 889 |
+
# # Predict classification
|
| 890 |
+
# if is_meta:
|
| 891 |
+
# predicted = "Neutral (meta-comment)"
|
| 892 |
+
# elif is_neg:
|
| 893 |
+
# predicted = "Negative (heuristic)"
|
| 894 |
+
# else:
|
| 895 |
+
# predicted = "Needs full analysis"
|
| 896 |
+
|
| 897 |
+
# results.append({
|
| 898 |
+
# "text": text,
|
| 899 |
+
# "is_meta_comment": is_meta,
|
| 900 |
+
# "is_heuristic_negative": is_neg,
|
| 901 |
+
# "predicted_classification": predicted
|
| 902 |
+
# })
|
| 903 |
+
|
| 904 |
+
# return {
|
| 905 |
+
# "test_results": results,
|
| 906 |
+
# "note": "Full analysis requires VADER and RoBERTa scores"
|
| 907 |
# }
|
| 908 |
|
| 909 |
# if __name__ == "__main__":
|
|
|
|
| 917 |
|
| 918 |
|
| 919 |
|
| 920 |
+
|
| 921 |
+
|
| 922 |
+
|
| 923 |
+
|
| 924 |
+
|
| 925 |
+
|
| 926 |
+
|
| 927 |
"""
|
| 928 |
Enhanced FastAPI Service for Comment Sentiment Analysis
|
| 929 |
with improved performance, validation, and configuration management
|
|
|
|
| 1156 |
|
| 1157 |
# Enhanced heuristic phrase/regex rules for explicit negative feedback
|
| 1158 |
NEGATIVE_PHRASES = [
|
| 1159 |
+
# Teaching quality issues
|
| 1160 |
+
'very poor',
|
| 1161 |
+
'extremely poor',
|
| 1162 |
+
'poor in teaching',
|
| 1163 |
+
'poor teaching level',
|
| 1164 |
+
'poor teaching',
|
| 1165 |
+
'bad teacher',
|
| 1166 |
+
'bad teaching',
|
| 1167 |
+
'not good', # Keep but check it's not "no negative"
|
| 1168 |
+
'not satisfied',
|
| 1169 |
+
'not satisfactory',
|
| 1170 |
+
|
| 1171 |
+
# Content/delivery issues
|
| 1172 |
+
'boring class',
|
| 1173 |
+
'boring classes',
|
| 1174 |
+
'boring subject',
|
| 1175 |
+
'subject is boring',
|
| 1176 |
+
'low voice',
|
| 1177 |
+
'voice is low',
|
| 1178 |
+
'cannot hear',
|
| 1179 |
+
"can't hear",
|
| 1180 |
+
'speak louder',
|
| 1181 |
+
|
| 1182 |
+
# Resource/support issues
|
| 1183 |
'need more staff',
|
| 1184 |
'need more faculty',
|
| 1185 |
'insufficient staff',
|
| 1186 |
'lack of staff',
|
| 1187 |
+
'not sufficient',
|
| 1188 |
+
'insufficient',
|
| 1189 |
+
'not enough',
|
| 1190 |
+
'no classes',
|
| 1191 |
+
'no regular classes',
|
| 1192 |
+
'not sufficient classes',
|
| 1193 |
+
|
| 1194 |
+
# Knowledge/understanding issues
|
| 1195 |
'lack of knowledge',
|
| 1196 |
'better knowledge needed',
|
| 1197 |
'poor knowledge',
|
| 1198 |
+
'knowledge is lacking',
|
| 1199 |
+
'practical knowledge lacking',
|
| 1200 |
+
'no practical',
|
| 1201 |
+
'lack of practical',
|
| 1202 |
+
'no hands-on',
|
| 1203 |
+
'no real world',
|
| 1204 |
+
'did not understand',
|
| 1205 |
+
"didn't understand",
|
| 1206 |
+
'not able to understand',
|
| 1207 |
+
'unable to understand',
|
| 1208 |
+
'difficult to understand',
|
| 1209 |
+
'hard to understand',
|
| 1210 |
+
'concepts are difficult',
|
| 1211 |
+
'concepts difficult',
|
| 1212 |
+
'cant understand',
|
| 1213 |
+
"can't understand",
|
| 1214 |
+
'not understandable',
|
| 1215 |
+
|
| 1216 |
+
# Improvement needed
|
| 1217 |
'improve class',
|
| 1218 |
'improvement needed',
|
| 1219 |
'needs improvement',
|
| 1220 |
+
'need improvement',
|
| 1221 |
+
'should improve',
|
| 1222 |
+
'must improve',
|
| 1223 |
'not helpful',
|
| 1224 |
'not clear',
|
| 1225 |
'communication skills need improvement',
|
| 1226 |
'improve communication',
|
| 1227 |
+
|
| 1228 |
+
# Pace/time issues
|
| 1229 |
'lectures are going fast',
|
| 1230 |
'going too fast',
|
| 1231 |
'too fast',
|
| 1232 |
'too slow',
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1233 |
'too lag',
|
| 1234 |
'lag',
|
| 1235 |
'lagging',
|
| 1236 |
'lag in teaching',
|
|
|
|
|
|
|
| 1237 |
'not managing time',
|
| 1238 |
'poor time management',
|
| 1239 |
+
'time management issue',
|
| 1240 |
+
|
| 1241 |
+
# Engagement issues
|
| 1242 |
+
'not interested',
|
| 1243 |
+
'no interest',
|
| 1244 |
'going for attendance',
|
| 1245 |
'just for attendance',
|
| 1246 |
'only for attendance',
|
| 1247 |
+
'not at all',
|
|
|
|
|
|
|
| 1248 |
'nothing learnt',
|
| 1249 |
'learned nothing',
|
| 1250 |
'no improvement',
|
| 1251 |
'same teaching',
|
| 1252 |
'monotonous',
|
| 1253 |
'sleeping in class',
|
| 1254 |
+
|
| 1255 |
+
# Value/utility issues
|
| 1256 |
+
'waste of time',
|
| 1257 |
+
'wasting time',
|
| 1258 |
+
'waste our time',
|
| 1259 |
'no use',
|
| 1260 |
'useless',
|
| 1261 |
+
|
| 1262 |
+
# Administrative issues
|
| 1263 |
+
'military rules',
|
| 1264 |
+
'strict rules',
|
| 1265 |
+
'too strict',
|
| 1266 |
+
'very strict',
|
| 1267 |
+
'attendance issue',
|
| 1268 |
+
'attendance problem',
|
| 1269 |
+
'not providing attendance',
|
| 1270 |
+
'claim od',
|
| 1271 |
+
|
| 1272 |
+
# Workload issues
|
| 1273 |
+
'too many projects',
|
| 1274 |
+
'many projects review',
|
| 1275 |
+
'trouble to make',
|
| 1276 |
+
'difficult to make',
|
| 1277 |
+
'hard to make',
|
| 1278 |
+
'placement activities', # When context is negative
|
| 1279 |
]
|
| 1280 |
|
| 1281 |
NEGATIVE_REGEXES = [
|
| 1282 |
+
# Teaching quality patterns
|
| 1283 |
+
re.compile(r"\b(very|extremely|quite|so)\s+(poor|bad|weak)\s+(in\s+)?(teaching|knowledge|communication)", re.IGNORECASE),
|
| 1284 |
+
re.compile(r"\bpoor\s+(teaching|teacher|faculty|knowledge|communication)", re.IGNORECASE),
|
| 1285 |
+
re.compile(r"\b(teaching|knowledge)\s+(is\s+)?(poor|bad|weak|lacking)", re.IGNORECASE),
|
| 1286 |
+
|
| 1287 |
+
# Boring/engagement patterns
|
| 1288 |
+
re.compile(r"\b(boring|dull|monotonous)\s+(class|classes|subject|lecture|lectures)", re.IGNORECASE),
|
| 1289 |
+
re.compile(r"\b(class|classes|subject|lecture|lectures)\s+(is|are)\s+(boring|dull|monotonous)", re.IGNORECASE),
|
| 1290 |
+
|
| 1291 |
+
# Voice/communication patterns
|
| 1292 |
+
re.compile(r"\b(low|soft|quiet)\s+voice\b", re.IGNORECASE),
|
| 1293 |
+
re.compile(r"\bvoice\s+(is\s+)?(low|soft|quiet|not clear)", re.IGNORECASE),
|
| 1294 |
+
re.compile(r"\b(cannot|can't|cant|unable to)\s+hear", re.IGNORECASE),
|
| 1295 |
+
|
| 1296 |
+
# Resource/support patterns
|
| 1297 |
+
re.compile(r"\b(no|not|insufficient|lack of)\s+(proper|sufficient|enough|regular)?\s*(classes|notes|support|staff|faculty)", re.IGNORECASE),
|
| 1298 |
+
re.compile(r"\bneed(s)?\s+more\s+(staff|faculty|support|classes)", re.IGNORECASE),
|
| 1299 |
+
|
| 1300 |
+
# Understanding/clarity patterns
|
| 1301 |
+
re.compile(r"\b(cannot|can't|cant|unable to|difficult to|hard to)\s+understand", re.IGNORECASE),
|
| 1302 |
+
re.compile(r"\b(not|difficult|hard)\s+(able\s+to\s+)?understand(\s+the)?(\s+(concepts?|teaching|lectures?))?", re.IGNORECASE),
|
| 1303 |
+
re.compile(r"\bconcepts?\s+(are\s+)?(difficult|hard|tough|complex)\s+to\s+understand", re.IGNORECASE),
|
| 1304 |
+
|
| 1305 |
+
# Improvement patterns
|
| 1306 |
+
re.compile(r"\b(need|needs|needed|require|requires)\s+(some\s+)?(improvement|to improve)", re.IGNORECASE),
|
| 1307 |
+
re.compile(r"\b(should|must|have to)\s+improve", re.IGNORECASE),
|
| 1308 |
+
re.compile(r"\bimprovement\s+(is\s+)?need(ed)?", re.IGNORECASE),
|
| 1309 |
+
|
| 1310 |
+
# Pace patterns
|
| 1311 |
+
re.compile(r"\b(lecture|lectures|class|classes|teaching)\s+(is|are|going)\s+(too|very)\s+(fast|slow)", re.IGNORECASE),
|
| 1312 |
+
re.compile(r"\b(too|very)\s+(fast|slow|lag|lagging)", re.IGNORECASE),
|
| 1313 |
+
|
| 1314 |
+
# Time management patterns
|
| 1315 |
+
re.compile(r"\b(not|poor|bad)\s+(managing|managing)\s+time", re.IGNORECASE),
|
| 1316 |
+
re.compile(r"\btime\s+management\s+(is\s+)?(poor|bad|lacking)", re.IGNORECASE),
|
| 1317 |
+
|
| 1318 |
+
# Attendance/engagement patterns
|
| 1319 |
+
re.compile(r"\b(just|only)\s+(for|going for)\s+attendance", re.IGNORECASE),
|
| 1320 |
+
re.compile(r"\b(going|attend|attending)\s+(to|for)\s+(her|his|their)\s+class\s+(just|only)\s+for\s+attendance", re.IGNORECASE),
|
| 1321 |
+
re.compile(r"\bnot\s+(at\s+all\s+)?(interested|engaging|helpful)", re.IGNORECASE),
|
| 1322 |
+
|
| 1323 |
+
# Value patterns
|
| 1324 |
+
re.compile(r"\b(waste|wasting)\s+(of\s+)?time", re.IGNORECASE),
|
| 1325 |
+
re.compile(r"\b(no\s+use|useless|not useful)", re.IGNORECASE),
|
| 1326 |
+
|
| 1327 |
+
# Workload patterns
|
| 1328 |
+
re.compile(r"\b(too\s+)?many\s+projects", re.IGNORECASE),
|
| 1329 |
+
re.compile(r"\btrouble\s+to\s+(make|complete|do)", re.IGNORECASE),
|
| 1330 |
+
|
| 1331 |
+
# Administrative patterns
|
| 1332 |
+
re.compile(r"\bmilitary\s+rules", re.IGNORECASE),
|
| 1333 |
+
re.compile(r"\b(too|very)\s+strict", re.IGNORECASE),
|
| 1334 |
+
re.compile(r"\battendance\s+(issue|problem)", re.IGNORECASE),
|
| 1335 |
+
re.compile(r"\bnot\s+providing\s+attendance", re.IGNORECASE),
|
| 1336 |
+
re.compile(r"\bclaim\s+od", re.IGNORECASE),
|
| 1337 |
+
|
| 1338 |
+
# Placement/scheduling patterns
|
| 1339 |
+
re.compile(r"\bplacement\s+activities\s+(and|with)\s+(attendance|issue|problem)", re.IGNORECASE),
|
| 1340 |
+
re.compile(r"\b(class|classes)\s+(intersecting|conflicting)\s+with\s+placement", re.IGNORECASE),
|
| 1341 |
]
|
| 1342 |
|
| 1343 |
META_COMMENT_PATTERNS = [
|
| 1344 |
+
re.compile(r"^no\s+negative\s+(comments?|feedback|remarks?)", re.IGNORECASE),
|
| 1345 |
+
re.compile(r"^no\s+negative\s+comments?\s+on\s+the\s+(faculty|teacher|staff|course)", re.IGNORECASE),
|
| 1346 |
+
re.compile(r"^no\s+(issues?|problems?|complaints?)\.?$", re.IGNORECASE),
|
| 1347 |
+
re.compile(r"^no\s+(issues?|problems?|complaints?)\s+(at\s+all|whatsoever)", re.IGNORECASE),
|
| 1348 |
+
|
| 1349 |
+
# "Everything is good" patterns
|
| 1350 |
+
re.compile(r"^(everything|all)\s+(is\s+)?(good|fine|ok|okay|great|perfect|excellent)", re.IGNORECASE),
|
| 1351 |
+
re.compile(r"^no,?\s+(everything|all)\s+(is\s+)?(good|fine|ok|okay)", re.IGNORECASE),
|
| 1352 |
+
re.compile(r"^(all\s+)?good\.?$", re.IGNORECASE),
|
| 1353 |
+
re.compile(r"^everything\s+at\s+the\s+too\s+only", re.IGNORECASE), # From your data
|
| 1354 |
+
|
| 1355 |
+
# "Nothing" patterns
|
| 1356 |
+
re.compile(r"^nothing\.?$", re.IGNORECASE),
|
| 1357 |
+
re.compile(r"^nothing\s+(to\s+)?(say|comment|mention|add)", re.IGNORECASE),
|
| 1358 |
+
re.compile(r"^nothing,?\s+(and\s+)?(all|everything)\s+(is\s+)?(good|fine)", re.IGNORECASE),
|
| 1359 |
+
|
| 1360 |
+
# "No more comments" patterns
|
| 1361 |
re.compile(r"^no\s+more\s+(comments?|remarks?|feedback)", re.IGNORECASE),
|
| 1362 |
re.compile(r"^no\s+(other\s+)?(comments?|remarks?|feedback)", re.IGNORECASE),
|
|
|
|
| 1363 |
re.compile(r"^no\s+remarks?(\s+(about|on))?", re.IGNORECASE),
|
| 1364 |
+
|
| 1365 |
+
# Empty/nil responses
|
| 1366 |
+
re.compile(r"^(nil|none|na|n/a|nill)\.?$", re.IGNORECASE),
|
| 1367 |
+
re.compile(r"^(no|nothing|none)\.?$", re.IGNORECASE),
|
| 1368 |
+
|
| 1369 |
+
# Positive meta-comments (not actual feedback)
|
| 1370 |
+
re.compile(r"^(it's\s+|its\s+)?(all\s+)?good\.?$", re.IGNORECASE),
|
| 1371 |
+
re.compile(r"^fine\.?$", re.IGNORECASE),
|
| 1372 |
+
re.compile(r"^ok(ay)?\.?$", re.IGNORECASE),
|
| 1373 |
+
re.compile(r"^great\.?$", re.IGNORECASE),
|
| 1374 |
+
re.compile(r"^nice\.?$", re.IGNORECASE),
|
| 1375 |
]
|
| 1376 |
|
| 1377 |
def is_meta_comment(text: str) -> bool:
|
| 1378 |
+
"""
|
| 1379 |
+
Check if comment is a meta-comment (not actual feedback).
|
| 1380 |
+
These are generic statements that don't provide substantive feedback.
|
| 1381 |
+
"""
|
| 1382 |
if not text:
|
| 1383 |
+
return True # Empty text is meta
|
| 1384 |
+
|
| 1385 |
text = text.strip()
|
| 1386 |
|
| 1387 |
+
# Check length - very short comments are likely meta
|
| 1388 |
+
if len(text) < 3:
|
| 1389 |
+
logger.debug(f"Meta-comment (too short): '{text}'")
|
| 1390 |
+
return True
|
| 1391 |
+
|
| 1392 |
+
# Check against patterns
|
| 1393 |
for pattern in META_COMMENT_PATTERNS:
|
| 1394 |
if pattern.match(text):
|
| 1395 |
logger.debug(f"Meta-comment detected: '{text[:50]}...'")
|
|
|
|
| 1398 |
return False
|
| 1399 |
|
| 1400 |
def is_explicit_negative(text: str) -> bool:
|
| 1401 |
+
"""
|
| 1402 |
+
Check if text contains explicit negative phrases.
|
| 1403 |
+
IMPORTANT: Must check if it's a meta-comment FIRST.
|
| 1404 |
+
"""
|
| 1405 |
if not text:
|
| 1406 |
return False
|
| 1407 |
+
|
| 1408 |
+
# CRITICAL: Don't classify meta-comments as negative
|
| 1409 |
+
if is_meta_comment(text):
|
| 1410 |
+
return False
|
| 1411 |
+
|
| 1412 |
lower = text.lower()
|
| 1413 |
|
| 1414 |
# Check phrases
|
| 1415 |
for phrase in NEGATIVE_PHRASES:
|
| 1416 |
if phrase in lower:
|
| 1417 |
+
# Double-check it's not a false positive like "no negative comments"
|
| 1418 |
+
if phrase == 'not good' and 'no negative' in lower:
|
| 1419 |
+
continue
|
| 1420 |
+
if phrase == 'no interest' and 'no negative' in lower:
|
| 1421 |
+
continue
|
| 1422 |
+
|
| 1423 |
logger.debug(f"Negative phrase detected: '{phrase}' in '{text[:50]}...'")
|
| 1424 |
return True
|
| 1425 |
|
|
|
|
| 1978 |
log_level="info"
|
| 1979 |
)
|
| 1980 |
|
| 1981 |
+
|
| 1982 |
+
|