Spaces:

kushvanth
/

iqac_fast_api

Sleeping

App Files Files Community

kushvanth commited on Nov 9, 2025

Commit

68aba94

verified ·

1 Parent(s): f02f5b9

Update fastapi_example.py

Browse files

Files changed (1) hide show

fastapi_example.py +389 -87

fastapi_example.py CHANGED Viewed

@@ -1,6 +1,10 @@
 # """
 # Enhanced FastAPI Service for Comment Sentiment Analysis
 # with improved performance, validation, and configuration management
 # """
 # from fastapi import FastAPI, HTTPException, Depends
@@ -80,13 +84,13 @@
 #     """Application settings with environment variable support"""
 #     # API Settings
 #     app_name: str = "Comment Analysis API"
-#     app_version: str = "2.0.0"
 #     debug_mode: bool = False
 #     # Request Limits
 #     max_comments_per_request: int = 1000
 #     max_comment_length: int = 5000
-#     min_comment_words: int = 2
 #     # Sentiment Thresholds
 #     vader_pos_threshold: float = 0.2
@@ -113,11 +117,30 @@
 #         env_file = ".env"
 #         env_file_encoding = 'utf-8'
 #         extra = 'ignore'
 # @lru_cache()
 # def get_settings() -> Settings:
 #     """Cached settings instance"""
-#     return Settings()
 # # Pydantic Models
 # class FacultyInfo(BaseModel):
@@ -208,7 +231,7 @@
 # device = None
 # summarizer = None
-# # Heuristic phrase/regex rules for explicit negative feedback
 # NEGATIVE_PHRASES = [
 #     'need more staff',
 #     'need more faculty',
@@ -225,6 +248,7 @@
 #     'boring class',
 #     'boring classes',
 #     'waste of time',
 #     'bad teacher',
 #     'bad teaching',
 #     'poor teaching',
@@ -238,6 +262,8 @@
 #     'lectures are going fast',
 #     'going too fast',
 #     'too fast',
 #     'lacking',
 #     'is lacking',
 #     'knowledge is lacking',
@@ -245,26 +271,67 @@
 #     'no practical',
 #     'lack of practical',
 #     'no hands-on',
-#     'no real world'
 # ]
 # NEGATIVE_REGEXES = [
 #     re.compile(r"\bno\s+(proper|sufficient)\s+(classes|notes|support)\b", re.IGNORECASE),
 #     re.compile(r"\bno\s+staff\b", re.IGNORECASE),
 #     re.compile(r"\bneed(s)?\s+more\s+(staff|faculty|support)\b", re.IGNORECASE),
-#     re.compile(r"\b(lecture|lectures|class|classes|teaching)\s+(are\s+)?(too|very)\s+fast\b", re.IGNORECASE),
-#     re.compile(r"\blectures?\s+are\s+going\s+fast\b", re.IGNORECASE),
 #     re.compile(r"\b(require|needs?|needed)\s+(some\s+)?improv(e|ement)s?\s+(in|of)?\s*communication(\s+skills?)?\b", re.IGNORECASE),
 #     re.compile(r"\b(is\s+)?lacking\b", re.IGNORECASE),
-#     re.compile(r"\bno\s+(practical|hands-on|real-world)\b", re.IGNORECASE)
 # ]
 # META_COMMENT_PATTERNS = [
-#     re.compile(r"^no\s+(other\s+)?(comments?|remarks?|feedback)$", re.IGNORECASE),
-#     re.compile(r"^no\s+remarks?\s+(about|on)", re.IGNORECASE),
-#     re.compile(r"^nil$", re.IGNORECASE),
-#     re.compile(r"^none$", re.IGNORECASE),
-#     re.compile(r"^n/?a$", re.IGNORECASE)
 # ]
 # def is_meta_comment(text: str) -> bool:
@@ -275,22 +342,27 @@
 #     for pattern in META_COMMENT_PATTERNS:
 #         if pattern.match(text):
 #             return True
 #     return False
 # def is_explicit_negative(text: str) -> bool:
-#     """Check if text contains explicit negative phrases"""
 #     if not text:
 #         return False
 #     lower = text.lower()
 #     for phrase in NEGATIVE_PHRASES:
 #         if phrase in lower:
 #             return True
 #     for regex in NEGATIVE_REGEXES:
 #         if regex.search(text):
 #             return True
 #     return False
@@ -305,7 +377,7 @@
 #         # Initialize VADER (NLTK data already downloaded)
 #         sia = SentimentIntensityAnalyzer()
-#         logger.info("VADER initialized")
 #         # Initialize RoBERTa with caching
 #         cache_dir = settings.model_cache_dir
@@ -323,7 +395,7 @@
 #         device = "cuda" if torch.cuda.is_available() else "cpu"
 #         model.to(device)
 #         model.eval()
-#         logger.info(f"RoBERTa initialized on device: {device}")
 #         # Initialize summarizer (optional)
 #         if settings.use_abstractive_summary:
@@ -333,12 +405,12 @@
 #                     model=settings.summarizer_model,
 #                     device=0 if device == "cuda" else -1
 #                 )
-#                 logger.info("Summarizer initialized")
 #             except Exception as e:
 #                 logger.warning(f"Summarizer initialization failed: {e}")
 #                 summarizer = None
-#         logger.info("All models initialized successfully")
 #     except Exception as e:
 #         logger.error(f"Error initializing models: {e}")
@@ -437,9 +509,11 @@
 #     roberta_neg = row.get('roberta_neg', 0.0)
 #     roberta_pos = row.get('roberta_pos', 0.0)
 #     if row.get('heuristic_negative') is True:
 #         return 'Negative'
 #     if (
 #         vader_compound <= settings.vader_neg_threshold or
 #         roberta_neg >= settings.roberta_neg_threshold or
@@ -447,6 +521,7 @@
 #     ):
 #         return 'Negative'
 #     if (
 #         vader_compound >= settings.vader_pos_threshold or
 #         roberta_pos >= settings.roberta_pos_threshold or
@@ -454,14 +529,17 @@
 #     ):
 #         return 'Positive'
 #     return 'Neutral'
 # def sanitize_text(text: str) -> str:
-#     """Sanitize input text"""
 #     if not text:
 #         return ""
 #     text = ' '.join(text.split())
-#     text = ''.join(char for char in text if ord(char) >= 32 or char == '\n')
 #     return text.strip()
 # def analyze_comments_sentiment(comments: List[str]) -> Dict[str, Any]:
@@ -470,12 +548,13 @@
 #         settings = get_settings()
 #         logger.info(f"Received {len(comments)} comments for analysis")
 #         sanitized_comments = [sanitize_text(comment) for comment in comments]
 #         filtered_comments = [
 #             comment for comment in sanitized_comments
-#             if (settings.min_comment_words < len(comment.split()) <= settings.max_comment_length
-#                 and not is_meta_comment(comment))
 #         ]
 #         logger.info(f"After filtering: {len(filtered_comments)} valid comments")
@@ -486,19 +565,32 @@
 #                 "message": "No valid comments found for analysis"
 #             }
 #         df = pd.DataFrame({'comment': filtered_comments})
 #         df['heuristic_negative'] = df['comment'].apply(is_explicit_negative)
 #         vader_results = []
 #         for text in df['comment']:
 #             vader_results.append(vader_sentiment(text))
 #         roberta_results = roberta_sentiment_batch(df['comment'].tolist())
 #         vader_df = pd.DataFrame(vader_results)
 #         roberta_df = pd.DataFrame(roberta_results)
 #         final_df = pd.concat([df.reset_index(drop=True), vader_df, roberta_df], axis=1)
 #         final_df['combined_pos'] = (
 #             settings.combined_weight_vader * final_df['vader_pos'] +
 #             settings.combined_weight_roberta * final_df['roberta_pos']
@@ -512,11 +604,13 @@
 #             settings.combined_weight_roberta * final_df['roberta_neu']
 #         )
 #         final_df['Overall_Sentiment'] = final_df.apply(
-#             lambda row: overall_sentiment(row, settings),
 #             axis=1
 #         )
 #         total_comments = len(final_df)
 #         positive_count = len(final_df[final_df['Overall_Sentiment'] == 'Positive'])
 #         negative_count = len(final_df[final_df['Overall_Sentiment'] == 'Negative'])
@@ -527,10 +621,12 @@
 #             f"{negative_count} negative, {neutral_count} neutral"
 #         )
 #         avg_positive = float(final_df['combined_pos'].mean())
 #         avg_negative = float(final_df['combined_neg'].mean())
 #         avg_neutral = float(final_df['combined_neu'].mean())
 #         if avg_positive > max(avg_negative, avg_neutral):
 #             overall_sentiment_label = "Positive"
 #         elif avg_negative > max(avg_positive, avg_neutral):
@@ -538,6 +634,7 @@
 #         else:
 #             overall_sentiment_label = "Neutral"
 #         negative_summary = ""
 #         negative_comments_list = []
 #         negative_comments = final_df[final_df['Overall_Sentiment'] == 'Negative']
@@ -546,6 +643,7 @@
 #             negative_comments_list = negative_comments['comment'].tolist()
 #             try:
 #                 top_idx = negative_comments['combined_neg'].nlargest(3).index
 #                 top_comments = negative_comments.loc[top_idx, 'comment'].tolist()
@@ -562,11 +660,13 @@
 #                     )
 #                     negative_summary = summary_result[0]['summary_text']
 #                 else:
 #                     negative_summary = "; ".join(top_comments)
 #             except Exception as e:
 #                 logger.warning(f"Summary generation failed: {e}")
 #                 negative_summary = "; ".join(negative_comments_list[:3])
 #         insights = []
 #         recommendations = []
@@ -647,11 +747,14 @@
 # async def startup_event():
 #     """Initialize models on startup"""
 #     try:
-#         logger.info("===== Application Startup at {} =====".format(datetime.now().strftime('%Y-%m-%d %H:%M:%S')))
 #         initialize_models()
-#         logger.info("Service started successfully")
 #     except Exception as e:
-#         logger.error(f"Startup failed: {e}")
 #         raise e
 # @app.on_event("shutdown")
@@ -665,7 +768,13 @@
 #     return {
 #         "service": get_settings().app_name,
 #         "version": get_settings().app_version,
-#         "status": "running"
 #     }
 # @app.get("/health")
@@ -740,18 +849,61 @@
 # @app.get("/config")
 # async def get_config(settings: Settings = Depends(get_settings)):
-#     """Get current configuration"""
 #     if not settings.debug_mode:
 #         raise HTTPException(status_code=404, detail="Not found")
 #     return {
 #         "max_comments_per_request": settings.max_comments_per_request,
 #         "vader_pos_threshold": settings.vader_pos_threshold,
 #         "vader_neg_threshold": settings.vader_neg_threshold,
 #         "roberta_pos_threshold": settings.roberta_pos_threshold,
 #         "roberta_neg_threshold": settings.roberta_neg_threshold,
 #         "enable_caching": settings.enable_caching,
-#         "batch_size": settings.batch_size
 #     }
 # if __name__ == "__main__":
@@ -765,6 +917,13 @@
 """
 Enhanced FastAPI Service for Comment Sentiment Analysis
 with improved performance, validation, and configuration management
@@ -997,113 +1156,240 @@ summarizer = None
 # Enhanced heuristic phrase/regex rules for explicit negative feedback
 NEGATIVE_PHRASES = [
     'need more staff',
     'need more faculty',
     'insufficient staff',
     'lack of staff',
     'lack of knowledge',
     'better knowledge needed',
     'poor knowledge',
-    'not good',
-    'not satisfied',
-    'not satisfactory',
-    'no classes',
-    'no regular classes',
-    'boring class',
-    'boring classes',
-    'waste of time',
-    'wasting time',
-    'bad teacher',
-    'bad teaching',
-    'poor teaching',
     'improve class',
     'improvement needed',
     'needs improvement',
     'not helpful',
     'not clear',
     'communication skills need improvement',
     'improve communication',
     'lectures are going fast',
     'going too fast',
     'too fast',
     'too slow',
-    'too boring',
-    'lacking',
-    'is lacking',
-    'knowledge is lacking',
-    'practical knowledge lacking',
-    'no practical',
-    'lack of practical',
-    'no hands-on',
-    'no real world',
     'too lag',
     'lag',
     'lagging',
     'lag in teaching',
-    'not interested',
-    'no interest',
     'not managing time',
     'poor time management',
-    'time management',
-    'not at all',
     'going for attendance',
     'just for attendance',
     'only for attendance',
-    'cant understand',
-    "can't understand",
-    'not understandable',
     'nothing learnt',
     'learned nothing',
     'no improvement',
     'same teaching',
     'monotonous',
     'sleeping in class',
     'no use',
     'useless',
-    'waste our time'
 ]
 NEGATIVE_REGEXES = [
-    re.compile(r"\bno\s+(proper|sufficient)\s+(classes|notes|support)\b", re.IGNORECASE),
-    re.compile(r"\bno\s+staff\b", re.IGNORECASE),
-    re.compile(r"\bneed(s)?\s+more\s+(staff|faculty|support)\b", re.IGNORECASE),
-    re.compile(r"\b(lecture|lectures|class|classes|teaching)\s+(are\s+)?(too|very)\s+(fast|slow|boring)\b", re.IGNORECASE),
-    re.compile(r"\blectures?\s+are\s+going\s+(too\s+)?fast\b", re.IGNORECASE),
-    re.compile(r"\b(require|needs?|needed)\s+(some\s+)?improv(e|ement)s?\s+(in|of)?\s*communication(\s+skills?)?\b", re.IGNORECASE),
-    re.compile(r"\b(is\s+)?lacking\b", re.IGNORECASE),
-    re.compile(r"\bno\s+(practical|hands-on|real-world)\b", re.IGNORECASE),
-    re.compile(r"\btoo\s+(lag|lagging?|slow|boring)\b", re.IGNORECASE),
-    re.compile(r"\b(not\s+)?managing\s+time\b", re.IGNORECASE),
-    re.compile(r"\btime\s+management", re.IGNORECASE),
-    re.compile(r"\bnot\s+interested(\s+in|\s+to)?\b", re.IGNORECASE),
-    re.compile(r"\bno\s+interest\b", re.IGNORECASE),
-    re.compile(r"\b(just\s+|only\s+)?for\s+attendance\b", re.IGNORECASE),
-    re.compile(r"\b(just\s+)?going\s+(to|for)\s+(her|his|their)\s+class\b", re.IGNORECASE),
-    re.compile(r"\bnot\s+at\s+all\b", re.IGNORECASE),
-    re.compile(r"\b(overall|its?)\s+(is\s+)?good\s+but\b", re.IGNORECASE),  # "good but" often precedes criticism
-    re.compile(r"\bcan'?t\s+understand", re.IGNORECASE),
-    re.compile(r"\bwaste\s+(of\s+)?time\b", re.IGNORECASE),
-    re.compile(r"\bno\s+use(ful)?\b", re.IGNORECASE),
-    re.compile(r"\buseless\b", re.IGNORECASE)
 ]
 META_COMMENT_PATTERNS = [
     re.compile(r"^no\s+more\s+(comments?|remarks?|feedback)", re.IGNORECASE),
     re.compile(r"^no\s+(other\s+)?(comments?|remarks?|feedback)", re.IGNORECASE),
-    re.compile(r"^nothing\s+to\s+(say|comment|mention)", re.IGNORECASE),
     re.compile(r"^no\s+remarks?(\s+(about|on))?", re.IGNORECASE),
-    re.compile(r"^(nil|none|na|n/a)$", re.IGNORECASE),
-    re.compile(r"^(no|nothing)\.?$", re.IGNORECASE),
-    re.compile(r"^everything\s+(is\s+)?(good|fine|ok|okay)", re.IGNORECASE),
-    re.compile(r"^(all\s+)?good$", re.IGNORECASE)
 ]
 def is_meta_comment(text: str) -> bool:
-    """Check if comment is a meta-comment (not actual feedback)"""
     if not text:
-        return False
     text = text.strip()
     for pattern in META_COMMENT_PATTERNS:
         if pattern.match(text):
             logger.debug(f"Meta-comment detected: '{text[:50]}...'")
@@ -1112,14 +1398,28 @@ def is_meta_comment(text: str) -> bool:
     return False
 def is_explicit_negative(text: str) -> bool:
-    """Check if text contains explicit negative phrases with logging"""
     if not text:
         return False
     lower = text.lower()
     # Check phrases
     for phrase in NEGATIVE_PHRASES:
         if phrase in lower:
             logger.debug(f"Negative phrase detected: '{phrase}' in '{text[:50]}...'")
             return True
@@ -1678,3 +1978,5 @@ if __name__ == "__main__":
         log_level="info"
     )

 # """
 # Enhanced FastAPI Service for Comment Sentiment Analysis
 # with improved performance, validation, and configuration management
+# Version 2.1.0 - Updated with bug fixes and improvements
 # """
 # from fastapi import FastAPI, HTTPException, Depends
 #     """Application settings with environment variable support"""
 #     # API Settings
 #     app_name: str = "Comment Analysis API"
+#     app_version: str = "2.1.0"
 #     debug_mode: bool = False
 #     # Request Limits
 #     max_comments_per_request: int = 1000
 #     max_comment_length: int = 5000
+#     min_comment_words: int = 1
 #     # Sentiment Thresholds
 #     vader_pos_threshold: float = 0.2
 #         env_file = ".env"
 #         env_file_encoding = 'utf-8'
 #         extra = 'ignore'
+#     @validator('min_comment_words')
+#     def validate_min_words(cls, v):
+#         if v < 0:
+#             raise ValueError('min_comment_words must be non-negative')
+#         return v
+#     @validator('combined_weight_vader', 'combined_weight_roberta')
+#     def validate_weights(cls, v):
+#         if not 0 <= v <= 1:
+#             raise ValueError('Weights must be between 0 and 1')
+#         return v
 # @lru_cache()
 # def get_settings() -> Settings:
 #     """Cached settings instance"""
+#     settings = Settings()
+#     # Normalize weights if needed
+#     total = settings.combined_weight_vader + settings.combined_weight_roberta
+#     if not (0.99 <= total <= 1.01):
+#         logger.warning(f"Weights sum to {total}, normalizing to 1.0")
+#         settings.combined_weight_vader /= total
+#         settings.combined_weight_roberta /= total
+#     return settings
 # # Pydantic Models
 # class FacultyInfo(BaseModel):
 # device = None
 # summarizer = None
+# # Enhanced heuristic phrase/regex rules for explicit negative feedback
 # NEGATIVE_PHRASES = [
 #     'need more staff',
 #     'need more faculty',
 #     'boring class',
 #     'boring classes',
 #     'waste of time',
+#     'wasting time',
 #     'bad teacher',
 #     'bad teaching',
 #     'poor teaching',
 #     'lectures are going fast',
 #     'going too fast',
 #     'too fast',
+#     'too slow',
+#     'too boring',
 #     'lacking',
 #     'is lacking',
 #     'knowledge is lacking',
 #     'no practical',
 #     'lack of practical',
 #     'no hands-on',
+#     'no real world',
+#     'too lag',
+#     'lag',
+#     'lagging',
+#     'lag in teaching',
+#     'not interested',
+#     'no interest',
+#     'not managing time',
+#     'poor time management',
+#     'time management',
+#     'not at all',
+#     'going for attendance',
+#     'just for attendance',
+#     'only for attendance',
+#     'cant understand',
+#     "can't understand",
+#     'not understandable',
+#     'nothing learnt',
+#     'learned nothing',
+#     'no improvement',
+#     'same teaching',
+#     'monotonous',
+#     'sleeping in class',
+#     'no use',
+#     'useless',
+#     'waste our time'
 # ]
 # NEGATIVE_REGEXES = [
 #     re.compile(r"\bno\s+(proper|sufficient)\s+(classes|notes|support)\b", re.IGNORECASE),
 #     re.compile(r"\bno\s+staff\b", re.IGNORECASE),
 #     re.compile(r"\bneed(s)?\s+more\s+(staff|faculty|support)\b", re.IGNORECASE),
+#     re.compile(r"\b(lecture|lectures|class|classes|teaching)\s+(are\s+)?(too|very)\s+(fast|slow|boring)\b", re.IGNORECASE),
+#     re.compile(r"\blectures?\s+are\s+going\s+(too\s+)?fast\b", re.IGNORECASE),
 #     re.compile(r"\b(require|needs?|needed)\s+(some\s+)?improv(e|ement)s?\s+(in|of)?\s*communication(\s+skills?)?\b", re.IGNORECASE),
 #     re.compile(r"\b(is\s+)?lacking\b", re.IGNORECASE),
+#     re.compile(r"\bno\s+(practical|hands-on|real-world)\b", re.IGNORECASE),
+#     re.compile(r"\btoo\s+(lag|lagging?|slow|boring)\b", re.IGNORECASE),
+#     re.compile(r"\b(not\s+)?managing\s+time\b", re.IGNORECASE),
+#     re.compile(r"\btime\s+management", re.IGNORECASE),
+#     re.compile(r"\bnot\s+interested(\s+in|\s+to)?\b", re.IGNORECASE),
+#     re.compile(r"\bno\s+interest\b", re.IGNORECASE),
+#     re.compile(r"\b(just\s+|only\s+)?for\s+attendance\b", re.IGNORECASE),
+#     re.compile(r"\b(just\s+)?going\s+(to|for)\s+(her|his|their)\s+class\b", re.IGNORECASE),
+#     re.compile(r"\bnot\s+at\s+all\b", re.IGNORECASE),
+#     re.compile(r"\b(overall|its?)\s+(is\s+)?good\s+but\b", re.IGNORECASE),  # "good but" often precedes criticism
+#     re.compile(r"\bcan'?t\s+understand", re.IGNORECASE),
+#     re.compile(r"\bwaste\s+(of\s+)?time\b", re.IGNORECASE),
+#     re.compile(r"\bno\s+use(ful)?\b", re.IGNORECASE),
+#     re.compile(r"\buseless\b", re.IGNORECASE)
 # ]
 # META_COMMENT_PATTERNS = [
+#     re.compile(r"^no\s+more\s+(comments?|remarks?|feedback)", re.IGNORECASE),
+#     re.compile(r"^no\s+(other\s+)?(comments?|remarks?|feedback)", re.IGNORECASE),
+#     re.compile(r"^nothing\s+to\s+(say|comment|mention)", re.IGNORECASE),
+#     re.compile(r"^no\s+remarks?(\s+(about|on))?", re.IGNORECASE),
+#     re.compile(r"^(nil|none|na|n/a)$", re.IGNORECASE),
+#     re.compile(r"^(no|nothing)\.?$", re.IGNORECASE),
+#     re.compile(r"^everything\s+(is\s+)?(good|fine|ok|okay)", re.IGNORECASE),
+#     re.compile(r"^(all\s+)?good$", re.IGNORECASE)
 # ]
 # def is_meta_comment(text: str) -> bool:
 #     for pattern in META_COMMENT_PATTERNS:
 #         if pattern.match(text):
+#             logger.debug(f"Meta-comment detected: '{text[:50]}...'")
 #             return True
 #     return False
 # def is_explicit_negative(text: str) -> bool:
+#     """Check if text contains explicit negative phrases with logging"""
 #     if not text:
 #         return False
 #     lower = text.lower()
+#     # Check phrases
 #     for phrase in NEGATIVE_PHRASES:
 #         if phrase in lower:
+#             logger.debug(f"Negative phrase detected: '{phrase}' in '{text[:50]}...'")
 #             return True
+#     # Check regexes
 #     for regex in NEGATIVE_REGEXES:
 #         if regex.search(text):
+#             logger.debug(f"Negative pattern matched: {regex.pattern} in '{text[:50]}...'")
 #             return True
 #     return False
 #         # Initialize VADER (NLTK data already downloaded)
 #         sia = SentimentIntensityAnalyzer()
+#         logger.info("✓ VADER initialized")
 #         # Initialize RoBERTa with caching
 #         cache_dir = settings.model_cache_dir
 #         device = "cuda" if torch.cuda.is_available() else "cpu"
 #         model.to(device)
 #         model.eval()
+#         logger.info(f"✓ RoBERTa initialized on device: {device}")
 #         # Initialize summarizer (optional)
 #         if settings.use_abstractive_summary:
 #                     model=settings.summarizer_model,
 #                     device=0 if device == "cuda" else -1
 #                 )
+#                 logger.info("✓ Summarizer initialized")
 #             except Exception as e:
 #                 logger.warning(f"Summarizer initialization failed: {e}")
 #                 summarizer = None
+#         logger.info("✓ All models initialized successfully")
 #     except Exception as e:
 #         logger.error(f"Error initializing models: {e}")
 #     roberta_neg = row.get('roberta_neg', 0.0)
 #     roberta_pos = row.get('roberta_pos', 0.0)
+#     # Priority 1: Heuristic negative patterns override everything
 #     if row.get('heuristic_negative') is True:
 #         return 'Negative'
+#     # Priority 2: Strong negative signals
 #     if (
 #         vader_compound <= settings.vader_neg_threshold or
 #         roberta_neg >= settings.roberta_neg_threshold or
 #     ):
 #         return 'Negative'
+#     # Priority 3: Positive signals
 #     if (
 #         vader_compound >= settings.vader_pos_threshold or
 #         roberta_pos >= settings.roberta_pos_threshold or
 #     ):
 #         return 'Positive'
+#     # Default: Neutral
 #     return 'Neutral'
 # def sanitize_text(text: str) -> str:
+#     """Sanitize input text while preserving emojis"""
 #     if not text:
 #         return ""
+#     # Remove control characters but keep printable characters and emojis
+#     text = re.sub(r'[\x00-\x08\x0B-\x0C\x0E-\x1F\x7F]', '', text)
+#     # Normalize whitespace
 #     text = ' '.join(text.split())
 #     return text.strip()
 # def analyze_comments_sentiment(comments: List[str]) -> Dict[str, Any]:
 #         settings = get_settings()
 #         logger.info(f"Received {len(comments)} comments for analysis")
+#         # Sanitize comments
 #         sanitized_comments = [sanitize_text(comment) for comment in comments]
+#         # FIXED: Changed < to <= to properly handle min_comment_words
 #         filtered_comments = [
 #             comment for comment in sanitized_comments
+#             if (settings.min_comment_words <= len(comment.split()) <= settings.max_comment_length)
 #         ]
 #         logger.info(f"After filtering: {len(filtered_comments)} valid comments")
 #                 "message": "No valid comments found for analysis"
 #             }
+#         # Create dataframe
 #         df = pd.DataFrame({'comment': filtered_comments})
+#         # Detect meta-comments and explicit negatives
+#         df['is_meta'] = df['comment'].apply(is_meta_comment)
 #         df['heuristic_negative'] = df['comment'].apply(is_explicit_negative)
+#         # Log detection results
+#         meta_count = df['is_meta'].sum()
+#         heuristic_neg_count = df['heuristic_negative'].sum()
+#         logger.info(f"Detected {meta_count} meta-comments and {heuristic_neg_count} heuristic negatives")
+#         # VADER sentiment analysis
 #         vader_results = []
 #         for text in df['comment']:
 #             vader_results.append(vader_sentiment(text))
+#         # RoBERTa sentiment analysis (batch)
 #         roberta_results = roberta_sentiment_batch(df['comment'].tolist())
+#         # Combine results
 #         vader_df = pd.DataFrame(vader_results)
 #         roberta_df = pd.DataFrame(roberta_results)
 #         final_df = pd.concat([df.reset_index(drop=True), vader_df, roberta_df], axis=1)
+#         # Calculate combined scores
 #         final_df['combined_pos'] = (
 #             settings.combined_weight_vader * final_df['vader_pos'] +
 #             settings.combined_weight_roberta * final_df['roberta_pos']
 #             settings.combined_weight_roberta * final_df['roberta_neu']
 #         )
+#         # Classify overall sentiment (meta-comments become Neutral)
 #         final_df['Overall_Sentiment'] = final_df.apply(
+#             lambda row: 'Neutral' if row.get('is_meta') else overall_sentiment(row, settings),
 #             axis=1
 #         )
+#         # Calculate statistics
 #         total_comments = len(final_df)
 #         positive_count = len(final_df[final_df['Overall_Sentiment'] == 'Positive'])
 #         negative_count = len(final_df[final_df['Overall_Sentiment'] == 'Negative'])
 #             f"{negative_count} negative, {neutral_count} neutral"
 #         )
+#         # Average scores
 #         avg_positive = float(final_df['combined_pos'].mean())
 #         avg_negative = float(final_df['combined_neg'].mean())
 #         avg_neutral = float(final_df['combined_neu'].mean())
+#         # Determine overall sentiment label
 #         if avg_positive > max(avg_negative, avg_neutral):
 #             overall_sentiment_label = "Positive"
 #         elif avg_negative > max(avg_positive, avg_neutral):
 #         else:
 #             overall_sentiment_label = "Neutral"
+#         # Process negative comments
 #         negative_summary = ""
 #         negative_comments_list = []
 #         negative_comments = final_df[final_df['Overall_Sentiment'] == 'Negative']
 #             negative_comments_list = negative_comments['comment'].tolist()
 #             try:
+#                 # Get top negative comments
 #                 top_idx = negative_comments['combined_neg'].nlargest(3).index
 #                 top_comments = negative_comments.loc[top_idx, 'comment'].tolist()
 #                     )
 #                     negative_summary = summary_result[0]['summary_text']
 #                 else:
+#                     # Extractive summary
 #                     negative_summary = "; ".join(top_comments)
 #             except Exception as e:
 #                 logger.warning(f"Summary generation failed: {e}")
 #                 negative_summary = "; ".join(negative_comments_list[:3])
+#         # Generate insights and recommendations
 #         insights = []
 #         recommendations = []
 # async def startup_event():
 #     """Initialize models on startup"""
 #     try:
+#         logger.info("=" * 80)
+#         logger.info(f"Application Startup at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
+#         logger.info("=" * 80)
 #         initialize_models()
+#         logger.info("✓ Service started successfully")
+#         logger.info("=" * 80)
 #     except Exception as e:
+#         logger.error(f"✗ Startup failed: {e}")
 #         raise e
 # @app.on_event("shutdown")
 #     return {
 #         "service": get_settings().app_name,
 #         "version": get_settings().app_version,
+#         "status": "running",
+#         "endpoints": {
+#             "health": "/health",
+#             "analyze": "/analyze-comments",
+#             "config": "/config (debug mode only)",
+#             "test": "/test"
+#         }
 #     }
 # @app.get("/health")
 # @app.get("/config")
 # async def get_config(settings: Settings = Depends(get_settings)):
+#     """Get current configuration (debug mode only)"""
 #     if not settings.debug_mode:
 #         raise HTTPException(status_code=404, detail="Not found")
 #     return {
 #         "max_comments_per_request": settings.max_comments_per_request,
+#         "max_comment_length": settings.max_comment_length,
+#         "min_comment_words": settings.min_comment_words,
 #         "vader_pos_threshold": settings.vader_pos_threshold,
 #         "vader_neg_threshold": settings.vader_neg_threshold,
 #         "roberta_pos_threshold": settings.roberta_pos_threshold,
 #         "roberta_neg_threshold": settings.roberta_neg_threshold,
+#         "combined_weight_vader": settings.combined_weight_vader,
+#         "combined_weight_roberta": settings.combined_weight_roberta,
 #         "enable_caching": settings.enable_caching,
+#         "batch_size": settings.batch_size,
+#         "use_abstractive_summary": settings.use_abstractive_summary
+#     }
+# @app.get("/test")
+# async def test_endpoint():
+#     """Test endpoint to verify sentiment classification"""
+#     test_cases = [
+#         "No more comments 😅",
+#         "Overall good but too lag",
+#         "Not interested to be in her class just we are going for attendance thats it not at all managing time.",
+#         "Nothing to say anything just we are going to her class mean, only for attendance",
+#         "Excellent teaching! Very clear explanations.",
+#         "Good teacher with strong subject knowledge",
+#         "Class is okay, nothing special"
+#     ]
+#     results = []
+#     for text in test_cases:
+#         is_meta = is_meta_comment(text)
+#         is_neg = is_explicit_negative(text)
+#         # Predict classification
+#         if is_meta:
+#             predicted = "Neutral (meta-comment)"
+#         elif is_neg:
+#             predicted = "Negative (heuristic)"
+#         else:
+#             predicted = "Needs full analysis"
+#         results.append({
+#             "text": text,
+#             "is_meta_comment": is_meta,
+#             "is_heuristic_negative": is_neg,
+#             "predicted_classification": predicted
+#         })
+#     return {
+#         "test_results": results,
+#         "note": "Full analysis requires VADER and RoBERTa scores"
 #     }
 # if __name__ == "__main__":
 """
 Enhanced FastAPI Service for Comment Sentiment Analysis
 with improved performance, validation, and configuration management
 # Enhanced heuristic phrase/regex rules for explicit negative feedback
 NEGATIVE_PHRASES = [
+    # Teaching quality issues
+    'very poor',
+    'extremely poor',
+    'poor in teaching',
+    'poor teaching level',
+    'poor teaching',
+    'bad teacher',
+    'bad teaching',
+    'not good',  # Keep but check it's not "no negative"
+    'not satisfied',
+    'not satisfactory',
+    # Content/delivery issues
+    'boring class',
+    'boring classes',
+    'boring subject',
+    'subject is boring',
+    'low voice',
+    'voice is low',
+    'cannot hear',
+    "can't hear",
+    'speak louder',
+    # Resource/support issues
     'need more staff',
     'need more faculty',
     'insufficient staff',
     'lack of staff',
+    'not sufficient',
+    'insufficient',
+    'not enough',
+    'no classes',
+    'no regular classes',
+    'not sufficient classes',
+    # Knowledge/understanding issues
     'lack of knowledge',
     'better knowledge needed',
     'poor knowledge',
+    'knowledge is lacking',
+    'practical knowledge lacking',
+    'no practical',
+    'lack of practical',
+    'no hands-on',
+    'no real world',
+    'did not understand',
+    "didn't understand",
+    'not able to understand',
+    'unable to understand',
+    'difficult to understand',
+    'hard to understand',
+    'concepts are difficult',
+    'concepts difficult',
+    'cant understand',
+    "can't understand",
+    'not understandable',
+    # Improvement needed
     'improve class',
     'improvement needed',
     'needs improvement',
+    'need improvement',
+    'should improve',
+    'must improve',
     'not helpful',
     'not clear',
     'communication skills need improvement',
     'improve communication',
+    # Pace/time issues
     'lectures are going fast',
     'going too fast',
     'too fast',
     'too slow',
     'too lag',
     'lag',
     'lagging',
     'lag in teaching',
     'not managing time',
     'poor time management',
+    'time management issue',
+    # Engagement issues
+    'not interested',
+    'no interest',
     'going for attendance',
     'just for attendance',
     'only for attendance',
+    'not at all',
     'nothing learnt',
     'learned nothing',
     'no improvement',
     'same teaching',
     'monotonous',
     'sleeping in class',
+    # Value/utility issues
+    'waste of time',
+    'wasting time',
+    'waste our time',
     'no use',
     'useless',
+    # Administrative issues
+    'military rules',
+    'strict rules',
+    'too strict',
+    'very strict',
+    'attendance issue',
+    'attendance problem',
+    'not providing attendance',
+    'claim od',
+    # Workload issues
+    'too many projects',
+    'many projects review',
+    'trouble to make',
+    'difficult to make',
+    'hard to make',
+    'placement activities',  # When context is negative
 ]
 NEGATIVE_REGEXES = [
+    # Teaching quality patterns
+    re.compile(r"\b(very|extremely|quite|so)\s+(poor|bad|weak)\s+(in\s+)?(teaching|knowledge|communication)", re.IGNORECASE),
+    re.compile(r"\bpoor\s+(teaching|teacher|faculty|knowledge|communication)", re.IGNORECASE),
+    re.compile(r"\b(teaching|knowledge)\s+(is\s+)?(poor|bad|weak|lacking)", re.IGNORECASE),
+    # Boring/engagement patterns
+    re.compile(r"\b(boring|dull|monotonous)\s+(class|classes|subject|lecture|lectures)", re.IGNORECASE),
+    re.compile(r"\b(class|classes|subject|lecture|lectures)\s+(is|are)\s+(boring|dull|monotonous)", re.IGNORECASE),
+    # Voice/communication patterns
+    re.compile(r"\b(low|soft|quiet)\s+voice\b", re.IGNORECASE),
+    re.compile(r"\bvoice\s+(is\s+)?(low|soft|quiet|not clear)", re.IGNORECASE),
+    re.compile(r"\b(cannot|can't|cant|unable to)\s+hear", re.IGNORECASE),
+    # Resource/support patterns
+    re.compile(r"\b(no|not|insufficient|lack of)\s+(proper|sufficient|enough|regular)?\s*(classes|notes|support|staff|faculty)", re.IGNORECASE),
+    re.compile(r"\bneed(s)?\s+more\s+(staff|faculty|support|classes)", re.IGNORECASE),
+    # Understanding/clarity patterns
+    re.compile(r"\b(cannot|can't|cant|unable to|difficult to|hard to)\s+understand", re.IGNORECASE),
+    re.compile(r"\b(not|difficult|hard)\s+(able\s+to\s+)?understand(\s+the)?(\s+(concepts?|teaching|lectures?))?", re.IGNORECASE),
+    re.compile(r"\bconcepts?\s+(are\s+)?(difficult|hard|tough|complex)\s+to\s+understand", re.IGNORECASE),
+    # Improvement patterns
+    re.compile(r"\b(need|needs|needed|require|requires)\s+(some\s+)?(improvement|to improve)", re.IGNORECASE),
+    re.compile(r"\b(should|must|have to)\s+improve", re.IGNORECASE),
+    re.compile(r"\bimprovement\s+(is\s+)?need(ed)?", re.IGNORECASE),
+    # Pace patterns
+    re.compile(r"\b(lecture|lectures|class|classes|teaching)\s+(is|are|going)\s+(too|very)\s+(fast|slow)", re.IGNORECASE),
+    re.compile(r"\b(too|very)\s+(fast|slow|lag|lagging)", re.IGNORECASE),
+    # Time management patterns
+    re.compile(r"\b(not|poor|bad)\s+(managing|managing)\s+time", re.IGNORECASE),
+    re.compile(r"\btime\s+management\s+(is\s+)?(poor|bad|lacking)", re.IGNORECASE),
+    # Attendance/engagement patterns
+    re.compile(r"\b(just|only)\s+(for|going for)\s+attendance", re.IGNORECASE),
+    re.compile(r"\b(going|attend|attending)\s+(to|for)\s+(her|his|their)\s+class\s+(just|only)\s+for\s+attendance", re.IGNORECASE),
+    re.compile(r"\bnot\s+(at\s+all\s+)?(interested|engaging|helpful)", re.IGNORECASE),
+    # Value patterns
+    re.compile(r"\b(waste|wasting)\s+(of\s+)?time", re.IGNORECASE),
+    re.compile(r"\b(no\s+use|useless|not useful)", re.IGNORECASE),
+    # Workload patterns
+    re.compile(r"\b(too\s+)?many\s+projects", re.IGNORECASE),
+    re.compile(r"\btrouble\s+to\s+(make|complete|do)", re.IGNORECASE),
+    # Administrative patterns
+    re.compile(r"\bmilitary\s+rules", re.IGNORECASE),
+    re.compile(r"\b(too|very)\s+strict", re.IGNORECASE),
+    re.compile(r"\battendance\s+(issue|problem)", re.IGNORECASE),
+    re.compile(r"\bnot\s+providing\s+attendance", re.IGNORECASE),
+    re.compile(r"\bclaim\s+od", re.IGNORECASE),
+    # Placement/scheduling patterns
+    re.compile(r"\bplacement\s+activities\s+(and|with)\s+(attendance|issue|problem)", re.IGNORECASE),
+    re.compile(r"\b(class|classes)\s+(intersecting|conflicting)\s+with\s+placement", re.IGNORECASE),
 ]
 META_COMMENT_PATTERNS = [
+re.compile(r"^no\s+negative\s+(comments?|feedback|remarks?)", re.IGNORECASE),
+    re.compile(r"^no\s+negative\s+comments?\s+on\s+the\s+(faculty|teacher|staff|course)", re.IGNORECASE),
+    re.compile(r"^no\s+(issues?|problems?|complaints?)\.?$", re.IGNORECASE),
+    re.compile(r"^no\s+(issues?|problems?|complaints?)\s+(at\s+all|whatsoever)", re.IGNORECASE),
+    # "Everything is good" patterns
+    re.compile(r"^(everything|all)\s+(is\s+)?(good|fine|ok|okay|great|perfect|excellent)", re.IGNORECASE),
+    re.compile(r"^no,?\s+(everything|all)\s+(is\s+)?(good|fine|ok|okay)", re.IGNORECASE),
+    re.compile(r"^(all\s+)?good\.?$", re.IGNORECASE),
+    re.compile(r"^everything\s+at\s+the\s+too\s+only", re.IGNORECASE),  # From your data
+    # "Nothing" patterns
+    re.compile(r"^nothing\.?$", re.IGNORECASE),
+    re.compile(r"^nothing\s+(to\s+)?(say|comment|mention|add)", re.IGNORECASE),
+    re.compile(r"^nothing,?\s+(and\s+)?(all|everything)\s+(is\s+)?(good|fine)", re.IGNORECASE),
+    # "No more comments" patterns
     re.compile(r"^no\s+more\s+(comments?|remarks?|feedback)", re.IGNORECASE),
     re.compile(r"^no\s+(other\s+)?(comments?|remarks?|feedback)", re.IGNORECASE),
     re.compile(r"^no\s+remarks?(\s+(about|on))?", re.IGNORECASE),
+    # Empty/nil responses
+    re.compile(r"^(nil|none|na|n/a|nill)\.?$", re.IGNORECASE),
+    re.compile(r"^(no|nothing|none)\.?$", re.IGNORECASE),
+    # Positive meta-comments (not actual feedback)
+    re.compile(r"^(it's\s+|its\s+)?(all\s+)?good\.?$", re.IGNORECASE),
+    re.compile(r"^fine\.?$", re.IGNORECASE),
+    re.compile(r"^ok(ay)?\.?$", re.IGNORECASE),
+    re.compile(r"^great\.?$", re.IGNORECASE),
+    re.compile(r"^nice\.?$", re.IGNORECASE),
 ]
 def is_meta_comment(text: str) -> bool:
+    """
+    Check if comment is a meta-comment (not actual feedback).
+    These are generic statements that don't provide substantive feedback.
+    """
     if not text:
+        return True  # Empty text is meta
     text = text.strip()
+    # Check length - very short comments are likely meta
+    if len(text) < 3:
+        logger.debug(f"Meta-comment (too short): '{text}'")
+        return True
+    # Check against patterns
     for pattern in META_COMMENT_PATTERNS:
         if pattern.match(text):
             logger.debug(f"Meta-comment detected: '{text[:50]}...'")
     return False
 def is_explicit_negative(text: str) -> bool:
+    """
+    Check if text contains explicit negative phrases.
+    IMPORTANT: Must check if it's a meta-comment FIRST.
+    """
     if not text:
         return False
+    # CRITICAL: Don't classify meta-comments as negative
+    if is_meta_comment(text):
+        return False
     lower = text.lower()
     # Check phrases
     for phrase in NEGATIVE_PHRASES:
         if phrase in lower:
+            # Double-check it's not a false positive like "no negative comments"
+            if phrase == 'not good' and 'no negative' in lower:
+                continue
+            if phrase == 'no interest' and 'no negative' in lower:
+                continue
             logger.debug(f"Negative phrase detected: '{phrase}' in '{text[:50]}...'")
             return True
         log_level="info"
     )