Spaces:

kushvanth
/

iqac_fast_api

Sleeping

App Files Files Community

iqac_fast_api / fastapi_example.py

kushvanth

Update fastapi_example.py

ad33fe4 verified 3 months ago

raw

history blame contribute delete

75.3 kB

	"""
	Enhanced FastAPI Service for Comment Sentiment Analysis
	Version 3.0.0 - Major accuracy improvements with advanced classification
	Features:
	- Multi-stage sentiment detection
	- Context-aware negative pattern matching
	- Improved neutral/meta-comment detection
	- Enhanced accuracy through ensemble approach
	"""

	from fastapi import FastAPI, HTTPException, Depends
	from fastapi.middleware.cors import CORSMiddleware
	from pydantic import BaseModel, Field, validator
	from pydantic_settings import BaseSettings
	from typing import List, Dict, Any, Optional
	from functools import lru_cache
	import uvicorn
	import pandas as pd
	import numpy as np
	import os
	import re
	from datetime import datetime
	import logging

	# Configure logging
	logging.basicConfig(
	level=logging.INFO,
	format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
	)
	logger = logging.getLogger(__name__)

	# NLTK Setup
	import nltk
	import ssl

	try:
	_create_unverified_https_context = ssl._create_unverified_context
	except AttributeError:
	pass
	else:
	ssl._create_default_https_context = _create_unverified_https_context

	nltk_data_dir = '/tmp/nltk_data'
	os.makedirs(nltk_data_dir, exist_ok=True)
	nltk.data.path.insert(0, nltk_data_dir)

	def ensure_nltk_data():
	"""Ensure all required NLTK data is downloaded"""
	resources = ['vader_lexicon', 'punkt', 'stopwords', 'wordnet', 'omw-1.4']

	for resource in resources:
	try:
	if resource == 'vader_lexicon':
	nltk.data.find('sentiment/vader_lexicon.zip')
	elif resource == 'punkt':
	nltk.data.find('tokenizers/punkt')
	elif resource in ['stopwords', 'wordnet', 'omw-1.4']:
	nltk.data.find(f'corpora/{resource}')
	logger.info(f"✓ NLTK resource '{resource}' already available")
	except LookupError:
	logger.info(f"Downloading NLTK resource '{resource}'...")
	try:
	nltk.download(resource, download_dir=nltk_data_dir, quiet=False)
	logger.info(f"✓ Successfully downloaded '{resource}'")
	except Exception as e:
	logger.error(f"✗ Failed to download '{resource}': {e}")

	logger.info("Ensuring NLTK data is available...")
	ensure_nltk_data()

	from nltk.sentiment import SentimentIntensityAnalyzer
	from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
	from scipy.special import softmax
	import torch

	# Configuration
	class Settings(BaseSettings):
	"""Application settings"""
	app_name: str = "Comment Analysis API"
	app_version: str = "3.0.0"
	debug_mode: bool = False

	max_comments_per_request: int = 1000
	max_comment_length: int = 5000
	min_comment_words: int = 1

	# Enhanced thresholds for better accuracy
	vader_strong_pos_threshold: float = 0.5
	vader_pos_threshold: float = 0.2
	vader_neg_threshold: float = -0.2
	vader_strong_neg_threshold: float = -0.5

	roberta_strong_pos_threshold: float = 0.70
	roberta_pos_threshold: float = 0.55
	roberta_neg_threshold: float = 0.40
	roberta_strong_neg_threshold: float = 0.60

	# Adjusted weights for better accuracy
	combined_weight_vader: float = 0.4
	combined_weight_roberta: float = 0.6

	model_cache_dir: str = "/tmp/model_cache"
	roberta_model_name: str = "cardiffnlp/twitter-roberta-base-sentiment"
	use_abstractive_summary: bool = False
	summarizer_model: str = "facebook/bart-large-cnn"
	max_summary_length: int = 100
	min_summary_length: int = 25

	enable_caching: bool = True
	cache_size: int = 500
	batch_size: int = 32

	class Config:
	env_file = ".env"
	env_file_encoding = 'utf-8'
	extra = 'ignore'

	@lru_cache()
	def get_settings() -> Settings:
	"""Cached settings instance"""
	settings = Settings()
	total = settings.combined_weight_vader + settings.combined_weight_roberta
	if not (0.99 <= total <= 1.01):
	logger.warning(f"Weights sum to {total}, normalizing to 1.0")
	settings.combined_weight_vader /= total
	settings.combined_weight_roberta /= total
	return settings

	# Pydantic Models
	class FacultyInfo(BaseModel):
	faculty_name: str = Field(..., min_length=1, max_length=200)
	staff_id: str = Field(..., min_length=1, max_length=50)
	course_code: str = Field(..., min_length=1, max_length=50)
	course_name: str = Field(..., min_length=1, max_length=200)

	class CommentAnalysisRequest(BaseModel):
	comments: List[str] = Field(..., min_items=1)
	faculty_info: FacultyInfo

	@validator('comments')
	def validate_comments(cls, v):
	settings = get_settings()
	if len(v) > settings.max_comments_per_request:
	raise ValueError(f'Maximum {settings.max_comments_per_request} comments per request')
	for idx, comment in enumerate(v):
	if len(comment) > settings.max_comment_length:
	raise ValueError(f'Comment {idx} exceeds maximum length of {settings.max_comment_length} characters')
	return v

	class SentimentDistribution(BaseModel):
	positive_percentage: float
	negative_percentage: float
	neutral_percentage: float

	class DetailedScores(BaseModel):
	average_positive: float
	average_negative: float
	average_neutral: float
	average_compound: Optional[float] = None

	class DetailedAnalysis(BaseModel):
	vader_scores: DetailedScores
	roberta_scores: DetailedScores

	class AnalysisResult(BaseModel):
	total_comments: int
	positive_comments: int
	negative_comments: int
	neutral_comments: int
	positive_sentiment: float
	negative_sentiment: float
	neutral_sentiment: float
	overall_sentiment: str
	sentiment_distribution: SentimentDistribution
	negative_comments_summary: str
	negative_comments_list: List[str]
	key_insights: List[str]
	recommendations: List[str]
	detailed_analysis: DetailedAnalysis
	faculty_info: Dict[str, str]
	analysis_timestamp: str

	class CommentAnalysisResponse(BaseModel):
	success: bool
	analysis: Optional[AnalysisResult] = None
	message: str

	# Initialize FastAPI
	app = FastAPI(
	title=get_settings().app_name,
	version=get_settings().app_version,
	description="Advanced sentiment analysis service for educational feedback"
	)

	app.add_middleware(
	CORSMiddleware,
	allow_origins=["*"],
	allow_credentials=True,
	allow_methods=["*"],
	allow_headers=["*"],
	)

	# Global model variables
	sia = None
	tokenizer = None
	model = None
	device = None
	summarizer = None

	# ============================================================================
	# ENHANCED PATTERN DETECTION FOR BETTER ACCURACY
	# ============================================================================

	# Meta-comments (not actual feedback - should be NEUTRAL)
	META_PATTERNS = re.compile(
	r'^(no\s+(negative\s+)?(more\s+)?(comments?\|feedback\|remarks?\|issues?\|problems?\|complaints?)\|'
	r'(everything\|all)\s+(is\s+)?(good\|fine\|ok(ay)?\|great\|perfect\|excellent)\|'
	r'nothing(\s+to\s+(say\|comment\|mention\|add))?\|'
	r'(nil\|none\|na\|n/a\|nill)\.?\|'
	r'^(all\s+)?(good\|fine\|ok(ay)?\|great\|nice)\.?\|'
	r'no\s+remarks?\|'
	r'everything\s+at\s+the\s+too\s+only)$',
	re.IGNORECASE
	)

	# Strong NEGATIVE indicators (should override model scores)
	STRONG_NEGATIVE_PATTERN = re.compile(
	r'\b('
	# Direct criticism
	r'(very\|extremely\|quite\|so\|too)\s+(poor\|bad\|weak\|terrible\|awful\|horrible)\|'
	r'poor\s+(teaching\|teacher\|faculty\|knowledge\|communication\|quality\|explanation)\|'
	r'bad\s+(teaching\|teacher\|faculty\|quality\|explanation)\|'
	r'terrible\|horrible\|awful\|pathetic\|useless\|waste\s+of\s+time\|'

	# Teaching quality issues
	r'(teaching\|knowledge)\s+(is\s+)?(poor\|bad\|weak\|lacking\|insufficient\|not\s+good)\|'
	r'cannot\s+teach\|can\'?t\s+teach\|doesn\'?t\s+know\s+how\s+to\s+teach\|'
	r'not\s+teaching\s+properly\|teaching\s+method\s+is\s+(poor\|bad)\|'

	# Boring/disengagement
	r'(boring\|dull\|monotonous)\s+(class\|classes\|subject\|lecture\|lectures\|sessions?)\|'
	r'(class\|classes\|subject\|lectures?)\s+(is\|are)\s+(boring\|dull\|monotonous\|uninteresting)\|'
	r'sleeping\s+in\s+class\|fall\s+asleep\|makes?\s+us\s+sleep\|'

	# Communication issues
	r'(low\|soft\|quiet\|unclear)\s+voice\|voice\s+(is\s+)?(low\|soft\|quiet\|not\s+clear)\|'
	r'(cannot\|can\'?t\|cant\|unable\s+to)\s+hear\|difficult\s+to\s+hear\|'
	r'(not\|poor\|bad)\s+(communication\|explaining\|explanation)\|'

	# Understanding issues
	r'(cannot\|can\'?t\|cant\|unable\s+to\|difficult\s+to\|hard\s+to)\s+understand\|'
	r'(not\|never\|don\'?t)\s+(able\s+to\s+)?understand\|'
	r'(concepts?\|topics?\|subjects?)\s+(are\s+)?(difficult\|hard\|tough\|impossible)\s+to\s+understand\|'
	r'makes?\s+(no\|little)\s+sense\|doesn\'?t\s+make\s+sense\|'

	# Improvement needed
	r'(need\|needs\|require\|requires)\s+(urgent\|serious\|immediate\|much\|lot\s+of)?\s*improvement\|'
	r'(should\|must\|have\s+to)\s+improve\s+(a\s+lot\|more\|urgently)\|'

	# Pace issues
	r'(lectures?\|class(es)?\|teaching)\s+(is\|are\|going)\s+(too\|very)\s+(fast\|slow)\|'
	r'(too\|very\|extremely)\s+(fast\|slow\|rush\|rushed)\|'
	r'(lag\|lagging)\s+in\s+teaching\|teaching\s+(is\s+)?lagging\|'

	# Time management
	r'(not\|poor\|bad\|terrible)\s+(managing\|managing)\s+time\|'
	r'time\s+management\s+(is\s+)?(poor\|bad\|terrible\|lacking)\|'
	r'always\s+(late\|wasting\s+time)\|waste\s+(our\|class)\s+time\|'

	# Lack of resources/support
	r'(no\|not\|insufficient\|lack\s+of)\s+(proper\|sufficient\|enough\|regular)?\s*(classes\|notes\|support\|help)\|'
	r'need\s+more\s+(staff\|faculty\|classes\|support\|help)\|'
	r'no\s+(practical\|hands[-\s]?on\|lab\|real[-\s]?world)\|lack\s+of\s+practical\|'

	# Attendance/engagement issues
	r'(just\|only)\s+(for\|going\s+for)\s+attendance\|'
	r'going\s+(to\|for)\s+(her\|his\|their)\s+class\s+(just\|only)\s+for\s+attendance\|'
	r'(not\|no)\s+(interested\|engaging\|helpful\|useful\|at\s+all)\|'
	r'no\s+interest\s+in\s+teaching\|'

	# Administrative issues
	r'military\s+rules\|too\s+strict\|very\s+strict\|'
	r'attendance\s+(issue\|problem)\|not\s+providing\s+attendance\|'

	# Workload issues
	r'too\s+many\s+projects\|many\s+projects\s+review\|'
	r'placement\s+activities\s+(and\|with)\s+attendance'
	r')\b',
	re.IGNORECASE
	)

	# Positive indicators (help identify positive comments)
	POSITIVE_PATTERN = re.compile(
	r'\b('
	r'(very\|extremely\|really\|so\|truly)\s+(good\|great\|excellent\|amazing\|wonderful\|fantastic\|helpful\|knowledgeable\|clear)\|'
	r'excellent\|outstanding\|amazing\|wonderful\|fantastic\|brilliant\|superb\|'
	r'(great\|good\|best\|wonderful)\s+(teaching\|teacher\|faculty\|knowledge\|explanation\|professor\|sir\|madam)\|'
	r'(teaching\|explanation\|knowledge)\s+(is\s+)?(excellent\|outstanding\|very\s+good\|great\|clear)\|'
	r'explains?\s+(very\s+)?(well\|clearly\|nicely\|perfectly)\|'
	r'(easy\|easier)\s+to\s+understand\|clear\s+explanation\|'
	r'(very\s+)?(helpful\|supportive\|friendly\|approachable\|patient)\|'
	r'(good\|strong\|deep\|vast)\s+(knowledge\|understanding)\|'
	r'(love\|like\|enjoy\|appreciate)\s+(the\s+)?(class\|classes\|teaching\|subject\|course\|lectures?)\|'
	r'learned?\s+(a\s+lot\|so\s+much\|many\s+things)\|'
	r'inspired?\|inspiring\|motivating\|motivated\|encouraged\|'
	r'(best\|favourite\|favorite)\s+(teacher\|faculty\|professor)\|'
	r'highly\s+recommend\|strongly\s+recommend\|'
	r'grateful\|thankful\|blessed\|lucky\s+to\s+have\|'
	r'satisfied\|happy\s+with\|pleased\s+with\|'
	r'(always\|very)\s+(available\|accessible\|helpful)\|'
	r'patient\|caring\|dedicated\|passionate\|'
	r'interactive\s+class\|engaging\s+class\|interesting\s+class'
	r')\b',
	re.IGNORECASE
	)

	# Weak negative indicators (suggestions/mild criticism - might be NEUTRAL)
	WEAK_NEGATIVE_PATTERN = re.compile(
	r'\b('
	r'could\s+(be\s+)?better\|'
	r'can\s+improve\|'
	r'would\s+be\s+good\s+if\|'
	r'suggest\|suggestion\|'
	r'maybe\|perhaps\|'
	r'slightly\|a\s+bit\|'
	r'sometimes\|occasionally'
	r')\b',
	re.IGNORECASE
	)

	def is_meta_comment(text: str) -> bool:
	"""Check if comment is meta (not actual feedback)"""
	if not text or len(text.strip()) < 3:
	return True

	text = text.strip()
	return bool(META_PATTERNS.match(text))

	def detect_strong_negative(text: str) -> bool:
	"""Detect strong negative patterns"""
	if not text or is_meta_comment(text):
	return False
	return bool(STRONG_NEGATIVE_PATTERN.search(text))

	def detect_positive(text: str) -> bool:
	"""Detect positive patterns"""
	if not text or is_meta_comment(text):
	return False
	return bool(POSITIVE_PATTERN.search(text))

	def detect_weak_negative(text: str) -> bool:
	"""Detect weak negative patterns (suggestions)"""
	if not text or is_meta_comment(text):
	return False
	return bool(WEAK_NEGATIVE_PATTERN.search(text))

	# ============================================================================
	# MODEL INITIALIZATION
	# ============================================================================

	def initialize_models():
	"""Initialize sentiment analysis models"""
	global sia, tokenizer, model, device, summarizer

	try:
	settings = get_settings()
	logger.info("Initializing sentiment analysis models...")

	# VADER
	sia = SentimentIntensityAnalyzer()
	logger.info("✓ VADER initialized")

	# RoBERTa
	cache_dir = settings.model_cache_dir
	os.makedirs(cache_dir, exist_ok=True)

	tokenizer = AutoTokenizer.from_pretrained(
	settings.roberta_model_name,
	cache_dir=cache_dir
	)
	model = AutoModelForSequenceClassification.from_pretrained(
	settings.roberta_model_name,
	cache_dir=cache_dir
	)

	device = "cuda" if torch.cuda.is_available() else "cpu"
	model.to(device)
	model.eval()
	logger.info(f"✓ RoBERTa initialized on device: {device}")

	# Summarizer (optional)
	if settings.use_abstractive_summary:
	try:
	summarizer = pipeline(
	"summarization",
	model=settings.summarizer_model,
	device=0 if device == "cuda" else -1
	)
	logger.info("✓ Summarizer initialized")
	except Exception as e:
	logger.warning(f"Summarizer initialization failed: {e}")
	summarizer = None

	logger.info("✓ All models initialized successfully")

	except Exception as e:
	logger.error(f"Error initializing models: {e}")
	raise e

	# ============================================================================
	# SENTIMENT ANALYSIS FUNCTIONS
	# ============================================================================

	@lru_cache(maxsize=500)
	def vader_sentiment_cached(text: str) -> tuple:
	"""Cached VADER sentiment analysis"""
	scores = sia.polarity_scores(text)
	return (scores['neg'], scores['neu'], scores['pos'], scores['compound'])

	def vader_sentiment(text: str) -> Dict[str, float]:
	"""VADER sentiment analysis"""
	try:
	settings = get_settings()
	if settings.enable_caching:
	neg, neu, pos, compound = vader_sentiment_cached(text)
	return {
	'vader_neg': neg,
	'vader_neu': neu,
	'vader_pos': pos,
	'vader_compound': compound
	}
	else:
	scores = sia.polarity_scores(text)
	return {
	'vader_neg': scores['neg'],
	'vader_neu': scores['neu'],
	'vader_pos': scores['pos'],
	'vader_compound': scores['compound']
	}
	except Exception as e:
	logger.warning(f"VADER analysis failed: {e}")
	return {'vader_neg': 0.0, 'vader_neu': 1.0, 'vader_pos': 0.0, 'vader_compound': 0.0}

	def roberta_sentiment_batch(texts: List[str]) -> List[Dict[str, float]]:
	"""Batch RoBERTa sentiment analysis"""
	try:
	settings = get_settings()
	results = []

	for i in range(0, len(texts), settings.batch_size):
	batch = texts[i:i + settings.batch_size]

	encoded = tokenizer(
	batch,
	return_tensors='pt',
	truncation=True,
	max_length=512,
	padding=True
	)
	encoded = {k: v.to(device) for k, v in encoded.items()}

	with torch.no_grad():
	outputs = model(**encoded)

	for output in outputs.logits:
	scores = softmax(output.cpu().numpy())
	results.append({
	'roberta_neg': float(scores[0]),
	'roberta_neu': float(scores[1]),
	'roberta_pos': float(scores[2])
	})

	return results

	except Exception as e:
	logger.warning(f"RoBERTa batch analysis failed: {e}")
	return [{'roberta_neg': 0.0, 'roberta_neu': 1.0, 'roberta_pos': 0.0} for _ in texts]

	def classify_sentiment_enhanced(row: pd.Series, settings: Settings) -> str:
	"""
	Enhanced multi-stage sentiment classification for better accuracy

	Stage 1: Meta-comments → Neutral
	Stage 2: Strong negative patterns → Negative (override models)
	Stage 3: Strong positive patterns + high scores → Positive
	Stage 4: Model ensemble decision
	Stage 5: Default to neutral if uncertain
	"""

	# Stage 1: Meta-comments are always neutral
	if row.get('is_meta', False):
	return 'Neutral'

	# Get all scores
	vader_compound = row.get('vader_compound', 0.0)
	vader_pos = row.get('vader_pos', 0.0)
	vader_neg = row.get('vader_neg', 0.0)

	roberta_pos = row.get('roberta_pos', 0.0)
	roberta_neg = row.get('roberta_neg', 0.0)
	roberta_neu = row.get('roberta_neu', 0.0)

	combined_pos = row.get('combined_pos', 0.0)
	combined_neg = row.get('combined_neg', 0.0)
	combined_neu = row.get('combined_neu', 0.0)

	has_strong_negative = row.get('has_strong_negative', False)
	has_positive = row.get('has_positive', False)
	has_weak_negative = row.get('has_weak_negative', False)

	# Stage 2: Strong negative patterns override everything
	if has_strong_negative:
	return 'Negative'

	# Stage 3: Strong positive signals
	if has_positive and (
	vader_compound >= settings.vader_strong_pos_threshold or
	roberta_pos >= settings.roberta_strong_pos_threshold or
	(vader_compound >= settings.vader_pos_threshold and roberta_pos >= settings.roberta_pos_threshold)
	):
	return 'Positive'

	# Stage 4: Model-based classification with ensemble

	# Strong negative from models
	if (
	vader_compound <= settings.vader_strong_neg_threshold or
	roberta_neg >= settings.roberta_strong_neg_threshold or
	(vader_compound <= settings.vader_neg_threshold and roberta_neg >= settings.roberta_neg_threshold)
	):
	return 'Negative'

	# Moderate negative
	if (
	combined_neg > combined_pos and
	combined_neg > combined_neu and
	combined_neg > 0.35 # Threshold for clarity
	):
	return 'Negative'

	# Clear positive
	if (
	combined_pos > combined_neg and
	combined_pos > combined_neu and
	combined_pos > 0.35 # Threshold for clarity
	):
	return 'Positive'

	# Weak negative with suggestion context → might be neutral
	if has_weak_negative and not has_strong_negative:
	# If scores are not strongly negative, treat as neutral
	if combined_neg < 0.5:
	return 'Neutral'

	# Stage 5: Default to neutral if uncertain
	return 'Neutral'

	def sanitize_text(text: str) -> str:
	"""Sanitize input text"""
	if not text:
	return ""
	text = re.sub(r'[\x00-\x08\x0B-\x0C\x0E-\x1F\x7F]', '', text)
	text = ' '.join(text.split())
	return text.strip()

	# ============================================================================
	# MAIN ANALYSIS FUNCTION
	# ============================================================================

	def analyze_comments_sentiment(comments: List[str]) -> Dict[str, Any]:
	"""Main sentiment analysis with enhanced accuracy"""
	try:
	settings = get_settings()
	logger.info(f"Received {len(comments)} comments for analysis")

	# Sanitize
	sanitized_comments = [sanitize_text(comment) for comment in comments]

	# Filter valid comments
	filtered_comments = [
	comment for comment in sanitized_comments
	if settings.min_comment_words <= len(comment.split()) <= settings.max_comment_length
	]

	logger.info(f"After filtering: {len(filtered_comments)} valid comments")

	if not filtered_comments:
	return {
	"total_comments": 0,
	"message": "No valid comments found for analysis"
	}

	# Create DataFrame
	df = pd.DataFrame({'comment': filtered_comments})

	# Pattern detection
	df['is_meta'] = df['comment'].apply(is_meta_comment)
	df['has_strong_negative'] = df['comment'].apply(detect_strong_negative)
	df['has_positive'] = df['comment'].apply(detect_positive)
	df['has_weak_negative'] = df['comment'].apply(detect_weak_negative)

	# Log detection stats
	logger.info(f"Meta: {df['is_meta'].sum()}, "
	f"Strong Neg: {df['has_strong_negative'].sum()}, "
	f"Positive: {df['has_positive'].sum()}, "
	f"Weak Neg: {df['has_weak_negative'].sum()}")

	# VADER analysis
	vader_results = [vader_sentiment(text) for text in df['comment']]
	vader_df = pd.DataFrame(vader_results)

	# RoBERTa analysis
	roberta_results = roberta_sentiment_batch(df['comment'].tolist())
	roberta_df = pd.DataFrame(roberta_results)

	# Combine
	final_df = pd.concat([df.reset_index(drop=True), vader_df, roberta_df], axis=1)

	# Calculate combined scores
	final_df['combined_pos'] = (
	settings.combined_weight_vader * final_df['vader_pos'] +
	settings.combined_weight_roberta * final_df['roberta_pos']
	)
	final_df['combined_neg'] = (
	settings.combined_weight_vader * final_df['vader_neg'] +
	settings.combined_weight_roberta * final_df['roberta_neg']
	)
	final_df['combined_neu'] = (
	settings.combined_weight_vader * final_df['vader_neu'] +
	settings.combined_weight_roberta * final_df['roberta_neu']
	)

	# Enhanced classification
	final_df['Overall_Sentiment'] = final_df.apply(
	lambda row: classify_sentiment_enhanced(row, settings),
	axis=1
	)

	# Statistics
	total_comments = len(final_df)
	positive_count = len(final_df[final_df['Overall_Sentiment'] == 'Positive'])
	negative_count = len(final_df[final_df['Overall_Sentiment'] == 'Negative'])
	neutral_count = len(final_df[final_df['Overall_Sentiment'] == 'Neutral'])

	logger.info(f"Classification Results - Pos: {positive_count}, Neg: {negative_count}, Neu: {neutral_count}")

	# Average scores
	avg_positive = float(final_df['combined_pos'].mean())
	avg_negative = float(final_df['combined_neg'].mean())
	avg_neutral = float(final_df['combined_neu'].mean())

	# Overall sentiment
	if avg_positive > max(avg_negative, avg_neutral):
	overall_sentiment_label = "Positive"
	elif avg_negative > max(avg_positive, avg_neutral):
	overall_sentiment_label = "Negative"
	else:
	overall_sentiment_label = "Neutral"

	# Process negative comments
	negative_summary = ""
	negative_comments_list = []
	negative_comments = final_df[final_df['Overall_Sentiment'] == 'Negative']

	if len(negative_comments) > 0:
	negative_comments_list = negative_comments['comment'].tolist()

	try:
	top_idx = negative_comments['combined_neg'].nlargest(min(3, len(negative_comments))).index
	top_comments = negative_comments.loc[top_idx, 'comment'].tolist()

	if settings.use_abstractive_summary and summarizer is not None:
	negative_text = " ".join(top_comments)
	if len(negative_text) > 1000:
	negative_text = negative_text[:1000]

	summary_result = summarizer(
	negative_text,
	max_length=settings.max_summary_length,
	min_length=settings.min_summary_length,
	do_sample=False
	)
	negative_summary = summary_result[0]['summary_text']
	else:
	negative_summary = "; ".join(top_comments)
	except Exception as e:
	logger.warning(f"Summary generation failed: {e}")
	negative_summary = "; ".join(negative_comments_list[:3])

	# Insights and recommendations
	insights = []
	recommendations = []

	if overall_sentiment_label == "Positive":
	insights.extend([
	f"Strong positive feedback: {positive_count}/{total_comments} comments ({round(positive_count/total_comments*100, 1)}%)",
	"Students are satisfied with the teaching approach",
	"High engagement and learning outcomes reported"
	])
	recommendations.extend([
	"Continue current effective teaching methods",
	"Document successful practices for future reference",
	"Share best practices with colleagues"
	])
	elif overall_sentiment_label == "Negative":
	insights.extend([
	f"Concerns identified: {negative_count}/{total_comments} negative comments ({round(negative_count/total_comments*100, 1)}%)",
	"Students facing challenges with current approach",
	"Immediate attention needed to address feedback"
	])
	recommendations.extend([
	"Review and analyze specific negative feedback points",
	"Consider adjusting teaching pace or methods",
	"Increase student engagement and support",
	"Schedule student feedback sessions",
	"Focus on communication clarity and accessibility"
	])
	else:
	insights.extend([
	f"Mixed feedback: {positive_count} positive, {negative_count} negative, {neutral_count} neutral",
	"Room for improvement while maintaining strengths",
	"Students have varied experiences"
	])
	recommendations.extend([
	"Address specific concerns raised in negative feedback",
	"Build on positive aspects appreciated by students",
	"Gather more detailed feedback for neutral areas"
	])

	# Add pattern-based insights
	if df['has_strong_negative'].sum() > 0:
	insights.append(f"{df['has_strong_negative'].sum()} comments contain explicit criticism requiring attention")
	if df['has_positive'].sum() > 0:
	insights.append(f"{df['has_positive'].sum()} comments contain strong positive appreciation")

	return {
	"total_comments": total_comments,
	"positive_comments": positive_count,
	"negative_comments": negative_count,
	"neutral_comments": neutral_count,
	"positive_sentiment": round(avg_positive, 3),
	"negative_sentiment": round(avg_negative, 3),
	"neutral_sentiment": round(avg_neutral, 3),
	"overall_sentiment": overall_sentiment_label,
	"sentiment_distribution": {
	"positive_percentage": round((positive_count / total_comments) * 100, 1),
	"negative_percentage": round((negative_count / total_comments) * 100, 1),
	"neutral_percentage": round((neutral_count / total_comments) * 100, 1)
	},
	"negative_comments_summary": negative_summary,
	"negative_comments_list": negative_comments_list,
	"key_insights": insights,
	"recommendations": recommendations,
	"detailed_analysis": {
	"vader_scores": {
	"average_positive": round(final_df['vader_pos'].mean(), 3),
	"average_negative": round(final_df['vader_neg'].mean(), 3),
	"average_neutral": round(final_df['vader_neu'].mean(), 3),
	"average_compound": round(final_df['vader_compound'].mean(), 3)
	},
	"roberta_scores": {
	"average_positive": round(final_df['roberta_pos'].mean(), 3),
	"average_negative": round(final_df['roberta_neg'].mean(), 3),
	"average_neutral": round(final_df['roberta_neu'].mean(), 3)
	}
	},
	"analysis_timestamp": datetime.utcnow().isoformat()
	}

	except Exception as e:
	logger.error(f"Sentiment analysis failed: {e}", exc_info=True)
	raise e

	# ============================================================================
	# API ENDPOINTS
	# ============================================================================

	@app.on_event("startup")
	async def startup_event():
	"""Initialize models on startup"""
	try:
	logger.info("=" * 80)
	logger.info(f"Application Startup at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
	logger.info("=" * 80)
	initialize_models()
	logger.info("✓ Service started successfully")
	logger.info("=" * 80)
	except Exception as e:
	logger.error(f"✗ Startup failed: {e}")
	raise e

	@app.on_event("shutdown")
	async def shutdown_event():
	"""Cleanup on shutdown"""
	logger.info("Service shutting down")

	@app.get("/")
	async def root():
	"""Root endpoint"""
	return {
	"service": get_settings().app_name,
	"version": get_settings().app_version,
	"status": "running",
	"endpoints": {
	"health": "/health",
	"analyze": "/analyze-comments",
	"test": "/test"
	}
	}

	@app.get("/health")
	async def health_check():
	"""Health check endpoint"""
	models_loaded = sia is not None and model is not None and tokenizer is not None

	return {
	"status": "healthy" if models_loaded else "unhealthy",
	"service": "comment-analysis",
	"version": get_settings().app_version,
	"models_loaded": models_loaded,
	"device": device if device else "not initialized",
	"timestamp": datetime.utcnow().isoformat()
	}

	@app.post("/analyze-comments", response_model=CommentAnalysisResponse)
	async def analyze_comments(
	request: CommentAnalysisRequest,
	settings: Settings = Depends(get_settings)
	):
	"""Analyze comments for sentiment using enhanced multi-stage classification"""
	try:
	comments = request.comments
	faculty_info = request.faculty_info

	if not comments:
	return CommentAnalysisResponse(
	success=False,
	analysis=None,
	message="No comments provided for analysis"
	)

	logger.info(f"Analyzing {len(comments)} comments for {faculty_info.faculty_name} ({faculty_info.course_code})")

	analysis_result = analyze_comments_sentiment(comments)

	if analysis_result.get("total_comments", 0) == 0:
	return CommentAnalysisResponse(
	success=False,
	analysis=None,
	message=analysis_result.get("message", "No valid comments to analyze")
	)

	analysis_result["faculty_info"] = {
	"faculty_name": faculty_info.faculty_name,
	"staff_id": faculty_info.staff_id,
	"course_code": faculty_info.course_code,
	"course_name": faculty_info.course_name
	}

	return CommentAnalysisResponse(
	success=True,
	analysis=analysis_result,
	message=f"Successfully analyzed {analysis_result['total_comments']} comments"
	)

	except ValueError as ve:
	logger.warning(f"Validation error: {ve}")
	raise HTTPException(status_code=400, detail=str(ve))
	except Exception as e:
	logger.error(f"Analysis failed: {e}", exc_info=True)
	raise HTTPException(status_code=500, detail="Analysis failed. Please try again later.")

	@app.get("/test")
	async def test_endpoint():
	"""Test endpoint with various comment types"""
	test_cases = [
	# Meta-comments (should be Neutral)
	"No negative comments",
	"Everything is good",
	"Nothing to say",
	"Nil",

	# Strong Negative (should be Negative)
	"Very poor teaching quality",
	"Boring class, waste of time",
	"Cannot understand anything",
	"Teaching is terrible and voice is too low",
	"Poor knowledge and bad teaching method",

	# Positive (should be Positive)
	"Excellent teacher with great knowledge",
	"Very helpful and explains clearly",
	"Amazing teaching style, learned a lot",
	"Best professor, highly recommend",

	# Weak negative/Neutral
	"Could be better",
	"Sometimes hard to understand",
	"Overall good but too lag",

	# Mixed
	"Good teacher but classes are boring",
	"Knowledgeable but voice is low"
	]

	results = []
	for text in test_cases:
	is_meta = is_meta_comment(text)
	has_strong_neg = detect_strong_negative(text)
	has_pos = detect_positive(text)
	has_weak_neg = detect_weak_negative(text)

	# Predict
	if is_meta:
	predicted = "Neutral (meta-comment)"
	elif has_strong_neg:
	predicted = "Negative (strong pattern)"
	elif has_pos and not has_strong_neg:
	predicted = "Positive (likely)"
	elif has_weak_neg and not has_strong_neg:
	predicted = "Neutral/Negative (weak)"
	else:
	predicted = "Requires full analysis"

	results.append({
	"text": text,
	"is_meta": is_meta,
	"strong_negative": has_strong_neg,
	"positive": has_pos,
	"weak_negative": has_weak_neg,
	"predicted": predicted
	})

	return {
	"test_results": results,
	"note": "Predictions based on pattern matching. Full analysis uses VADER + RoBERTa ensemble."
	}

	if __name__ == "__main__":
	uvicorn.run(app, host="0.0.0.0", port=8000, log_level="info")











	# """
	# Enhanced FastAPI Service for Comment Sentiment Analysis
	# with improved performance, validation, and configuration management
	# Version 2.1.0 - Updated with bug fixes and improvements
	# """

	# from fastapi import FastAPI, HTTPException, Depends
	# from fastapi.middleware.cors import CORSMiddleware
	# from pydantic import BaseModel, Field, validator
	# from pydantic_settings import BaseSettings
	# from typing import List, Dict, Any, Optional
	# from functools import lru_cache
	# import uvicorn
	# import pandas as pd
	# import numpy as np
	# import os
	# import re
	# from datetime import datetime
	# import logging

	# # Configure logging FIRST
	# logging.basicConfig(
	# level=logging.INFO,
	# format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
	# )
	# logger = logging.getLogger(__name__)

	# # CRITICAL: Download NLTK data BEFORE importing NLTK components
	# import nltk
	# import ssl

	# try:
	# _create_unverified_https_context = ssl._create_unverified_context
	# except AttributeError:
	# pass
	# else:
	# ssl._create_default_https_context = _create_unverified_https_context

	# # Set NLTK data path
	# nltk_data_dir = '/tmp/nltk_data'
	# os.makedirs(nltk_data_dir, exist_ok=True)
	# nltk.data.path.insert(0, nltk_data_dir)

	# # Download required NLTK data
	# def ensure_nltk_data():
	# """Ensure all required NLTK data is downloaded"""
	# resources = ['vader_lexicon', 'punkt', 'stopwords', 'wordnet', 'omw-1.4']

	# for resource in resources:
	# try:
	# # Try to find the resource
	# if resource == 'vader_lexicon':
	# nltk.data.find('sentiment/vader_lexicon.zip')
	# elif resource == 'punkt':
	# nltk.data.find('tokenizers/punkt')
	# elif resource in ['stopwords', 'wordnet', 'omw-1.4']:
	# nltk.data.find(f'corpora/{resource}')
	# logger.info(f"✓ NLTK resource '{resource}' already available")
	# except LookupError:
	# logger.info(f"Downloading NLTK resource '{resource}'...")
	# try:
	# nltk.download(resource, download_dir=nltk_data_dir, quiet=False)
	# logger.info(f"✓ Successfully downloaded '{resource}'")
	# except Exception as e:
	# logger.error(f"✗ Failed to download '{resource}': {e}")

	# # Download NLTK data immediately
	# logger.info("Ensuring NLTK data is available...")
	# ensure_nltk_data()

	# # NOW import NLTK components
	# from nltk.sentiment import SentimentIntensityAnalyzer

	# # Import transformers after NLTK setup
	# from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
	# from scipy.special import softmax
	# import torch

	# # Configuration Management
	# class Settings(BaseSettings):
	# """Application settings with environment variable support"""
	# # API Settings
	# app_name: str = "Comment Analysis API"
	# app_version: str = "2.1.0"
	# debug_mode: bool = False

	# # Request Limits
	# max_comments_per_request: int = 1000
	# max_comment_length: int = 5000
	# min_comment_words: int = 1

	# # Sentiment Thresholds
	# vader_pos_threshold: float = 0.2
	# vader_neg_threshold: float = -0.2
	# roberta_pos_threshold: float = 0.55
	# roberta_neg_threshold: float = 0.45
	# combined_weight_vader: float = 0.5
	# combined_weight_roberta: float = 0.5

	# # Model Settings
	# model_cache_dir: str = "/tmp/model_cache"
	# roberta_model_name: str = "cardiffnlp/twitter-roberta-base-sentiment"
	# use_abstractive_summary: bool = False
	# summarizer_model: str = "facebook/bart-large-cnn"
	# max_summary_length: int = 100
	# min_summary_length: int = 25

	# # Performance
	# enable_caching: bool = True
	# cache_size: int = 500
	# batch_size: int = 32

	# class Config:
	# env_file = ".env"
	# env_file_encoding = 'utf-8'
	# extra = 'ignore'

	# @validator('min_comment_words')
	# def validate_min_words(cls, v):
	# if v < 0:
	# raise ValueError('min_comment_words must be non-negative')
	# return v

	# @validator('combined_weight_vader', 'combined_weight_roberta')
	# def validate_weights(cls, v):
	# if not 0 <= v <= 1:
	# raise ValueError('Weights must be between 0 and 1')
	# return v

	# @lru_cache()
	# def get_settings() -> Settings:
	# """Cached settings instance"""
	# settings = Settings()
	# # Normalize weights if needed
	# total = settings.combined_weight_vader + settings.combined_weight_roberta
	# if not (0.99 <= total <= 1.01):
	# logger.warning(f"Weights sum to {total}, normalizing to 1.0")
	# settings.combined_weight_vader /= total
	# settings.combined_weight_roberta /= total
	# return settings

	# # Pydantic Models
	# class FacultyInfo(BaseModel):
	# faculty_name: str = Field(..., min_length=1, max_length=200)
	# staff_id: str = Field(..., min_length=1, max_length=50)
	# course_code: str = Field(..., min_length=1, max_length=50)
	# course_name: str = Field(..., min_length=1, max_length=200)

	# class CommentAnalysisRequest(BaseModel):
	# comments: List[str] = Field(..., min_items=1)
	# faculty_info: FacultyInfo

	# @validator('comments')
	# def validate_comments(cls, v):
	# settings = get_settings()

	# if len(v) > settings.max_comments_per_request:
	# raise ValueError(
	# f'Maximum {settings.max_comments_per_request} comments per request'
	# )

	# for idx, comment in enumerate(v):
	# if len(comment) > settings.max_comment_length:
	# raise ValueError(
	# f'Comment {idx} exceeds maximum length of {settings.max_comment_length} characters'
	# )

	# return v

	# class SentimentDistribution(BaseModel):
	# positive_percentage: float
	# negative_percentage: float
	# neutral_percentage: float

	# class DetailedScores(BaseModel):
	# average_positive: float
	# average_negative: float
	# average_neutral: float
	# average_compound: Optional[float] = None

	# class DetailedAnalysis(BaseModel):
	# vader_scores: DetailedScores
	# roberta_scores: DetailedScores

	# class AnalysisResult(BaseModel):
	# total_comments: int
	# positive_comments: int
	# negative_comments: int
	# neutral_comments: int
	# positive_sentiment: float
	# negative_sentiment: float
	# neutral_sentiment: float
	# overall_sentiment: str
	# sentiment_distribution: SentimentDistribution
	# negative_comments_summary: str
	# negative_comments_list: List[str]
	# key_insights: List[str]
	# recommendations: List[str]
	# detailed_analysis: DetailedAnalysis
	# faculty_info: Dict[str, str]
	# analysis_timestamp: str

	# class CommentAnalysisResponse(BaseModel):
	# success: bool
	# analysis: Optional[AnalysisResult] = None
	# message: str

	# # Initialize FastAPI app
	# app = FastAPI(
	# title=get_settings().app_name,
	# version=get_settings().app_version,
	# description="Advanced sentiment analysis service for educational feedback"
	# )

	# # Add CORS middleware
	# app.add_middleware(
	# CORSMiddleware,
	# allow_origins=["*"],
	# allow_credentials=True,
	# allow_methods=["*"],
	# allow_headers=["*"],
	# )

	# # Global variables for models
	# sia = None
	# tokenizer = None
	# model = None
	# device = None
	# summarizer = None

	# # Enhanced heuristic phrase/regex rules for explicit negative feedback
	# NEGATIVE_PHRASES = [
	# # Teaching quality issues
	# 'very poor',
	# 'extremely poor',
	# 'poor in teaching',
	# 'poor teaching level',
	# 'poor teaching',
	# 'bad teacher',
	# 'bad teaching',
	# 'not good', # Keep but check it's not "no negative"
	# 'not satisfied',
	# 'not satisfactory',

	# # Content/delivery issues
	# 'boring class',
	# 'boring classes',
	# 'boring subject',
	# 'subject is boring',
	# 'low voice',
	# 'voice is low',
	# 'cannot hear',
	# "can't hear",
	# 'speak louder',

	# # Resource/support issues
	# 'need more staff',
	# 'need more faculty',
	# 'insufficient staff',
	# 'lack of staff',
	# 'not sufficient',
	# 'insufficient',
	# 'not enough',
	# 'no classes',
	# 'no regular classes',
	# 'not sufficient classes',

	# # Knowledge/understanding issues
	# 'lack of knowledge',
	# 'better knowledge needed',
	# 'poor knowledge',
	# 'knowledge is lacking',
	# 'practical knowledge lacking',
	# 'no practical',
	# 'lack of practical',
	# 'no hands-on',
	# 'no real world',
	# 'did not understand',
	# "didn't understand",
	# 'not able to understand',
	# 'unable to understand',
	# 'difficult to understand',
	# 'hard to understand',
	# 'concepts are difficult',
	# 'concepts difficult',
	# 'cant understand',
	# "can't understand",
	# 'not understandable',

	# # Improvement needed
	# 'improve class',
	# 'improvement needed',
	# 'needs improvement',
	# 'need improvement',
	# 'should improve',
	# 'must improve',
	# 'not helpful',
	# 'not clear',
	# 'communication skills need improvement',
	# 'improve communication',

	# # Pace/time issues
	# 'lectures are going fast',
	# 'going too fast',
	# 'too fast',
	# 'too slow',
	# 'too lag',
	# 'lag',
	# 'lagging',
	# 'lag in teaching',
	# 'not managing time',
	# 'poor time management',
	# 'time management issue',

	# # Engagement issues
	# 'not interested',
	# 'no interest',
	# 'going for attendance',
	# 'just for attendance',
	# 'only for attendance',
	# 'not at all',
	# 'nothing learnt',
	# 'learned nothing',
	# 'no improvement',
	# 'same teaching',
	# 'monotonous',
	# 'sleeping in class',

	# # Value/utility issues
	# 'waste of time',
	# 'wasting time',
	# 'waste our time',
	# 'no use',
	# 'useless',

	# # Administrative issues
	# 'military rules',
	# 'strict rules',
	# 'too strict',
	# 'very strict',
	# 'attendance issue',
	# 'attendance problem',
	# 'not providing attendance',
	# 'claim od',

	# # Workload issues
	# 'too many projects',
	# 'many projects review',
	# 'trouble to make',
	# 'difficult to make',
	# 'hard to make',
	# 'placement activities', # When context is negative
	# ]

	# NEGATIVE_REGEXES = [
	# # Teaching quality patterns
	# re.compile(r"\b(very\|extremely\|quite\|so)\s+(poor\|bad\|weak)\s+(in\s+)?(teaching\|knowledge\|communication)", re.IGNORECASE),
	# re.compile(r"\bpoor\s+(teaching\|teacher\|faculty\|knowledge\|communication)", re.IGNORECASE),
	# re.compile(r"\b(teaching\|knowledge)\s+(is\s+)?(poor\|bad\|weak\|lacking)", re.IGNORECASE),

	# # Boring/engagement patterns
	# re.compile(r"\b(boring\|dull\|monotonous)\s+(class\|classes\|subject\|lecture\|lectures)", re.IGNORECASE),
	# re.compile(r"\b(class\|classes\|subject\|lecture\|lectures)\s+(is\|are)\s+(boring\|dull\|monotonous)", re.IGNORECASE),

	# # Voice/communication patterns
	# re.compile(r"\b(low\|soft\|quiet)\s+voice\b", re.IGNORECASE),
	# re.compile(r"\bvoice\s+(is\s+)?(low\|soft\|quiet\|not clear)", re.IGNORECASE),
	# re.compile(r"\b(cannot\|can't\|cant\|unable to)\s+hear", re.IGNORECASE),

	# # Resource/support patterns
	# re.compile(r"\b(no\|not\|insufficient\|lack of)\s+(proper\|sufficient\|enough\|regular)?\s*(classes\|notes\|support\|staff\|faculty)", re.IGNORECASE),
	# re.compile(r"\bneed(s)?\s+more\s+(staff\|faculty\|support\|classes)", re.IGNORECASE),

	# # Understanding/clarity patterns
	# re.compile(r"\b(cannot\|can't\|cant\|unable to\|difficult to\|hard to)\s+understand", re.IGNORECASE),
	# re.compile(r"\b(not\|difficult\|hard)\s+(able\s+to\s+)?understand(\s+the)?(\s+(concepts?\|teaching\|lectures?))?", re.IGNORECASE),
	# re.compile(r"\bconcepts?\s+(are\s+)?(difficult\|hard\|tough\|complex)\s+to\s+understand", re.IGNORECASE),

	# # Improvement patterns
	# re.compile(r"\b(need\|needs\|needed\|require\|requires)\s+(some\s+)?(improvement\|to improve)", re.IGNORECASE),
	# re.compile(r"\b(should\|must\|have to)\s+improve", re.IGNORECASE),
	# re.compile(r"\bimprovement\s+(is\s+)?need(ed)?", re.IGNORECASE),

	# # Pace patterns
	# re.compile(r"\b(lecture\|lectures\|class\|classes\|teaching)\s+(is\|are\|going)\s+(too\|very)\s+(fast\|slow)", re.IGNORECASE),
	# re.compile(r"\b(too\|very)\s+(fast\|slow\|lag\|lagging)", re.IGNORECASE),

	# # Time management patterns
	# re.compile(r"\b(not\|poor\|bad)\s+(managing\|managing)\s+time", re.IGNORECASE),
	# re.compile(r"\btime\s+management\s+(is\s+)?(poor\|bad\|lacking)", re.IGNORECASE),

	# # Attendance/engagement patterns
	# re.compile(r"\b(just\|only)\s+(for\|going for)\s+attendance", re.IGNORECASE),
	# re.compile(r"\b(going\|attend\|attending)\s+(to\|for)\s+(her\|his\|their)\s+class\s+(just\|only)\s+for\s+attendance", re.IGNORECASE),
	# re.compile(r"\bnot\s+(at\s+all\s+)?(interested\|engaging\|helpful)", re.IGNORECASE),

	# # Value patterns
	# re.compile(r"\b(waste\|wasting)\s+(of\s+)?time", re.IGNORECASE),
	# re.compile(r"\b(no\s+use\|useless\|not useful)", re.IGNORECASE),

	# # Workload patterns
	# re.compile(r"\b(too\s+)?many\s+projects", re.IGNORECASE),
	# re.compile(r"\btrouble\s+to\s+(make\|complete\|do)", re.IGNORECASE),

	# # Administrative patterns
	# re.compile(r"\bmilitary\s+rules", re.IGNORECASE),
	# re.compile(r"\b(too\|very)\s+strict", re.IGNORECASE),
	# re.compile(r"\battendance\s+(issue\|problem)", re.IGNORECASE),
	# re.compile(r"\bnot\s+providing\s+attendance", re.IGNORECASE),
	# re.compile(r"\bclaim\s+od", re.IGNORECASE),

	# # Placement/scheduling patterns
	# re.compile(r"\bplacement\s+activities\s+(and\|with)\s+(attendance\|issue\|problem)", re.IGNORECASE),
	# re.compile(r"\b(class\|classes)\s+(intersecting\|conflicting)\s+with\s+placement", re.IGNORECASE),
	# ]

	# META_COMMENT_PATTERNS = [
	# re.compile(r"^no\s+negative\s+(comments?\|feedback\|remarks?)", re.IGNORECASE),
	# re.compile(r"^no\s+negative\s+comments?\s+on\s+the\s+(faculty\|teacher\|staff\|course)", re.IGNORECASE),
	# re.compile(r"^no\s+(issues?\|problems?\|complaints?)\.?$", re.IGNORECASE),
	# re.compile(r"^no\s+(issues?\|problems?\|complaints?)\s+(at\s+all\|whatsoever)", re.IGNORECASE),

	# # "Everything is good" patterns
	# re.compile(r"^(everything\|all)\s+(is\s+)?(good\|fine\|ok\|okay\|great\|perfect\|excellent)", re.IGNORECASE),
	# re.compile(r"^no,?\s+(everything\|all)\s+(is\s+)?(good\|fine\|ok\|okay)", re.IGNORECASE),
	# re.compile(r"^(all\s+)?good\.?$", re.IGNORECASE),
	# re.compile(r"^everything\s+at\s+the\s+too\s+only", re.IGNORECASE), # From your data

	# # "Nothing" patterns
	# re.compile(r"^nothing\.?$", re.IGNORECASE),
	# re.compile(r"^nothing\s+(to\s+)?(say\|comment\|mention\|add)", re.IGNORECASE),
	# re.compile(r"^nothing,?\s+(and\s+)?(all\|everything)\s+(is\s+)?(good\|fine)", re.IGNORECASE),

	# # "No more comments" patterns
	# re.compile(r"^no\s+more\s+(comments?\|remarks?\|feedback)", re.IGNORECASE),
	# re.compile(r"^no\s+(other\s+)?(comments?\|remarks?\|feedback)", re.IGNORECASE),
	# re.compile(r"^no\s+remarks?(\s+(about\|on))?", re.IGNORECASE),

	# # Empty/nil responses
	# re.compile(r"^(nil\|none\|na\|n/a\|nill)\.?$", re.IGNORECASE),
	# re.compile(r"^(no\|nothing\|none)\.?$", re.IGNORECASE),

	# # Positive meta-comments (not actual feedback)
	# re.compile(r"^(it's\s+\|its\s+)?(all\s+)?good\.?$", re.IGNORECASE),
	# re.compile(r"^fine\.?$", re.IGNORECASE),
	# re.compile(r"^ok(ay)?\.?$", re.IGNORECASE),
	# re.compile(r"^great\.?$", re.IGNORECASE),
	# re.compile(r"^nice\.?$", re.IGNORECASE),
	# ]

	# def is_meta_comment(text: str) -> bool:
	# """
	# Check if comment is a meta-comment (not actual feedback).
	# These are generic statements that don't provide substantive feedback.
	# """
	# if not text:
	# return True # Empty text is meta

	# text = text.strip()

	# # Check length - very short comments are likely meta
	# if len(text) < 3:
	# logger.debug(f"Meta-comment (too short): '{text}'")
	# return True

	# # Check against patterns
	# for pattern in META_COMMENT_PATTERNS:
	# if pattern.match(text):
	# logger.debug(f"Meta-comment detected: '{text[:50]}...'")
	# return True

	# return False

	# def is_explicit_negative(text: str) -> bool:
	# """
	# Check if text contains explicit negative phrases.
	# IMPORTANT: Must check if it's a meta-comment FIRST.
	# """
	# if not text:
	# return False

	# # CRITICAL: Don't classify meta-comments as negative
	# if is_meta_comment(text):
	# return False

	# lower = text.lower()

	# # Check phrases
	# for phrase in NEGATIVE_PHRASES:
	# if phrase in lower:
	# # Double-check it's not a false positive like "no negative comments"
	# if phrase == 'not good' and 'no negative' in lower:
	# continue
	# if phrase == 'no interest' and 'no negative' in lower:
	# continue

	# logger.debug(f"Negative phrase detected: '{phrase}' in '{text[:50]}...'")
	# return True

	# # Check regexes
	# for regex in NEGATIVE_REGEXES:
	# if regex.search(text):
	# logger.debug(f"Negative pattern matched: {regex.pattern} in '{text[:50]}...'")
	# return True

	# return False

	# def initialize_models():
	# """Initialize sentiment analysis models with caching support"""
	# global sia, tokenizer, model, device, summarizer

	# try:
	# settings = get_settings()
	# logger.info("Initializing sentiment analysis models...")

	# # Initialize VADER (NLTK data already downloaded)
	# sia = SentimentIntensityAnalyzer()
	# logger.info("✓ VADER initialized")

	# # Initialize RoBERTa with caching
	# cache_dir = settings.model_cache_dir
	# os.makedirs(cache_dir, exist_ok=True)

	# tokenizer = AutoTokenizer.from_pretrained(
	# settings.roberta_model_name,
	# cache_dir=cache_dir
	# )
	# model = AutoModelForSequenceClassification.from_pretrained(
	# settings.roberta_model_name,
	# cache_dir=cache_dir
	# )

	# device = "cuda" if torch.cuda.is_available() else "cpu"
	# model.to(device)
	# model.eval()
	# logger.info(f"✓ RoBERTa initialized on device: {device}")

	# # Initialize summarizer (optional)
	# if settings.use_abstractive_summary:
	# try:
	# summarizer = pipeline(
	# "summarization",
	# model=settings.summarizer_model,
	# device=0 if device == "cuda" else -1
	# )
	# logger.info("✓ Summarizer initialized")
	# except Exception as e:
	# logger.warning(f"Summarizer initialization failed: {e}")
	# summarizer = None

	# logger.info("✓ All models initialized successfully")

	# except Exception as e:
	# logger.error(f"Error initializing models: {e}")
	# raise e

	# @lru_cache(maxsize=500)
	# def vader_sentiment_cached(text: str) -> tuple:
	# """Cached VADER sentiment analysis"""
	# scores = sia.polarity_scores(text)
	# return (scores['neg'], scores['neu'], scores['pos'], scores['compound'])

	# def vader_sentiment(text: str) -> Dict[str, float]:
	# """VADER sentiment analysis with caching support"""
	# try:
	# settings = get_settings()
	# if settings.enable_caching:
	# neg, neu, pos, compound = vader_sentiment_cached(text)
	# return {
	# 'vader_neg': neg,
	# 'vader_neu': neu,
	# 'vader_pos': pos,
	# 'vader_compound': compound
	# }
	# else:
	# scores = sia.polarity_scores(text)
	# return {
	# 'vader_neg': scores['neg'],
	# 'vader_neu': scores['neu'],
	# 'vader_pos': scores['pos'],
	# 'vader_compound': scores['compound']
	# }
	# except Exception as e:
	# logger.warning(f"VADER analysis failed for text: {e}")
	# return {'vader_neg': 0.0, 'vader_neu': 1.0, 'vader_pos': 0.0, 'vader_compound': 0.0}

	# def roberta_sentiment_batch(texts: List[str]) -> List[Dict[str, float]]:
	# """Batch RoBERTa sentiment analysis for better performance"""
	# try:
	# settings = get_settings()
	# results = []

	# for i in range(0, len(texts), settings.batch_size):
	# batch = texts[i:i + settings.batch_size]

	# encoded = tokenizer(
	# batch,
	# return_tensors='pt',
	# truncation=True,
	# max_length=512,
	# padding=True
	# )
	# encoded = {k: v.to(device) for k, v in encoded.items()}

	# with torch.no_grad():
	# outputs = model(**encoded)

	# for output in outputs.logits:
	# scores = softmax(output.cpu().numpy())
	# results.append({
	# 'roberta_neg': float(scores[0]),
	# 'roberta_neu': float(scores[1]),
	# 'roberta_pos': float(scores[2])
	# })

	# return results

	# except Exception as e:
	# logger.warning(f"RoBERTa batch analysis failed: {e}")
	# return [{'roberta_neg': 0.0, 'roberta_neu': 1.0, 'roberta_pos': 0.0} for _ in texts]

	# def roberta_sentiment(text: str) -> Dict[str, float]:
	# """Single text RoBERTa sentiment analysis"""
	# try:
	# encoded_text = tokenizer(text, return_tensors='pt', truncation=True, max_length=512)
	# encoded_text = {k: v.to(device) for k, v in encoded_text.items()}

	# with torch.no_grad():
	# output = model(**encoded_text)

	# scores = softmax(output[0][0].cpu().numpy())
	# return {
	# 'roberta_neg': float(scores[0]),
	# 'roberta_neu': float(scores[1]),
	# 'roberta_pos': float(scores[2])
	# }
	# except Exception as e:
	# logger.warning(f"RoBERTa analysis failed for text: {e}")
	# return {'roberta_neg': 0.0, 'roberta_neu': 1.0, 'roberta_pos': 0.0}

	# def overall_sentiment(row: pd.Series, settings: Settings) -> str:
	# """Determine overall sentiment using combined scores with configurable thresholds"""
	# combined_pos = row.get('combined_pos', 0.0)
	# combined_neg = row.get('combined_neg', 0.0)
	# combined_neu = row.get('combined_neu', 0.0)
	# vader_compound = row.get('vader_compound', 0.0)
	# roberta_neg = row.get('roberta_neg', 0.0)
	# roberta_pos = row.get('roberta_pos', 0.0)

	# # Priority 1: Heuristic negative patterns override everything
	# if row.get('heuristic_negative') is True:
	# return 'Negative'

	# # Priority 2: Strong negative signals
	# if (
	# vader_compound <= settings.vader_neg_threshold or
	# roberta_neg >= settings.roberta_neg_threshold or
	# combined_neg >= max(combined_pos, combined_neu)
	# ):
	# return 'Negative'

	# # Priority 3: Positive signals
	# if (
	# vader_compound >= settings.vader_pos_threshold or
	# roberta_pos >= settings.roberta_pos_threshold or
	# combined_pos >= max(combined_neg, combined_neu)
	# ):
	# return 'Positive'

	# # Default: Neutral
	# return 'Neutral'

	# def sanitize_text(text: str) -> str:
	# """Sanitize input text while preserving emojis"""
	# if not text:
	# return ""
	# # Remove control characters but keep printable characters and emojis
	# text = re.sub(r'[\x00-\x08\x0B-\x0C\x0E-\x1F\x7F]', '', text)
	# # Normalize whitespace
	# text = ' '.join(text.split())
	# return text.strip()

	# def analyze_comments_sentiment(comments: List[str]) -> Dict[str, Any]:
	# """Main sentiment analysis function with enhanced performance"""
	# try:
	# settings = get_settings()
	# logger.info(f"Received {len(comments)} comments for analysis")

	# # Sanitize comments
	# sanitized_comments = [sanitize_text(comment) for comment in comments]

	# # FIXED: Changed < to <= to properly handle min_comment_words
	# filtered_comments = [
	# comment for comment in sanitized_comments
	# if (settings.min_comment_words <= len(comment.split()) <= settings.max_comment_length)
	# ]

	# logger.info(f"After filtering: {len(filtered_comments)} valid comments")

	# if not filtered_comments:
	# return {
	# "total_comments": 0,
	# "message": "No valid comments found for analysis"
	# }

	# # Create dataframe
	# df = pd.DataFrame({'comment': filtered_comments})

	# # Detect meta-comments and explicit negatives
	# df['is_meta'] = df['comment'].apply(is_meta_comment)
	# df['heuristic_negative'] = df['comment'].apply(is_explicit_negative)

	# # Log detection results
	# meta_count = df['is_meta'].sum()
	# heuristic_neg_count = df['heuristic_negative'].sum()
	# logger.info(f"Detected {meta_count} meta-comments and {heuristic_neg_count} heuristic negatives")

	# # VADER sentiment analysis
	# vader_results = []
	# for text in df['comment']:
	# vader_results.append(vader_sentiment(text))

	# # RoBERTa sentiment analysis (batch)
	# roberta_results = roberta_sentiment_batch(df['comment'].tolist())

	# # Combine results
	# vader_df = pd.DataFrame(vader_results)
	# roberta_df = pd.DataFrame(roberta_results)
	# final_df = pd.concat([df.reset_index(drop=True), vader_df, roberta_df], axis=1)

	# # Calculate combined scores
	# final_df['combined_pos'] = (
	# settings.combined_weight_vader * final_df['vader_pos'] +
	# settings.combined_weight_roberta * final_df['roberta_pos']
	# )
	# final_df['combined_neg'] = (
	# settings.combined_weight_vader * final_df['vader_neg'] +
	# settings.combined_weight_roberta * final_df['roberta_neg']
	# )
	# final_df['combined_neu'] = (
	# settings.combined_weight_vader * final_df['vader_neu'] +
	# settings.combined_weight_roberta * final_df['roberta_neu']
	# )

	# # Classify overall sentiment (meta-comments become Neutral)
	# final_df['Overall_Sentiment'] = final_df.apply(
	# lambda row: 'Neutral' if row.get('is_meta') else overall_sentiment(row, settings),
	# axis=1
	# )

	# # Calculate statistics
	# total_comments = len(final_df)
	# positive_count = len(final_df[final_df['Overall_Sentiment'] == 'Positive'])
	# negative_count = len(final_df[final_df['Overall_Sentiment'] == 'Negative'])
	# neutral_count = len(final_df[final_df['Overall_Sentiment'] == 'Neutral'])

	# logger.info(
	# f"Results: {positive_count} positive, "
	# f"{negative_count} negative, {neutral_count} neutral"
	# )

	# # Average scores
	# avg_positive = float(final_df['combined_pos'].mean())
	# avg_negative = float(final_df['combined_neg'].mean())
	# avg_neutral = float(final_df['combined_neu'].mean())

	# # Determine overall sentiment label
	# if avg_positive > max(avg_negative, avg_neutral):
	# overall_sentiment_label = "Positive"
	# elif avg_negative > max(avg_positive, avg_neutral):
	# overall_sentiment_label = "Negative"
	# else:
	# overall_sentiment_label = "Neutral"

	# # Process negative comments
	# negative_summary = ""
	# negative_comments_list = []
	# negative_comments = final_df[final_df['Overall_Sentiment'] == 'Negative']

	# if len(negative_comments) > 0:
	# negative_comments_list = negative_comments['comment'].tolist()

	# try:
	# # Get top negative comments
	# top_idx = negative_comments['combined_neg'].nlargest(3).index
	# top_comments = negative_comments.loc[top_idx, 'comment'].tolist()

	# if settings.use_abstractive_summary and summarizer is not None:
	# negative_text = " ".join(top_comments)
	# if len(negative_text) > 1000:
	# negative_text = negative_text[:1000]

	# summary_result = summarizer(
	# negative_text,
	# max_length=settings.max_summary_length,
	# min_length=settings.min_summary_length,
	# do_sample=False
	# )
	# negative_summary = summary_result[0]['summary_text']
	# else:
	# # Extractive summary
	# negative_summary = "; ".join(top_comments)
	# except Exception as e:
	# logger.warning(f"Summary generation failed: {e}")
	# negative_summary = "; ".join(negative_comments_list[:3])

	# # Generate insights and recommendations
	# insights = []
	# recommendations = []

	# if overall_sentiment_label == "Positive":
	# insights.extend([
	# "Students have positive feedback overall",
	# "Teaching methods are well-received",
	# f"{positive_count}/{total_comments} comments are positive"
	# ])
	# recommendations.extend([
	# "Continue current teaching approach",
	# "Maintain student engagement strategies",
	# "Share successful practices with colleagues"
	# ])
	# elif overall_sentiment_label == "Negative":
	# insights.extend([
	# "Students have concerns that need attention",
	# "Some aspects of teaching may need improvement",
	# f"{negative_count}/{total_comments} comments indicate issues"
	# ])
	# recommendations.extend([
	# "Review teaching methods and materials",
	# "Consider additional student support",
	# "Schedule meetings to address student concerns",
	# "Focus on areas mentioned in negative feedback"
	# ])
	# else:
	# insights.extend([
	# "Mixed feedback from students",
	# "Some areas performing well, others need attention",
	# "Balance of positive and negative responses"
	# ])
	# recommendations.extend([
	# "Focus on areas with negative feedback",
	# "Maintain strengths while addressing weaknesses",
	# "Gather more specific feedback on improvement areas"
	# ])

	# return {
	# "total_comments": total_comments,
	# "positive_comments": positive_count,
	# "negative_comments": negative_count,
	# "neutral_comments": neutral_count,
	# "positive_sentiment": round(avg_positive, 3),
	# "negative_sentiment": round(avg_negative, 3),
	# "neutral_sentiment": round(avg_neutral, 3),
	# "overall_sentiment": overall_sentiment_label,
	# "sentiment_distribution": {
	# "positive_percentage": round((positive_count / total_comments) * 100, 1),
	# "negative_percentage": round((negative_count / total_comments) * 100, 1),
	# "neutral_percentage": round((neutral_count / total_comments) * 100, 1)
	# },
	# "negative_comments_summary": negative_summary,
	# "negative_comments_list": negative_comments_list,
	# "key_insights": insights,
	# "recommendations": recommendations,
	# "detailed_analysis": {
	# "vader_scores": {
	# "average_positive": round(final_df['vader_pos'].mean(), 3),
	# "average_negative": round(final_df['vader_neg'].mean(), 3),
	# "average_neutral": round(final_df['vader_neu'].mean(), 3),
	# "average_compound": round(final_df['vader_compound'].mean(), 3)
	# },
	# "roberta_scores": {
	# "average_positive": round(final_df['roberta_pos'].mean(), 3),
	# "average_negative": round(final_df['roberta_neg'].mean(), 3),
	# "average_neutral": round(final_df['roberta_neu'].mean(), 3)
	# }
	# },
	# "analysis_timestamp": datetime.utcnow().isoformat()
	# }

	# except Exception as e:
	# logger.error(f"Sentiment analysis failed: {e}", exc_info=True)
	# raise e

	# @app.on_event("startup")
	# async def startup_event():
	# """Initialize models on startup"""
	# try:
	# logger.info("=" * 80)
	# logger.info(f"Application Startup at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
	# logger.info("=" * 80)
	# initialize_models()
	# logger.info("✓ Service started successfully")
	# logger.info("=" * 80)
	# except Exception as e:
	# logger.error(f"✗ Startup failed: {e}")
	# raise e

	# @app.on_event("shutdown")
	# async def shutdown_event():
	# """Cleanup on shutdown"""
	# logger.info("Service shutting down")

	# @app.get("/")
	# async def root():
	# """Root endpoint"""
	# return {
	# "service": get_settings().app_name,
	# "version": get_settings().app_version,
	# "status": "running",
	# "endpoints": {
	# "health": "/health",
	# "analyze": "/analyze-comments",
	# "config": "/config (debug mode only)",
	# "test": "/test"
	# }
	# }

	# @app.get("/health")
	# async def health_check():
	# """Health check endpoint"""
	# models_loaded = sia is not None and model is not None and tokenizer is not None

	# return {
	# "status": "healthy" if models_loaded else "unhealthy",
	# "service": "comment-analysis",
	# "version": get_settings().app_version,
	# "models_loaded": models_loaded,
	# "device": device if device else "not initialized",
	# "timestamp": datetime.utcnow().isoformat()
	# }

	# @app.post("/analyze-comments", response_model=CommentAnalysisResponse)
	# async def analyze_comments(
	# request: CommentAnalysisRequest,
	# settings: Settings = Depends(get_settings)
	# ):
	# """
	# Analyze comments for sentiment analysis using VADER and RoBERTa models
	# """
	# try:
	# comments = request.comments
	# faculty_info = request.faculty_info

	# if not comments:
	# return CommentAnalysisResponse(
	# success=False,
	# analysis=None,
	# message="No comments provided for analysis"
	# )

	# logger.info(
	# f"Analyzing {len(comments)} comments for "
	# f"{faculty_info.faculty_name} ({faculty_info.course_code})"
	# )

	# analysis_result = analyze_comments_sentiment(comments)

	# if analysis_result.get("total_comments", 0) == 0:
	# return CommentAnalysisResponse(
	# success=False,
	# analysis=None,
	# message=analysis_result.get("message", "No valid comments to analyze")
	# )

	# analysis_result["faculty_info"] = {
	# "faculty_name": faculty_info.faculty_name,
	# "staff_id": faculty_info.staff_id,
	# "course_code": faculty_info.course_code,
	# "course_name": faculty_info.course_name
	# }

	# return CommentAnalysisResponse(
	# success=True,
	# analysis=analysis_result,
	# message=f"Successfully analyzed {analysis_result['total_comments']} comments"
	# )

	# except ValueError as ve:
	# logger.warning(f"Validation error: {ve}")
	# raise HTTPException(status_code=400, detail=str(ve))
	# except Exception as e:
	# logger.error(f"Analysis failed: {e}", exc_info=True)
	# raise HTTPException(
	# status_code=500,
	# detail="Analysis failed. Please try again later."
	# )

	# @app.get("/config")
	# async def get_config(settings: Settings = Depends(get_settings)):
	# """Get current configuration (debug mode only)"""
	# if not settings.debug_mode:
	# raise HTTPException(status_code=404, detail="Not found")

	# return {
	# "max_comments_per_request": settings.max_comments_per_request,
	# "max_comment_length": settings.max_comment_length,
	# "min_comment_words": settings.min_comment_words,
	# "vader_pos_threshold": settings.vader_pos_threshold,
	# "vader_neg_threshold": settings.vader_neg_threshold,
	# "roberta_pos_threshold": settings.roberta_pos_threshold,
	# "roberta_neg_threshold": settings.roberta_neg_threshold,
	# "combined_weight_vader": settings.combined_weight_vader,
	# "combined_weight_roberta": settings.combined_weight_roberta,
	# "enable_caching": settings.enable_caching,
	# "batch_size": settings.batch_size,
	# "use_abstractive_summary": settings.use_abstractive_summary
	# }

	# @app.get("/test")
	# async def test_endpoint():
	# """Test endpoint to verify sentiment classification"""
	# test_cases = [
	# "No more comments 😅",
	# "Overall good but too lag",
	# "Not interested to be in her class just we are going for attendance thats it not at all managing time.",
	# "Nothing to say anything just we are going to her class mean, only for attendance",
	# "Excellent teaching! Very clear explanations.",
	# "Good teacher with strong subject knowledge",
	# "Class is okay, nothing special"
	# ]

	# results = []
	# for text in test_cases:
	# is_meta = is_meta_comment(text)
	# is_neg = is_explicit_negative(text)

	# # Predict classification
	# if is_meta:
	# predicted = "Neutral (meta-comment)"
	# elif is_neg:
	# predicted = "Negative (heuristic)"
	# else:
	# predicted = "Needs full analysis"

	# results.append({
	# "text": text,
	# "is_meta_comment": is_meta,
	# "is_heuristic_negative": is_neg,
	# "predicted_classification": predicted
	# })

	# return {
	# "test_results": results,
	# "note": "Full analysis requires VADER and RoBERTa scores"
	# }

	# if __name__ == "__main__":
	# uvicorn.run(
	# app,
	# host="0.0.0.0",
	# port=8000,
	# log_level="info"
	# )