Spaces:
Sleeping
Sleeping
| """ | |
| Enhanced FastAPI Service for Comment Sentiment Analysis | |
| Version 3.0.0 - Major accuracy improvements with advanced classification | |
| Features: | |
| - Multi-stage sentiment detection | |
| - Context-aware negative pattern matching | |
| - Improved neutral/meta-comment detection | |
| - Enhanced accuracy through ensemble approach | |
| """ | |
| from fastapi import FastAPI, HTTPException, Depends | |
| from fastapi.middleware.cors import CORSMiddleware | |
| from pydantic import BaseModel, Field, validator | |
| from pydantic_settings import BaseSettings | |
| from typing import List, Dict, Any, Optional | |
| from functools import lru_cache | |
| import uvicorn | |
| import pandas as pd | |
| import numpy as np | |
| import os | |
| import re | |
| from datetime import datetime | |
| import logging | |
| # Configure logging | |
| logging.basicConfig( | |
| level=logging.INFO, | |
| format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' | |
| ) | |
| logger = logging.getLogger(__name__) | |
| # NLTK Setup | |
| import nltk | |
| import ssl | |
| try: | |
| _create_unverified_https_context = ssl._create_unverified_context | |
| except AttributeError: | |
| pass | |
| else: | |
| ssl._create_default_https_context = _create_unverified_https_context | |
| nltk_data_dir = '/tmp/nltk_data' | |
| os.makedirs(nltk_data_dir, exist_ok=True) | |
| nltk.data.path.insert(0, nltk_data_dir) | |
| def ensure_nltk_data(): | |
| """Ensure all required NLTK data is downloaded""" | |
| resources = ['vader_lexicon', 'punkt', 'stopwords', 'wordnet', 'omw-1.4'] | |
| for resource in resources: | |
| try: | |
| if resource == 'vader_lexicon': | |
| nltk.data.find('sentiment/vader_lexicon.zip') | |
| elif resource == 'punkt': | |
| nltk.data.find('tokenizers/punkt') | |
| elif resource in ['stopwords', 'wordnet', 'omw-1.4']: | |
| nltk.data.find(f'corpora/{resource}') | |
| logger.info(f"✓ NLTK resource '{resource}' already available") | |
| except LookupError: | |
| logger.info(f"Downloading NLTK resource '{resource}'...") | |
| try: | |
| nltk.download(resource, download_dir=nltk_data_dir, quiet=False) | |
| logger.info(f"✓ Successfully downloaded '{resource}'") | |
| except Exception as e: | |
| logger.error(f"✗ Failed to download '{resource}': {e}") | |
| logger.info("Ensuring NLTK data is available...") | |
| ensure_nltk_data() | |
| from nltk.sentiment import SentimentIntensityAnalyzer | |
| from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline | |
| from scipy.special import softmax | |
| import torch | |
| # Configuration | |
| class Settings(BaseSettings): | |
| """Application settings""" | |
| app_name: str = "Comment Analysis API" | |
| app_version: str = "3.0.0" | |
| debug_mode: bool = False | |
| max_comments_per_request: int = 1000 | |
| max_comment_length: int = 5000 | |
| min_comment_words: int = 1 | |
| # Enhanced thresholds for better accuracy | |
| vader_strong_pos_threshold: float = 0.5 | |
| vader_pos_threshold: float = 0.2 | |
| vader_neg_threshold: float = -0.2 | |
| vader_strong_neg_threshold: float = -0.5 | |
| roberta_strong_pos_threshold: float = 0.70 | |
| roberta_pos_threshold: float = 0.55 | |
| roberta_neg_threshold: float = 0.40 | |
| roberta_strong_neg_threshold: float = 0.60 | |
| # Adjusted weights for better accuracy | |
| combined_weight_vader: float = 0.4 | |
| combined_weight_roberta: float = 0.6 | |
| model_cache_dir: str = "/tmp/model_cache" | |
| roberta_model_name: str = "cardiffnlp/twitter-roberta-base-sentiment" | |
| use_abstractive_summary: bool = False | |
| summarizer_model: str = "facebook/bart-large-cnn" | |
| max_summary_length: int = 100 | |
| min_summary_length: int = 25 | |
| enable_caching: bool = True | |
| cache_size: int = 500 | |
| batch_size: int = 32 | |
| class Config: | |
| env_file = ".env" | |
| env_file_encoding = 'utf-8' | |
| extra = 'ignore' | |
| def get_settings() -> Settings: | |
| """Cached settings instance""" | |
| settings = Settings() | |
| total = settings.combined_weight_vader + settings.combined_weight_roberta | |
| if not (0.99 <= total <= 1.01): | |
| logger.warning(f"Weights sum to {total}, normalizing to 1.0") | |
| settings.combined_weight_vader /= total | |
| settings.combined_weight_roberta /= total | |
| return settings | |
| # Pydantic Models | |
| class FacultyInfo(BaseModel): | |
| faculty_name: str = Field(..., min_length=1, max_length=200) | |
| staff_id: str = Field(..., min_length=1, max_length=50) | |
| course_code: str = Field(..., min_length=1, max_length=50) | |
| course_name: str = Field(..., min_length=1, max_length=200) | |
| class CommentAnalysisRequest(BaseModel): | |
| comments: List[str] = Field(..., min_items=1) | |
| faculty_info: FacultyInfo | |
| def validate_comments(cls, v): | |
| settings = get_settings() | |
| if len(v) > settings.max_comments_per_request: | |
| raise ValueError(f'Maximum {settings.max_comments_per_request} comments per request') | |
| for idx, comment in enumerate(v): | |
| if len(comment) > settings.max_comment_length: | |
| raise ValueError(f'Comment {idx} exceeds maximum length of {settings.max_comment_length} characters') | |
| return v | |
| class SentimentDistribution(BaseModel): | |
| positive_percentage: float | |
| negative_percentage: float | |
| neutral_percentage: float | |
| class DetailedScores(BaseModel): | |
| average_positive: float | |
| average_negative: float | |
| average_neutral: float | |
| average_compound: Optional[float] = None | |
| class DetailedAnalysis(BaseModel): | |
| vader_scores: DetailedScores | |
| roberta_scores: DetailedScores | |
| class AnalysisResult(BaseModel): | |
| total_comments: int | |
| positive_comments: int | |
| negative_comments: int | |
| neutral_comments: int | |
| positive_sentiment: float | |
| negative_sentiment: float | |
| neutral_sentiment: float | |
| overall_sentiment: str | |
| sentiment_distribution: SentimentDistribution | |
| negative_comments_summary: str | |
| negative_comments_list: List[str] | |
| key_insights: List[str] | |
| recommendations: List[str] | |
| detailed_analysis: DetailedAnalysis | |
| faculty_info: Dict[str, str] | |
| analysis_timestamp: str | |
| class CommentAnalysisResponse(BaseModel): | |
| success: bool | |
| analysis: Optional[AnalysisResult] = None | |
| message: str | |
| # Initialize FastAPI | |
| app = FastAPI( | |
| title=get_settings().app_name, | |
| version=get_settings().app_version, | |
| description="Advanced sentiment analysis service for educational feedback" | |
| ) | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=["*"], | |
| allow_credentials=True, | |
| allow_methods=["*"], | |
| allow_headers=["*"], | |
| ) | |
| # Global model variables | |
| sia = None | |
| tokenizer = None | |
| model = None | |
| device = None | |
| summarizer = None | |
| # ============================================================================ | |
| # ENHANCED PATTERN DETECTION FOR BETTER ACCURACY | |
| # ============================================================================ | |
| # Meta-comments (not actual feedback - should be NEUTRAL) | |
| META_PATTERNS = re.compile( | |
| r'^(no\s+(negative\s+)?(more\s+)?(comments?|feedback|remarks?|issues?|problems?|complaints?)|' | |
| r'(everything|all)\s+(is\s+)?(good|fine|ok(ay)?|great|perfect|excellent)|' | |
| r'nothing(\s+to\s+(say|comment|mention|add))?|' | |
| r'(nil|none|na|n/a|nill)\.?|' | |
| r'^(all\s+)?(good|fine|ok(ay)?|great|nice)\.?|' | |
| r'no\s+remarks?|' | |
| r'everything\s+at\s+the\s+too\s+only)$', | |
| re.IGNORECASE | |
| ) | |
| # Strong NEGATIVE indicators (should override model scores) | |
| STRONG_NEGATIVE_PATTERN = re.compile( | |
| r'\b(' | |
| # Direct criticism | |
| r'(very|extremely|quite|so|too)\s+(poor|bad|weak|terrible|awful|horrible)|' | |
| r'poor\s+(teaching|teacher|faculty|knowledge|communication|quality|explanation)|' | |
| r'bad\s+(teaching|teacher|faculty|quality|explanation)|' | |
| r'terrible|horrible|awful|pathetic|useless|waste\s+of\s+time|' | |
| # Teaching quality issues | |
| r'(teaching|knowledge)\s+(is\s+)?(poor|bad|weak|lacking|insufficient|not\s+good)|' | |
| r'cannot\s+teach|can\'?t\s+teach|doesn\'?t\s+know\s+how\s+to\s+teach|' | |
| r'not\s+teaching\s+properly|teaching\s+method\s+is\s+(poor|bad)|' | |
| # Boring/disengagement | |
| r'(boring|dull|monotonous)\s+(class|classes|subject|lecture|lectures|sessions?)|' | |
| r'(class|classes|subject|lectures?)\s+(is|are)\s+(boring|dull|monotonous|uninteresting)|' | |
| r'sleeping\s+in\s+class|fall\s+asleep|makes?\s+us\s+sleep|' | |
| # Communication issues | |
| r'(low|soft|quiet|unclear)\s+voice|voice\s+(is\s+)?(low|soft|quiet|not\s+clear)|' | |
| r'(cannot|can\'?t|cant|unable\s+to)\s+hear|difficult\s+to\s+hear|' | |
| r'(not|poor|bad)\s+(communication|explaining|explanation)|' | |
| # Understanding issues | |
| r'(cannot|can\'?t|cant|unable\s+to|difficult\s+to|hard\s+to)\s+understand|' | |
| r'(not|never|don\'?t)\s+(able\s+to\s+)?understand|' | |
| r'(concepts?|topics?|subjects?)\s+(are\s+)?(difficult|hard|tough|impossible)\s+to\s+understand|' | |
| r'makes?\s+(no|little)\s+sense|doesn\'?t\s+make\s+sense|' | |
| # Improvement needed | |
| r'(need|needs|require|requires)\s+(urgent|serious|immediate|much|lot\s+of)?\s*improvement|' | |
| r'(should|must|have\s+to)\s+improve\s+(a\s+lot|more|urgently)|' | |
| # Pace issues | |
| r'(lectures?|class(es)?|teaching)\s+(is|are|going)\s+(too|very)\s+(fast|slow)|' | |
| r'(too|very|extremely)\s+(fast|slow|rush|rushed)|' | |
| r'(lag|lagging)\s+in\s+teaching|teaching\s+(is\s+)?lagging|' | |
| # Time management | |
| r'(not|poor|bad|terrible)\s+(managing|managing)\s+time|' | |
| r'time\s+management\s+(is\s+)?(poor|bad|terrible|lacking)|' | |
| r'always\s+(late|wasting\s+time)|waste\s+(our|class)\s+time|' | |
| # Lack of resources/support | |
| r'(no|not|insufficient|lack\s+of)\s+(proper|sufficient|enough|regular)?\s*(classes|notes|support|help)|' | |
| r'need\s+more\s+(staff|faculty|classes|support|help)|' | |
| r'no\s+(practical|hands[-\s]?on|lab|real[-\s]?world)|lack\s+of\s+practical|' | |
| # Attendance/engagement issues | |
| r'(just|only)\s+(for|going\s+for)\s+attendance|' | |
| r'going\s+(to|for)\s+(her|his|their)\s+class\s+(just|only)\s+for\s+attendance|' | |
| r'(not|no)\s+(interested|engaging|helpful|useful|at\s+all)|' | |
| r'no\s+interest\s+in\s+teaching|' | |
| # Administrative issues | |
| r'military\s+rules|too\s+strict|very\s+strict|' | |
| r'attendance\s+(issue|problem)|not\s+providing\s+attendance|' | |
| # Workload issues | |
| r'too\s+many\s+projects|many\s+projects\s+review|' | |
| r'placement\s+activities\s+(and|with)\s+attendance' | |
| r')\b', | |
| re.IGNORECASE | |
| ) | |
| # Positive indicators (help identify positive comments) | |
| POSITIVE_PATTERN = re.compile( | |
| r'\b(' | |
| r'(very|extremely|really|so|truly)\s+(good|great|excellent|amazing|wonderful|fantastic|helpful|knowledgeable|clear)|' | |
| r'excellent|outstanding|amazing|wonderful|fantastic|brilliant|superb|' | |
| r'(great|good|best|wonderful)\s+(teaching|teacher|faculty|knowledge|explanation|professor|sir|madam)|' | |
| r'(teaching|explanation|knowledge)\s+(is\s+)?(excellent|outstanding|very\s+good|great|clear)|' | |
| r'explains?\s+(very\s+)?(well|clearly|nicely|perfectly)|' | |
| r'(easy|easier)\s+to\s+understand|clear\s+explanation|' | |
| r'(very\s+)?(helpful|supportive|friendly|approachable|patient)|' | |
| r'(good|strong|deep|vast)\s+(knowledge|understanding)|' | |
| r'(love|like|enjoy|appreciate)\s+(the\s+)?(class|classes|teaching|subject|course|lectures?)|' | |
| r'learned?\s+(a\s+lot|so\s+much|many\s+things)|' | |
| r'inspired?|inspiring|motivating|motivated|encouraged|' | |
| r'(best|favourite|favorite)\s+(teacher|faculty|professor)|' | |
| r'highly\s+recommend|strongly\s+recommend|' | |
| r'grateful|thankful|blessed|lucky\s+to\s+have|' | |
| r'satisfied|happy\s+with|pleased\s+with|' | |
| r'(always|very)\s+(available|accessible|helpful)|' | |
| r'patient|caring|dedicated|passionate|' | |
| r'interactive\s+class|engaging\s+class|interesting\s+class' | |
| r')\b', | |
| re.IGNORECASE | |
| ) | |
| # Weak negative indicators (suggestions/mild criticism - might be NEUTRAL) | |
| WEAK_NEGATIVE_PATTERN = re.compile( | |
| r'\b(' | |
| r'could\s+(be\s+)?better|' | |
| r'can\s+improve|' | |
| r'would\s+be\s+good\s+if|' | |
| r'suggest|suggestion|' | |
| r'maybe|perhaps|' | |
| r'slightly|a\s+bit|' | |
| r'sometimes|occasionally' | |
| r')\b', | |
| re.IGNORECASE | |
| ) | |
| def is_meta_comment(text: str) -> bool: | |
| """Check if comment is meta (not actual feedback)""" | |
| if not text or len(text.strip()) < 3: | |
| return True | |
| text = text.strip() | |
| return bool(META_PATTERNS.match(text)) | |
| def detect_strong_negative(text: str) -> bool: | |
| """Detect strong negative patterns""" | |
| if not text or is_meta_comment(text): | |
| return False | |
| return bool(STRONG_NEGATIVE_PATTERN.search(text)) | |
| def detect_positive(text: str) -> bool: | |
| """Detect positive patterns""" | |
| if not text or is_meta_comment(text): | |
| return False | |
| return bool(POSITIVE_PATTERN.search(text)) | |
| def detect_weak_negative(text: str) -> bool: | |
| """Detect weak negative patterns (suggestions)""" | |
| if not text or is_meta_comment(text): | |
| return False | |
| return bool(WEAK_NEGATIVE_PATTERN.search(text)) | |
| # ============================================================================ | |
| # MODEL INITIALIZATION | |
| # ============================================================================ | |
| def initialize_models(): | |
| """Initialize sentiment analysis models""" | |
| global sia, tokenizer, model, device, summarizer | |
| try: | |
| settings = get_settings() | |
| logger.info("Initializing sentiment analysis models...") | |
| # VADER | |
| sia = SentimentIntensityAnalyzer() | |
| logger.info("✓ VADER initialized") | |
| # RoBERTa | |
| cache_dir = settings.model_cache_dir | |
| os.makedirs(cache_dir, exist_ok=True) | |
| tokenizer = AutoTokenizer.from_pretrained( | |
| settings.roberta_model_name, | |
| cache_dir=cache_dir | |
| ) | |
| model = AutoModelForSequenceClassification.from_pretrained( | |
| settings.roberta_model_name, | |
| cache_dir=cache_dir | |
| ) | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| model.to(device) | |
| model.eval() | |
| logger.info(f"✓ RoBERTa initialized on device: {device}") | |
| # Summarizer (optional) | |
| if settings.use_abstractive_summary: | |
| try: | |
| summarizer = pipeline( | |
| "summarization", | |
| model=settings.summarizer_model, | |
| device=0 if device == "cuda" else -1 | |
| ) | |
| logger.info("✓ Summarizer initialized") | |
| except Exception as e: | |
| logger.warning(f"Summarizer initialization failed: {e}") | |
| summarizer = None | |
| logger.info("✓ All models initialized successfully") | |
| except Exception as e: | |
| logger.error(f"Error initializing models: {e}") | |
| raise e | |
| # ============================================================================ | |
| # SENTIMENT ANALYSIS FUNCTIONS | |
| # ============================================================================ | |
| def vader_sentiment_cached(text: str) -> tuple: | |
| """Cached VADER sentiment analysis""" | |
| scores = sia.polarity_scores(text) | |
| return (scores['neg'], scores['neu'], scores['pos'], scores['compound']) | |
| def vader_sentiment(text: str) -> Dict[str, float]: | |
| """VADER sentiment analysis""" | |
| try: | |
| settings = get_settings() | |
| if settings.enable_caching: | |
| neg, neu, pos, compound = vader_sentiment_cached(text) | |
| return { | |
| 'vader_neg': neg, | |
| 'vader_neu': neu, | |
| 'vader_pos': pos, | |
| 'vader_compound': compound | |
| } | |
| else: | |
| scores = sia.polarity_scores(text) | |
| return { | |
| 'vader_neg': scores['neg'], | |
| 'vader_neu': scores['neu'], | |
| 'vader_pos': scores['pos'], | |
| 'vader_compound': scores['compound'] | |
| } | |
| except Exception as e: | |
| logger.warning(f"VADER analysis failed: {e}") | |
| return {'vader_neg': 0.0, 'vader_neu': 1.0, 'vader_pos': 0.0, 'vader_compound': 0.0} | |
| def roberta_sentiment_batch(texts: List[str]) -> List[Dict[str, float]]: | |
| """Batch RoBERTa sentiment analysis""" | |
| try: | |
| settings = get_settings() | |
| results = [] | |
| for i in range(0, len(texts), settings.batch_size): | |
| batch = texts[i:i + settings.batch_size] | |
| encoded = tokenizer( | |
| batch, | |
| return_tensors='pt', | |
| truncation=True, | |
| max_length=512, | |
| padding=True | |
| ) | |
| encoded = {k: v.to(device) for k, v in encoded.items()} | |
| with torch.no_grad(): | |
| outputs = model(**encoded) | |
| for output in outputs.logits: | |
| scores = softmax(output.cpu().numpy()) | |
| results.append({ | |
| 'roberta_neg': float(scores[0]), | |
| 'roberta_neu': float(scores[1]), | |
| 'roberta_pos': float(scores[2]) | |
| }) | |
| return results | |
| except Exception as e: | |
| logger.warning(f"RoBERTa batch analysis failed: {e}") | |
| return [{'roberta_neg': 0.0, 'roberta_neu': 1.0, 'roberta_pos': 0.0} for _ in texts] | |
| def classify_sentiment_enhanced(row: pd.Series, settings: Settings) -> str: | |
| """ | |
| Enhanced multi-stage sentiment classification for better accuracy | |
| Stage 1: Meta-comments → Neutral | |
| Stage 2: Strong negative patterns → Negative (override models) | |
| Stage 3: Strong positive patterns + high scores → Positive | |
| Stage 4: Model ensemble decision | |
| Stage 5: Default to neutral if uncertain | |
| """ | |
| # Stage 1: Meta-comments are always neutral | |
| if row.get('is_meta', False): | |
| return 'Neutral' | |
| # Get all scores | |
| vader_compound = row.get('vader_compound', 0.0) | |
| vader_pos = row.get('vader_pos', 0.0) | |
| vader_neg = row.get('vader_neg', 0.0) | |
| roberta_pos = row.get('roberta_pos', 0.0) | |
| roberta_neg = row.get('roberta_neg', 0.0) | |
| roberta_neu = row.get('roberta_neu', 0.0) | |
| combined_pos = row.get('combined_pos', 0.0) | |
| combined_neg = row.get('combined_neg', 0.0) | |
| combined_neu = row.get('combined_neu', 0.0) | |
| has_strong_negative = row.get('has_strong_negative', False) | |
| has_positive = row.get('has_positive', False) | |
| has_weak_negative = row.get('has_weak_negative', False) | |
| # Stage 2: Strong negative patterns override everything | |
| if has_strong_negative: | |
| return 'Negative' | |
| # Stage 3: Strong positive signals | |
| if has_positive and ( | |
| vader_compound >= settings.vader_strong_pos_threshold or | |
| roberta_pos >= settings.roberta_strong_pos_threshold or | |
| (vader_compound >= settings.vader_pos_threshold and roberta_pos >= settings.roberta_pos_threshold) | |
| ): | |
| return 'Positive' | |
| # Stage 4: Model-based classification with ensemble | |
| # Strong negative from models | |
| if ( | |
| vader_compound <= settings.vader_strong_neg_threshold or | |
| roberta_neg >= settings.roberta_strong_neg_threshold or | |
| (vader_compound <= settings.vader_neg_threshold and roberta_neg >= settings.roberta_neg_threshold) | |
| ): | |
| return 'Negative' | |
| # Moderate negative | |
| if ( | |
| combined_neg > combined_pos and | |
| combined_neg > combined_neu and | |
| combined_neg > 0.35 # Threshold for clarity | |
| ): | |
| return 'Negative' | |
| # Clear positive | |
| if ( | |
| combined_pos > combined_neg and | |
| combined_pos > combined_neu and | |
| combined_pos > 0.35 # Threshold for clarity | |
| ): | |
| return 'Positive' | |
| # Weak negative with suggestion context → might be neutral | |
| if has_weak_negative and not has_strong_negative: | |
| # If scores are not strongly negative, treat as neutral | |
| if combined_neg < 0.5: | |
| return 'Neutral' | |
| # Stage 5: Default to neutral if uncertain | |
| return 'Neutral' | |
| def sanitize_text(text: str) -> str: | |
| """Sanitize input text""" | |
| if not text: | |
| return "" | |
| text = re.sub(r'[\x00-\x08\x0B-\x0C\x0E-\x1F\x7F]', '', text) | |
| text = ' '.join(text.split()) | |
| return text.strip() | |
| # ============================================================================ | |
| # MAIN ANALYSIS FUNCTION | |
| # ============================================================================ | |
| def analyze_comments_sentiment(comments: List[str]) -> Dict[str, Any]: | |
| """Main sentiment analysis with enhanced accuracy""" | |
| try: | |
| settings = get_settings() | |
| logger.info(f"Received {len(comments)} comments for analysis") | |
| # Sanitize | |
| sanitized_comments = [sanitize_text(comment) for comment in comments] | |
| # Filter valid comments | |
| filtered_comments = [ | |
| comment for comment in sanitized_comments | |
| if settings.min_comment_words <= len(comment.split()) <= settings.max_comment_length | |
| ] | |
| logger.info(f"After filtering: {len(filtered_comments)} valid comments") | |
| if not filtered_comments: | |
| return { | |
| "total_comments": 0, | |
| "message": "No valid comments found for analysis" | |
| } | |
| # Create DataFrame | |
| df = pd.DataFrame({'comment': filtered_comments}) | |
| # Pattern detection | |
| df['is_meta'] = df['comment'].apply(is_meta_comment) | |
| df['has_strong_negative'] = df['comment'].apply(detect_strong_negative) | |
| df['has_positive'] = df['comment'].apply(detect_positive) | |
| df['has_weak_negative'] = df['comment'].apply(detect_weak_negative) | |
| # Log detection stats | |
| logger.info(f"Meta: {df['is_meta'].sum()}, " | |
| f"Strong Neg: {df['has_strong_negative'].sum()}, " | |
| f"Positive: {df['has_positive'].sum()}, " | |
| f"Weak Neg: {df['has_weak_negative'].sum()}") | |
| # VADER analysis | |
| vader_results = [vader_sentiment(text) for text in df['comment']] | |
| vader_df = pd.DataFrame(vader_results) | |
| # RoBERTa analysis | |
| roberta_results = roberta_sentiment_batch(df['comment'].tolist()) | |
| roberta_df = pd.DataFrame(roberta_results) | |
| # Combine | |
| final_df = pd.concat([df.reset_index(drop=True), vader_df, roberta_df], axis=1) | |
| # Calculate combined scores | |
| final_df['combined_pos'] = ( | |
| settings.combined_weight_vader * final_df['vader_pos'] + | |
| settings.combined_weight_roberta * final_df['roberta_pos'] | |
| ) | |
| final_df['combined_neg'] = ( | |
| settings.combined_weight_vader * final_df['vader_neg'] + | |
| settings.combined_weight_roberta * final_df['roberta_neg'] | |
| ) | |
| final_df['combined_neu'] = ( | |
| settings.combined_weight_vader * final_df['vader_neu'] + | |
| settings.combined_weight_roberta * final_df['roberta_neu'] | |
| ) | |
| # Enhanced classification | |
| final_df['Overall_Sentiment'] = final_df.apply( | |
| lambda row: classify_sentiment_enhanced(row, settings), | |
| axis=1 | |
| ) | |
| # Statistics | |
| total_comments = len(final_df) | |
| positive_count = len(final_df[final_df['Overall_Sentiment'] == 'Positive']) | |
| negative_count = len(final_df[final_df['Overall_Sentiment'] == 'Negative']) | |
| neutral_count = len(final_df[final_df['Overall_Sentiment'] == 'Neutral']) | |
| logger.info(f"Classification Results - Pos: {positive_count}, Neg: {negative_count}, Neu: {neutral_count}") | |
| # Average scores | |
| avg_positive = float(final_df['combined_pos'].mean()) | |
| avg_negative = float(final_df['combined_neg'].mean()) | |
| avg_neutral = float(final_df['combined_neu'].mean()) | |
| # Overall sentiment | |
| if avg_positive > max(avg_negative, avg_neutral): | |
| overall_sentiment_label = "Positive" | |
| elif avg_negative > max(avg_positive, avg_neutral): | |
| overall_sentiment_label = "Negative" | |
| else: | |
| overall_sentiment_label = "Neutral" | |
| # Process negative comments | |
| negative_summary = "" | |
| negative_comments_list = [] | |
| negative_comments = final_df[final_df['Overall_Sentiment'] == 'Negative'] | |
| if len(negative_comments) > 0: | |
| negative_comments_list = negative_comments['comment'].tolist() | |
| try: | |
| top_idx = negative_comments['combined_neg'].nlargest(min(3, len(negative_comments))).index | |
| top_comments = negative_comments.loc[top_idx, 'comment'].tolist() | |
| if settings.use_abstractive_summary and summarizer is not None: | |
| negative_text = " ".join(top_comments) | |
| if len(negative_text) > 1000: | |
| negative_text = negative_text[:1000] | |
| summary_result = summarizer( | |
| negative_text, | |
| max_length=settings.max_summary_length, | |
| min_length=settings.min_summary_length, | |
| do_sample=False | |
| ) | |
| negative_summary = summary_result[0]['summary_text'] | |
| else: | |
| negative_summary = "; ".join(top_comments) | |
| except Exception as e: | |
| logger.warning(f"Summary generation failed: {e}") | |
| negative_summary = "; ".join(negative_comments_list[:3]) | |
| # Insights and recommendations | |
| insights = [] | |
| recommendations = [] | |
| if overall_sentiment_label == "Positive": | |
| insights.extend([ | |
| f"Strong positive feedback: {positive_count}/{total_comments} comments ({round(positive_count/total_comments*100, 1)}%)", | |
| "Students are satisfied with the teaching approach", | |
| "High engagement and learning outcomes reported" | |
| ]) | |
| recommendations.extend([ | |
| "Continue current effective teaching methods", | |
| "Document successful practices for future reference", | |
| "Share best practices with colleagues" | |
| ]) | |
| elif overall_sentiment_label == "Negative": | |
| insights.extend([ | |
| f"Concerns identified: {negative_count}/{total_comments} negative comments ({round(negative_count/total_comments*100, 1)}%)", | |
| "Students facing challenges with current approach", | |
| "Immediate attention needed to address feedback" | |
| ]) | |
| recommendations.extend([ | |
| "Review and analyze specific negative feedback points", | |
| "Consider adjusting teaching pace or methods", | |
| "Increase student engagement and support", | |
| "Schedule student feedback sessions", | |
| "Focus on communication clarity and accessibility" | |
| ]) | |
| else: | |
| insights.extend([ | |
| f"Mixed feedback: {positive_count} positive, {negative_count} negative, {neutral_count} neutral", | |
| "Room for improvement while maintaining strengths", | |
| "Students have varied experiences" | |
| ]) | |
| recommendations.extend([ | |
| "Address specific concerns raised in negative feedback", | |
| "Build on positive aspects appreciated by students", | |
| "Gather more detailed feedback for neutral areas" | |
| ]) | |
| # Add pattern-based insights | |
| if df['has_strong_negative'].sum() > 0: | |
| insights.append(f"{df['has_strong_negative'].sum()} comments contain explicit criticism requiring attention") | |
| if df['has_positive'].sum() > 0: | |
| insights.append(f"{df['has_positive'].sum()} comments contain strong positive appreciation") | |
| return { | |
| "total_comments": total_comments, | |
| "positive_comments": positive_count, | |
| "negative_comments": negative_count, | |
| "neutral_comments": neutral_count, | |
| "positive_sentiment": round(avg_positive, 3), | |
| "negative_sentiment": round(avg_negative, 3), | |
| "neutral_sentiment": round(avg_neutral, 3), | |
| "overall_sentiment": overall_sentiment_label, | |
| "sentiment_distribution": { | |
| "positive_percentage": round((positive_count / total_comments) * 100, 1), | |
| "negative_percentage": round((negative_count / total_comments) * 100, 1), | |
| "neutral_percentage": round((neutral_count / total_comments) * 100, 1) | |
| }, | |
| "negative_comments_summary": negative_summary, | |
| "negative_comments_list": negative_comments_list, | |
| "key_insights": insights, | |
| "recommendations": recommendations, | |
| "detailed_analysis": { | |
| "vader_scores": { | |
| "average_positive": round(final_df['vader_pos'].mean(), 3), | |
| "average_negative": round(final_df['vader_neg'].mean(), 3), | |
| "average_neutral": round(final_df['vader_neu'].mean(), 3), | |
| "average_compound": round(final_df['vader_compound'].mean(), 3) | |
| }, | |
| "roberta_scores": { | |
| "average_positive": round(final_df['roberta_pos'].mean(), 3), | |
| "average_negative": round(final_df['roberta_neg'].mean(), 3), | |
| "average_neutral": round(final_df['roberta_neu'].mean(), 3) | |
| } | |
| }, | |
| "analysis_timestamp": datetime.utcnow().isoformat() | |
| } | |
| except Exception as e: | |
| logger.error(f"Sentiment analysis failed: {e}", exc_info=True) | |
| raise e | |
| # ============================================================================ | |
| # API ENDPOINTS | |
| # ============================================================================ | |
| async def startup_event(): | |
| """Initialize models on startup""" | |
| try: | |
| logger.info("=" * 80) | |
| logger.info(f"Application Startup at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") | |
| logger.info("=" * 80) | |
| initialize_models() | |
| logger.info("✓ Service started successfully") | |
| logger.info("=" * 80) | |
| except Exception as e: | |
| logger.error(f"✗ Startup failed: {e}") | |
| raise e | |
| async def shutdown_event(): | |
| """Cleanup on shutdown""" | |
| logger.info("Service shutting down") | |
| async def root(): | |
| """Root endpoint""" | |
| return { | |
| "service": get_settings().app_name, | |
| "version": get_settings().app_version, | |
| "status": "running", | |
| "endpoints": { | |
| "health": "/health", | |
| "analyze": "/analyze-comments", | |
| "test": "/test" | |
| } | |
| } | |
| async def health_check(): | |
| """Health check endpoint""" | |
| models_loaded = sia is not None and model is not None and tokenizer is not None | |
| return { | |
| "status": "healthy" if models_loaded else "unhealthy", | |
| "service": "comment-analysis", | |
| "version": get_settings().app_version, | |
| "models_loaded": models_loaded, | |
| "device": device if device else "not initialized", | |
| "timestamp": datetime.utcnow().isoformat() | |
| } | |
| async def analyze_comments( | |
| request: CommentAnalysisRequest, | |
| settings: Settings = Depends(get_settings) | |
| ): | |
| """Analyze comments for sentiment using enhanced multi-stage classification""" | |
| try: | |
| comments = request.comments | |
| faculty_info = request.faculty_info | |
| if not comments: | |
| return CommentAnalysisResponse( | |
| success=False, | |
| analysis=None, | |
| message="No comments provided for analysis" | |
| ) | |
| logger.info(f"Analyzing {len(comments)} comments for {faculty_info.faculty_name} ({faculty_info.course_code})") | |
| analysis_result = analyze_comments_sentiment(comments) | |
| if analysis_result.get("total_comments", 0) == 0: | |
| return CommentAnalysisResponse( | |
| success=False, | |
| analysis=None, | |
| message=analysis_result.get("message", "No valid comments to analyze") | |
| ) | |
| analysis_result["faculty_info"] = { | |
| "faculty_name": faculty_info.faculty_name, | |
| "staff_id": faculty_info.staff_id, | |
| "course_code": faculty_info.course_code, | |
| "course_name": faculty_info.course_name | |
| } | |
| return CommentAnalysisResponse( | |
| success=True, | |
| analysis=analysis_result, | |
| message=f"Successfully analyzed {analysis_result['total_comments']} comments" | |
| ) | |
| except ValueError as ve: | |
| logger.warning(f"Validation error: {ve}") | |
| raise HTTPException(status_code=400, detail=str(ve)) | |
| except Exception as e: | |
| logger.error(f"Analysis failed: {e}", exc_info=True) | |
| raise HTTPException(status_code=500, detail="Analysis failed. Please try again later.") | |
| async def test_endpoint(): | |
| """Test endpoint with various comment types""" | |
| test_cases = [ | |
| # Meta-comments (should be Neutral) | |
| "No negative comments", | |
| "Everything is good", | |
| "Nothing to say", | |
| "Nil", | |
| # Strong Negative (should be Negative) | |
| "Very poor teaching quality", | |
| "Boring class, waste of time", | |
| "Cannot understand anything", | |
| "Teaching is terrible and voice is too low", | |
| "Poor knowledge and bad teaching method", | |
| # Positive (should be Positive) | |
| "Excellent teacher with great knowledge", | |
| "Very helpful and explains clearly", | |
| "Amazing teaching style, learned a lot", | |
| "Best professor, highly recommend", | |
| # Weak negative/Neutral | |
| "Could be better", | |
| "Sometimes hard to understand", | |
| "Overall good but too lag", | |
| # Mixed | |
| "Good teacher but classes are boring", | |
| "Knowledgeable but voice is low" | |
| ] | |
| results = [] | |
| for text in test_cases: | |
| is_meta = is_meta_comment(text) | |
| has_strong_neg = detect_strong_negative(text) | |
| has_pos = detect_positive(text) | |
| has_weak_neg = detect_weak_negative(text) | |
| # Predict | |
| if is_meta: | |
| predicted = "Neutral (meta-comment)" | |
| elif has_strong_neg: | |
| predicted = "Negative (strong pattern)" | |
| elif has_pos and not has_strong_neg: | |
| predicted = "Positive (likely)" | |
| elif has_weak_neg and not has_strong_neg: | |
| predicted = "Neutral/Negative (weak)" | |
| else: | |
| predicted = "Requires full analysis" | |
| results.append({ | |
| "text": text, | |
| "is_meta": is_meta, | |
| "strong_negative": has_strong_neg, | |
| "positive": has_pos, | |
| "weak_negative": has_weak_neg, | |
| "predicted": predicted | |
| }) | |
| return { | |
| "test_results": results, | |
| "note": "Predictions based on pattern matching. Full analysis uses VADER + RoBERTa ensemble." | |
| } | |
| if __name__ == "__main__": | |
| uvicorn.run(app, host="0.0.0.0", port=8000, log_level="info") | |
| # """ | |
| # Enhanced FastAPI Service for Comment Sentiment Analysis | |
| # with improved performance, validation, and configuration management | |
| # Version 2.1.0 - Updated with bug fixes and improvements | |
| # """ | |
| # from fastapi import FastAPI, HTTPException, Depends | |
| # from fastapi.middleware.cors import CORSMiddleware | |
| # from pydantic import BaseModel, Field, validator | |
| # from pydantic_settings import BaseSettings | |
| # from typing import List, Dict, Any, Optional | |
| # from functools import lru_cache | |
| # import uvicorn | |
| # import pandas as pd | |
| # import numpy as np | |
| # import os | |
| # import re | |
| # from datetime import datetime | |
| # import logging | |
| # # Configure logging FIRST | |
| # logging.basicConfig( | |
| # level=logging.INFO, | |
| # format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' | |
| # ) | |
| # logger = logging.getLogger(__name__) | |
| # # CRITICAL: Download NLTK data BEFORE importing NLTK components | |
| # import nltk | |
| # import ssl | |
| # try: | |
| # _create_unverified_https_context = ssl._create_unverified_context | |
| # except AttributeError: | |
| # pass | |
| # else: | |
| # ssl._create_default_https_context = _create_unverified_https_context | |
| # # Set NLTK data path | |
| # nltk_data_dir = '/tmp/nltk_data' | |
| # os.makedirs(nltk_data_dir, exist_ok=True) | |
| # nltk.data.path.insert(0, nltk_data_dir) | |
| # # Download required NLTK data | |
| # def ensure_nltk_data(): | |
| # """Ensure all required NLTK data is downloaded""" | |
| # resources = ['vader_lexicon', 'punkt', 'stopwords', 'wordnet', 'omw-1.4'] | |
| # for resource in resources: | |
| # try: | |
| # # Try to find the resource | |
| # if resource == 'vader_lexicon': | |
| # nltk.data.find('sentiment/vader_lexicon.zip') | |
| # elif resource == 'punkt': | |
| # nltk.data.find('tokenizers/punkt') | |
| # elif resource in ['stopwords', 'wordnet', 'omw-1.4']: | |
| # nltk.data.find(f'corpora/{resource}') | |
| # logger.info(f"✓ NLTK resource '{resource}' already available") | |
| # except LookupError: | |
| # logger.info(f"Downloading NLTK resource '{resource}'...") | |
| # try: | |
| # nltk.download(resource, download_dir=nltk_data_dir, quiet=False) | |
| # logger.info(f"✓ Successfully downloaded '{resource}'") | |
| # except Exception as e: | |
| # logger.error(f"✗ Failed to download '{resource}': {e}") | |
| # # Download NLTK data immediately | |
| # logger.info("Ensuring NLTK data is available...") | |
| # ensure_nltk_data() | |
| # # NOW import NLTK components | |
| # from nltk.sentiment import SentimentIntensityAnalyzer | |
| # # Import transformers after NLTK setup | |
| # from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline | |
| # from scipy.special import softmax | |
| # import torch | |
| # # Configuration Management | |
| # class Settings(BaseSettings): | |
| # """Application settings with environment variable support""" | |
| # # API Settings | |
| # app_name: str = "Comment Analysis API" | |
| # app_version: str = "2.1.0" | |
| # debug_mode: bool = False | |
| # # Request Limits | |
| # max_comments_per_request: int = 1000 | |
| # max_comment_length: int = 5000 | |
| # min_comment_words: int = 1 | |
| # # Sentiment Thresholds | |
| # vader_pos_threshold: float = 0.2 | |
| # vader_neg_threshold: float = -0.2 | |
| # roberta_pos_threshold: float = 0.55 | |
| # roberta_neg_threshold: float = 0.45 | |
| # combined_weight_vader: float = 0.5 | |
| # combined_weight_roberta: float = 0.5 | |
| # # Model Settings | |
| # model_cache_dir: str = "/tmp/model_cache" | |
| # roberta_model_name: str = "cardiffnlp/twitter-roberta-base-sentiment" | |
| # use_abstractive_summary: bool = False | |
| # summarizer_model: str = "facebook/bart-large-cnn" | |
| # max_summary_length: int = 100 | |
| # min_summary_length: int = 25 | |
| # # Performance | |
| # enable_caching: bool = True | |
| # cache_size: int = 500 | |
| # batch_size: int = 32 | |
| # class Config: | |
| # env_file = ".env" | |
| # env_file_encoding = 'utf-8' | |
| # extra = 'ignore' | |
| # @validator('min_comment_words') | |
| # def validate_min_words(cls, v): | |
| # if v < 0: | |
| # raise ValueError('min_comment_words must be non-negative') | |
| # return v | |
| # @validator('combined_weight_vader', 'combined_weight_roberta') | |
| # def validate_weights(cls, v): | |
| # if not 0 <= v <= 1: | |
| # raise ValueError('Weights must be between 0 and 1') | |
| # return v | |
| # @lru_cache() | |
| # def get_settings() -> Settings: | |
| # """Cached settings instance""" | |
| # settings = Settings() | |
| # # Normalize weights if needed | |
| # total = settings.combined_weight_vader + settings.combined_weight_roberta | |
| # if not (0.99 <= total <= 1.01): | |
| # logger.warning(f"Weights sum to {total}, normalizing to 1.0") | |
| # settings.combined_weight_vader /= total | |
| # settings.combined_weight_roberta /= total | |
| # return settings | |
| # # Pydantic Models | |
| # class FacultyInfo(BaseModel): | |
| # faculty_name: str = Field(..., min_length=1, max_length=200) | |
| # staff_id: str = Field(..., min_length=1, max_length=50) | |
| # course_code: str = Field(..., min_length=1, max_length=50) | |
| # course_name: str = Field(..., min_length=1, max_length=200) | |
| # class CommentAnalysisRequest(BaseModel): | |
| # comments: List[str] = Field(..., min_items=1) | |
| # faculty_info: FacultyInfo | |
| # @validator('comments') | |
| # def validate_comments(cls, v): | |
| # settings = get_settings() | |
| # if len(v) > settings.max_comments_per_request: | |
| # raise ValueError( | |
| # f'Maximum {settings.max_comments_per_request} comments per request' | |
| # ) | |
| # for idx, comment in enumerate(v): | |
| # if len(comment) > settings.max_comment_length: | |
| # raise ValueError( | |
| # f'Comment {idx} exceeds maximum length of {settings.max_comment_length} characters' | |
| # ) | |
| # return v | |
| # class SentimentDistribution(BaseModel): | |
| # positive_percentage: float | |
| # negative_percentage: float | |
| # neutral_percentage: float | |
| # class DetailedScores(BaseModel): | |
| # average_positive: float | |
| # average_negative: float | |
| # average_neutral: float | |
| # average_compound: Optional[float] = None | |
| # class DetailedAnalysis(BaseModel): | |
| # vader_scores: DetailedScores | |
| # roberta_scores: DetailedScores | |
| # class AnalysisResult(BaseModel): | |
| # total_comments: int | |
| # positive_comments: int | |
| # negative_comments: int | |
| # neutral_comments: int | |
| # positive_sentiment: float | |
| # negative_sentiment: float | |
| # neutral_sentiment: float | |
| # overall_sentiment: str | |
| # sentiment_distribution: SentimentDistribution | |
| # negative_comments_summary: str | |
| # negative_comments_list: List[str] | |
| # key_insights: List[str] | |
| # recommendations: List[str] | |
| # detailed_analysis: DetailedAnalysis | |
| # faculty_info: Dict[str, str] | |
| # analysis_timestamp: str | |
| # class CommentAnalysisResponse(BaseModel): | |
| # success: bool | |
| # analysis: Optional[AnalysisResult] = None | |
| # message: str | |
| # # Initialize FastAPI app | |
| # app = FastAPI( | |
| # title=get_settings().app_name, | |
| # version=get_settings().app_version, | |
| # description="Advanced sentiment analysis service for educational feedback" | |
| # ) | |
| # # Add CORS middleware | |
| # app.add_middleware( | |
| # CORSMiddleware, | |
| # allow_origins=["*"], | |
| # allow_credentials=True, | |
| # allow_methods=["*"], | |
| # allow_headers=["*"], | |
| # ) | |
| # # Global variables for models | |
| # sia = None | |
| # tokenizer = None | |
| # model = None | |
| # device = None | |
| # summarizer = None | |
| # # Enhanced heuristic phrase/regex rules for explicit negative feedback | |
| # NEGATIVE_PHRASES = [ | |
| # # Teaching quality issues | |
| # 'very poor', | |
| # 'extremely poor', | |
| # 'poor in teaching', | |
| # 'poor teaching level', | |
| # 'poor teaching', | |
| # 'bad teacher', | |
| # 'bad teaching', | |
| # 'not good', # Keep but check it's not "no negative" | |
| # 'not satisfied', | |
| # 'not satisfactory', | |
| # # Content/delivery issues | |
| # 'boring class', | |
| # 'boring classes', | |
| # 'boring subject', | |
| # 'subject is boring', | |
| # 'low voice', | |
| # 'voice is low', | |
| # 'cannot hear', | |
| # "can't hear", | |
| # 'speak louder', | |
| # # Resource/support issues | |
| # 'need more staff', | |
| # 'need more faculty', | |
| # 'insufficient staff', | |
| # 'lack of staff', | |
| # 'not sufficient', | |
| # 'insufficient', | |
| # 'not enough', | |
| # 'no classes', | |
| # 'no regular classes', | |
| # 'not sufficient classes', | |
| # # Knowledge/understanding issues | |
| # 'lack of knowledge', | |
| # 'better knowledge needed', | |
| # 'poor knowledge', | |
| # 'knowledge is lacking', | |
| # 'practical knowledge lacking', | |
| # 'no practical', | |
| # 'lack of practical', | |
| # 'no hands-on', | |
| # 'no real world', | |
| # 'did not understand', | |
| # "didn't understand", | |
| # 'not able to understand', | |
| # 'unable to understand', | |
| # 'difficult to understand', | |
| # 'hard to understand', | |
| # 'concepts are difficult', | |
| # 'concepts difficult', | |
| # 'cant understand', | |
| # "can't understand", | |
| # 'not understandable', | |
| # # Improvement needed | |
| # 'improve class', | |
| # 'improvement needed', | |
| # 'needs improvement', | |
| # 'need improvement', | |
| # 'should improve', | |
| # 'must improve', | |
| # 'not helpful', | |
| # 'not clear', | |
| # 'communication skills need improvement', | |
| # 'improve communication', | |
| # # Pace/time issues | |
| # 'lectures are going fast', | |
| # 'going too fast', | |
| # 'too fast', | |
| # 'too slow', | |
| # 'too lag', | |
| # 'lag', | |
| # 'lagging', | |
| # 'lag in teaching', | |
| # 'not managing time', | |
| # 'poor time management', | |
| # 'time management issue', | |
| # # Engagement issues | |
| # 'not interested', | |
| # 'no interest', | |
| # 'going for attendance', | |
| # 'just for attendance', | |
| # 'only for attendance', | |
| # 'not at all', | |
| # 'nothing learnt', | |
| # 'learned nothing', | |
| # 'no improvement', | |
| # 'same teaching', | |
| # 'monotonous', | |
| # 'sleeping in class', | |
| # # Value/utility issues | |
| # 'waste of time', | |
| # 'wasting time', | |
| # 'waste our time', | |
| # 'no use', | |
| # 'useless', | |
| # # Administrative issues | |
| # 'military rules', | |
| # 'strict rules', | |
| # 'too strict', | |
| # 'very strict', | |
| # 'attendance issue', | |
| # 'attendance problem', | |
| # 'not providing attendance', | |
| # 'claim od', | |
| # # Workload issues | |
| # 'too many projects', | |
| # 'many projects review', | |
| # 'trouble to make', | |
| # 'difficult to make', | |
| # 'hard to make', | |
| # 'placement activities', # When context is negative | |
| # ] | |
| # NEGATIVE_REGEXES = [ | |
| # # Teaching quality patterns | |
| # re.compile(r"\b(very|extremely|quite|so)\s+(poor|bad|weak)\s+(in\s+)?(teaching|knowledge|communication)", re.IGNORECASE), | |
| # re.compile(r"\bpoor\s+(teaching|teacher|faculty|knowledge|communication)", re.IGNORECASE), | |
| # re.compile(r"\b(teaching|knowledge)\s+(is\s+)?(poor|bad|weak|lacking)", re.IGNORECASE), | |
| # # Boring/engagement patterns | |
| # re.compile(r"\b(boring|dull|monotonous)\s+(class|classes|subject|lecture|lectures)", re.IGNORECASE), | |
| # re.compile(r"\b(class|classes|subject|lecture|lectures)\s+(is|are)\s+(boring|dull|monotonous)", re.IGNORECASE), | |
| # # Voice/communication patterns | |
| # re.compile(r"\b(low|soft|quiet)\s+voice\b", re.IGNORECASE), | |
| # re.compile(r"\bvoice\s+(is\s+)?(low|soft|quiet|not clear)", re.IGNORECASE), | |
| # re.compile(r"\b(cannot|can't|cant|unable to)\s+hear", re.IGNORECASE), | |
| # # Resource/support patterns | |
| # re.compile(r"\b(no|not|insufficient|lack of)\s+(proper|sufficient|enough|regular)?\s*(classes|notes|support|staff|faculty)", re.IGNORECASE), | |
| # re.compile(r"\bneed(s)?\s+more\s+(staff|faculty|support|classes)", re.IGNORECASE), | |
| # # Understanding/clarity patterns | |
| # re.compile(r"\b(cannot|can't|cant|unable to|difficult to|hard to)\s+understand", re.IGNORECASE), | |
| # re.compile(r"\b(not|difficult|hard)\s+(able\s+to\s+)?understand(\s+the)?(\s+(concepts?|teaching|lectures?))?", re.IGNORECASE), | |
| # re.compile(r"\bconcepts?\s+(are\s+)?(difficult|hard|tough|complex)\s+to\s+understand", re.IGNORECASE), | |
| # # Improvement patterns | |
| # re.compile(r"\b(need|needs|needed|require|requires)\s+(some\s+)?(improvement|to improve)", re.IGNORECASE), | |
| # re.compile(r"\b(should|must|have to)\s+improve", re.IGNORECASE), | |
| # re.compile(r"\bimprovement\s+(is\s+)?need(ed)?", re.IGNORECASE), | |
| # # Pace patterns | |
| # re.compile(r"\b(lecture|lectures|class|classes|teaching)\s+(is|are|going)\s+(too|very)\s+(fast|slow)", re.IGNORECASE), | |
| # re.compile(r"\b(too|very)\s+(fast|slow|lag|lagging)", re.IGNORECASE), | |
| # # Time management patterns | |
| # re.compile(r"\b(not|poor|bad)\s+(managing|managing)\s+time", re.IGNORECASE), | |
| # re.compile(r"\btime\s+management\s+(is\s+)?(poor|bad|lacking)", re.IGNORECASE), | |
| # # Attendance/engagement patterns | |
| # re.compile(r"\b(just|only)\s+(for|going for)\s+attendance", re.IGNORECASE), | |
| # re.compile(r"\b(going|attend|attending)\s+(to|for)\s+(her|his|their)\s+class\s+(just|only)\s+for\s+attendance", re.IGNORECASE), | |
| # re.compile(r"\bnot\s+(at\s+all\s+)?(interested|engaging|helpful)", re.IGNORECASE), | |
| # # Value patterns | |
| # re.compile(r"\b(waste|wasting)\s+(of\s+)?time", re.IGNORECASE), | |
| # re.compile(r"\b(no\s+use|useless|not useful)", re.IGNORECASE), | |
| # # Workload patterns | |
| # re.compile(r"\b(too\s+)?many\s+projects", re.IGNORECASE), | |
| # re.compile(r"\btrouble\s+to\s+(make|complete|do)", re.IGNORECASE), | |
| # # Administrative patterns | |
| # re.compile(r"\bmilitary\s+rules", re.IGNORECASE), | |
| # re.compile(r"\b(too|very)\s+strict", re.IGNORECASE), | |
| # re.compile(r"\battendance\s+(issue|problem)", re.IGNORECASE), | |
| # re.compile(r"\bnot\s+providing\s+attendance", re.IGNORECASE), | |
| # re.compile(r"\bclaim\s+od", re.IGNORECASE), | |
| # # Placement/scheduling patterns | |
| # re.compile(r"\bplacement\s+activities\s+(and|with)\s+(attendance|issue|problem)", re.IGNORECASE), | |
| # re.compile(r"\b(class|classes)\s+(intersecting|conflicting)\s+with\s+placement", re.IGNORECASE), | |
| # ] | |
| # META_COMMENT_PATTERNS = [ | |
| # re.compile(r"^no\s+negative\s+(comments?|feedback|remarks?)", re.IGNORECASE), | |
| # re.compile(r"^no\s+negative\s+comments?\s+on\s+the\s+(faculty|teacher|staff|course)", re.IGNORECASE), | |
| # re.compile(r"^no\s+(issues?|problems?|complaints?)\.?$", re.IGNORECASE), | |
| # re.compile(r"^no\s+(issues?|problems?|complaints?)\s+(at\s+all|whatsoever)", re.IGNORECASE), | |
| # # "Everything is good" patterns | |
| # re.compile(r"^(everything|all)\s+(is\s+)?(good|fine|ok|okay|great|perfect|excellent)", re.IGNORECASE), | |
| # re.compile(r"^no,?\s+(everything|all)\s+(is\s+)?(good|fine|ok|okay)", re.IGNORECASE), | |
| # re.compile(r"^(all\s+)?good\.?$", re.IGNORECASE), | |
| # re.compile(r"^everything\s+at\s+the\s+too\s+only", re.IGNORECASE), # From your data | |
| # # "Nothing" patterns | |
| # re.compile(r"^nothing\.?$", re.IGNORECASE), | |
| # re.compile(r"^nothing\s+(to\s+)?(say|comment|mention|add)", re.IGNORECASE), | |
| # re.compile(r"^nothing,?\s+(and\s+)?(all|everything)\s+(is\s+)?(good|fine)", re.IGNORECASE), | |
| # # "No more comments" patterns | |
| # re.compile(r"^no\s+more\s+(comments?|remarks?|feedback)", re.IGNORECASE), | |
| # re.compile(r"^no\s+(other\s+)?(comments?|remarks?|feedback)", re.IGNORECASE), | |
| # re.compile(r"^no\s+remarks?(\s+(about|on))?", re.IGNORECASE), | |
| # # Empty/nil responses | |
| # re.compile(r"^(nil|none|na|n/a|nill)\.?$", re.IGNORECASE), | |
| # re.compile(r"^(no|nothing|none)\.?$", re.IGNORECASE), | |
| # # Positive meta-comments (not actual feedback) | |
| # re.compile(r"^(it's\s+|its\s+)?(all\s+)?good\.?$", re.IGNORECASE), | |
| # re.compile(r"^fine\.?$", re.IGNORECASE), | |
| # re.compile(r"^ok(ay)?\.?$", re.IGNORECASE), | |
| # re.compile(r"^great\.?$", re.IGNORECASE), | |
| # re.compile(r"^nice\.?$", re.IGNORECASE), | |
| # ] | |
| # def is_meta_comment(text: str) -> bool: | |
| # """ | |
| # Check if comment is a meta-comment (not actual feedback). | |
| # These are generic statements that don't provide substantive feedback. | |
| # """ | |
| # if not text: | |
| # return True # Empty text is meta | |
| # text = text.strip() | |
| # # Check length - very short comments are likely meta | |
| # if len(text) < 3: | |
| # logger.debug(f"Meta-comment (too short): '{text}'") | |
| # return True | |
| # # Check against patterns | |
| # for pattern in META_COMMENT_PATTERNS: | |
| # if pattern.match(text): | |
| # logger.debug(f"Meta-comment detected: '{text[:50]}...'") | |
| # return True | |
| # return False | |
| # def is_explicit_negative(text: str) -> bool: | |
| # """ | |
| # Check if text contains explicit negative phrases. | |
| # IMPORTANT: Must check if it's a meta-comment FIRST. | |
| # """ | |
| # if not text: | |
| # return False | |
| # # CRITICAL: Don't classify meta-comments as negative | |
| # if is_meta_comment(text): | |
| # return False | |
| # lower = text.lower() | |
| # # Check phrases | |
| # for phrase in NEGATIVE_PHRASES: | |
| # if phrase in lower: | |
| # # Double-check it's not a false positive like "no negative comments" | |
| # if phrase == 'not good' and 'no negative' in lower: | |
| # continue | |
| # if phrase == 'no interest' and 'no negative' in lower: | |
| # continue | |
| # logger.debug(f"Negative phrase detected: '{phrase}' in '{text[:50]}...'") | |
| # return True | |
| # # Check regexes | |
| # for regex in NEGATIVE_REGEXES: | |
| # if regex.search(text): | |
| # logger.debug(f"Negative pattern matched: {regex.pattern} in '{text[:50]}...'") | |
| # return True | |
| # return False | |
| # def initialize_models(): | |
| # """Initialize sentiment analysis models with caching support""" | |
| # global sia, tokenizer, model, device, summarizer | |
| # try: | |
| # settings = get_settings() | |
| # logger.info("Initializing sentiment analysis models...") | |
| # # Initialize VADER (NLTK data already downloaded) | |
| # sia = SentimentIntensityAnalyzer() | |
| # logger.info("✓ VADER initialized") | |
| # # Initialize RoBERTa with caching | |
| # cache_dir = settings.model_cache_dir | |
| # os.makedirs(cache_dir, exist_ok=True) | |
| # tokenizer = AutoTokenizer.from_pretrained( | |
| # settings.roberta_model_name, | |
| # cache_dir=cache_dir | |
| # ) | |
| # model = AutoModelForSequenceClassification.from_pretrained( | |
| # settings.roberta_model_name, | |
| # cache_dir=cache_dir | |
| # ) | |
| # device = "cuda" if torch.cuda.is_available() else "cpu" | |
| # model.to(device) | |
| # model.eval() | |
| # logger.info(f"✓ RoBERTa initialized on device: {device}") | |
| # # Initialize summarizer (optional) | |
| # if settings.use_abstractive_summary: | |
| # try: | |
| # summarizer = pipeline( | |
| # "summarization", | |
| # model=settings.summarizer_model, | |
| # device=0 if device == "cuda" else -1 | |
| # ) | |
| # logger.info("✓ Summarizer initialized") | |
| # except Exception as e: | |
| # logger.warning(f"Summarizer initialization failed: {e}") | |
| # summarizer = None | |
| # logger.info("✓ All models initialized successfully") | |
| # except Exception as e: | |
| # logger.error(f"Error initializing models: {e}") | |
| # raise e | |
| # @lru_cache(maxsize=500) | |
| # def vader_sentiment_cached(text: str) -> tuple: | |
| # """Cached VADER sentiment analysis""" | |
| # scores = sia.polarity_scores(text) | |
| # return (scores['neg'], scores['neu'], scores['pos'], scores['compound']) | |
| # def vader_sentiment(text: str) -> Dict[str, float]: | |
| # """VADER sentiment analysis with caching support""" | |
| # try: | |
| # settings = get_settings() | |
| # if settings.enable_caching: | |
| # neg, neu, pos, compound = vader_sentiment_cached(text) | |
| # return { | |
| # 'vader_neg': neg, | |
| # 'vader_neu': neu, | |
| # 'vader_pos': pos, | |
| # 'vader_compound': compound | |
| # } | |
| # else: | |
| # scores = sia.polarity_scores(text) | |
| # return { | |
| # 'vader_neg': scores['neg'], | |
| # 'vader_neu': scores['neu'], | |
| # 'vader_pos': scores['pos'], | |
| # 'vader_compound': scores['compound'] | |
| # } | |
| # except Exception as e: | |
| # logger.warning(f"VADER analysis failed for text: {e}") | |
| # return {'vader_neg': 0.0, 'vader_neu': 1.0, 'vader_pos': 0.0, 'vader_compound': 0.0} | |
| # def roberta_sentiment_batch(texts: List[str]) -> List[Dict[str, float]]: | |
| # """Batch RoBERTa sentiment analysis for better performance""" | |
| # try: | |
| # settings = get_settings() | |
| # results = [] | |
| # for i in range(0, len(texts), settings.batch_size): | |
| # batch = texts[i:i + settings.batch_size] | |
| # encoded = tokenizer( | |
| # batch, | |
| # return_tensors='pt', | |
| # truncation=True, | |
| # max_length=512, | |
| # padding=True | |
| # ) | |
| # encoded = {k: v.to(device) for k, v in encoded.items()} | |
| # with torch.no_grad(): | |
| # outputs = model(**encoded) | |
| # for output in outputs.logits: | |
| # scores = softmax(output.cpu().numpy()) | |
| # results.append({ | |
| # 'roberta_neg': float(scores[0]), | |
| # 'roberta_neu': float(scores[1]), | |
| # 'roberta_pos': float(scores[2]) | |
| # }) | |
| # return results | |
| # except Exception as e: | |
| # logger.warning(f"RoBERTa batch analysis failed: {e}") | |
| # return [{'roberta_neg': 0.0, 'roberta_neu': 1.0, 'roberta_pos': 0.0} for _ in texts] | |
| # def roberta_sentiment(text: str) -> Dict[str, float]: | |
| # """Single text RoBERTa sentiment analysis""" | |
| # try: | |
| # encoded_text = tokenizer(text, return_tensors='pt', truncation=True, max_length=512) | |
| # encoded_text = {k: v.to(device) for k, v in encoded_text.items()} | |
| # with torch.no_grad(): | |
| # output = model(**encoded_text) | |
| # scores = softmax(output[0][0].cpu().numpy()) | |
| # return { | |
| # 'roberta_neg': float(scores[0]), | |
| # 'roberta_neu': float(scores[1]), | |
| # 'roberta_pos': float(scores[2]) | |
| # } | |
| # except Exception as e: | |
| # logger.warning(f"RoBERTa analysis failed for text: {e}") | |
| # return {'roberta_neg': 0.0, 'roberta_neu': 1.0, 'roberta_pos': 0.0} | |
| # def overall_sentiment(row: pd.Series, settings: Settings) -> str: | |
| # """Determine overall sentiment using combined scores with configurable thresholds""" | |
| # combined_pos = row.get('combined_pos', 0.0) | |
| # combined_neg = row.get('combined_neg', 0.0) | |
| # combined_neu = row.get('combined_neu', 0.0) | |
| # vader_compound = row.get('vader_compound', 0.0) | |
| # roberta_neg = row.get('roberta_neg', 0.0) | |
| # roberta_pos = row.get('roberta_pos', 0.0) | |
| # # Priority 1: Heuristic negative patterns override everything | |
| # if row.get('heuristic_negative') is True: | |
| # return 'Negative' | |
| # # Priority 2: Strong negative signals | |
| # if ( | |
| # vader_compound <= settings.vader_neg_threshold or | |
| # roberta_neg >= settings.roberta_neg_threshold or | |
| # combined_neg >= max(combined_pos, combined_neu) | |
| # ): | |
| # return 'Negative' | |
| # # Priority 3: Positive signals | |
| # if ( | |
| # vader_compound >= settings.vader_pos_threshold or | |
| # roberta_pos >= settings.roberta_pos_threshold or | |
| # combined_pos >= max(combined_neg, combined_neu) | |
| # ): | |
| # return 'Positive' | |
| # # Default: Neutral | |
| # return 'Neutral' | |
| # def sanitize_text(text: str) -> str: | |
| # """Sanitize input text while preserving emojis""" | |
| # if not text: | |
| # return "" | |
| # # Remove control characters but keep printable characters and emojis | |
| # text = re.sub(r'[\x00-\x08\x0B-\x0C\x0E-\x1F\x7F]', '', text) | |
| # # Normalize whitespace | |
| # text = ' '.join(text.split()) | |
| # return text.strip() | |
| # def analyze_comments_sentiment(comments: List[str]) -> Dict[str, Any]: | |
| # """Main sentiment analysis function with enhanced performance""" | |
| # try: | |
| # settings = get_settings() | |
| # logger.info(f"Received {len(comments)} comments for analysis") | |
| # # Sanitize comments | |
| # sanitized_comments = [sanitize_text(comment) for comment in comments] | |
| # # FIXED: Changed < to <= to properly handle min_comment_words | |
| # filtered_comments = [ | |
| # comment for comment in sanitized_comments | |
| # if (settings.min_comment_words <= len(comment.split()) <= settings.max_comment_length) | |
| # ] | |
| # logger.info(f"After filtering: {len(filtered_comments)} valid comments") | |
| # if not filtered_comments: | |
| # return { | |
| # "total_comments": 0, | |
| # "message": "No valid comments found for analysis" | |
| # } | |
| # # Create dataframe | |
| # df = pd.DataFrame({'comment': filtered_comments}) | |
| # # Detect meta-comments and explicit negatives | |
| # df['is_meta'] = df['comment'].apply(is_meta_comment) | |
| # df['heuristic_negative'] = df['comment'].apply(is_explicit_negative) | |
| # # Log detection results | |
| # meta_count = df['is_meta'].sum() | |
| # heuristic_neg_count = df['heuristic_negative'].sum() | |
| # logger.info(f"Detected {meta_count} meta-comments and {heuristic_neg_count} heuristic negatives") | |
| # # VADER sentiment analysis | |
| # vader_results = [] | |
| # for text in df['comment']: | |
| # vader_results.append(vader_sentiment(text)) | |
| # # RoBERTa sentiment analysis (batch) | |
| # roberta_results = roberta_sentiment_batch(df['comment'].tolist()) | |
| # # Combine results | |
| # vader_df = pd.DataFrame(vader_results) | |
| # roberta_df = pd.DataFrame(roberta_results) | |
| # final_df = pd.concat([df.reset_index(drop=True), vader_df, roberta_df], axis=1) | |
| # # Calculate combined scores | |
| # final_df['combined_pos'] = ( | |
| # settings.combined_weight_vader * final_df['vader_pos'] + | |
| # settings.combined_weight_roberta * final_df['roberta_pos'] | |
| # ) | |
| # final_df['combined_neg'] = ( | |
| # settings.combined_weight_vader * final_df['vader_neg'] + | |
| # settings.combined_weight_roberta * final_df['roberta_neg'] | |
| # ) | |
| # final_df['combined_neu'] = ( | |
| # settings.combined_weight_vader * final_df['vader_neu'] + | |
| # settings.combined_weight_roberta * final_df['roberta_neu'] | |
| # ) | |
| # # Classify overall sentiment (meta-comments become Neutral) | |
| # final_df['Overall_Sentiment'] = final_df.apply( | |
| # lambda row: 'Neutral' if row.get('is_meta') else overall_sentiment(row, settings), | |
| # axis=1 | |
| # ) | |
| # # Calculate statistics | |
| # total_comments = len(final_df) | |
| # positive_count = len(final_df[final_df['Overall_Sentiment'] == 'Positive']) | |
| # negative_count = len(final_df[final_df['Overall_Sentiment'] == 'Negative']) | |
| # neutral_count = len(final_df[final_df['Overall_Sentiment'] == 'Neutral']) | |
| # logger.info( | |
| # f"Results: {positive_count} positive, " | |
| # f"{negative_count} negative, {neutral_count} neutral" | |
| # ) | |
| # # Average scores | |
| # avg_positive = float(final_df['combined_pos'].mean()) | |
| # avg_negative = float(final_df['combined_neg'].mean()) | |
| # avg_neutral = float(final_df['combined_neu'].mean()) | |
| # # Determine overall sentiment label | |
| # if avg_positive > max(avg_negative, avg_neutral): | |
| # overall_sentiment_label = "Positive" | |
| # elif avg_negative > max(avg_positive, avg_neutral): | |
| # overall_sentiment_label = "Negative" | |
| # else: | |
| # overall_sentiment_label = "Neutral" | |
| # # Process negative comments | |
| # negative_summary = "" | |
| # negative_comments_list = [] | |
| # negative_comments = final_df[final_df['Overall_Sentiment'] == 'Negative'] | |
| # if len(negative_comments) > 0: | |
| # negative_comments_list = negative_comments['comment'].tolist() | |
| # try: | |
| # # Get top negative comments | |
| # top_idx = negative_comments['combined_neg'].nlargest(3).index | |
| # top_comments = negative_comments.loc[top_idx, 'comment'].tolist() | |
| # if settings.use_abstractive_summary and summarizer is not None: | |
| # negative_text = " ".join(top_comments) | |
| # if len(negative_text) > 1000: | |
| # negative_text = negative_text[:1000] | |
| # summary_result = summarizer( | |
| # negative_text, | |
| # max_length=settings.max_summary_length, | |
| # min_length=settings.min_summary_length, | |
| # do_sample=False | |
| # ) | |
| # negative_summary = summary_result[0]['summary_text'] | |
| # else: | |
| # # Extractive summary | |
| # negative_summary = "; ".join(top_comments) | |
| # except Exception as e: | |
| # logger.warning(f"Summary generation failed: {e}") | |
| # negative_summary = "; ".join(negative_comments_list[:3]) | |
| # # Generate insights and recommendations | |
| # insights = [] | |
| # recommendations = [] | |
| # if overall_sentiment_label == "Positive": | |
| # insights.extend([ | |
| # "Students have positive feedback overall", | |
| # "Teaching methods are well-received", | |
| # f"{positive_count}/{total_comments} comments are positive" | |
| # ]) | |
| # recommendations.extend([ | |
| # "Continue current teaching approach", | |
| # "Maintain student engagement strategies", | |
| # "Share successful practices with colleagues" | |
| # ]) | |
| # elif overall_sentiment_label == "Negative": | |
| # insights.extend([ | |
| # "Students have concerns that need attention", | |
| # "Some aspects of teaching may need improvement", | |
| # f"{negative_count}/{total_comments} comments indicate issues" | |
| # ]) | |
| # recommendations.extend([ | |
| # "Review teaching methods and materials", | |
| # "Consider additional student support", | |
| # "Schedule meetings to address student concerns", | |
| # "Focus on areas mentioned in negative feedback" | |
| # ]) | |
| # else: | |
| # insights.extend([ | |
| # "Mixed feedback from students", | |
| # "Some areas performing well, others need attention", | |
| # "Balance of positive and negative responses" | |
| # ]) | |
| # recommendations.extend([ | |
| # "Focus on areas with negative feedback", | |
| # "Maintain strengths while addressing weaknesses", | |
| # "Gather more specific feedback on improvement areas" | |
| # ]) | |
| # return { | |
| # "total_comments": total_comments, | |
| # "positive_comments": positive_count, | |
| # "negative_comments": negative_count, | |
| # "neutral_comments": neutral_count, | |
| # "positive_sentiment": round(avg_positive, 3), | |
| # "negative_sentiment": round(avg_negative, 3), | |
| # "neutral_sentiment": round(avg_neutral, 3), | |
| # "overall_sentiment": overall_sentiment_label, | |
| # "sentiment_distribution": { | |
| # "positive_percentage": round((positive_count / total_comments) * 100, 1), | |
| # "negative_percentage": round((negative_count / total_comments) * 100, 1), | |
| # "neutral_percentage": round((neutral_count / total_comments) * 100, 1) | |
| # }, | |
| # "negative_comments_summary": negative_summary, | |
| # "negative_comments_list": negative_comments_list, | |
| # "key_insights": insights, | |
| # "recommendations": recommendations, | |
| # "detailed_analysis": { | |
| # "vader_scores": { | |
| # "average_positive": round(final_df['vader_pos'].mean(), 3), | |
| # "average_negative": round(final_df['vader_neg'].mean(), 3), | |
| # "average_neutral": round(final_df['vader_neu'].mean(), 3), | |
| # "average_compound": round(final_df['vader_compound'].mean(), 3) | |
| # }, | |
| # "roberta_scores": { | |
| # "average_positive": round(final_df['roberta_pos'].mean(), 3), | |
| # "average_negative": round(final_df['roberta_neg'].mean(), 3), | |
| # "average_neutral": round(final_df['roberta_neu'].mean(), 3) | |
| # } | |
| # }, | |
| # "analysis_timestamp": datetime.utcnow().isoformat() | |
| # } | |
| # except Exception as e: | |
| # logger.error(f"Sentiment analysis failed: {e}", exc_info=True) | |
| # raise e | |
| # @app.on_event("startup") | |
| # async def startup_event(): | |
| # """Initialize models on startup""" | |
| # try: | |
| # logger.info("=" * 80) | |
| # logger.info(f"Application Startup at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") | |
| # logger.info("=" * 80) | |
| # initialize_models() | |
| # logger.info("✓ Service started successfully") | |
| # logger.info("=" * 80) | |
| # except Exception as e: | |
| # logger.error(f"✗ Startup failed: {e}") | |
| # raise e | |
| # @app.on_event("shutdown") | |
| # async def shutdown_event(): | |
| # """Cleanup on shutdown""" | |
| # logger.info("Service shutting down") | |
| # @app.get("/") | |
| # async def root(): | |
| # """Root endpoint""" | |
| # return { | |
| # "service": get_settings().app_name, | |
| # "version": get_settings().app_version, | |
| # "status": "running", | |
| # "endpoints": { | |
| # "health": "/health", | |
| # "analyze": "/analyze-comments", | |
| # "config": "/config (debug mode only)", | |
| # "test": "/test" | |
| # } | |
| # } | |
| # @app.get("/health") | |
| # async def health_check(): | |
| # """Health check endpoint""" | |
| # models_loaded = sia is not None and model is not None and tokenizer is not None | |
| # return { | |
| # "status": "healthy" if models_loaded else "unhealthy", | |
| # "service": "comment-analysis", | |
| # "version": get_settings().app_version, | |
| # "models_loaded": models_loaded, | |
| # "device": device if device else "not initialized", | |
| # "timestamp": datetime.utcnow().isoformat() | |
| # } | |
| # @app.post("/analyze-comments", response_model=CommentAnalysisResponse) | |
| # async def analyze_comments( | |
| # request: CommentAnalysisRequest, | |
| # settings: Settings = Depends(get_settings) | |
| # ): | |
| # """ | |
| # Analyze comments for sentiment analysis using VADER and RoBERTa models | |
| # """ | |
| # try: | |
| # comments = request.comments | |
| # faculty_info = request.faculty_info | |
| # if not comments: | |
| # return CommentAnalysisResponse( | |
| # success=False, | |
| # analysis=None, | |
| # message="No comments provided for analysis" | |
| # ) | |
| # logger.info( | |
| # f"Analyzing {len(comments)} comments for " | |
| # f"{faculty_info.faculty_name} ({faculty_info.course_code})" | |
| # ) | |
| # analysis_result = analyze_comments_sentiment(comments) | |
| # if analysis_result.get("total_comments", 0) == 0: | |
| # return CommentAnalysisResponse( | |
| # success=False, | |
| # analysis=None, | |
| # message=analysis_result.get("message", "No valid comments to analyze") | |
| # ) | |
| # analysis_result["faculty_info"] = { | |
| # "faculty_name": faculty_info.faculty_name, | |
| # "staff_id": faculty_info.staff_id, | |
| # "course_code": faculty_info.course_code, | |
| # "course_name": faculty_info.course_name | |
| # } | |
| # return CommentAnalysisResponse( | |
| # success=True, | |
| # analysis=analysis_result, | |
| # message=f"Successfully analyzed {analysis_result['total_comments']} comments" | |
| # ) | |
| # except ValueError as ve: | |
| # logger.warning(f"Validation error: {ve}") | |
| # raise HTTPException(status_code=400, detail=str(ve)) | |
| # except Exception as e: | |
| # logger.error(f"Analysis failed: {e}", exc_info=True) | |
| # raise HTTPException( | |
| # status_code=500, | |
| # detail="Analysis failed. Please try again later." | |
| # ) | |
| # @app.get("/config") | |
| # async def get_config(settings: Settings = Depends(get_settings)): | |
| # """Get current configuration (debug mode only)""" | |
| # if not settings.debug_mode: | |
| # raise HTTPException(status_code=404, detail="Not found") | |
| # return { | |
| # "max_comments_per_request": settings.max_comments_per_request, | |
| # "max_comment_length": settings.max_comment_length, | |
| # "min_comment_words": settings.min_comment_words, | |
| # "vader_pos_threshold": settings.vader_pos_threshold, | |
| # "vader_neg_threshold": settings.vader_neg_threshold, | |
| # "roberta_pos_threshold": settings.roberta_pos_threshold, | |
| # "roberta_neg_threshold": settings.roberta_neg_threshold, | |
| # "combined_weight_vader": settings.combined_weight_vader, | |
| # "combined_weight_roberta": settings.combined_weight_roberta, | |
| # "enable_caching": settings.enable_caching, | |
| # "batch_size": settings.batch_size, | |
| # "use_abstractive_summary": settings.use_abstractive_summary | |
| # } | |
| # @app.get("/test") | |
| # async def test_endpoint(): | |
| # """Test endpoint to verify sentiment classification""" | |
| # test_cases = [ | |
| # "No more comments 😅", | |
| # "Overall good but too lag", | |
| # "Not interested to be in her class just we are going for attendance thats it not at all managing time.", | |
| # "Nothing to say anything just we are going to her class mean, only for attendance", | |
| # "Excellent teaching! Very clear explanations.", | |
| # "Good teacher with strong subject knowledge", | |
| # "Class is okay, nothing special" | |
| # ] | |
| # results = [] | |
| # for text in test_cases: | |
| # is_meta = is_meta_comment(text) | |
| # is_neg = is_explicit_negative(text) | |
| # # Predict classification | |
| # if is_meta: | |
| # predicted = "Neutral (meta-comment)" | |
| # elif is_neg: | |
| # predicted = "Negative (heuristic)" | |
| # else: | |
| # predicted = "Needs full analysis" | |
| # results.append({ | |
| # "text": text, | |
| # "is_meta_comment": is_meta, | |
| # "is_heuristic_negative": is_neg, | |
| # "predicted_classification": predicted | |
| # }) | |
| # return { | |
| # "test_results": results, | |
| # "note": "Full analysis requires VADER and RoBERTa scores" | |
| # } | |
| # if __name__ == "__main__": | |
| # uvicorn.run( | |
| # app, | |
| # host="0.0.0.0", | |
| # port=8000, | |
| # log_level="info" | |
| # ) | |