diff --git "a/fastapi_example.py" "b/fastapi_example.py" --- "a/fastapi_example.py" +++ "b/fastapi_example.py" @@ -1,1982 +1,2007 @@ +""" +Enhanced FastAPI Service for Comment Sentiment Analysis +Version 3.0.0 - Major accuracy improvements with advanced classification +Features: +- Multi-stage sentiment detection +- Context-aware negative pattern matching +- Improved neutral/meta-comment detection +- Enhanced accuracy through ensemble approach +""" +from fastapi import FastAPI, HTTPException, Depends +from fastapi.middleware.cors import CORSMiddleware +from pydantic import BaseModel, Field, validator +from pydantic_settings import BaseSettings +from typing import List, Dict, Any, Optional +from functools import lru_cache +import uvicorn +import pandas as pd +import numpy as np +import os +import re +from datetime import datetime +import logging +# Configure logging +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' +) +logger = logging.getLogger(__name__) -# """ -# Enhanced FastAPI Service for Comment Sentiment Analysis -# with improved performance, validation, and configuration management -# Version 2.1.0 - Updated with bug fixes and improvements -# """ - -# from fastapi import FastAPI, HTTPException, Depends -# from fastapi.middleware.cors import CORSMiddleware -# from pydantic import BaseModel, Field, validator -# from pydantic_settings import BaseSettings -# from typing import List, Dict, Any, Optional -# from functools import lru_cache -# import uvicorn -# import pandas as pd -# import numpy as np -# import os -# import re -# from datetime import datetime -# import logging - -# # Configure logging FIRST -# logging.basicConfig( -# level=logging.INFO, -# format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' -# ) -# logger = logging.getLogger(__name__) - -# # CRITICAL: Download NLTK data BEFORE importing NLTK components -# import nltk -# import ssl +# NLTK Setup +import nltk +import ssl -# try: -# _create_unverified_https_context = ssl._create_unverified_context -# except AttributeError: -# pass -# else: -# ssl._create_default_https_context = _create_unverified_https_context +try: + _create_unverified_https_context = ssl._create_unverified_context +except AttributeError: + pass +else: + ssl._create_default_https_context = _create_unverified_https_context -# # Set NLTK data path -# nltk_data_dir = '/tmp/nltk_data' -# os.makedirs(nltk_data_dir, exist_ok=True) -# nltk.data.path.insert(0, nltk_data_dir) +nltk_data_dir = '/tmp/nltk_data' +os.makedirs(nltk_data_dir, exist_ok=True) +nltk.data.path.insert(0, nltk_data_dir) -# # Download required NLTK data -# def ensure_nltk_data(): -# """Ensure all required NLTK data is downloaded""" -# resources = ['vader_lexicon', 'punkt', 'stopwords', 'wordnet', 'omw-1.4'] +def ensure_nltk_data(): + """Ensure all required NLTK data is downloaded""" + resources = ['vader_lexicon', 'punkt', 'stopwords', 'wordnet', 'omw-1.4'] -# for resource in resources: -# try: -# # Try to find the resource -# if resource == 'vader_lexicon': -# nltk.data.find('sentiment/vader_lexicon.zip') -# elif resource == 'punkt': -# nltk.data.find('tokenizers/punkt') -# elif resource in ['stopwords', 'wordnet', 'omw-1.4']: -# nltk.data.find(f'corpora/{resource}') -# logger.info(f"✓ NLTK resource '{resource}' already available") -# except LookupError: -# logger.info(f"Downloading NLTK resource '{resource}'...") -# try: -# nltk.download(resource, download_dir=nltk_data_dir, quiet=False) -# logger.info(f"✓ Successfully downloaded '{resource}'") -# except Exception as e: -# logger.error(f"✗ Failed to download '{resource}': {e}") - -# # Download NLTK data immediately -# logger.info("Ensuring NLTK data is available...") -# ensure_nltk_data() + for resource in resources: + try: + if resource == 'vader_lexicon': + nltk.data.find('sentiment/vader_lexicon.zip') + elif resource == 'punkt': + nltk.data.find('tokenizers/punkt') + elif resource in ['stopwords', 'wordnet', 'omw-1.4']: + nltk.data.find(f'corpora/{resource}') + logger.info(f"✓ NLTK resource '{resource}' already available") + except LookupError: + logger.info(f"Downloading NLTK resource '{resource}'...") + try: + nltk.download(resource, download_dir=nltk_data_dir, quiet=False) + logger.info(f"✓ Successfully downloaded '{resource}'") + except Exception as e: + logger.error(f"✗ Failed to download '{resource}': {e}") -# # NOW import NLTK components -# from nltk.sentiment import SentimentIntensityAnalyzer +logger.info("Ensuring NLTK data is available...") +ensure_nltk_data() -# # Import transformers after NLTK setup -# from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline -# from scipy.special import softmax -# import torch +from nltk.sentiment import SentimentIntensityAnalyzer +from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline +from scipy.special import softmax +import torch -# # Configuration Management -# class Settings(BaseSettings): -# """Application settings with environment variable support""" -# # API Settings -# app_name: str = "Comment Analysis API" -# app_version: str = "2.1.0" -# debug_mode: bool = False +# Configuration +class Settings(BaseSettings): + """Application settings""" + app_name: str = "Comment Analysis API" + app_version: str = "3.0.0" + debug_mode: bool = False -# # Request Limits -# max_comments_per_request: int = 1000 -# max_comment_length: int = 5000 -# min_comment_words: int = 1 + max_comments_per_request: int = 1000 + max_comment_length: int = 5000 + min_comment_words: int = 1 -# # Sentiment Thresholds -# vader_pos_threshold: float = 0.2 -# vader_neg_threshold: float = -0.2 -# roberta_pos_threshold: float = 0.55 -# roberta_neg_threshold: float = 0.45 -# combined_weight_vader: float = 0.5 -# combined_weight_roberta: float = 0.5 + # Enhanced thresholds for better accuracy + vader_strong_pos_threshold: float = 0.5 + vader_pos_threshold: float = 0.2 + vader_neg_threshold: float = -0.2 + vader_strong_neg_threshold: float = -0.5 -# # Model Settings -# model_cache_dir: str = "/tmp/model_cache" -# roberta_model_name: str = "cardiffnlp/twitter-roberta-base-sentiment" -# use_abstractive_summary: bool = False -# summarizer_model: str = "facebook/bart-large-cnn" -# max_summary_length: int = 100 -# min_summary_length: int = 25 + roberta_strong_pos_threshold: float = 0.70 + roberta_pos_threshold: float = 0.55 + roberta_neg_threshold: float = 0.40 + roberta_strong_neg_threshold: float = 0.60 -# # Performance -# enable_caching: bool = True -# cache_size: int = 500 -# batch_size: int = 32 + # Adjusted weights for better accuracy + combined_weight_vader: float = 0.4 + combined_weight_roberta: float = 0.6 -# class Config: -# env_file = ".env" -# env_file_encoding = 'utf-8' -# extra = 'ignore' + model_cache_dir: str = "/tmp/model_cache" + roberta_model_name: str = "cardiffnlp/twitter-roberta-base-sentiment" + use_abstractive_summary: bool = False + summarizer_model: str = "facebook/bart-large-cnn" + max_summary_length: int = 100 + min_summary_length: int = 25 -# @validator('min_comment_words') -# def validate_min_words(cls, v): -# if v < 0: -# raise ValueError('min_comment_words must be non-negative') -# return v + enable_caching: bool = True + cache_size: int = 500 + batch_size: int = 32 -# @validator('combined_weight_vader', 'combined_weight_roberta') -# def validate_weights(cls, v): -# if not 0 <= v <= 1: -# raise ValueError('Weights must be between 0 and 1') -# return v + class Config: + env_file = ".env" + env_file_encoding = 'utf-8' + extra = 'ignore' -# @lru_cache() -# def get_settings() -> Settings: -# """Cached settings instance""" -# settings = Settings() -# # Normalize weights if needed -# total = settings.combined_weight_vader + settings.combined_weight_roberta -# if not (0.99 <= total <= 1.01): -# logger.warning(f"Weights sum to {total}, normalizing to 1.0") -# settings.combined_weight_vader /= total -# settings.combined_weight_roberta /= total -# return settings +@lru_cache() +def get_settings() -> Settings: + """Cached settings instance""" + settings = Settings() + total = settings.combined_weight_vader + settings.combined_weight_roberta + if not (0.99 <= total <= 1.01): + logger.warning(f"Weights sum to {total}, normalizing to 1.0") + settings.combined_weight_vader /= total + settings.combined_weight_roberta /= total + return settings -# # Pydantic Models -# class FacultyInfo(BaseModel): -# faculty_name: str = Field(..., min_length=1, max_length=200) -# staff_id: str = Field(..., min_length=1, max_length=50) -# course_code: str = Field(..., min_length=1, max_length=50) -# course_name: str = Field(..., min_length=1, max_length=200) +# Pydantic Models +class FacultyInfo(BaseModel): + faculty_name: str = Field(..., min_length=1, max_length=200) + staff_id: str = Field(..., min_length=1, max_length=50) + course_code: str = Field(..., min_length=1, max_length=50) + course_name: str = Field(..., min_length=1, max_length=200) -# class CommentAnalysisRequest(BaseModel): -# comments: List[str] = Field(..., min_items=1) -# faculty_info: FacultyInfo +class CommentAnalysisRequest(BaseModel): + comments: List[str] = Field(..., min_items=1) + faculty_info: FacultyInfo -# @validator('comments') -# def validate_comments(cls, v): -# settings = get_settings() - -# if len(v) > settings.max_comments_per_request: -# raise ValueError( -# f'Maximum {settings.max_comments_per_request} comments per request' -# ) - -# for idx, comment in enumerate(v): -# if len(comment) > settings.max_comment_length: -# raise ValueError( -# f'Comment {idx} exceeds maximum length of {settings.max_comment_length} characters' -# ) - -# return v + @validator('comments') + def validate_comments(cls, v): + settings = get_settings() + if len(v) > settings.max_comments_per_request: + raise ValueError(f'Maximum {settings.max_comments_per_request} comments per request') + for idx, comment in enumerate(v): + if len(comment) > settings.max_comment_length: + raise ValueError(f'Comment {idx} exceeds maximum length of {settings.max_comment_length} characters') + return v -# class SentimentDistribution(BaseModel): -# positive_percentage: float -# negative_percentage: float -# neutral_percentage: float - -# class DetailedScores(BaseModel): -# average_positive: float -# average_negative: float -# average_neutral: float -# average_compound: Optional[float] = None - -# class DetailedAnalysis(BaseModel): -# vader_scores: DetailedScores -# roberta_scores: DetailedScores +class SentimentDistribution(BaseModel): + positive_percentage: float + negative_percentage: float + neutral_percentage: float -# class AnalysisResult(BaseModel): -# total_comments: int -# positive_comments: int -# negative_comments: int -# neutral_comments: int -# positive_sentiment: float -# negative_sentiment: float -# neutral_sentiment: float -# overall_sentiment: str -# sentiment_distribution: SentimentDistribution -# negative_comments_summary: str -# negative_comments_list: List[str] -# key_insights: List[str] -# recommendations: List[str] -# detailed_analysis: DetailedAnalysis -# faculty_info: Dict[str, str] -# analysis_timestamp: str +class DetailedScores(BaseModel): + average_positive: float + average_negative: float + average_neutral: float + average_compound: Optional[float] = None -# class CommentAnalysisResponse(BaseModel): -# success: bool -# analysis: Optional[AnalysisResult] = None -# message: str +class DetailedAnalysis(BaseModel): + vader_scores: DetailedScores + roberta_scores: DetailedScores -# # Initialize FastAPI app -# app = FastAPI( -# title=get_settings().app_name, -# version=get_settings().app_version, -# description="Advanced sentiment analysis service for educational feedback" -# ) +class AnalysisResult(BaseModel): + total_comments: int + positive_comments: int + negative_comments: int + neutral_comments: int + positive_sentiment: float + negative_sentiment: float + neutral_sentiment: float + overall_sentiment: str + sentiment_distribution: SentimentDistribution + negative_comments_summary: str + negative_comments_list: List[str] + key_insights: List[str] + recommendations: List[str] + detailed_analysis: DetailedAnalysis + faculty_info: Dict[str, str] + analysis_timestamp: str -# # Add CORS middleware -# app.add_middleware( -# CORSMiddleware, -# allow_origins=["*"], -# allow_credentials=True, -# allow_methods=["*"], -# allow_headers=["*"], -# ) +class CommentAnalysisResponse(BaseModel): + success: bool + analysis: Optional[AnalysisResult] = None + message: str -# # Global variables for models -# sia = None -# tokenizer = None -# model = None -# device = None -# summarizer = None +# Initialize FastAPI +app = FastAPI( + title=get_settings().app_name, + version=get_settings().app_version, + description="Advanced sentiment analysis service for educational feedback" +) -# # Enhanced heuristic phrase/regex rules for explicit negative feedback -# NEGATIVE_PHRASES = [ -# 'need more staff', -# 'need more faculty', -# 'insufficient staff', -# 'lack of staff', -# 'lack of knowledge', -# 'better knowledge needed', -# 'poor knowledge', -# 'not good', -# 'not satisfied', -# 'not satisfactory', -# 'no classes', -# 'no regular classes', -# 'boring class', -# 'boring classes', -# 'waste of time', -# 'wasting time', -# 'bad teacher', -# 'bad teaching', -# 'poor teaching', -# 'improve class', -# 'improvement needed', -# 'needs improvement', -# 'not helpful', -# 'not clear', -# 'communication skills need improvement', -# 'improve communication', -# 'lectures are going fast', -# 'going too fast', -# 'too fast', -# 'too slow', -# 'too boring', -# 'lacking', -# 'is lacking', -# 'knowledge is lacking', -# 'practical knowledge lacking', -# 'no practical', -# 'lack of practical', -# 'no hands-on', -# 'no real world', -# 'too lag', -# 'lag', -# 'lagging', -# 'lag in teaching', -# 'not interested', -# 'no interest', -# 'not managing time', -# 'poor time management', -# 'time management', -# 'not at all', -# 'going for attendance', -# 'just for attendance', -# 'only for attendance', -# 'cant understand', -# "can't understand", -# 'not understandable', -# 'nothing learnt', -# 'learned nothing', -# 'no improvement', -# 'same teaching', -# 'monotonous', -# 'sleeping in class', -# 'no use', -# 'useless', -# 'waste our time' -# ] +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) -# NEGATIVE_REGEXES = [ -# re.compile(r"\bno\s+(proper|sufficient)\s+(classes|notes|support)\b", re.IGNORECASE), -# re.compile(r"\bno\s+staff\b", re.IGNORECASE), -# re.compile(r"\bneed(s)?\s+more\s+(staff|faculty|support)\b", re.IGNORECASE), -# re.compile(r"\b(lecture|lectures|class|classes|teaching)\s+(are\s+)?(too|very)\s+(fast|slow|boring)\b", re.IGNORECASE), -# re.compile(r"\blectures?\s+are\s+going\s+(too\s+)?fast\b", re.IGNORECASE), -# re.compile(r"\b(require|needs?|needed)\s+(some\s+)?improv(e|ement)s?\s+(in|of)?\s*communication(\s+skills?)?\b", re.IGNORECASE), -# re.compile(r"\b(is\s+)?lacking\b", re.IGNORECASE), -# re.compile(r"\bno\s+(practical|hands-on|real-world)\b", re.IGNORECASE), -# re.compile(r"\btoo\s+(lag|lagging?|slow|boring)\b", re.IGNORECASE), -# re.compile(r"\b(not\s+)?managing\s+time\b", re.IGNORECASE), -# re.compile(r"\btime\s+management", re.IGNORECASE), -# re.compile(r"\bnot\s+interested(\s+in|\s+to)?\b", re.IGNORECASE), -# re.compile(r"\bno\s+interest\b", re.IGNORECASE), -# re.compile(r"\b(just\s+|only\s+)?for\s+attendance\b", re.IGNORECASE), -# re.compile(r"\b(just\s+)?going\s+(to|for)\s+(her|his|their)\s+class\b", re.IGNORECASE), -# re.compile(r"\bnot\s+at\s+all\b", re.IGNORECASE), -# re.compile(r"\b(overall|its?)\s+(is\s+)?good\s+but\b", re.IGNORECASE), # "good but" often precedes criticism -# re.compile(r"\bcan'?t\s+understand", re.IGNORECASE), -# re.compile(r"\bwaste\s+(of\s+)?time\b", re.IGNORECASE), -# re.compile(r"\bno\s+use(ful)?\b", re.IGNORECASE), -# re.compile(r"\buseless\b", re.IGNORECASE) -# ] +# Global model variables +sia = None +tokenizer = None +model = None +device = None +summarizer = None -# META_COMMENT_PATTERNS = [ -# re.compile(r"^no\s+more\s+(comments?|remarks?|feedback)", re.IGNORECASE), -# re.compile(r"^no\s+(other\s+)?(comments?|remarks?|feedback)", re.IGNORECASE), -# re.compile(r"^nothing\s+to\s+(say|comment|mention)", re.IGNORECASE), -# re.compile(r"^no\s+remarks?(\s+(about|on))?", re.IGNORECASE), -# re.compile(r"^(nil|none|na|n/a)$", re.IGNORECASE), -# re.compile(r"^(no|nothing)\.?$", re.IGNORECASE), -# re.compile(r"^everything\s+(is\s+)?(good|fine|ok|okay)", re.IGNORECASE), -# re.compile(r"^(all\s+)?good$", re.IGNORECASE) -# ] +# ============================================================================ +# ENHANCED PATTERN DETECTION FOR BETTER ACCURACY +# ============================================================================ + +# Meta-comments (not actual feedback - should be NEUTRAL) +META_PATTERNS = re.compile( + r'^(no\s+(negative\s+)?(more\s+)?(comments?|feedback|remarks?|issues?|problems?|complaints?)|' + r'(everything|all)\s+(is\s+)?(good|fine|ok(ay)?|great|perfect|excellent)|' + r'nothing(\s+to\s+(say|comment|mention|add))?|' + r'(nil|none|na|n/a|nill)\.?|' + r'^(all\s+)?(good|fine|ok(ay)?|great|nice)\.?|' + r'no\s+remarks?|' + r'everything\s+at\s+the\s+too\s+only)$', + re.IGNORECASE +) -# def is_meta_comment(text: str) -> bool: -# """Check if comment is a meta-comment (not actual feedback)""" -# if not text: -# return False -# text = text.strip() +# Strong NEGATIVE indicators (should override model scores) +STRONG_NEGATIVE_PATTERN = re.compile( + r'\b(' + # Direct criticism + r'(very|extremely|quite|so|too)\s+(poor|bad|weak|terrible|awful|horrible)|' + r'poor\s+(teaching|teacher|faculty|knowledge|communication|quality|explanation)|' + r'bad\s+(teaching|teacher|faculty|quality|explanation)|' + r'terrible|horrible|awful|pathetic|useless|waste\s+of\s+time|' -# for pattern in META_COMMENT_PATTERNS: -# if pattern.match(text): -# logger.debug(f"Meta-comment detected: '{text[:50]}...'") -# return True + # Teaching quality issues + r'(teaching|knowledge)\s+(is\s+)?(poor|bad|weak|lacking|insufficient|not\s+good)|' + r'cannot\s+teach|can\'?t\s+teach|doesn\'?t\s+know\s+how\s+to\s+teach|' + r'not\s+teaching\s+properly|teaching\s+method\s+is\s+(poor|bad)|' + + # Boring/disengagement + r'(boring|dull|monotonous)\s+(class|classes|subject|lecture|lectures|sessions?)|' + r'(class|classes|subject|lectures?)\s+(is|are)\s+(boring|dull|monotonous|uninteresting)|' + r'sleeping\s+in\s+class|fall\s+asleep|makes?\s+us\s+sleep|' + + # Communication issues + r'(low|soft|quiet|unclear)\s+voice|voice\s+(is\s+)?(low|soft|quiet|not\s+clear)|' + r'(cannot|can\'?t|cant|unable\s+to)\s+hear|difficult\s+to\s+hear|' + r'(not|poor|bad)\s+(communication|explaining|explanation)|' + + # Understanding issues + r'(cannot|can\'?t|cant|unable\s+to|difficult\s+to|hard\s+to)\s+understand|' + r'(not|never|don\'?t)\s+(able\s+to\s+)?understand|' + r'(concepts?|topics?|subjects?)\s+(are\s+)?(difficult|hard|tough|impossible)\s+to\s+understand|' + r'makes?\s+(no|little)\s+sense|doesn\'?t\s+make\s+sense|' -# return False - -# def is_explicit_negative(text: str) -> bool: -# """Check if text contains explicit negative phrases with logging""" -# if not text: -# return False -# lower = text.lower() + # Improvement needed + r'(need|needs|require|requires)\s+(urgent|serious|immediate|much|lot\s+of)?\s*improvement|' + r'(should|must|have\s+to)\s+improve\s+(a\s+lot|more|urgently)|' + + # Pace issues + r'(lectures?|class(es)?|teaching)\s+(is|are|going)\s+(too|very)\s+(fast|slow)|' + r'(too|very|extremely)\s+(fast|slow|rush|rushed)|' + r'(lag|lagging)\s+in\s+teaching|teaching\s+(is\s+)?lagging|' + + # Time management + r'(not|poor|bad|terrible)\s+(managing|managing)\s+time|' + r'time\s+management\s+(is\s+)?(poor|bad|terrible|lacking)|' + r'always\s+(late|wasting\s+time)|waste\s+(our|class)\s+time|' + + # Lack of resources/support + r'(no|not|insufficient|lack\s+of)\s+(proper|sufficient|enough|regular)?\s*(classes|notes|support|help)|' + r'need\s+more\s+(staff|faculty|classes|support|help)|' + r'no\s+(practical|hands[-\s]?on|lab|real[-\s]?world)|lack\s+of\s+practical|' + + # Attendance/engagement issues + r'(just|only)\s+(for|going\s+for)\s+attendance|' + r'going\s+(to|for)\s+(her|his|their)\s+class\s+(just|only)\s+for\s+attendance|' + r'(not|no)\s+(interested|engaging|helpful|useful|at\s+all)|' + r'no\s+interest\s+in\s+teaching|' -# # Check phrases -# for phrase in NEGATIVE_PHRASES: -# if phrase in lower: -# logger.debug(f"Negative phrase detected: '{phrase}' in '{text[:50]}...'") -# return True + # Administrative issues + r'military\s+rules|too\s+strict|very\s+strict|' + r'attendance\s+(issue|problem)|not\s+providing\s+attendance|' -# # Check regexes -# for regex in NEGATIVE_REGEXES: -# if regex.search(text): -# logger.debug(f"Negative pattern matched: {regex.pattern} in '{text[:50]}...'") -# return True + # Workload issues + r'too\s+many\s+projects|many\s+projects\s+review|' + r'placement\s+activities\s+(and|with)\s+attendance' + r')\b', + re.IGNORECASE +) + +# Positive indicators (help identify positive comments) +POSITIVE_PATTERN = re.compile( + r'\b(' + r'(very|extremely|really|so|truly)\s+(good|great|excellent|amazing|wonderful|fantastic|helpful|knowledgeable|clear)|' + r'excellent|outstanding|amazing|wonderful|fantastic|brilliant|superb|' + r'(great|good|best|wonderful)\s+(teaching|teacher|faculty|knowledge|explanation|professor|sir|madam)|' + r'(teaching|explanation|knowledge)\s+(is\s+)?(excellent|outstanding|very\s+good|great|clear)|' + r'explains?\s+(very\s+)?(well|clearly|nicely|perfectly)|' + r'(easy|easier)\s+to\s+understand|clear\s+explanation|' + r'(very\s+)?(helpful|supportive|friendly|approachable|patient)|' + r'(good|strong|deep|vast)\s+(knowledge|understanding)|' + r'(love|like|enjoy|appreciate)\s+(the\s+)?(class|classes|teaching|subject|course|lectures?)|' + r'learned?\s+(a\s+lot|so\s+much|many\s+things)|' + r'inspired?|inspiring|motivating|motivated|encouraged|' + r'(best|favourite|favorite)\s+(teacher|faculty|professor)|' + r'highly\s+recommend|strongly\s+recommend|' + r'grateful|thankful|blessed|lucky\s+to\s+have|' + r'satisfied|happy\s+with|pleased\s+with|' + r'(always|very)\s+(available|accessible|helpful)|' + r'patient|caring|dedicated|passionate|' + r'interactive\s+class|engaging\s+class|interesting\s+class' + r')\b', + re.IGNORECASE +) + +# Weak negative indicators (suggestions/mild criticism - might be NEUTRAL) +WEAK_NEGATIVE_PATTERN = re.compile( + r'\b(' + r'could\s+(be\s+)?better|' + r'can\s+improve|' + r'would\s+be\s+good\s+if|' + r'suggest|suggestion|' + r'maybe|perhaps|' + r'slightly|a\s+bit|' + r'sometimes|occasionally' + r')\b', + re.IGNORECASE +) + +def is_meta_comment(text: str) -> bool: + """Check if comment is meta (not actual feedback)""" + if not text or len(text.strip()) < 3: + return True -# return False + text = text.strip() + return bool(META_PATTERNS.match(text)) -# def initialize_models(): -# """Initialize sentiment analysis models with caching support""" -# global sia, tokenizer, model, device, summarizer +def detect_strong_negative(text: str) -> bool: + """Detect strong negative patterns""" + if not text or is_meta_comment(text): + return False + return bool(STRONG_NEGATIVE_PATTERN.search(text)) + +def detect_positive(text: str) -> bool: + """Detect positive patterns""" + if not text or is_meta_comment(text): + return False + return bool(POSITIVE_PATTERN.search(text)) + +def detect_weak_negative(text: str) -> bool: + """Detect weak negative patterns (suggestions)""" + if not text or is_meta_comment(text): + return False + return bool(WEAK_NEGATIVE_PATTERN.search(text)) + +# ============================================================================ +# MODEL INITIALIZATION +# ============================================================================ + +def initialize_models(): + """Initialize sentiment analysis models""" + global sia, tokenizer, model, device, summarizer -# try: -# settings = get_settings() -# logger.info("Initializing sentiment analysis models...") + try: + settings = get_settings() + logger.info("Initializing sentiment analysis models...") -# # Initialize VADER (NLTK data already downloaded) -# sia = SentimentIntensityAnalyzer() -# logger.info("✓ VADER initialized") + # VADER + sia = SentimentIntensityAnalyzer() + logger.info("✓ VADER initialized") -# # Initialize RoBERTa with caching -# cache_dir = settings.model_cache_dir -# os.makedirs(cache_dir, exist_ok=True) + # RoBERTa + cache_dir = settings.model_cache_dir + os.makedirs(cache_dir, exist_ok=True) -# tokenizer = AutoTokenizer.from_pretrained( -# settings.roberta_model_name, -# cache_dir=cache_dir -# ) -# model = AutoModelForSequenceClassification.from_pretrained( -# settings.roberta_model_name, -# cache_dir=cache_dir -# ) + tokenizer = AutoTokenizer.from_pretrained( + settings.roberta_model_name, + cache_dir=cache_dir + ) + model = AutoModelForSequenceClassification.from_pretrained( + settings.roberta_model_name, + cache_dir=cache_dir + ) -# device = "cuda" if torch.cuda.is_available() else "cpu" -# model.to(device) -# model.eval() -# logger.info(f"✓ RoBERTa initialized on device: {device}") + device = "cuda" if torch.cuda.is_available() else "cpu" + model.to(device) + model.eval() + logger.info(f"✓ RoBERTa initialized on device: {device}") -# # Initialize summarizer (optional) -# if settings.use_abstractive_summary: -# try: -# summarizer = pipeline( -# "summarization", -# model=settings.summarizer_model, -# device=0 if device == "cuda" else -1 -# ) -# logger.info("✓ Summarizer initialized") -# except Exception as e: -# logger.warning(f"Summarizer initialization failed: {e}") -# summarizer = None + # Summarizer (optional) + if settings.use_abstractive_summary: + try: + summarizer = pipeline( + "summarization", + model=settings.summarizer_model, + device=0 if device == "cuda" else -1 + ) + logger.info("✓ Summarizer initialized") + except Exception as e: + logger.warning(f"Summarizer initialization failed: {e}") + summarizer = None -# logger.info("✓ All models initialized successfully") + logger.info("✓ All models initialized successfully") -# except Exception as e: -# logger.error(f"Error initializing models: {e}") -# raise e + except Exception as e: + logger.error(f"Error initializing models: {e}") + raise e -# @lru_cache(maxsize=500) -# def vader_sentiment_cached(text: str) -> tuple: -# """Cached VADER sentiment analysis""" -# scores = sia.polarity_scores(text) -# return (scores['neg'], scores['neu'], scores['pos'], scores['compound']) +# ============================================================================ +# SENTIMENT ANALYSIS FUNCTIONS +# ============================================================================ -# def vader_sentiment(text: str) -> Dict[str, float]: -# """VADER sentiment analysis with caching support""" -# try: -# settings = get_settings() -# if settings.enable_caching: -# neg, neu, pos, compound = vader_sentiment_cached(text) -# return { -# 'vader_neg': neg, -# 'vader_neu': neu, -# 'vader_pos': pos, -# 'vader_compound': compound -# } -# else: -# scores = sia.polarity_scores(text) -# return { -# 'vader_neg': scores['neg'], -# 'vader_neu': scores['neu'], -# 'vader_pos': scores['pos'], -# 'vader_compound': scores['compound'] -# } -# except Exception as e: -# logger.warning(f"VADER analysis failed for text: {e}") -# return {'vader_neg': 0.0, 'vader_neu': 1.0, 'vader_pos': 0.0, 'vader_compound': 0.0} +@lru_cache(maxsize=500) +def vader_sentiment_cached(text: str) -> tuple: + """Cached VADER sentiment analysis""" + scores = sia.polarity_scores(text) + return (scores['neg'], scores['neu'], scores['pos'], scores['compound']) -# def roberta_sentiment_batch(texts: List[str]) -> List[Dict[str, float]]: -# """Batch RoBERTa sentiment analysis for better performance""" -# try: -# settings = get_settings() -# results = [] +def vader_sentiment(text: str) -> Dict[str, float]: + """VADER sentiment analysis""" + try: + settings = get_settings() + if settings.enable_caching: + neg, neu, pos, compound = vader_sentiment_cached(text) + return { + 'vader_neg': neg, + 'vader_neu': neu, + 'vader_pos': pos, + 'vader_compound': compound + } + else: + scores = sia.polarity_scores(text) + return { + 'vader_neg': scores['neg'], + 'vader_neu': scores['neu'], + 'vader_pos': scores['pos'], + 'vader_compound': scores['compound'] + } + except Exception as e: + logger.warning(f"VADER analysis failed: {e}") + return {'vader_neg': 0.0, 'vader_neu': 1.0, 'vader_pos': 0.0, 'vader_compound': 0.0} + +def roberta_sentiment_batch(texts: List[str]) -> List[Dict[str, float]]: + """Batch RoBERTa sentiment analysis""" + try: + settings = get_settings() + results = [] -# for i in range(0, len(texts), settings.batch_size): -# batch = texts[i:i + settings.batch_size] + for i in range(0, len(texts), settings.batch_size): + batch = texts[i:i + settings.batch_size] -# encoded = tokenizer( -# batch, -# return_tensors='pt', -# truncation=True, -# max_length=512, -# padding=True -# ) -# encoded = {k: v.to(device) for k, v in encoded.items()} + encoded = tokenizer( + batch, + return_tensors='pt', + truncation=True, + max_length=512, + padding=True + ) + encoded = {k: v.to(device) for k, v in encoded.items()} -# with torch.no_grad(): -# outputs = model(**encoded) + with torch.no_grad(): + outputs = model(**encoded) -# for output in outputs.logits: -# scores = softmax(output.cpu().numpy()) -# results.append({ -# 'roberta_neg': float(scores[0]), -# 'roberta_neu': float(scores[1]), -# 'roberta_pos': float(scores[2]) -# }) - -# return results - -# except Exception as e: -# logger.warning(f"RoBERTa batch analysis failed: {e}") -# return [{'roberta_neg': 0.0, 'roberta_neu': 1.0, 'roberta_pos': 0.0} for _ in texts] - -# def roberta_sentiment(text: str) -> Dict[str, float]: -# """Single text RoBERTa sentiment analysis""" -# try: -# encoded_text = tokenizer(text, return_tensors='pt', truncation=True, max_length=512) -# encoded_text = {k: v.to(device) for k, v in encoded_text.items()} + for output in outputs.logits: + scores = softmax(output.cpu().numpy()) + results.append({ + 'roberta_neg': float(scores[0]), + 'roberta_neu': float(scores[1]), + 'roberta_pos': float(scores[2]) + }) -# with torch.no_grad(): -# output = model(**encoded_text) + return results -# scores = softmax(output[0][0].cpu().numpy()) -# return { -# 'roberta_neg': float(scores[0]), -# 'roberta_neu': float(scores[1]), -# 'roberta_pos': float(scores[2]) -# } -# except Exception as e: -# logger.warning(f"RoBERTa analysis failed for text: {e}") -# return {'roberta_neg': 0.0, 'roberta_neu': 1.0, 'roberta_pos': 0.0} + except Exception as e: + logger.warning(f"RoBERTa batch analysis failed: {e}") + return [{'roberta_neg': 0.0, 'roberta_neu': 1.0, 'roberta_pos': 0.0} for _ in texts] -# def overall_sentiment(row: pd.Series, settings: Settings) -> str: -# """Determine overall sentiment using combined scores with configurable thresholds""" -# combined_pos = row.get('combined_pos', 0.0) -# combined_neg = row.get('combined_neg', 0.0) -# combined_neu = row.get('combined_neu', 0.0) -# vader_compound = row.get('vader_compound', 0.0) -# roberta_neg = row.get('roberta_neg', 0.0) -# roberta_pos = row.get('roberta_pos', 0.0) +def classify_sentiment_enhanced(row: pd.Series, settings: Settings) -> str: + """ + Enhanced multi-stage sentiment classification for better accuracy -# # Priority 1: Heuristic negative patterns override everything -# if row.get('heuristic_negative') is True: -# return 'Negative' + Stage 1: Meta-comments → Neutral + Stage 2: Strong negative patterns → Negative (override models) + Stage 3: Strong positive patterns + high scores → Positive + Stage 4: Model ensemble decision + Stage 5: Default to neutral if uncertain + """ -# # Priority 2: Strong negative signals -# if ( -# vader_compound <= settings.vader_neg_threshold or -# roberta_neg >= settings.roberta_neg_threshold or -# combined_neg >= max(combined_pos, combined_neu) -# ): -# return 'Negative' + # Stage 1: Meta-comments are always neutral + if row.get('is_meta', False): + return 'Neutral' -# # Priority 3: Positive signals -# if ( -# vader_compound >= settings.vader_pos_threshold or -# roberta_pos >= settings.roberta_pos_threshold or -# combined_pos >= max(combined_neg, combined_neu) -# ): -# return 'Positive' + # Get all scores + vader_compound = row.get('vader_compound', 0.0) + vader_pos = row.get('vader_pos', 0.0) + vader_neg = row.get('vader_neg', 0.0) -# # Default: Neutral -# return 'Neutral' + roberta_pos = row.get('roberta_pos', 0.0) + roberta_neg = row.get('roberta_neg', 0.0) + roberta_neu = row.get('roberta_neu', 0.0) + + combined_pos = row.get('combined_pos', 0.0) + combined_neg = row.get('combined_neg', 0.0) + combined_neu = row.get('combined_neu', 0.0) + + has_strong_negative = row.get('has_strong_negative', False) + has_positive = row.get('has_positive', False) + has_weak_negative = row.get('has_weak_negative', False) + + # Stage 2: Strong negative patterns override everything + if has_strong_negative: + return 'Negative' + + # Stage 3: Strong positive signals + if has_positive and ( + vader_compound >= settings.vader_strong_pos_threshold or + roberta_pos >= settings.roberta_strong_pos_threshold or + (vader_compound >= settings.vader_pos_threshold and roberta_pos >= settings.roberta_pos_threshold) + ): + return 'Positive' + + # Stage 4: Model-based classification with ensemble + + # Strong negative from models + if ( + vader_compound <= settings.vader_strong_neg_threshold or + roberta_neg >= settings.roberta_strong_neg_threshold or + (vader_compound <= settings.vader_neg_threshold and roberta_neg >= settings.roberta_neg_threshold) + ): + return 'Negative' + + # Moderate negative + if ( + combined_neg > combined_pos and + combined_neg > combined_neu and + combined_neg > 0.35 # Threshold for clarity + ): + return 'Negative' + + # Clear positive + if ( + combined_pos > combined_neg and + combined_pos > combined_neu and + combined_pos > 0.35 # Threshold for clarity + ): + return 'Positive' + + # Weak negative with suggestion context → might be neutral + if has_weak_negative and not has_strong_negative: + # If scores are not strongly negative, treat as neutral + if combined_neg < 0.5: + return 'Neutral' + + # Stage 5: Default to neutral if uncertain + return 'Neutral' -# def sanitize_text(text: str) -> str: -# """Sanitize input text while preserving emojis""" -# if not text: -# return "" -# # Remove control characters but keep printable characters and emojis -# text = re.sub(r'[\x00-\x08\x0B-\x0C\x0E-\x1F\x7F]', '', text) -# # Normalize whitespace -# text = ' '.join(text.split()) -# return text.strip() +def sanitize_text(text: str) -> str: + """Sanitize input text""" + if not text: + return "" + text = re.sub(r'[\x00-\x08\x0B-\x0C\x0E-\x1F\x7F]', '', text) + text = ' '.join(text.split()) + return text.strip() -# def analyze_comments_sentiment(comments: List[str]) -> Dict[str, Any]: -# """Main sentiment analysis function with enhanced performance""" -# try: -# settings = get_settings() -# logger.info(f"Received {len(comments)} comments for analysis") +# ============================================================================ +# MAIN ANALYSIS FUNCTION +# ============================================================================ + +def analyze_comments_sentiment(comments: List[str]) -> Dict[str, Any]: + """Main sentiment analysis with enhanced accuracy""" + try: + settings = get_settings() + logger.info(f"Received {len(comments)} comments for analysis") -# # Sanitize comments -# sanitized_comments = [sanitize_text(comment) for comment in comments] + # Sanitize + sanitized_comments = [sanitize_text(comment) for comment in comments] -# # FIXED: Changed < to <= to properly handle min_comment_words -# filtered_comments = [ -# comment for comment in sanitized_comments -# if (settings.min_comment_words <= len(comment.split()) <= settings.max_comment_length) -# ] - -# logger.info(f"After filtering: {len(filtered_comments)} valid comments") - -# if not filtered_comments: -# return { -# "total_comments": 0, -# "message": "No valid comments found for analysis" -# } + # Filter valid comments + filtered_comments = [ + comment for comment in sanitized_comments + if settings.min_comment_words <= len(comment.split()) <= settings.max_comment_length + ] -# # Create dataframe -# df = pd.DataFrame({'comment': filtered_comments}) + logger.info(f"After filtering: {len(filtered_comments)} valid comments") -# # Detect meta-comments and explicit negatives -# df['is_meta'] = df['comment'].apply(is_meta_comment) -# df['heuristic_negative'] = df['comment'].apply(is_explicit_negative) + if not filtered_comments: + return { + "total_comments": 0, + "message": "No valid comments found for analysis" + } -# # Log detection results -# meta_count = df['is_meta'].sum() -# heuristic_neg_count = df['heuristic_negative'].sum() -# logger.info(f"Detected {meta_count} meta-comments and {heuristic_neg_count} heuristic negatives") + # Create DataFrame + df = pd.DataFrame({'comment': filtered_comments}) -# # VADER sentiment analysis -# vader_results = [] -# for text in df['comment']: -# vader_results.append(vader_sentiment(text)) + # Pattern detection + df['is_meta'] = df['comment'].apply(is_meta_comment) + df['has_strong_negative'] = df['comment'].apply(detect_strong_negative) + df['has_positive'] = df['comment'].apply(detect_positive) + df['has_weak_negative'] = df['comment'].apply(detect_weak_negative) + + # Log detection stats + logger.info(f"Meta: {df['is_meta'].sum()}, " + f"Strong Neg: {df['has_strong_negative'].sum()}, " + f"Positive: {df['has_positive'].sum()}, " + f"Weak Neg: {df['has_weak_negative'].sum()}") + + # VADER analysis + vader_results = [vader_sentiment(text) for text in df['comment']] + vader_df = pd.DataFrame(vader_results) -# # RoBERTa sentiment analysis (batch) -# roberta_results = roberta_sentiment_batch(df['comment'].tolist()) + # RoBERTa analysis + roberta_results = roberta_sentiment_batch(df['comment'].tolist()) + roberta_df = pd.DataFrame(roberta_results) -# # Combine results -# vader_df = pd.DataFrame(vader_results) -# roberta_df = pd.DataFrame(roberta_results) -# final_df = pd.concat([df.reset_index(drop=True), vader_df, roberta_df], axis=1) + # Combine + final_df = pd.concat([df.reset_index(drop=True), vader_df, roberta_df], axis=1) -# # Calculate combined scores -# final_df['combined_pos'] = ( -# settings.combined_weight_vader * final_df['vader_pos'] + -# settings.combined_weight_roberta * final_df['roberta_pos'] -# ) -# final_df['combined_neg'] = ( -# settings.combined_weight_vader * final_df['vader_neg'] + -# settings.combined_weight_roberta * final_df['roberta_neg'] -# ) -# final_df['combined_neu'] = ( -# settings.combined_weight_vader * final_df['vader_neu'] + -# settings.combined_weight_roberta * final_df['roberta_neu'] -# ) + # Calculate combined scores + final_df['combined_pos'] = ( + settings.combined_weight_vader * final_df['vader_pos'] + + settings.combined_weight_roberta * final_df['roberta_pos'] + ) + final_df['combined_neg'] = ( + settings.combined_weight_vader * final_df['vader_neg'] + + settings.combined_weight_roberta * final_df['roberta_neg'] + ) + final_df['combined_neu'] = ( + settings.combined_weight_vader * final_df['vader_neu'] + + settings.combined_weight_roberta * final_df['roberta_neu'] + ) -# # Classify overall sentiment (meta-comments become Neutral) -# final_df['Overall_Sentiment'] = final_df.apply( -# lambda row: 'Neutral' if row.get('is_meta') else overall_sentiment(row, settings), -# axis=1 -# ) + # Enhanced classification + final_df['Overall_Sentiment'] = final_df.apply( + lambda row: classify_sentiment_enhanced(row, settings), + axis=1 + ) -# # Calculate statistics -# total_comments = len(final_df) -# positive_count = len(final_df[final_df['Overall_Sentiment'] == 'Positive']) -# negative_count = len(final_df[final_df['Overall_Sentiment'] == 'Negative']) -# neutral_count = len(final_df[final_df['Overall_Sentiment'] == 'Neutral']) + # Statistics + total_comments = len(final_df) + positive_count = len(final_df[final_df['Overall_Sentiment'] == 'Positive']) + negative_count = len(final_df[final_df['Overall_Sentiment'] == 'Negative']) + neutral_count = len(final_df[final_df['Overall_Sentiment'] == 'Neutral']) -# logger.info( -# f"Results: {positive_count} positive, " -# f"{negative_count} negative, {neutral_count} neutral" -# ) + logger.info(f"Classification Results - Pos: {positive_count}, Neg: {negative_count}, Neu: {neutral_count}") -# # Average scores -# avg_positive = float(final_df['combined_pos'].mean()) -# avg_negative = float(final_df['combined_neg'].mean()) -# avg_neutral = float(final_df['combined_neu'].mean()) + # Average scores + avg_positive = float(final_df['combined_pos'].mean()) + avg_negative = float(final_df['combined_neg'].mean()) + avg_neutral = float(final_df['combined_neu'].mean()) -# # Determine overall sentiment label -# if avg_positive > max(avg_negative, avg_neutral): -# overall_sentiment_label = "Positive" -# elif avg_negative > max(avg_positive, avg_neutral): -# overall_sentiment_label = "Negative" -# else: -# overall_sentiment_label = "Neutral" + # Overall sentiment + if avg_positive > max(avg_negative, avg_neutral): + overall_sentiment_label = "Positive" + elif avg_negative > max(avg_positive, avg_neutral): + overall_sentiment_label = "Negative" + else: + overall_sentiment_label = "Neutral" -# # Process negative comments -# negative_summary = "" -# negative_comments_list = [] -# negative_comments = final_df[final_df['Overall_Sentiment'] == 'Negative'] + # Process negative comments + negative_summary = "" + negative_comments_list = [] + negative_comments = final_df[final_df['Overall_Sentiment'] == 'Negative'] -# if len(negative_comments) > 0: -# negative_comments_list = negative_comments['comment'].tolist() + if len(negative_comments) > 0: + negative_comments_list = negative_comments['comment'].tolist() -# try: -# # Get top negative comments -# top_idx = negative_comments['combined_neg'].nlargest(3).index -# top_comments = negative_comments.loc[top_idx, 'comment'].tolist() + try: + top_idx = negative_comments['combined_neg'].nlargest(min(3, len(negative_comments))).index + top_comments = negative_comments.loc[top_idx, 'comment'].tolist() -# if settings.use_abstractive_summary and summarizer is not None: -# negative_text = " ".join(top_comments) -# if len(negative_text) > 1000: -# negative_text = negative_text[:1000] + if settings.use_abstractive_summary and summarizer is not None: + negative_text = " ".join(top_comments) + if len(negative_text) > 1000: + negative_text = negative_text[:1000] -# summary_result = summarizer( -# negative_text, -# max_length=settings.max_summary_length, -# min_length=settings.min_summary_length, -# do_sample=False -# ) -# negative_summary = summary_result[0]['summary_text'] -# else: -# # Extractive summary -# negative_summary = "; ".join(top_comments) -# except Exception as e: -# logger.warning(f"Summary generation failed: {e}") -# negative_summary = "; ".join(negative_comments_list[:3]) + summary_result = summarizer( + negative_text, + max_length=settings.max_summary_length, + min_length=settings.min_summary_length, + do_sample=False + ) + negative_summary = summary_result[0]['summary_text'] + else: + negative_summary = "; ".join(top_comments) + except Exception as e: + logger.warning(f"Summary generation failed: {e}") + negative_summary = "; ".join(negative_comments_list[:3]) -# # Generate insights and recommendations -# insights = [] -# recommendations = [] + # Insights and recommendations + insights = [] + recommendations = [] -# if overall_sentiment_label == "Positive": -# insights.extend([ -# "Students have positive feedback overall", -# "Teaching methods are well-received", -# f"{positive_count}/{total_comments} comments are positive" -# ]) -# recommendations.extend([ -# "Continue current teaching approach", -# "Maintain student engagement strategies", -# "Share successful practices with colleagues" -# ]) -# elif overall_sentiment_label == "Negative": -# insights.extend([ -# "Students have concerns that need attention", -# "Some aspects of teaching may need improvement", -# f"{negative_count}/{total_comments} comments indicate issues" -# ]) -# recommendations.extend([ -# "Review teaching methods and materials", -# "Consider additional student support", -# "Schedule meetings to address student concerns", -# "Focus on areas mentioned in negative feedback" -# ]) -# else: -# insights.extend([ -# "Mixed feedback from students", -# "Some areas performing well, others need attention", -# "Balance of positive and negative responses" -# ]) -# recommendations.extend([ -# "Focus on areas with negative feedback", -# "Maintain strengths while addressing weaknesses", -# "Gather more specific feedback on improvement areas" -# ]) + if overall_sentiment_label == "Positive": + insights.extend([ + f"Strong positive feedback: {positive_count}/{total_comments} comments ({round(positive_count/total_comments*100, 1)}%)", + "Students are satisfied with the teaching approach", + "High engagement and learning outcomes reported" + ]) + recommendations.extend([ + "Continue current effective teaching methods", + "Document successful practices for future reference", + "Share best practices with colleagues" + ]) + elif overall_sentiment_label == "Negative": + insights.extend([ + f"Concerns identified: {negative_count}/{total_comments} negative comments ({round(negative_count/total_comments*100, 1)}%)", + "Students facing challenges with current approach", + "Immediate attention needed to address feedback" + ]) + recommendations.extend([ + "Review and analyze specific negative feedback points", + "Consider adjusting teaching pace or methods", + "Increase student engagement and support", + "Schedule student feedback sessions", + "Focus on communication clarity and accessibility" + ]) + else: + insights.extend([ + f"Mixed feedback: {positive_count} positive, {negative_count} negative, {neutral_count} neutral", + "Room for improvement while maintaining strengths", + "Students have varied experiences" + ]) + recommendations.extend([ + "Address specific concerns raised in negative feedback", + "Build on positive aspects appreciated by students", + "Gather more detailed feedback for neutral areas" + ]) -# return { -# "total_comments": total_comments, -# "positive_comments": positive_count, -# "negative_comments": negative_count, -# "neutral_comments": neutral_count, -# "positive_sentiment": round(avg_positive, 3), -# "negative_sentiment": round(avg_negative, 3), -# "neutral_sentiment": round(avg_neutral, 3), -# "overall_sentiment": overall_sentiment_label, -# "sentiment_distribution": { -# "positive_percentage": round((positive_count / total_comments) * 100, 1), -# "negative_percentage": round((negative_count / total_comments) * 100, 1), -# "neutral_percentage": round((neutral_count / total_comments) * 100, 1) -# }, -# "negative_comments_summary": negative_summary, -# "negative_comments_list": negative_comments_list, -# "key_insights": insights, -# "recommendations": recommendations, -# "detailed_analysis": { -# "vader_scores": { -# "average_positive": round(final_df['vader_pos'].mean(), 3), -# "average_negative": round(final_df['vader_neg'].mean(), 3), -# "average_neutral": round(final_df['vader_neu'].mean(), 3), -# "average_compound": round(final_df['vader_compound'].mean(), 3) -# }, -# "roberta_scores": { -# "average_positive": round(final_df['roberta_pos'].mean(), 3), -# "average_negative": round(final_df['roberta_neg'].mean(), 3), -# "average_neutral": round(final_df['roberta_neu'].mean(), 3) -# } -# }, -# "analysis_timestamp": datetime.utcnow().isoformat() -# } + # Add pattern-based insights + if df['has_strong_negative'].sum() > 0: + insights.append(f"{df['has_strong_negative'].sum()} comments contain explicit criticism requiring attention") + if df['has_positive'].sum() > 0: + insights.append(f"{df['has_positive'].sum()} comments contain strong positive appreciation") -# except Exception as e: -# logger.error(f"Sentiment analysis failed: {e}", exc_info=True) -# raise e - -# @app.on_event("startup") -# async def startup_event(): -# """Initialize models on startup""" -# try: -# logger.info("=" * 80) -# logger.info(f"Application Startup at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") -# logger.info("=" * 80) -# initialize_models() -# logger.info("✓ Service started successfully") -# logger.info("=" * 80) -# except Exception as e: -# logger.error(f"✗ Startup failed: {e}") -# raise e + return { + "total_comments": total_comments, + "positive_comments": positive_count, + "negative_comments": negative_count, + "neutral_comments": neutral_count, + "positive_sentiment": round(avg_positive, 3), + "negative_sentiment": round(avg_negative, 3), + "neutral_sentiment": round(avg_neutral, 3), + "overall_sentiment": overall_sentiment_label, + "sentiment_distribution": { + "positive_percentage": round((positive_count / total_comments) * 100, 1), + "negative_percentage": round((negative_count / total_comments) * 100, 1), + "neutral_percentage": round((neutral_count / total_comments) * 100, 1) + }, + "negative_comments_summary": negative_summary, + "negative_comments_list": negative_comments_list, + "key_insights": insights, + "recommendations": recommendations, + "detailed_analysis": { + "vader_scores": { + "average_positive": round(final_df['vader_pos'].mean(), 3), + "average_negative": round(final_df['vader_neg'].mean(), 3), + "average_neutral": round(final_df['vader_neu'].mean(), 3), + "average_compound": round(final_df['vader_compound'].mean(), 3) + }, + "roberta_scores": { + "average_positive": round(final_df['roberta_pos'].mean(), 3), + "average_negative": round(final_df['roberta_neg'].mean(), 3), + "average_neutral": round(final_df['roberta_neu'].mean(), 3) + } + }, + "analysis_timestamp": datetime.utcnow().isoformat() + } + + except Exception as e: + logger.error(f"Sentiment analysis failed: {e}", exc_info=True) + raise e -# @app.on_event("shutdown") -# async def shutdown_event(): -# """Cleanup on shutdown""" -# logger.info("Service shutting down") +# ============================================================================ +# API ENDPOINTS +# ============================================================================ -# @app.get("/") -# async def root(): -# """Root endpoint""" -# return { -# "service": get_settings().app_name, -# "version": get_settings().app_version, -# "status": "running", -# "endpoints": { -# "health": "/health", -# "analyze": "/analyze-comments", -# "config": "/config (debug mode only)", -# "test": "/test" -# } -# } +@app.on_event("startup") +async def startup_event(): + """Initialize models on startup""" + try: + logger.info("=" * 80) + logger.info(f"Application Startup at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") + logger.info("=" * 80) + initialize_models() + logger.info("✓ Service started successfully") + logger.info("=" * 80) + except Exception as e: + logger.error(f"✗ Startup failed: {e}") + raise e -# @app.get("/health") -# async def health_check(): -# """Health check endpoint""" -# models_loaded = sia is not None and model is not None and tokenizer is not None +@app.on_event("shutdown") +async def shutdown_event(): + """Cleanup on shutdown""" + logger.info("Service shutting down") + +@app.get("/") +async def root(): + """Root endpoint""" + return { + "service": get_settings().app_name, + "version": get_settings().app_version, + "status": "running", + "endpoints": { + "health": "/health", + "analyze": "/analyze-comments", + "test": "/test" + } + } + +@app.get("/health") +async def health_check(): + """Health check endpoint""" + models_loaded = sia is not None and model is not None and tokenizer is not None -# return { -# "status": "healthy" if models_loaded else "unhealthy", -# "service": "comment-analysis", -# "version": get_settings().app_version, -# "models_loaded": models_loaded, -# "device": device if device else "not initialized", -# "timestamp": datetime.utcnow().isoformat() -# } + return { + "status": "healthy" if models_loaded else "unhealthy", + "service": "comment-analysis", + "version": get_settings().app_version, + "models_loaded": models_loaded, + "device": device if device else "not initialized", + "timestamp": datetime.utcnow().isoformat() + } -# @app.post("/analyze-comments", response_model=CommentAnalysisResponse) -# async def analyze_comments( -# request: CommentAnalysisRequest, -# settings: Settings = Depends(get_settings) -# ): -# """ -# Analyze comments for sentiment analysis using VADER and RoBERTa models -# """ -# try: -# comments = request.comments -# faculty_info = request.faculty_info +@app.post("/analyze-comments", response_model=CommentAnalysisResponse) +async def analyze_comments( + request: CommentAnalysisRequest, + settings: Settings = Depends(get_settings) +): + """Analyze comments for sentiment using enhanced multi-stage classification""" + try: + comments = request.comments + faculty_info = request.faculty_info -# if not comments: -# return CommentAnalysisResponse( -# success=False, -# analysis=None, -# message="No comments provided for analysis" -# ) + if not comments: + return CommentAnalysisResponse( + success=False, + analysis=None, + message="No comments provided for analysis" + ) -# logger.info( -# f"Analyzing {len(comments)} comments for " -# f"{faculty_info.faculty_name} ({faculty_info.course_code})" -# ) + logger.info(f"Analyzing {len(comments)} comments for {faculty_info.faculty_name} ({faculty_info.course_code})") -# analysis_result = analyze_comments_sentiment(comments) + analysis_result = analyze_comments_sentiment(comments) -# if analysis_result.get("total_comments", 0) == 0: -# return CommentAnalysisResponse( -# success=False, -# analysis=None, -# message=analysis_result.get("message", "No valid comments to analyze") -# ) + if analysis_result.get("total_comments", 0) == 0: + return CommentAnalysisResponse( + success=False, + analysis=None, + message=analysis_result.get("message", "No valid comments to analyze") + ) -# analysis_result["faculty_info"] = { -# "faculty_name": faculty_info.faculty_name, -# "staff_id": faculty_info.staff_id, -# "course_code": faculty_info.course_code, -# "course_name": faculty_info.course_name -# } + analysis_result["faculty_info"] = { + "faculty_name": faculty_info.faculty_name, + "staff_id": faculty_info.staff_id, + "course_code": faculty_info.course_code, + "course_name": faculty_info.course_name + } -# return CommentAnalysisResponse( -# success=True, -# analysis=analysis_result, -# message=f"Successfully analyzed {analysis_result['total_comments']} comments" -# ) + return CommentAnalysisResponse( + success=True, + analysis=analysis_result, + message=f"Successfully analyzed {analysis_result['total_comments']} comments" + ) -# except ValueError as ve: -# logger.warning(f"Validation error: {ve}") -# raise HTTPException(status_code=400, detail=str(ve)) -# except Exception as e: -# logger.error(f"Analysis failed: {e}", exc_info=True) -# raise HTTPException( -# status_code=500, -# detail="Analysis failed. Please try again later." -# ) - -# @app.get("/config") -# async def get_config(settings: Settings = Depends(get_settings)): -# """Get current configuration (debug mode only)""" -# if not settings.debug_mode: -# raise HTTPException(status_code=404, detail="Not found") - -# return { -# "max_comments_per_request": settings.max_comments_per_request, -# "max_comment_length": settings.max_comment_length, -# "min_comment_words": settings.min_comment_words, -# "vader_pos_threshold": settings.vader_pos_threshold, -# "vader_neg_threshold": settings.vader_neg_threshold, -# "roberta_pos_threshold": settings.roberta_pos_threshold, -# "roberta_neg_threshold": settings.roberta_neg_threshold, -# "combined_weight_vader": settings.combined_weight_vader, -# "combined_weight_roberta": settings.combined_weight_roberta, -# "enable_caching": settings.enable_caching, -# "batch_size": settings.batch_size, -# "use_abstractive_summary": settings.use_abstractive_summary -# } + except ValueError as ve: + logger.warning(f"Validation error: {ve}") + raise HTTPException(status_code=400, detail=str(ve)) + except Exception as e: + logger.error(f"Analysis failed: {e}", exc_info=True) + raise HTTPException(status_code=500, detail="Analysis failed. Please try again later.") -# @app.get("/test") -# async def test_endpoint(): -# """Test endpoint to verify sentiment classification""" -# test_cases = [ -# "No more comments 😅", -# "Overall good but too lag", -# "Not interested to be in her class just we are going for attendance thats it not at all managing time.", -# "Nothing to say anything just we are going to her class mean, only for attendance", -# "Excellent teaching! Very clear explanations.", -# "Good teacher with strong subject knowledge", -# "Class is okay, nothing special" -# ] +@app.get("/test") +async def test_endpoint(): + """Test endpoint with various comment types""" + test_cases = [ + # Meta-comments (should be Neutral) + "No negative comments", + "Everything is good", + "Nothing to say", + "Nil", + + # Strong Negative (should be Negative) + "Very poor teaching quality", + "Boring class, waste of time", + "Cannot understand anything", + "Teaching is terrible and voice is too low", + "Poor knowledge and bad teaching method", + + # Positive (should be Positive) + "Excellent teacher with great knowledge", + "Very helpful and explains clearly", + "Amazing teaching style, learned a lot", + "Best professor, highly recommend", + + # Weak negative/Neutral + "Could be better", + "Sometimes hard to understand", + "Overall good but too lag", + + # Mixed + "Good teacher but classes are boring", + "Knowledgeable but voice is low" + ] -# results = [] -# for text in test_cases: -# is_meta = is_meta_comment(text) -# is_neg = is_explicit_negative(text) + results = [] + for text in test_cases: + is_meta = is_meta_comment(text) + has_strong_neg = detect_strong_negative(text) + has_pos = detect_positive(text) + has_weak_neg = detect_weak_negative(text) -# # Predict classification -# if is_meta: -# predicted = "Neutral (meta-comment)" -# elif is_neg: -# predicted = "Negative (heuristic)" -# else: -# predicted = "Needs full analysis" + # Predict + if is_meta: + predicted = "Neutral (meta-comment)" + elif has_strong_neg: + predicted = "Negative (strong pattern)" + elif has_pos and not has_strong_neg: + predicted = "Positive (likely)" + elif has_weak_neg and not has_strong_neg: + predicted = "Neutral/Negative (weak)" + else: + predicted = "Requires full analysis" -# results.append({ -# "text": text, -# "is_meta_comment": is_meta, -# "is_heuristic_negative": is_neg, -# "predicted_classification": predicted -# }) + results.append({ + "text": text, + "is_meta": is_meta, + "strong_negative": has_strong_neg, + "positive": has_pos, + "weak_negative": has_weak_neg, + "predicted": predicted + }) -# return { -# "test_results": results, -# "note": "Full analysis requires VADER and RoBERTa scores" -# } + return { + "test_results": results, + "note": "Predictions based on pattern matching. Full analysis uses VADER + RoBERTa ensemble." + } + +if __name__ == "__main__": + uvicorn.run(app, host="0.0.0.0", port=8000, log_level="info") + + + + + -# if __name__ == "__main__": -# uvicorn.run( -# app, -# host="0.0.0.0", -# port=8000, -# log_level="info" -# ) +# """ +# Enhanced FastAPI Service for Comment Sentiment Analysis +# with improved performance, validation, and configuration management +# Version 2.1.0 - Updated with bug fixes and improvements +# """ + +# from fastapi import FastAPI, HTTPException, Depends +# from fastapi.middleware.cors import CORSMiddleware +# from pydantic import BaseModel, Field, validator +# from pydantic_settings import BaseSettings +# from typing import List, Dict, Any, Optional +# from functools import lru_cache +# import uvicorn +# import pandas as pd +# import numpy as np +# import os +# import re +# from datetime import datetime +# import logging + +# # Configure logging FIRST +# logging.basicConfig( +# level=logging.INFO, +# format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' +# ) +# logger = logging.getLogger(__name__) + +# # CRITICAL: Download NLTK data BEFORE importing NLTK components +# import nltk +# import ssl + +# try: +# _create_unverified_https_context = ssl._create_unverified_context +# except AttributeError: +# pass +# else: +# ssl._create_default_https_context = _create_unverified_https_context + +# # Set NLTK data path +# nltk_data_dir = '/tmp/nltk_data' +# os.makedirs(nltk_data_dir, exist_ok=True) +# nltk.data.path.insert(0, nltk_data_dir) + +# # Download required NLTK data +# def ensure_nltk_data(): +# """Ensure all required NLTK data is downloaded""" +# resources = ['vader_lexicon', 'punkt', 'stopwords', 'wordnet', 'omw-1.4'] + +# for resource in resources: +# try: +# # Try to find the resource +# if resource == 'vader_lexicon': +# nltk.data.find('sentiment/vader_lexicon.zip') +# elif resource == 'punkt': +# nltk.data.find('tokenizers/punkt') +# elif resource in ['stopwords', 'wordnet', 'omw-1.4']: +# nltk.data.find(f'corpora/{resource}') +# logger.info(f"✓ NLTK resource '{resource}' already available") +# except LookupError: +# logger.info(f"Downloading NLTK resource '{resource}'...") +# try: +# nltk.download(resource, download_dir=nltk_data_dir, quiet=False) +# logger.info(f"✓ Successfully downloaded '{resource}'") +# except Exception as e: +# logger.error(f"✗ Failed to download '{resource}': {e}") +# # Download NLTK data immediately +# logger.info("Ensuring NLTK data is available...") +# ensure_nltk_data() +# # NOW import NLTK components +# from nltk.sentiment import SentimentIntensityAnalyzer +# # Import transformers after NLTK setup +# from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline +# from scipy.special import softmax +# import torch +# # Configuration Management +# class Settings(BaseSettings): +# """Application settings with environment variable support""" +# # API Settings +# app_name: str = "Comment Analysis API" +# app_version: str = "2.1.0" +# debug_mode: bool = False + +# # Request Limits +# max_comments_per_request: int = 1000 +# max_comment_length: int = 5000 +# min_comment_words: int = 1 + +# # Sentiment Thresholds +# vader_pos_threshold: float = 0.2 +# vader_neg_threshold: float = -0.2 +# roberta_pos_threshold: float = 0.55 +# roberta_neg_threshold: float = 0.45 +# combined_weight_vader: float = 0.5 +# combined_weight_roberta: float = 0.5 + +# # Model Settings +# model_cache_dir: str = "/tmp/model_cache" +# roberta_model_name: str = "cardiffnlp/twitter-roberta-base-sentiment" +# use_abstractive_summary: bool = False +# summarizer_model: str = "facebook/bart-large-cnn" +# max_summary_length: int = 100 +# min_summary_length: int = 25 + +# # Performance +# enable_caching: bool = True +# cache_size: int = 500 +# batch_size: int = 32 + +# class Config: +# env_file = ".env" +# env_file_encoding = 'utf-8' +# extra = 'ignore' + +# @validator('min_comment_words') +# def validate_min_words(cls, v): +# if v < 0: +# raise ValueError('min_comment_words must be non-negative') +# return v + +# @validator('combined_weight_vader', 'combined_weight_roberta') +# def validate_weights(cls, v): +# if not 0 <= v <= 1: +# raise ValueError('Weights must be between 0 and 1') +# return v +# @lru_cache() +# def get_settings() -> Settings: +# """Cached settings instance""" +# settings = Settings() +# # Normalize weights if needed +# total = settings.combined_weight_vader + settings.combined_weight_roberta +# if not (0.99 <= total <= 1.01): +# logger.warning(f"Weights sum to {total}, normalizing to 1.0") +# settings.combined_weight_vader /= total +# settings.combined_weight_roberta /= total +# return settings -""" -Enhanced FastAPI Service for Comment Sentiment Analysis -with improved performance, validation, and configuration management -Version 2.1.0 - Updated with bug fixes and improvements -""" +# # Pydantic Models +# class FacultyInfo(BaseModel): +# faculty_name: str = Field(..., min_length=1, max_length=200) +# staff_id: str = Field(..., min_length=1, max_length=50) +# course_code: str = Field(..., min_length=1, max_length=50) +# course_name: str = Field(..., min_length=1, max_length=200) -from fastapi import FastAPI, HTTPException, Depends -from fastapi.middleware.cors import CORSMiddleware -from pydantic import BaseModel, Field, validator -from pydantic_settings import BaseSettings -from typing import List, Dict, Any, Optional -from functools import lru_cache -import uvicorn -import pandas as pd -import numpy as np -import os -import re -from datetime import datetime -import logging +# class CommentAnalysisRequest(BaseModel): +# comments: List[str] = Field(..., min_items=1) +# faculty_info: FacultyInfo + +# @validator('comments') +# def validate_comments(cls, v): +# settings = get_settings() + +# if len(v) > settings.max_comments_per_request: +# raise ValueError( +# f'Maximum {settings.max_comments_per_request} comments per request' +# ) + +# for idx, comment in enumerate(v): +# if len(comment) > settings.max_comment_length: +# raise ValueError( +# f'Comment {idx} exceeds maximum length of {settings.max_comment_length} characters' +# ) + +# return v -# Configure logging FIRST -logging.basicConfig( - level=logging.INFO, - format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' -) -logger = logging.getLogger(__name__) +# class SentimentDistribution(BaseModel): +# positive_percentage: float +# negative_percentage: float +# neutral_percentage: float -# CRITICAL: Download NLTK data BEFORE importing NLTK components -import nltk -import ssl +# class DetailedScores(BaseModel): +# average_positive: float +# average_negative: float +# average_neutral: float +# average_compound: Optional[float] = None -try: - _create_unverified_https_context = ssl._create_unverified_context -except AttributeError: - pass -else: - ssl._create_default_https_context = _create_unverified_https_context +# class DetailedAnalysis(BaseModel): +# vader_scores: DetailedScores +# roberta_scores: DetailedScores -# Set NLTK data path -nltk_data_dir = '/tmp/nltk_data' -os.makedirs(nltk_data_dir, exist_ok=True) -nltk.data.path.insert(0, nltk_data_dir) +# class AnalysisResult(BaseModel): +# total_comments: int +# positive_comments: int +# negative_comments: int +# neutral_comments: int +# positive_sentiment: float +# negative_sentiment: float +# neutral_sentiment: float +# overall_sentiment: str +# sentiment_distribution: SentimentDistribution +# negative_comments_summary: str +# negative_comments_list: List[str] +# key_insights: List[str] +# recommendations: List[str] +# detailed_analysis: DetailedAnalysis +# faculty_info: Dict[str, str] +# analysis_timestamp: str -# Download required NLTK data -def ensure_nltk_data(): - """Ensure all required NLTK data is downloaded""" - resources = ['vader_lexicon', 'punkt', 'stopwords', 'wordnet', 'omw-1.4'] - - for resource in resources: - try: - # Try to find the resource - if resource == 'vader_lexicon': - nltk.data.find('sentiment/vader_lexicon.zip') - elif resource == 'punkt': - nltk.data.find('tokenizers/punkt') - elif resource in ['stopwords', 'wordnet', 'omw-1.4']: - nltk.data.find(f'corpora/{resource}') - logger.info(f"✓ NLTK resource '{resource}' already available") - except LookupError: - logger.info(f"Downloading NLTK resource '{resource}'...") - try: - nltk.download(resource, download_dir=nltk_data_dir, quiet=False) - logger.info(f"✓ Successfully downloaded '{resource}'") - except Exception as e: - logger.error(f"✗ Failed to download '{resource}': {e}") +# class CommentAnalysisResponse(BaseModel): +# success: bool +# analysis: Optional[AnalysisResult] = None +# message: str -# Download NLTK data immediately -logger.info("Ensuring NLTK data is available...") -ensure_nltk_data() +# # Initialize FastAPI app +# app = FastAPI( +# title=get_settings().app_name, +# version=get_settings().app_version, +# description="Advanced sentiment analysis service for educational feedback" +# ) -# NOW import NLTK components -from nltk.sentiment import SentimentIntensityAnalyzer +# # Add CORS middleware +# app.add_middleware( +# CORSMiddleware, +# allow_origins=["*"], +# allow_credentials=True, +# allow_methods=["*"], +# allow_headers=["*"], +# ) -# Import transformers after NLTK setup -from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline -from scipy.special import softmax -import torch +# # Global variables for models +# sia = None +# tokenizer = None +# model = None +# device = None +# summarizer = None -# Configuration Management -class Settings(BaseSettings): - """Application settings with environment variable support""" - # API Settings - app_name: str = "Comment Analysis API" - app_version: str = "2.1.0" - debug_mode: bool = False - - # Request Limits - max_comments_per_request: int = 1000 - max_comment_length: int = 5000 - min_comment_words: int = 1 +# # Enhanced heuristic phrase/regex rules for explicit negative feedback +# NEGATIVE_PHRASES = [ +# # Teaching quality issues +# 'very poor', +# 'extremely poor', +# 'poor in teaching', +# 'poor teaching level', +# 'poor teaching', +# 'bad teacher', +# 'bad teaching', +# 'not good', # Keep but check it's not "no negative" +# 'not satisfied', +# 'not satisfactory', - # Sentiment Thresholds - vader_pos_threshold: float = 0.2 - vader_neg_threshold: float = -0.2 - roberta_pos_threshold: float = 0.55 - roberta_neg_threshold: float = 0.45 - combined_weight_vader: float = 0.5 - combined_weight_roberta: float = 0.5 +# # Content/delivery issues +# 'boring class', +# 'boring classes', +# 'boring subject', +# 'subject is boring', +# 'low voice', +# 'voice is low', +# 'cannot hear', +# "can't hear", +# 'speak louder', + +# # Resource/support issues +# 'need more staff', +# 'need more faculty', +# 'insufficient staff', +# 'lack of staff', +# 'not sufficient', +# 'insufficient', +# 'not enough', +# 'no classes', +# 'no regular classes', +# 'not sufficient classes', - # Model Settings - model_cache_dir: str = "/tmp/model_cache" - roberta_model_name: str = "cardiffnlp/twitter-roberta-base-sentiment" - use_abstractive_summary: bool = False - summarizer_model: str = "facebook/bart-large-cnn" - max_summary_length: int = 100 - min_summary_length: int = 25 +# # Knowledge/understanding issues +# 'lack of knowledge', +# 'better knowledge needed', +# 'poor knowledge', +# 'knowledge is lacking', +# 'practical knowledge lacking', +# 'no practical', +# 'lack of practical', +# 'no hands-on', +# 'no real world', +# 'did not understand', +# "didn't understand", +# 'not able to understand', +# 'unable to understand', +# 'difficult to understand', +# 'hard to understand', +# 'concepts are difficult', +# 'concepts difficult', +# 'cant understand', +# "can't understand", +# 'not understandable', - # Performance - enable_caching: bool = True - cache_size: int = 500 - batch_size: int = 32 +# # Improvement needed +# 'improve class', +# 'improvement needed', +# 'needs improvement', +# 'need improvement', +# 'should improve', +# 'must improve', +# 'not helpful', +# 'not clear', +# 'communication skills need improvement', +# 'improve communication', - class Config: - env_file = ".env" - env_file_encoding = 'utf-8' - extra = 'ignore' +# # Pace/time issues +# 'lectures are going fast', +# 'going too fast', +# 'too fast', +# 'too slow', +# 'too lag', +# 'lag', +# 'lagging', +# 'lag in teaching', +# 'not managing time', +# 'poor time management', +# 'time management issue', - @validator('min_comment_words') - def validate_min_words(cls, v): - if v < 0: - raise ValueError('min_comment_words must be non-negative') - return v +# # Engagement issues +# 'not interested', +# 'no interest', +# 'going for attendance', +# 'just for attendance', +# 'only for attendance', +# 'not at all', +# 'nothing learnt', +# 'learned nothing', +# 'no improvement', +# 'same teaching', +# 'monotonous', +# 'sleeping in class', - @validator('combined_weight_vader', 'combined_weight_roberta') - def validate_weights(cls, v): - if not 0 <= v <= 1: - raise ValueError('Weights must be between 0 and 1') - return v - -@lru_cache() -def get_settings() -> Settings: - """Cached settings instance""" - settings = Settings() - # Normalize weights if needed - total = settings.combined_weight_vader + settings.combined_weight_roberta - if not (0.99 <= total <= 1.01): - logger.warning(f"Weights sum to {total}, normalizing to 1.0") - settings.combined_weight_vader /= total - settings.combined_weight_roberta /= total - return settings - -# Pydantic Models -class FacultyInfo(BaseModel): - faculty_name: str = Field(..., min_length=1, max_length=200) - staff_id: str = Field(..., min_length=1, max_length=50) - course_code: str = Field(..., min_length=1, max_length=50) - course_name: str = Field(..., min_length=1, max_length=200) - -class CommentAnalysisRequest(BaseModel): - comments: List[str] = Field(..., min_items=1) - faculty_info: FacultyInfo +# # Value/utility issues +# 'waste of time', +# 'wasting time', +# 'waste our time', +# 'no use', +# 'useless', - @validator('comments') - def validate_comments(cls, v): - settings = get_settings() - - if len(v) > settings.max_comments_per_request: - raise ValueError( - f'Maximum {settings.max_comments_per_request} comments per request' - ) - - for idx, comment in enumerate(v): - if len(comment) > settings.max_comment_length: - raise ValueError( - f'Comment {idx} exceeds maximum length of {settings.max_comment_length} characters' - ) - - return v - -class SentimentDistribution(BaseModel): - positive_percentage: float - negative_percentage: float - neutral_percentage: float - -class DetailedScores(BaseModel): - average_positive: float - average_negative: float - average_neutral: float - average_compound: Optional[float] = None - -class DetailedAnalysis(BaseModel): - vader_scores: DetailedScores - roberta_scores: DetailedScores - -class AnalysisResult(BaseModel): - total_comments: int - positive_comments: int - negative_comments: int - neutral_comments: int - positive_sentiment: float - negative_sentiment: float - neutral_sentiment: float - overall_sentiment: str - sentiment_distribution: SentimentDistribution - negative_comments_summary: str - negative_comments_list: List[str] - key_insights: List[str] - recommendations: List[str] - detailed_analysis: DetailedAnalysis - faculty_info: Dict[str, str] - analysis_timestamp: str - -class CommentAnalysisResponse(BaseModel): - success: bool - analysis: Optional[AnalysisResult] = None - message: str - -# Initialize FastAPI app -app = FastAPI( - title=get_settings().app_name, - version=get_settings().app_version, - description="Advanced sentiment analysis service for educational feedback" -) - -# Add CORS middleware -app.add_middleware( - CORSMiddleware, - allow_origins=["*"], - allow_credentials=True, - allow_methods=["*"], - allow_headers=["*"], -) +# # Administrative issues +# 'military rules', +# 'strict rules', +# 'too strict', +# 'very strict', +# 'attendance issue', +# 'attendance problem', +# 'not providing attendance', +# 'claim od', + +# # Workload issues +# 'too many projects', +# 'many projects review', +# 'trouble to make', +# 'difficult to make', +# 'hard to make', +# 'placement activities', # When context is negative +# ] -# Global variables for models -sia = None -tokenizer = None -model = None -device = None -summarizer = None +# NEGATIVE_REGEXES = [ +# # Teaching quality patterns +# re.compile(r"\b(very|extremely|quite|so)\s+(poor|bad|weak)\s+(in\s+)?(teaching|knowledge|communication)", re.IGNORECASE), +# re.compile(r"\bpoor\s+(teaching|teacher|faculty|knowledge|communication)", re.IGNORECASE), +# re.compile(r"\b(teaching|knowledge)\s+(is\s+)?(poor|bad|weak|lacking)", re.IGNORECASE), + +# # Boring/engagement patterns +# re.compile(r"\b(boring|dull|monotonous)\s+(class|classes|subject|lecture|lectures)", re.IGNORECASE), +# re.compile(r"\b(class|classes|subject|lecture|lectures)\s+(is|are)\s+(boring|dull|monotonous)", re.IGNORECASE), + +# # Voice/communication patterns +# re.compile(r"\b(low|soft|quiet)\s+voice\b", re.IGNORECASE), +# re.compile(r"\bvoice\s+(is\s+)?(low|soft|quiet|not clear)", re.IGNORECASE), +# re.compile(r"\b(cannot|can't|cant|unable to)\s+hear", re.IGNORECASE), + +# # Resource/support patterns +# re.compile(r"\b(no|not|insufficient|lack of)\s+(proper|sufficient|enough|regular)?\s*(classes|notes|support|staff|faculty)", re.IGNORECASE), +# re.compile(r"\bneed(s)?\s+more\s+(staff|faculty|support|classes)", re.IGNORECASE), + +# # Understanding/clarity patterns +# re.compile(r"\b(cannot|can't|cant|unable to|difficult to|hard to)\s+understand", re.IGNORECASE), +# re.compile(r"\b(not|difficult|hard)\s+(able\s+to\s+)?understand(\s+the)?(\s+(concepts?|teaching|lectures?))?", re.IGNORECASE), +# re.compile(r"\bconcepts?\s+(are\s+)?(difficult|hard|tough|complex)\s+to\s+understand", re.IGNORECASE), + +# # Improvement patterns +# re.compile(r"\b(need|needs|needed|require|requires)\s+(some\s+)?(improvement|to improve)", re.IGNORECASE), +# re.compile(r"\b(should|must|have to)\s+improve", re.IGNORECASE), +# re.compile(r"\bimprovement\s+(is\s+)?need(ed)?", re.IGNORECASE), + +# # Pace patterns +# re.compile(r"\b(lecture|lectures|class|classes|teaching)\s+(is|are|going)\s+(too|very)\s+(fast|slow)", re.IGNORECASE), +# re.compile(r"\b(too|very)\s+(fast|slow|lag|lagging)", re.IGNORECASE), + +# # Time management patterns +# re.compile(r"\b(not|poor|bad)\s+(managing|managing)\s+time", re.IGNORECASE), +# re.compile(r"\btime\s+management\s+(is\s+)?(poor|bad|lacking)", re.IGNORECASE), + +# # Attendance/engagement patterns +# re.compile(r"\b(just|only)\s+(for|going for)\s+attendance", re.IGNORECASE), +# re.compile(r"\b(going|attend|attending)\s+(to|for)\s+(her|his|their)\s+class\s+(just|only)\s+for\s+attendance", re.IGNORECASE), +# re.compile(r"\bnot\s+(at\s+all\s+)?(interested|engaging|helpful)", re.IGNORECASE), + +# # Value patterns +# re.compile(r"\b(waste|wasting)\s+(of\s+)?time", re.IGNORECASE), +# re.compile(r"\b(no\s+use|useless|not useful)", re.IGNORECASE), + +# # Workload patterns +# re.compile(r"\b(too\s+)?many\s+projects", re.IGNORECASE), +# re.compile(r"\btrouble\s+to\s+(make|complete|do)", re.IGNORECASE), + +# # Administrative patterns +# re.compile(r"\bmilitary\s+rules", re.IGNORECASE), +# re.compile(r"\b(too|very)\s+strict", re.IGNORECASE), +# re.compile(r"\battendance\s+(issue|problem)", re.IGNORECASE), +# re.compile(r"\bnot\s+providing\s+attendance", re.IGNORECASE), +# re.compile(r"\bclaim\s+od", re.IGNORECASE), + +# # Placement/scheduling patterns +# re.compile(r"\bplacement\s+activities\s+(and|with)\s+(attendance|issue|problem)", re.IGNORECASE), +# re.compile(r"\b(class|classes)\s+(intersecting|conflicting)\s+with\s+placement", re.IGNORECASE), +# ] -# Enhanced heuristic phrase/regex rules for explicit negative feedback -NEGATIVE_PHRASES = [ - # Teaching quality issues - 'very poor', - 'extremely poor', - 'poor in teaching', - 'poor teaching level', - 'poor teaching', - 'bad teacher', - 'bad teaching', - 'not good', # Keep but check it's not "no negative" - 'not satisfied', - 'not satisfactory', - - # Content/delivery issues - 'boring class', - 'boring classes', - 'boring subject', - 'subject is boring', - 'low voice', - 'voice is low', - 'cannot hear', - "can't hear", - 'speak louder', - - # Resource/support issues - 'need more staff', - 'need more faculty', - 'insufficient staff', - 'lack of staff', - 'not sufficient', - 'insufficient', - 'not enough', - 'no classes', - 'no regular classes', - 'not sufficient classes', - - # Knowledge/understanding issues - 'lack of knowledge', - 'better knowledge needed', - 'poor knowledge', - 'knowledge is lacking', - 'practical knowledge lacking', - 'no practical', - 'lack of practical', - 'no hands-on', - 'no real world', - 'did not understand', - "didn't understand", - 'not able to understand', - 'unable to understand', - 'difficult to understand', - 'hard to understand', - 'concepts are difficult', - 'concepts difficult', - 'cant understand', - "can't understand", - 'not understandable', - - # Improvement needed - 'improve class', - 'improvement needed', - 'needs improvement', - 'need improvement', - 'should improve', - 'must improve', - 'not helpful', - 'not clear', - 'communication skills need improvement', - 'improve communication', - - # Pace/time issues - 'lectures are going fast', - 'going too fast', - 'too fast', - 'too slow', - 'too lag', - 'lag', - 'lagging', - 'lag in teaching', - 'not managing time', - 'poor time management', - 'time management issue', - - # Engagement issues - 'not interested', - 'no interest', - 'going for attendance', - 'just for attendance', - 'only for attendance', - 'not at all', - 'nothing learnt', - 'learned nothing', - 'no improvement', - 'same teaching', - 'monotonous', - 'sleeping in class', - - # Value/utility issues - 'waste of time', - 'wasting time', - 'waste our time', - 'no use', - 'useless', +# META_COMMENT_PATTERNS = [ +# re.compile(r"^no\s+negative\s+(comments?|feedback|remarks?)", re.IGNORECASE), +# re.compile(r"^no\s+negative\s+comments?\s+on\s+the\s+(faculty|teacher|staff|course)", re.IGNORECASE), +# re.compile(r"^no\s+(issues?|problems?|complaints?)\.?$", re.IGNORECASE), +# re.compile(r"^no\s+(issues?|problems?|complaints?)\s+(at\s+all|whatsoever)", re.IGNORECASE), + +# # "Everything is good" patterns +# re.compile(r"^(everything|all)\s+(is\s+)?(good|fine|ok|okay|great|perfect|excellent)", re.IGNORECASE), +# re.compile(r"^no,?\s+(everything|all)\s+(is\s+)?(good|fine|ok|okay)", re.IGNORECASE), +# re.compile(r"^(all\s+)?good\.?$", re.IGNORECASE), +# re.compile(r"^everything\s+at\s+the\s+too\s+only", re.IGNORECASE), # From your data + +# # "Nothing" patterns +# re.compile(r"^nothing\.?$", re.IGNORECASE), +# re.compile(r"^nothing\s+(to\s+)?(say|comment|mention|add)", re.IGNORECASE), +# re.compile(r"^nothing,?\s+(and\s+)?(all|everything)\s+(is\s+)?(good|fine)", re.IGNORECASE), + +# # "No more comments" patterns +# re.compile(r"^no\s+more\s+(comments?|remarks?|feedback)", re.IGNORECASE), +# re.compile(r"^no\s+(other\s+)?(comments?|remarks?|feedback)", re.IGNORECASE), +# re.compile(r"^no\s+remarks?(\s+(about|on))?", re.IGNORECASE), - # Administrative issues - 'military rules', - 'strict rules', - 'too strict', - 'very strict', - 'attendance issue', - 'attendance problem', - 'not providing attendance', - 'claim od', +# # Empty/nil responses +# re.compile(r"^(nil|none|na|n/a|nill)\.?$", re.IGNORECASE), +# re.compile(r"^(no|nothing|none)\.?$", re.IGNORECASE), - # Workload issues - 'too many projects', - 'many projects review', - 'trouble to make', - 'difficult to make', - 'hard to make', - 'placement activities', # When context is negative -] - -NEGATIVE_REGEXES = [ - # Teaching quality patterns - re.compile(r"\b(very|extremely|quite|so)\s+(poor|bad|weak)\s+(in\s+)?(teaching|knowledge|communication)", re.IGNORECASE), - re.compile(r"\bpoor\s+(teaching|teacher|faculty|knowledge|communication)", re.IGNORECASE), - re.compile(r"\b(teaching|knowledge)\s+(is\s+)?(poor|bad|weak|lacking)", re.IGNORECASE), - - # Boring/engagement patterns - re.compile(r"\b(boring|dull|monotonous)\s+(class|classes|subject|lecture|lectures)", re.IGNORECASE), - re.compile(r"\b(class|classes|subject|lecture|lectures)\s+(is|are)\s+(boring|dull|monotonous)", re.IGNORECASE), - - # Voice/communication patterns - re.compile(r"\b(low|soft|quiet)\s+voice\b", re.IGNORECASE), - re.compile(r"\bvoice\s+(is\s+)?(low|soft|quiet|not clear)", re.IGNORECASE), - re.compile(r"\b(cannot|can't|cant|unable to)\s+hear", re.IGNORECASE), - - # Resource/support patterns - re.compile(r"\b(no|not|insufficient|lack of)\s+(proper|sufficient|enough|regular)?\s*(classes|notes|support|staff|faculty)", re.IGNORECASE), - re.compile(r"\bneed(s)?\s+more\s+(staff|faculty|support|classes)", re.IGNORECASE), - - # Understanding/clarity patterns - re.compile(r"\b(cannot|can't|cant|unable to|difficult to|hard to)\s+understand", re.IGNORECASE), - re.compile(r"\b(not|difficult|hard)\s+(able\s+to\s+)?understand(\s+the)?(\s+(concepts?|teaching|lectures?))?", re.IGNORECASE), - re.compile(r"\bconcepts?\s+(are\s+)?(difficult|hard|tough|complex)\s+to\s+understand", re.IGNORECASE), - - # Improvement patterns - re.compile(r"\b(need|needs|needed|require|requires)\s+(some\s+)?(improvement|to improve)", re.IGNORECASE), - re.compile(r"\b(should|must|have to)\s+improve", re.IGNORECASE), - re.compile(r"\bimprovement\s+(is\s+)?need(ed)?", re.IGNORECASE), - - # Pace patterns - re.compile(r"\b(lecture|lectures|class|classes|teaching)\s+(is|are|going)\s+(too|very)\s+(fast|slow)", re.IGNORECASE), - re.compile(r"\b(too|very)\s+(fast|slow|lag|lagging)", re.IGNORECASE), - - # Time management patterns - re.compile(r"\b(not|poor|bad)\s+(managing|managing)\s+time", re.IGNORECASE), - re.compile(r"\btime\s+management\s+(is\s+)?(poor|bad|lacking)", re.IGNORECASE), - - # Attendance/engagement patterns - re.compile(r"\b(just|only)\s+(for|going for)\s+attendance", re.IGNORECASE), - re.compile(r"\b(going|attend|attending)\s+(to|for)\s+(her|his|their)\s+class\s+(just|only)\s+for\s+attendance", re.IGNORECASE), - re.compile(r"\bnot\s+(at\s+all\s+)?(interested|engaging|helpful)", re.IGNORECASE), - - # Value patterns - re.compile(r"\b(waste|wasting)\s+(of\s+)?time", re.IGNORECASE), - re.compile(r"\b(no\s+use|useless|not useful)", re.IGNORECASE), - - # Workload patterns - re.compile(r"\b(too\s+)?many\s+projects", re.IGNORECASE), - re.compile(r"\btrouble\s+to\s+(make|complete|do)", re.IGNORECASE), - - # Administrative patterns - re.compile(r"\bmilitary\s+rules", re.IGNORECASE), - re.compile(r"\b(too|very)\s+strict", re.IGNORECASE), - re.compile(r"\battendance\s+(issue|problem)", re.IGNORECASE), - re.compile(r"\bnot\s+providing\s+attendance", re.IGNORECASE), - re.compile(r"\bclaim\s+od", re.IGNORECASE), - - # Placement/scheduling patterns - re.compile(r"\bplacement\s+activities\s+(and|with)\s+(attendance|issue|problem)", re.IGNORECASE), - re.compile(r"\b(class|classes)\s+(intersecting|conflicting)\s+with\s+placement", re.IGNORECASE), -] - -META_COMMENT_PATTERNS = [ -re.compile(r"^no\s+negative\s+(comments?|feedback|remarks?)", re.IGNORECASE), - re.compile(r"^no\s+negative\s+comments?\s+on\s+the\s+(faculty|teacher|staff|course)", re.IGNORECASE), - re.compile(r"^no\s+(issues?|problems?|complaints?)\.?$", re.IGNORECASE), - re.compile(r"^no\s+(issues?|problems?|complaints?)\s+(at\s+all|whatsoever)", re.IGNORECASE), - - # "Everything is good" patterns - re.compile(r"^(everything|all)\s+(is\s+)?(good|fine|ok|okay|great|perfect|excellent)", re.IGNORECASE), - re.compile(r"^no,?\s+(everything|all)\s+(is\s+)?(good|fine|ok|okay)", re.IGNORECASE), - re.compile(r"^(all\s+)?good\.?$", re.IGNORECASE), - re.compile(r"^everything\s+at\s+the\s+too\s+only", re.IGNORECASE), # From your data - - # "Nothing" patterns - re.compile(r"^nothing\.?$", re.IGNORECASE), - re.compile(r"^nothing\s+(to\s+)?(say|comment|mention|add)", re.IGNORECASE), - re.compile(r"^nothing,?\s+(and\s+)?(all|everything)\s+(is\s+)?(good|fine)", re.IGNORECASE), - - # "No more comments" patterns - re.compile(r"^no\s+more\s+(comments?|remarks?|feedback)", re.IGNORECASE), - re.compile(r"^no\s+(other\s+)?(comments?|remarks?|feedback)", re.IGNORECASE), - re.compile(r"^no\s+remarks?(\s+(about|on))?", re.IGNORECASE), - - # Empty/nil responses - re.compile(r"^(nil|none|na|n/a|nill)\.?$", re.IGNORECASE), - re.compile(r"^(no|nothing|none)\.?$", re.IGNORECASE), - - # Positive meta-comments (not actual feedback) - re.compile(r"^(it's\s+|its\s+)?(all\s+)?good\.?$", re.IGNORECASE), - re.compile(r"^fine\.?$", re.IGNORECASE), - re.compile(r"^ok(ay)?\.?$", re.IGNORECASE), - re.compile(r"^great\.?$", re.IGNORECASE), - re.compile(r"^nice\.?$", re.IGNORECASE), -] +# # Positive meta-comments (not actual feedback) +# re.compile(r"^(it's\s+|its\s+)?(all\s+)?good\.?$", re.IGNORECASE), +# re.compile(r"^fine\.?$", re.IGNORECASE), +# re.compile(r"^ok(ay)?\.?$", re.IGNORECASE), +# re.compile(r"^great\.?$", re.IGNORECASE), +# re.compile(r"^nice\.?$", re.IGNORECASE), +# ] -def is_meta_comment(text: str) -> bool: - """ - Check if comment is a meta-comment (not actual feedback). - These are generic statements that don't provide substantive feedback. - """ - if not text: - return True # Empty text is meta +# def is_meta_comment(text: str) -> bool: +# """ +# Check if comment is a meta-comment (not actual feedback). +# These are generic statements that don't provide substantive feedback. +# """ +# if not text: +# return True # Empty text is meta - text = text.strip() +# text = text.strip() - # Check length - very short comments are likely meta - if len(text) < 3: - logger.debug(f"Meta-comment (too short): '{text}'") - return True +# # Check length - very short comments are likely meta +# if len(text) < 3: +# logger.debug(f"Meta-comment (too short): '{text}'") +# return True - # Check against patterns - for pattern in META_COMMENT_PATTERNS: - if pattern.match(text): - logger.debug(f"Meta-comment detected: '{text[:50]}...'") - return True +# # Check against patterns +# for pattern in META_COMMENT_PATTERNS: +# if pattern.match(text): +# logger.debug(f"Meta-comment detected: '{text[:50]}...'") +# return True - return False +# return False -def is_explicit_negative(text: str) -> bool: - """ - Check if text contains explicit negative phrases. - IMPORTANT: Must check if it's a meta-comment FIRST. - """ - if not text: - return False +# def is_explicit_negative(text: str) -> bool: +# """ +# Check if text contains explicit negative phrases. +# IMPORTANT: Must check if it's a meta-comment FIRST. +# """ +# if not text: +# return False - # CRITICAL: Don't classify meta-comments as negative - if is_meta_comment(text): - return False +# # CRITICAL: Don't classify meta-comments as negative +# if is_meta_comment(text): +# return False - lower = text.lower() +# lower = text.lower() - # Check phrases - for phrase in NEGATIVE_PHRASES: - if phrase in lower: - # Double-check it's not a false positive like "no negative comments" - if phrase == 'not good' and 'no negative' in lower: - continue - if phrase == 'no interest' and 'no negative' in lower: - continue +# # Check phrases +# for phrase in NEGATIVE_PHRASES: +# if phrase in lower: +# # Double-check it's not a false positive like "no negative comments" +# if phrase == 'not good' and 'no negative' in lower: +# continue +# if phrase == 'no interest' and 'no negative' in lower: +# continue - logger.debug(f"Negative phrase detected: '{phrase}' in '{text[:50]}...'") - return True +# logger.debug(f"Negative phrase detected: '{phrase}' in '{text[:50]}...'") +# return True - # Check regexes - for regex in NEGATIVE_REGEXES: - if regex.search(text): - logger.debug(f"Negative pattern matched: {regex.pattern} in '{text[:50]}...'") - return True +# # Check regexes +# for regex in NEGATIVE_REGEXES: +# if regex.search(text): +# logger.debug(f"Negative pattern matched: {regex.pattern} in '{text[:50]}...'") +# return True - return False +# return False -def initialize_models(): - """Initialize sentiment analysis models with caching support""" - global sia, tokenizer, model, device, summarizer +# def initialize_models(): +# """Initialize sentiment analysis models with caching support""" +# global sia, tokenizer, model, device, summarizer - try: - settings = get_settings() - logger.info("Initializing sentiment analysis models...") - - # Initialize VADER (NLTK data already downloaded) - sia = SentimentIntensityAnalyzer() - logger.info("✓ VADER initialized") - - # Initialize RoBERTa with caching - cache_dir = settings.model_cache_dir - os.makedirs(cache_dir, exist_ok=True) +# try: +# settings = get_settings() +# logger.info("Initializing sentiment analysis models...") - tokenizer = AutoTokenizer.from_pretrained( - settings.roberta_model_name, - cache_dir=cache_dir - ) - model = AutoModelForSequenceClassification.from_pretrained( - settings.roberta_model_name, - cache_dir=cache_dir - ) +# # Initialize VADER (NLTK data already downloaded) +# sia = SentimentIntensityAnalyzer() +# logger.info("✓ VADER initialized") - device = "cuda" if torch.cuda.is_available() else "cpu" - model.to(device) - model.eval() - logger.info(f"✓ RoBERTa initialized on device: {device}") +# # Initialize RoBERTa with caching +# cache_dir = settings.model_cache_dir +# os.makedirs(cache_dir, exist_ok=True) - # Initialize summarizer (optional) - if settings.use_abstractive_summary: - try: - summarizer = pipeline( - "summarization", - model=settings.summarizer_model, - device=0 if device == "cuda" else -1 - ) - logger.info("✓ Summarizer initialized") - except Exception as e: - logger.warning(f"Summarizer initialization failed: {e}") - summarizer = None +# tokenizer = AutoTokenizer.from_pretrained( +# settings.roberta_model_name, +# cache_dir=cache_dir +# ) +# model = AutoModelForSequenceClassification.from_pretrained( +# settings.roberta_model_name, +# cache_dir=cache_dir +# ) - logger.info("✓ All models initialized successfully") +# device = "cuda" if torch.cuda.is_available() else "cpu" +# model.to(device) +# model.eval() +# logger.info(f"✓ RoBERTa initialized on device: {device}") - except Exception as e: - logger.error(f"Error initializing models: {e}") - raise e +# # Initialize summarizer (optional) +# if settings.use_abstractive_summary: +# try: +# summarizer = pipeline( +# "summarization", +# model=settings.summarizer_model, +# device=0 if device == "cuda" else -1 +# ) +# logger.info("✓ Summarizer initialized") +# except Exception as e: +# logger.warning(f"Summarizer initialization failed: {e}") +# summarizer = None + +# logger.info("✓ All models initialized successfully") + +# except Exception as e: +# logger.error(f"Error initializing models: {e}") +# raise e -@lru_cache(maxsize=500) -def vader_sentiment_cached(text: str) -> tuple: - """Cached VADER sentiment analysis""" - scores = sia.polarity_scores(text) - return (scores['neg'], scores['neu'], scores['pos'], scores['compound']) +# @lru_cache(maxsize=500) +# def vader_sentiment_cached(text: str) -> tuple: +# """Cached VADER sentiment analysis""" +# scores = sia.polarity_scores(text) +# return (scores['neg'], scores['neu'], scores['pos'], scores['compound']) -def vader_sentiment(text: str) -> Dict[str, float]: - """VADER sentiment analysis with caching support""" - try: - settings = get_settings() - if settings.enable_caching: - neg, neu, pos, compound = vader_sentiment_cached(text) - return { - 'vader_neg': neg, - 'vader_neu': neu, - 'vader_pos': pos, - 'vader_compound': compound - } - else: - scores = sia.polarity_scores(text) - return { - 'vader_neg': scores['neg'], - 'vader_neu': scores['neu'], - 'vader_pos': scores['pos'], - 'vader_compound': scores['compound'] - } - except Exception as e: - logger.warning(f"VADER analysis failed for text: {e}") - return {'vader_neg': 0.0, 'vader_neu': 1.0, 'vader_pos': 0.0, 'vader_compound': 0.0} +# def vader_sentiment(text: str) -> Dict[str, float]: +# """VADER sentiment analysis with caching support""" +# try: +# settings = get_settings() +# if settings.enable_caching: +# neg, neu, pos, compound = vader_sentiment_cached(text) +# return { +# 'vader_neg': neg, +# 'vader_neu': neu, +# 'vader_pos': pos, +# 'vader_compound': compound +# } +# else: +# scores = sia.polarity_scores(text) +# return { +# 'vader_neg': scores['neg'], +# 'vader_neu': scores['neu'], +# 'vader_pos': scores['pos'], +# 'vader_compound': scores['compound'] +# } +# except Exception as e: +# logger.warning(f"VADER analysis failed for text: {e}") +# return {'vader_neg': 0.0, 'vader_neu': 1.0, 'vader_pos': 0.0, 'vader_compound': 0.0} -def roberta_sentiment_batch(texts: List[str]) -> List[Dict[str, float]]: - """Batch RoBERTa sentiment analysis for better performance""" - try: - settings = get_settings() - results = [] +# def roberta_sentiment_batch(texts: List[str]) -> List[Dict[str, float]]: +# """Batch RoBERTa sentiment analysis for better performance""" +# try: +# settings = get_settings() +# results = [] - for i in range(0, len(texts), settings.batch_size): - batch = texts[i:i + settings.batch_size] +# for i in range(0, len(texts), settings.batch_size): +# batch = texts[i:i + settings.batch_size] - encoded = tokenizer( - batch, - return_tensors='pt', - truncation=True, - max_length=512, - padding=True - ) - encoded = {k: v.to(device) for k, v in encoded.items()} +# encoded = tokenizer( +# batch, +# return_tensors='pt', +# truncation=True, +# max_length=512, +# padding=True +# ) +# encoded = {k: v.to(device) for k, v in encoded.items()} - with torch.no_grad(): - outputs = model(**encoded) +# with torch.no_grad(): +# outputs = model(**encoded) - for output in outputs.logits: - scores = softmax(output.cpu().numpy()) - results.append({ - 'roberta_neg': float(scores[0]), - 'roberta_neu': float(scores[1]), - 'roberta_pos': float(scores[2]) - }) +# for output in outputs.logits: +# scores = softmax(output.cpu().numpy()) +# results.append({ +# 'roberta_neg': float(scores[0]), +# 'roberta_neu': float(scores[1]), +# 'roberta_pos': float(scores[2]) +# }) - return results +# return results - except Exception as e: - logger.warning(f"RoBERTa batch analysis failed: {e}") - return [{'roberta_neg': 0.0, 'roberta_neu': 1.0, 'roberta_pos': 0.0} for _ in texts] +# except Exception as e: +# logger.warning(f"RoBERTa batch analysis failed: {e}") +# return [{'roberta_neg': 0.0, 'roberta_neu': 1.0, 'roberta_pos': 0.0} for _ in texts] -def roberta_sentiment(text: str) -> Dict[str, float]: - """Single text RoBERTa sentiment analysis""" - try: - encoded_text = tokenizer(text, return_tensors='pt', truncation=True, max_length=512) - encoded_text = {k: v.to(device) for k, v in encoded_text.items()} +# def roberta_sentiment(text: str) -> Dict[str, float]: +# """Single text RoBERTa sentiment analysis""" +# try: +# encoded_text = tokenizer(text, return_tensors='pt', truncation=True, max_length=512) +# encoded_text = {k: v.to(device) for k, v in encoded_text.items()} - with torch.no_grad(): - output = model(**encoded_text) +# with torch.no_grad(): +# output = model(**encoded_text) - scores = softmax(output[0][0].cpu().numpy()) - return { - 'roberta_neg': float(scores[0]), - 'roberta_neu': float(scores[1]), - 'roberta_pos': float(scores[2]) - } - except Exception as e: - logger.warning(f"RoBERTa analysis failed for text: {e}") - return {'roberta_neg': 0.0, 'roberta_neu': 1.0, 'roberta_pos': 0.0} +# scores = softmax(output[0][0].cpu().numpy()) +# return { +# 'roberta_neg': float(scores[0]), +# 'roberta_neu': float(scores[1]), +# 'roberta_pos': float(scores[2]) +# } +# except Exception as e: +# logger.warning(f"RoBERTa analysis failed for text: {e}") +# return {'roberta_neg': 0.0, 'roberta_neu': 1.0, 'roberta_pos': 0.0} -def overall_sentiment(row: pd.Series, settings: Settings) -> str: - """Determine overall sentiment using combined scores with configurable thresholds""" - combined_pos = row.get('combined_pos', 0.0) - combined_neg = row.get('combined_neg', 0.0) - combined_neu = row.get('combined_neu', 0.0) - vader_compound = row.get('vader_compound', 0.0) - roberta_neg = row.get('roberta_neg', 0.0) - roberta_pos = row.get('roberta_pos', 0.0) +# def overall_sentiment(row: pd.Series, settings: Settings) -> str: +# """Determine overall sentiment using combined scores with configurable thresholds""" +# combined_pos = row.get('combined_pos', 0.0) +# combined_neg = row.get('combined_neg', 0.0) +# combined_neu = row.get('combined_neu', 0.0) +# vader_compound = row.get('vader_compound', 0.0) +# roberta_neg = row.get('roberta_neg', 0.0) +# roberta_pos = row.get('roberta_pos', 0.0) - # Priority 1: Heuristic negative patterns override everything - if row.get('heuristic_negative') is True: - return 'Negative' +# # Priority 1: Heuristic negative patterns override everything +# if row.get('heuristic_negative') is True: +# return 'Negative' - # Priority 2: Strong negative signals - if ( - vader_compound <= settings.vader_neg_threshold or - roberta_neg >= settings.roberta_neg_threshold or - combined_neg >= max(combined_pos, combined_neu) - ): - return 'Negative' +# # Priority 2: Strong negative signals +# if ( +# vader_compound <= settings.vader_neg_threshold or +# roberta_neg >= settings.roberta_neg_threshold or +# combined_neg >= max(combined_pos, combined_neu) +# ): +# return 'Negative' - # Priority 3: Positive signals - if ( - vader_compound >= settings.vader_pos_threshold or - roberta_pos >= settings.roberta_pos_threshold or - combined_pos >= max(combined_neg, combined_neu) - ): - return 'Positive' +# # Priority 3: Positive signals +# if ( +# vader_compound >= settings.vader_pos_threshold or +# roberta_pos >= settings.roberta_pos_threshold or +# combined_pos >= max(combined_neg, combined_neu) +# ): +# return 'Positive' - # Default: Neutral - return 'Neutral' +# # Default: Neutral +# return 'Neutral' -def sanitize_text(text: str) -> str: - """Sanitize input text while preserving emojis""" - if not text: - return "" - # Remove control characters but keep printable characters and emojis - text = re.sub(r'[\x00-\x08\x0B-\x0C\x0E-\x1F\x7F]', '', text) - # Normalize whitespace - text = ' '.join(text.split()) - return text.strip() +# def sanitize_text(text: str) -> str: +# """Sanitize input text while preserving emojis""" +# if not text: +# return "" +# # Remove control characters but keep printable characters and emojis +# text = re.sub(r'[\x00-\x08\x0B-\x0C\x0E-\x1F\x7F]', '', text) +# # Normalize whitespace +# text = ' '.join(text.split()) +# return text.strip() -def analyze_comments_sentiment(comments: List[str]) -> Dict[str, Any]: - """Main sentiment analysis function with enhanced performance""" - try: - settings = get_settings() - logger.info(f"Received {len(comments)} comments for analysis") +# def analyze_comments_sentiment(comments: List[str]) -> Dict[str, Any]: +# """Main sentiment analysis function with enhanced performance""" +# try: +# settings = get_settings() +# logger.info(f"Received {len(comments)} comments for analysis") - # Sanitize comments - sanitized_comments = [sanitize_text(comment) for comment in comments] +# # Sanitize comments +# sanitized_comments = [sanitize_text(comment) for comment in comments] - # FIXED: Changed < to <= to properly handle min_comment_words - filtered_comments = [ - comment for comment in sanitized_comments - if (settings.min_comment_words <= len(comment.split()) <= settings.max_comment_length) - ] +# # FIXED: Changed < to <= to properly handle min_comment_words +# filtered_comments = [ +# comment for comment in sanitized_comments +# if (settings.min_comment_words <= len(comment.split()) <= settings.max_comment_length) +# ] - logger.info(f"After filtering: {len(filtered_comments)} valid comments") +# logger.info(f"After filtering: {len(filtered_comments)} valid comments") - if not filtered_comments: - return { - "total_comments": 0, - "message": "No valid comments found for analysis" - } +# if not filtered_comments: +# return { +# "total_comments": 0, +# "message": "No valid comments found for analysis" +# } - # Create dataframe - df = pd.DataFrame({'comment': filtered_comments}) +# # Create dataframe +# df = pd.DataFrame({'comment': filtered_comments}) - # Detect meta-comments and explicit negatives - df['is_meta'] = df['comment'].apply(is_meta_comment) - df['heuristic_negative'] = df['comment'].apply(is_explicit_negative) +# # Detect meta-comments and explicit negatives +# df['is_meta'] = df['comment'].apply(is_meta_comment) +# df['heuristic_negative'] = df['comment'].apply(is_explicit_negative) - # Log detection results - meta_count = df['is_meta'].sum() - heuristic_neg_count = df['heuristic_negative'].sum() - logger.info(f"Detected {meta_count} meta-comments and {heuristic_neg_count} heuristic negatives") +# # Log detection results +# meta_count = df['is_meta'].sum() +# heuristic_neg_count = df['heuristic_negative'].sum() +# logger.info(f"Detected {meta_count} meta-comments and {heuristic_neg_count} heuristic negatives") - # VADER sentiment analysis - vader_results = [] - for text in df['comment']: - vader_results.append(vader_sentiment(text)) +# # VADER sentiment analysis +# vader_results = [] +# for text in df['comment']: +# vader_results.append(vader_sentiment(text)) - # RoBERTa sentiment analysis (batch) - roberta_results = roberta_sentiment_batch(df['comment'].tolist()) +# # RoBERTa sentiment analysis (batch) +# roberta_results = roberta_sentiment_batch(df['comment'].tolist()) - # Combine results - vader_df = pd.DataFrame(vader_results) - roberta_df = pd.DataFrame(roberta_results) - final_df = pd.concat([df.reset_index(drop=True), vader_df, roberta_df], axis=1) +# # Combine results +# vader_df = pd.DataFrame(vader_results) +# roberta_df = pd.DataFrame(roberta_results) +# final_df = pd.concat([df.reset_index(drop=True), vader_df, roberta_df], axis=1) - # Calculate combined scores - final_df['combined_pos'] = ( - settings.combined_weight_vader * final_df['vader_pos'] + - settings.combined_weight_roberta * final_df['roberta_pos'] - ) - final_df['combined_neg'] = ( - settings.combined_weight_vader * final_df['vader_neg'] + - settings.combined_weight_roberta * final_df['roberta_neg'] - ) - final_df['combined_neu'] = ( - settings.combined_weight_vader * final_df['vader_neu'] + - settings.combined_weight_roberta * final_df['roberta_neu'] - ) +# # Calculate combined scores +# final_df['combined_pos'] = ( +# settings.combined_weight_vader * final_df['vader_pos'] + +# settings.combined_weight_roberta * final_df['roberta_pos'] +# ) +# final_df['combined_neg'] = ( +# settings.combined_weight_vader * final_df['vader_neg'] + +# settings.combined_weight_roberta * final_df['roberta_neg'] +# ) +# final_df['combined_neu'] = ( +# settings.combined_weight_vader * final_df['vader_neu'] + +# settings.combined_weight_roberta * final_df['roberta_neu'] +# ) - # Classify overall sentiment (meta-comments become Neutral) - final_df['Overall_Sentiment'] = final_df.apply( - lambda row: 'Neutral' if row.get('is_meta') else overall_sentiment(row, settings), - axis=1 - ) +# # Classify overall sentiment (meta-comments become Neutral) +# final_df['Overall_Sentiment'] = final_df.apply( +# lambda row: 'Neutral' if row.get('is_meta') else overall_sentiment(row, settings), +# axis=1 +# ) - # Calculate statistics - total_comments = len(final_df) - positive_count = len(final_df[final_df['Overall_Sentiment'] == 'Positive']) - negative_count = len(final_df[final_df['Overall_Sentiment'] == 'Negative']) - neutral_count = len(final_df[final_df['Overall_Sentiment'] == 'Neutral']) +# # Calculate statistics +# total_comments = len(final_df) +# positive_count = len(final_df[final_df['Overall_Sentiment'] == 'Positive']) +# negative_count = len(final_df[final_df['Overall_Sentiment'] == 'Negative']) +# neutral_count = len(final_df[final_df['Overall_Sentiment'] == 'Neutral']) - logger.info( - f"Results: {positive_count} positive, " - f"{negative_count} negative, {neutral_count} neutral" - ) +# logger.info( +# f"Results: {positive_count} positive, " +# f"{negative_count} negative, {neutral_count} neutral" +# ) - # Average scores - avg_positive = float(final_df['combined_pos'].mean()) - avg_negative = float(final_df['combined_neg'].mean()) - avg_neutral = float(final_df['combined_neu'].mean()) +# # Average scores +# avg_positive = float(final_df['combined_pos'].mean()) +# avg_negative = float(final_df['combined_neg'].mean()) +# avg_neutral = float(final_df['combined_neu'].mean()) - # Determine overall sentiment label - if avg_positive > max(avg_negative, avg_neutral): - overall_sentiment_label = "Positive" - elif avg_negative > max(avg_positive, avg_neutral): - overall_sentiment_label = "Negative" - else: - overall_sentiment_label = "Neutral" +# # Determine overall sentiment label +# if avg_positive > max(avg_negative, avg_neutral): +# overall_sentiment_label = "Positive" +# elif avg_negative > max(avg_positive, avg_neutral): +# overall_sentiment_label = "Negative" +# else: +# overall_sentiment_label = "Neutral" - # Process negative comments - negative_summary = "" - negative_comments_list = [] - negative_comments = final_df[final_df['Overall_Sentiment'] == 'Negative'] +# # Process negative comments +# negative_summary = "" +# negative_comments_list = [] +# negative_comments = final_df[final_df['Overall_Sentiment'] == 'Negative'] - if len(negative_comments) > 0: - negative_comments_list = negative_comments['comment'].tolist() +# if len(negative_comments) > 0: +# negative_comments_list = negative_comments['comment'].tolist() - try: - # Get top negative comments - top_idx = negative_comments['combined_neg'].nlargest(3).index - top_comments = negative_comments.loc[top_idx, 'comment'].tolist() +# try: +# # Get top negative comments +# top_idx = negative_comments['combined_neg'].nlargest(3).index +# top_comments = negative_comments.loc[top_idx, 'comment'].tolist() - if settings.use_abstractive_summary and summarizer is not None: - negative_text = " ".join(top_comments) - if len(negative_text) > 1000: - negative_text = negative_text[:1000] +# if settings.use_abstractive_summary and summarizer is not None: +# negative_text = " ".join(top_comments) +# if len(negative_text) > 1000: +# negative_text = negative_text[:1000] - summary_result = summarizer( - negative_text, - max_length=settings.max_summary_length, - min_length=settings.min_summary_length, - do_sample=False - ) - negative_summary = summary_result[0]['summary_text'] - else: - # Extractive summary - negative_summary = "; ".join(top_comments) - except Exception as e: - logger.warning(f"Summary generation failed: {e}") - negative_summary = "; ".join(negative_comments_list[:3]) +# summary_result = summarizer( +# negative_text, +# max_length=settings.max_summary_length, +# min_length=settings.min_summary_length, +# do_sample=False +# ) +# negative_summary = summary_result[0]['summary_text'] +# else: +# # Extractive summary +# negative_summary = "; ".join(top_comments) +# except Exception as e: +# logger.warning(f"Summary generation failed: {e}") +# negative_summary = "; ".join(negative_comments_list[:3]) - # Generate insights and recommendations - insights = [] - recommendations = [] +# # Generate insights and recommendations +# insights = [] +# recommendations = [] - if overall_sentiment_label == "Positive": - insights.extend([ - "Students have positive feedback overall", - "Teaching methods are well-received", - f"{positive_count}/{total_comments} comments are positive" - ]) - recommendations.extend([ - "Continue current teaching approach", - "Maintain student engagement strategies", - "Share successful practices with colleagues" - ]) - elif overall_sentiment_label == "Negative": - insights.extend([ - "Students have concerns that need attention", - "Some aspects of teaching may need improvement", - f"{negative_count}/{total_comments} comments indicate issues" - ]) - recommendations.extend([ - "Review teaching methods and materials", - "Consider additional student support", - "Schedule meetings to address student concerns", - "Focus on areas mentioned in negative feedback" - ]) - else: - insights.extend([ - "Mixed feedback from students", - "Some areas performing well, others need attention", - "Balance of positive and negative responses" - ]) - recommendations.extend([ - "Focus on areas with negative feedback", - "Maintain strengths while addressing weaknesses", - "Gather more specific feedback on improvement areas" - ]) +# if overall_sentiment_label == "Positive": +# insights.extend([ +# "Students have positive feedback overall", +# "Teaching methods are well-received", +# f"{positive_count}/{total_comments} comments are positive" +# ]) +# recommendations.extend([ +# "Continue current teaching approach", +# "Maintain student engagement strategies", +# "Share successful practices with colleagues" +# ]) +# elif overall_sentiment_label == "Negative": +# insights.extend([ +# "Students have concerns that need attention", +# "Some aspects of teaching may need improvement", +# f"{negative_count}/{total_comments} comments indicate issues" +# ]) +# recommendations.extend([ +# "Review teaching methods and materials", +# "Consider additional student support", +# "Schedule meetings to address student concerns", +# "Focus on areas mentioned in negative feedback" +# ]) +# else: +# insights.extend([ +# "Mixed feedback from students", +# "Some areas performing well, others need attention", +# "Balance of positive and negative responses" +# ]) +# recommendations.extend([ +# "Focus on areas with negative feedback", +# "Maintain strengths while addressing weaknesses", +# "Gather more specific feedback on improvement areas" +# ]) - return { - "total_comments": total_comments, - "positive_comments": positive_count, - "negative_comments": negative_count, - "neutral_comments": neutral_count, - "positive_sentiment": round(avg_positive, 3), - "negative_sentiment": round(avg_negative, 3), - "neutral_sentiment": round(avg_neutral, 3), - "overall_sentiment": overall_sentiment_label, - "sentiment_distribution": { - "positive_percentage": round((positive_count / total_comments) * 100, 1), - "negative_percentage": round((negative_count / total_comments) * 100, 1), - "neutral_percentage": round((neutral_count / total_comments) * 100, 1) - }, - "negative_comments_summary": negative_summary, - "negative_comments_list": negative_comments_list, - "key_insights": insights, - "recommendations": recommendations, - "detailed_analysis": { - "vader_scores": { - "average_positive": round(final_df['vader_pos'].mean(), 3), - "average_negative": round(final_df['vader_neg'].mean(), 3), - "average_neutral": round(final_df['vader_neu'].mean(), 3), - "average_compound": round(final_df['vader_compound'].mean(), 3) - }, - "roberta_scores": { - "average_positive": round(final_df['roberta_pos'].mean(), 3), - "average_negative": round(final_df['roberta_neg'].mean(), 3), - "average_neutral": round(final_df['roberta_neu'].mean(), 3) - } - }, - "analysis_timestamp": datetime.utcnow().isoformat() - } +# return { +# "total_comments": total_comments, +# "positive_comments": positive_count, +# "negative_comments": negative_count, +# "neutral_comments": neutral_count, +# "positive_sentiment": round(avg_positive, 3), +# "negative_sentiment": round(avg_negative, 3), +# "neutral_sentiment": round(avg_neutral, 3), +# "overall_sentiment": overall_sentiment_label, +# "sentiment_distribution": { +# "positive_percentage": round((positive_count / total_comments) * 100, 1), +# "negative_percentage": round((negative_count / total_comments) * 100, 1), +# "neutral_percentage": round((neutral_count / total_comments) * 100, 1) +# }, +# "negative_comments_summary": negative_summary, +# "negative_comments_list": negative_comments_list, +# "key_insights": insights, +# "recommendations": recommendations, +# "detailed_analysis": { +# "vader_scores": { +# "average_positive": round(final_df['vader_pos'].mean(), 3), +# "average_negative": round(final_df['vader_neg'].mean(), 3), +# "average_neutral": round(final_df['vader_neu'].mean(), 3), +# "average_compound": round(final_df['vader_compound'].mean(), 3) +# }, +# "roberta_scores": { +# "average_positive": round(final_df['roberta_pos'].mean(), 3), +# "average_negative": round(final_df['roberta_neg'].mean(), 3), +# "average_neutral": round(final_df['roberta_neu'].mean(), 3) +# } +# }, +# "analysis_timestamp": datetime.utcnow().isoformat() +# } - except Exception as e: - logger.error(f"Sentiment analysis failed: {e}", exc_info=True) - raise e +# except Exception as e: +# logger.error(f"Sentiment analysis failed: {e}", exc_info=True) +# raise e -@app.on_event("startup") -async def startup_event(): - """Initialize models on startup""" - try: - logger.info("=" * 80) - logger.info(f"Application Startup at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") - logger.info("=" * 80) - initialize_models() - logger.info("✓ Service started successfully") - logger.info("=" * 80) - except Exception as e: - logger.error(f"✗ Startup failed: {e}") - raise e +# @app.on_event("startup") +# async def startup_event(): +# """Initialize models on startup""" +# try: +# logger.info("=" * 80) +# logger.info(f"Application Startup at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") +# logger.info("=" * 80) +# initialize_models() +# logger.info("✓ Service started successfully") +# logger.info("=" * 80) +# except Exception as e: +# logger.error(f"✗ Startup failed: {e}") +# raise e -@app.on_event("shutdown") -async def shutdown_event(): - """Cleanup on shutdown""" - logger.info("Service shutting down") +# @app.on_event("shutdown") +# async def shutdown_event(): +# """Cleanup on shutdown""" +# logger.info("Service shutting down") -@app.get("/") -async def root(): - """Root endpoint""" - return { - "service": get_settings().app_name, - "version": get_settings().app_version, - "status": "running", - "endpoints": { - "health": "/health", - "analyze": "/analyze-comments", - "config": "/config (debug mode only)", - "test": "/test" - } - } +# @app.get("/") +# async def root(): +# """Root endpoint""" +# return { +# "service": get_settings().app_name, +# "version": get_settings().app_version, +# "status": "running", +# "endpoints": { +# "health": "/health", +# "analyze": "/analyze-comments", +# "config": "/config (debug mode only)", +# "test": "/test" +# } +# } -@app.get("/health") -async def health_check(): - """Health check endpoint""" - models_loaded = sia is not None and model is not None and tokenizer is not None +# @app.get("/health") +# async def health_check(): +# """Health check endpoint""" +# models_loaded = sia is not None and model is not None and tokenizer is not None - return { - "status": "healthy" if models_loaded else "unhealthy", - "service": "comment-analysis", - "version": get_settings().app_version, - "models_loaded": models_loaded, - "device": device if device else "not initialized", - "timestamp": datetime.utcnow().isoformat() - } +# return { +# "status": "healthy" if models_loaded else "unhealthy", +# "service": "comment-analysis", +# "version": get_settings().app_version, +# "models_loaded": models_loaded, +# "device": device if device else "not initialized", +# "timestamp": datetime.utcnow().isoformat() +# } -@app.post("/analyze-comments", response_model=CommentAnalysisResponse) -async def analyze_comments( - request: CommentAnalysisRequest, - settings: Settings = Depends(get_settings) -): - """ - Analyze comments for sentiment analysis using VADER and RoBERTa models - """ - try: - comments = request.comments - faculty_info = request.faculty_info +# @app.post("/analyze-comments", response_model=CommentAnalysisResponse) +# async def analyze_comments( +# request: CommentAnalysisRequest, +# settings: Settings = Depends(get_settings) +# ): +# """ +# Analyze comments for sentiment analysis using VADER and RoBERTa models +# """ +# try: +# comments = request.comments +# faculty_info = request.faculty_info - if not comments: - return CommentAnalysisResponse( - success=False, - analysis=None, - message="No comments provided for analysis" - ) +# if not comments: +# return CommentAnalysisResponse( +# success=False, +# analysis=None, +# message="No comments provided for analysis" +# ) - logger.info( - f"Analyzing {len(comments)} comments for " - f"{faculty_info.faculty_name} ({faculty_info.course_code})" - ) +# logger.info( +# f"Analyzing {len(comments)} comments for " +# f"{faculty_info.faculty_name} ({faculty_info.course_code})" +# ) - analysis_result = analyze_comments_sentiment(comments) +# analysis_result = analyze_comments_sentiment(comments) - if analysis_result.get("total_comments", 0) == 0: - return CommentAnalysisResponse( - success=False, - analysis=None, - message=analysis_result.get("message", "No valid comments to analyze") - ) +# if analysis_result.get("total_comments", 0) == 0: +# return CommentAnalysisResponse( +# success=False, +# analysis=None, +# message=analysis_result.get("message", "No valid comments to analyze") +# ) - analysis_result["faculty_info"] = { - "faculty_name": faculty_info.faculty_name, - "staff_id": faculty_info.staff_id, - "course_code": faculty_info.course_code, - "course_name": faculty_info.course_name - } +# analysis_result["faculty_info"] = { +# "faculty_name": faculty_info.faculty_name, +# "staff_id": faculty_info.staff_id, +# "course_code": faculty_info.course_code, +# "course_name": faculty_info.course_name +# } - return CommentAnalysisResponse( - success=True, - analysis=analysis_result, - message=f"Successfully analyzed {analysis_result['total_comments']} comments" - ) +# return CommentAnalysisResponse( +# success=True, +# analysis=analysis_result, +# message=f"Successfully analyzed {analysis_result['total_comments']} comments" +# ) - except ValueError as ve: - logger.warning(f"Validation error: {ve}") - raise HTTPException(status_code=400, detail=str(ve)) - except Exception as e: - logger.error(f"Analysis failed: {e}", exc_info=True) - raise HTTPException( - status_code=500, - detail="Analysis failed. Please try again later." - ) +# except ValueError as ve: +# logger.warning(f"Validation error: {ve}") +# raise HTTPException(status_code=400, detail=str(ve)) +# except Exception as e: +# logger.error(f"Analysis failed: {e}", exc_info=True) +# raise HTTPException( +# status_code=500, +# detail="Analysis failed. Please try again later." +# ) -@app.get("/config") -async def get_config(settings: Settings = Depends(get_settings)): - """Get current configuration (debug mode only)""" - if not settings.debug_mode: - raise HTTPException(status_code=404, detail="Not found") +# @app.get("/config") +# async def get_config(settings: Settings = Depends(get_settings)): +# """Get current configuration (debug mode only)""" +# if not settings.debug_mode: +# raise HTTPException(status_code=404, detail="Not found") - return { - "max_comments_per_request": settings.max_comments_per_request, - "max_comment_length": settings.max_comment_length, - "min_comment_words": settings.min_comment_words, - "vader_pos_threshold": settings.vader_pos_threshold, - "vader_neg_threshold": settings.vader_neg_threshold, - "roberta_pos_threshold": settings.roberta_pos_threshold, - "roberta_neg_threshold": settings.roberta_neg_threshold, - "combined_weight_vader": settings.combined_weight_vader, - "combined_weight_roberta": settings.combined_weight_roberta, - "enable_caching": settings.enable_caching, - "batch_size": settings.batch_size, - "use_abstractive_summary": settings.use_abstractive_summary - } +# return { +# "max_comments_per_request": settings.max_comments_per_request, +# "max_comment_length": settings.max_comment_length, +# "min_comment_words": settings.min_comment_words, +# "vader_pos_threshold": settings.vader_pos_threshold, +# "vader_neg_threshold": settings.vader_neg_threshold, +# "roberta_pos_threshold": settings.roberta_pos_threshold, +# "roberta_neg_threshold": settings.roberta_neg_threshold, +# "combined_weight_vader": settings.combined_weight_vader, +# "combined_weight_roberta": settings.combined_weight_roberta, +# "enable_caching": settings.enable_caching, +# "batch_size": settings.batch_size, +# "use_abstractive_summary": settings.use_abstractive_summary +# } -@app.get("/test") -async def test_endpoint(): - """Test endpoint to verify sentiment classification""" - test_cases = [ - "No more comments 😅", - "Overall good but too lag", - "Not interested to be in her class just we are going for attendance thats it not at all managing time.", - "Nothing to say anything just we are going to her class mean, only for attendance", - "Excellent teaching! Very clear explanations.", - "Good teacher with strong subject knowledge", - "Class is okay, nothing special" - ] +# @app.get("/test") +# async def test_endpoint(): +# """Test endpoint to verify sentiment classification""" +# test_cases = [ +# "No more comments 😅", +# "Overall good but too lag", +# "Not interested to be in her class just we are going for attendance thats it not at all managing time.", +# "Nothing to say anything just we are going to her class mean, only for attendance", +# "Excellent teaching! Very clear explanations.", +# "Good teacher with strong subject knowledge", +# "Class is okay, nothing special" +# ] - results = [] - for text in test_cases: - is_meta = is_meta_comment(text) - is_neg = is_explicit_negative(text) +# results = [] +# for text in test_cases: +# is_meta = is_meta_comment(text) +# is_neg = is_explicit_negative(text) - # Predict classification - if is_meta: - predicted = "Neutral (meta-comment)" - elif is_neg: - predicted = "Negative (heuristic)" - else: - predicted = "Needs full analysis" +# # Predict classification +# if is_meta: +# predicted = "Neutral (meta-comment)" +# elif is_neg: +# predicted = "Negative (heuristic)" +# else: +# predicted = "Needs full analysis" - results.append({ - "text": text, - "is_meta_comment": is_meta, - "is_heuristic_negative": is_neg, - "predicted_classification": predicted - }) +# results.append({ +# "text": text, +# "is_meta_comment": is_meta, +# "is_heuristic_negative": is_neg, +# "predicted_classification": predicted +# }) - return { - "test_results": results, - "note": "Full analysis requires VADER and RoBERTa scores" - } +# return { +# "test_results": results, +# "note": "Full analysis requires VADER and RoBERTa scores" +# } -if __name__ == "__main__": - uvicorn.run( - app, - host="0.0.0.0", - port=8000, - log_level="info" - ) +# if __name__ == "__main__": +# uvicorn.run( +# app, +# host="0.0.0.0", +# port=8000, +# log_level="info" +# )