Spaces:

kushvanth
/

iqac_fast_api

Sleeping

App Files Files Community

kushvanth commited on Oct 14, 2025

Commit

1195e16

verified ·

1 Parent(s): 083e0cf

Upload folder using huggingface_hub

Browse files

Files changed (17) hide show

Dockerfile +22 -0
fastapi_example.py +763 -0
model_cache/models--cardiffnlp--twitter-roberta-base-sentiment/.no_exist/daefdd1f6ae931839bce4d0f3db0a1a4265cd50f/added_tokens.json +0 -0
model_cache/models--cardiffnlp--twitter-roberta-base-sentiment/.no_exist/daefdd1f6ae931839bce4d0f3db0a1a4265cd50f/chat_template.jinja +0 -0
model_cache/models--cardiffnlp--twitter-roberta-base-sentiment/.no_exist/daefdd1f6ae931839bce4d0f3db0a1a4265cd50f/model.safetensors +0 -0
model_cache/models--cardiffnlp--twitter-roberta-base-sentiment/.no_exist/daefdd1f6ae931839bce4d0f3db0a1a4265cd50f/model.safetensors.index.json +0 -0
model_cache/models--cardiffnlp--twitter-roberta-base-sentiment/.no_exist/daefdd1f6ae931839bce4d0f3db0a1a4265cd50f/tokenizer.json +0 -0
model_cache/models--cardiffnlp--twitter-roberta-base-sentiment/.no_exist/daefdd1f6ae931839bce4d0f3db0a1a4265cd50f/tokenizer_config.json +0 -0
model_cache/models--cardiffnlp--twitter-roberta-base-sentiment/refs/main +1 -0
model_cache/models--cardiffnlp--twitter-roberta-base-sentiment/refs/refs/pr/19 +1 -0
model_cache/models--cardiffnlp--twitter-roberta-base-sentiment/snapshots/5ecd13590450ab163e43547492b0cfc49f16629b/model.safetensors +3 -0
model_cache/models--cardiffnlp--twitter-roberta-base-sentiment/snapshots/daefdd1f6ae931839bce4d0f3db0a1a4265cd50f/config.json +33 -0
model_cache/models--cardiffnlp--twitter-roberta-base-sentiment/snapshots/daefdd1f6ae931839bce4d0f3db0a1a4265cd50f/merges.txt +0 -0
model_cache/models--cardiffnlp--twitter-roberta-base-sentiment/snapshots/daefdd1f6ae931839bce4d0f3db0a1a4265cd50f/pytorch_model.bin +3 -0
model_cache/models--cardiffnlp--twitter-roberta-base-sentiment/snapshots/daefdd1f6ae931839bce4d0f3db0a1a4265cd50f/special_tokens_map.json +1 -0
model_cache/models--cardiffnlp--twitter-roberta-base-sentiment/snapshots/daefdd1f6ae931839bce4d0f3db0a1a4265cd50f/vocab.json +0 -0
requirements.txt +10 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,22 @@

+FROM python:3.12-slim
+WORKDIR /app
+# Upgrade pip
+RUN python -m pip install --upgrade pip
+# Copy dependencies and install
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+# Download NLTK data to /tmp (writable directory)
+RUN python -m nltk.downloader -d /tmp/nltk_data vader_lexicon punkt stopwords wordnet omw-1.4
+# Copy application code
+COPY . .
+# Expose port 7860 (Hugging Face requirement)
+EXPOSE 7860
+# Run FastAPI
+CMD ["uvicorn", "fastapi_example:app", "--host", "0.0.0.0", "--port", "7860"]

fastapi_example.py ADDED Viewed

	@@ -0,0 +1,763 @@

+"""
+Enhanced FastAPI Service for Comment Sentiment Analysis
+with improved performance, validation, and configuration management
+"""
+from fastapi import FastAPI, HTTPException, Depends
+from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel, Field, validator
+from pydantic_settings import BaseSettings
+from typing import List, Dict, Any, Optional
+from functools import lru_cache
+import uvicorn
+import pandas as pd
+import numpy as np
+import os
+import re
+from datetime import datetime
+import logging
+# Configure logging FIRST
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
+# CRITICAL: Download NLTK data BEFORE importing NLTK components
+import nltk
+import ssl
+try:
+    _create_unverified_https_context = ssl._create_unverified_context
+except AttributeError:
+    pass
+else:
+    ssl._create_default_https_context = _create_unverified_https_context
+# Set NLTK data path
+nltk_data_dir = '/tmp/nltk_data'
+os.makedirs(nltk_data_dir, exist_ok=True)
+nltk.data.path.insert(0, nltk_data_dir)
+# Download required NLTK data
+def ensure_nltk_data():
+    """Ensure all required NLTK data is downloaded"""
+    resources = ['vader_lexicon', 'punkt', 'stopwords', 'wordnet', 'omw-1.4']
+    for resource in resources:
+        try:
+            # Try to find the resource
+            if resource == 'vader_lexicon':
+                nltk.data.find('sentiment/vader_lexicon.zip')
+            elif resource == 'punkt':
+                nltk.data.find('tokenizers/punkt')
+            elif resource in ['stopwords', 'wordnet', 'omw-1.4']:
+                nltk.data.find(f'corpora/{resource}')
+            logger.info(f"✓ NLTK resource '{resource}' already available")
+        except LookupError:
+            logger.info(f"Downloading NLTK resource '{resource}'...")
+            try:
+                nltk.download(resource, download_dir=nltk_data_dir, quiet=False)
+                logger.info(f"✓ Successfully downloaded '{resource}'")
+            except Exception as e:
+                logger.error(f"✗ Failed to download '{resource}': {e}")
+# Download NLTK data immediately
+logger.info("Ensuring NLTK data is available...")
+ensure_nltk_data()
+# NOW import NLTK components
+from nltk.sentiment import SentimentIntensityAnalyzer
+# Import transformers after NLTK setup
+from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
+from scipy.special import softmax
+import torch
+# Configuration Management
+class Settings(BaseSettings):
+    """Application settings with environment variable support"""
+    # API Settings
+    app_name: str = "Comment Analysis API"
+    app_version: str = "2.0.0"
+    debug_mode: bool = False
+    # Request Limits
+    max_comments_per_request: int = 1000
+    max_comment_length: int = 5000
+    min_comment_words: int = 2
+    # Sentiment Thresholds
+    vader_pos_threshold: float = 0.2
+    vader_neg_threshold: float = -0.2
+    roberta_pos_threshold: float = 0.55
+    roberta_neg_threshold: float = 0.45
+    combined_weight_vader: float = 0.5
+    combined_weight_roberta: float = 0.5
+    # Model Settings
+    model_cache_dir: str = "./model_cache"
+    roberta_model_name: str = "cardiffnlp/twitter-roberta-base-sentiment"
+    use_abstractive_summary: bool = False
+    summarizer_model: str = "facebook/bart-large-cnn"
+    max_summary_length: int = 100
+    min_summary_length: int = 25
+    # Performance
+    enable_caching: bool = True
+    cache_size: int = 500
+    batch_size: int = 32
+    class Config:
+        env_file = ".env"
+        env_file_encoding = 'utf-8'
+        extra = 'ignore'
+@lru_cache()
+def get_settings() -> Settings:
+    """Cached settings instance"""
+    return Settings()
+# Pydantic Models
+class FacultyInfo(BaseModel):
+    faculty_name: str = Field(..., min_length=1, max_length=200)
+    staff_id: str = Field(..., min_length=1, max_length=50)
+    course_code: str = Field(..., min_length=1, max_length=50)
+    course_name: str = Field(..., min_length=1, max_length=200)
+class CommentAnalysisRequest(BaseModel):
+    comments: List[str] = Field(..., min_items=1)
+    faculty_info: FacultyInfo
+    @validator('comments')
+    def validate_comments(cls, v):
+        settings = get_settings()
+        if len(v) > settings.max_comments_per_request:
+            raise ValueError(
+                f'Maximum {settings.max_comments_per_request} comments per request'
+            )
+        for idx, comment in enumerate(v):
+            if len(comment) > settings.max_comment_length:
+                raise ValueError(
+                    f'Comment {idx} exceeds maximum length of {settings.max_comment_length} characters'
+                )
+        return v
+class SentimentDistribution(BaseModel):
+    positive_percentage: float
+    negative_percentage: float
+    neutral_percentage: float
+class DetailedScores(BaseModel):
+    average_positive: float
+    average_negative: float
+    average_neutral: float
+    average_compound: Optional[float] = None
+class DetailedAnalysis(BaseModel):
+    vader_scores: DetailedScores
+    roberta_scores: DetailedScores
+class AnalysisResult(BaseModel):
+    total_comments: int
+    positive_comments: int
+    negative_comments: int
+    neutral_comments: int
+    positive_sentiment: float
+    negative_sentiment: float
+    neutral_sentiment: float
+    overall_sentiment: str
+    sentiment_distribution: SentimentDistribution
+    negative_comments_summary: str
+    negative_comments_list: List[str]
+    key_insights: List[str]
+    recommendations: List[str]
+    detailed_analysis: DetailedAnalysis
+    faculty_info: Dict[str, str]
+    analysis_timestamp: str
+class CommentAnalysisResponse(BaseModel):
+    success: bool
+    analysis: Optional[AnalysisResult] = None
+    message: str
+# Initialize FastAPI app
+app = FastAPI(
+    title=get_settings().app_name,
+    version=get_settings().app_version,
+    description="Advanced sentiment analysis service for educational feedback"
+)
+# Add CORS middleware
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# Global variables for models
+sia = None
+tokenizer = None
+model = None
+device = None
+summarizer = None
+# Heuristic phrase/regex rules for explicit negative feedback
+NEGATIVE_PHRASES = [
+    'need more staff',
+    'need more faculty',
+    'insufficient staff',
+    'lack of staff',
+    'lack of knowledge',
+    'better knowledge needed',
+    'poor knowledge',
+    'not good',
+    'not satisfied',
+    'not satisfactory',
+    'no classes',
+    'no regular classes',
+    'boring class',
+    'boring classes',
+    'waste of time',
+    'bad teacher',
+    'bad teaching',
+    'poor teaching',
+    'improve class',
+    'improvement needed',
+    'needs improvement',
+    'not helpful',
+    'not clear',
+    'communication skills need improvement',
+    'improve communication',
+    'lectures are going fast',
+    'going too fast',
+    'too fast',
+    'lacking',
+    'is lacking',
+    'knowledge is lacking',
+    'practical knowledge lacking',
+    'no practical',
+    'lack of practical',
+    'no hands-on',
+    'no real world'
+]
+NEGATIVE_REGEXES = [
+    re.compile(r"\bno\s+(proper|sufficient)\s+(classes|notes|support)\b", re.IGNORECASE),
+    re.compile(r"\bno\s+staff\b", re.IGNORECASE),
+    re.compile(r"\bneed(s)?\s+more\s+(staff|faculty|support)\b", re.IGNORECASE),
+    re.compile(r"\b(lecture|lectures|class|classes|teaching)\s+(are\s+)?(too|very)\s+fast\b", re.IGNORECASE),
+    re.compile(r"\blectures?\s+are\s+going\s+fast\b", re.IGNORECASE),
+    re.compile(r"\b(require|needs?|needed)\s+(some\s+)?improv(e|ement)s?\s+(in|of)?\s*communication(\s+skills?)?\b", re.IGNORECASE),
+    re.compile(r"\b(is\s+)?lacking\b", re.IGNORECASE),
+    re.compile(r"\bno\s+(practical|hands-on|real-world)\b", re.IGNORECASE)
+]
+META_COMMENT_PATTERNS = [
+    re.compile(r"^no\s+(other\s+)?(comments?|remarks?|feedback)$", re.IGNORECASE),
+    re.compile(r"^no\s+remarks?\s+(about|on)", re.IGNORECASE),
+    re.compile(r"^nil$", re.IGNORECASE),
+    re.compile(r"^none$", re.IGNORECASE),
+    re.compile(r"^n/?a$", re.IGNORECASE)
+]
+def is_meta_comment(text: str) -> bool:
+    """Check if comment is a meta-comment (not actual feedback)"""
+    if not text:
+        return False
+    text = text.strip()
+    for pattern in META_COMMENT_PATTERNS:
+        if pattern.match(text):
+            return True
+    return False
+def is_explicit_negative(text: str) -> bool:
+    """Check if text contains explicit negative phrases"""
+    if not text:
+        return False
+    lower = text.lower()
+    for phrase in NEGATIVE_PHRASES:
+        if phrase in lower:
+            return True
+    for regex in NEGATIVE_REGEXES:
+        if regex.search(text):
+            return True
+    return False
+def initialize_models():
+    """Initialize sentiment analysis models with caching support"""
+    global sia, tokenizer, model, device, summarizer
+    try:
+        settings = get_settings()
+        logger.info("Initializing sentiment analysis models...")
+        # Initialize VADER (NLTK data already downloaded)
+        sia = SentimentIntensityAnalyzer()
+        logger.info("VADER initialized")
+        # Initialize RoBERTa with caching
+        cache_dir = settings.model_cache_dir
+        os.makedirs(cache_dir, exist_ok=True)
+        tokenizer = AutoTokenizer.from_pretrained(
+            settings.roberta_model_name,
+            cache_dir=cache_dir
+        )
+        model = AutoModelForSequenceClassification.from_pretrained(
+            settings.roberta_model_name,
+            cache_dir=cache_dir
+        )
+        device = "cuda" if torch.cuda.is_available() else "cpu"
+        model.to(device)
+        model.eval()
+        logger.info(f"RoBERTa initialized on device: {device}")
+        # Initialize summarizer (optional)
+        if settings.use_abstractive_summary:
+            try:
+                summarizer = pipeline(
+                    "summarization",
+                    model=settings.summarizer_model,
+                    device=0 if device == "cuda" else -1
+                )
+                logger.info("Summarizer initialized")
+            except Exception as e:
+                logger.warning(f"Summarizer initialization failed: {e}")
+                summarizer = None
+        logger.info("All models initialized successfully")
+    except Exception as e:
+        logger.error(f"Error initializing models: {e}")
+        raise e
+@lru_cache(maxsize=500)
+def vader_sentiment_cached(text: str) -> tuple:
+    """Cached VADER sentiment analysis"""
+    scores = sia.polarity_scores(text)
+    return (scores['neg'], scores['neu'], scores['pos'], scores['compound'])
+def vader_sentiment(text: str) -> Dict[str, float]:
+    """VADER sentiment analysis with caching support"""
+    try:
+        settings = get_settings()
+        if settings.enable_caching:
+            neg, neu, pos, compound = vader_sentiment_cached(text)
+            return {
+                'vader_neg': neg,
+                'vader_neu': neu,
+                'vader_pos': pos,
+                'vader_compound': compound
+            }
+        else:
+            scores = sia.polarity_scores(text)
+            return {
+                'vader_neg': scores['neg'],
+                'vader_neu': scores['neu'],
+                'vader_pos': scores['pos'],
+                'vader_compound': scores['compound']
+            }
+    except Exception as e:
+        logger.warning(f"VADER analysis failed for text: {e}")
+        return {'vader_neg': 0.0, 'vader_neu': 1.0, 'vader_pos': 0.0, 'vader_compound': 0.0}
+def roberta_sentiment_batch(texts: List[str]) -> List[Dict[str, float]]:
+    """Batch RoBERTa sentiment analysis for better performance"""
+    try:
+        settings = get_settings()
+        results = []
+        for i in range(0, len(texts), settings.batch_size):
+            batch = texts[i:i + settings.batch_size]
+            encoded = tokenizer(
+                batch,
+                return_tensors='pt',
+                truncation=True,
+                max_length=512,
+                padding=True
+            )
+            encoded = {k: v.to(device) for k, v in encoded.items()}
+            with torch.no_grad():
+                outputs = model(**encoded)
+            for output in outputs.logits:
+                scores = softmax(output.cpu().numpy())
+                results.append({
+                    'roberta_neg': float(scores[0]),
+                    'roberta_neu': float(scores[1]),
+                    'roberta_pos': float(scores[2])
+                })
+        return results
+    except Exception as e:
+        logger.warning(f"RoBERTa batch analysis failed: {e}")
+        return [{'roberta_neg': 0.0, 'roberta_neu': 1.0, 'roberta_pos': 0.0} for _ in texts]
+def roberta_sentiment(text: str) -> Dict[str, float]:
+    """Single text RoBERTa sentiment analysis"""
+    try:
+        encoded_text = tokenizer(text, return_tensors='pt', truncation=True, max_length=512)
+        encoded_text = {k: v.to(device) for k, v in encoded_text.items()}
+        with torch.no_grad():
+            output = model(**encoded_text)
+        scores = softmax(output[0][0].cpu().numpy())
+        return {
+            'roberta_neg': float(scores[0]),
+            'roberta_neu': float(scores[1]),
+            'roberta_pos': float(scores[2])
+        }
+    except Exception as e:
+        logger.warning(f"RoBERTa analysis failed for text: {e}")
+        return {'roberta_neg': 0.0, 'roberta_neu': 1.0, 'roberta_pos': 0.0}
+def overall_sentiment(row: pd.Series, settings: Settings) -> str:
+    """Determine overall sentiment using combined scores with configurable thresholds"""
+    combined_pos = row.get('combined_pos', 0.0)
+    combined_neg = row.get('combined_neg', 0.0)
+    combined_neu = row.get('combined_neu', 0.0)
+    vader_compound = row.get('vader_compound', 0.0)
+    roberta_neg = row.get('roberta_neg', 0.0)
+    roberta_pos = row.get('roberta_pos', 0.0)
+    if row.get('heuristic_negative') is True:
+        return 'Negative'
+    if (
+        vader_compound <= settings.vader_neg_threshold or
+        roberta_neg >= settings.roberta_neg_threshold or
+        combined_neg >= max(combined_pos, combined_neu)
+    ):
+        return 'Negative'
+    if (
+        vader_compound >= settings.vader_pos_threshold or
+        roberta_pos >= settings.roberta_pos_threshold or
+        combined_pos >= max(combined_neg, combined_neu)
+    ):
+        return 'Positive'
+    return 'Neutral'
+def sanitize_text(text: str) -> str:
+    """Sanitize input text"""
+    if not text:
+        return ""
+    text = ' '.join(text.split())
+    text = ''.join(char for char in text if ord(char) >= 32 or char == '\n')
+    return text.strip()
+def analyze_comments_sentiment(comments: List[str]) -> Dict[str, Any]:
+    """Main sentiment analysis function with enhanced performance"""
+    try:
+        settings = get_settings()
+        logger.info(f"Received {len(comments)} comments for analysis")
+        sanitized_comments = [sanitize_text(comment) for comment in comments]
+        filtered_comments = [
+            comment for comment in sanitized_comments
+            if (settings.min_comment_words < len(comment.split()) <= settings.max_comment_length
+                and not is_meta_comment(comment))
+        ]
+        logger.info(f"After filtering: {len(filtered_comments)} valid comments")
+        if not filtered_comments:
+            return {
+                "total_comments": 0,
+                "message": "No valid comments found for analysis"
+            }
+        df = pd.DataFrame({'comment': filtered_comments})
+        df['heuristic_negative'] = df['comment'].apply(is_explicit_negative)
+        vader_results = []
+        for text in df['comment']:
+            vader_results.append(vader_sentiment(text))
+        roberta_results = roberta_sentiment_batch(df['comment'].tolist())
+        vader_df = pd.DataFrame(vader_results)
+        roberta_df = pd.DataFrame(roberta_results)
+        final_df = pd.concat([df.reset_index(drop=True), vader_df, roberta_df], axis=1)
+        final_df['combined_pos'] = (
+            settings.combined_weight_vader * final_df['vader_pos'] +
+            settings.combined_weight_roberta * final_df['roberta_pos']
+        )
+        final_df['combined_neg'] = (
+            settings.combined_weight_vader * final_df['vader_neg'] +
+            settings.combined_weight_roberta * final_df['roberta_neg']
+        )
+        final_df['combined_neu'] = (
+            settings.combined_weight_vader * final_df['vader_neu'] +
+            settings.combined_weight_roberta * final_df['roberta_neu']
+        )
+        final_df['Overall_Sentiment'] = final_df.apply(
+            lambda row: overall_sentiment(row, settings),
+            axis=1
+        )
+        total_comments = len(final_df)
+        positive_count = len(final_df[final_df['Overall_Sentiment'] == 'Positive'])
+        negative_count = len(final_df[final_df['Overall_Sentiment'] == 'Negative'])
+        neutral_count = len(final_df[final_df['Overall_Sentiment'] == 'Neutral'])
+        logger.info(
+            f"Results: {positive_count} positive, "
+            f"{negative_count} negative, {neutral_count} neutral"
+        )
+        avg_positive = float(final_df['combined_pos'].mean())
+        avg_negative = float(final_df['combined_neg'].mean())
+        avg_neutral = float(final_df['combined_neu'].mean())
+        if avg_positive > max(avg_negative, avg_neutral):
+            overall_sentiment_label = "Positive"
+        elif avg_negative > max(avg_positive, avg_neutral):
+            overall_sentiment_label = "Negative"
+        else:
+            overall_sentiment_label = "Neutral"
+        negative_summary = ""
+        negative_comments_list = []
+        negative_comments = final_df[final_df['Overall_Sentiment'] == 'Negative']
+        if len(negative_comments) > 0:
+            negative_comments_list = negative_comments['comment'].tolist()
+            try:
+                top_idx = negative_comments['combined_neg'].nlargest(3).index
+                top_comments = negative_comments.loc[top_idx, 'comment'].tolist()
+                if settings.use_abstractive_summary and summarizer is not None:
+                    negative_text = " ".join(top_comments)
+                    if len(negative_text) > 1000:
+                        negative_text = negative_text[:1000]
+                    summary_result = summarizer(
+                        negative_text,
+                        max_length=settings.max_summary_length,
+                        min_length=settings.min_summary_length,
+                        do_sample=False
+                    )
+                    negative_summary = summary_result[0]['summary_text']
+                else:
+                    negative_summary = "; ".join(top_comments)
+            except Exception as e:
+                logger.warning(f"Summary generation failed: {e}")
+                negative_summary = "; ".join(negative_comments_list[:3])
+        insights = []
+        recommendations = []
+        if overall_sentiment_label == "Positive":
+            insights.extend([
+                "Students have positive feedback overall",
+                "Teaching methods are well-received",
+                f"{positive_count}/{total_comments} comments are positive"
+            ])
+            recommendations.extend([
+                "Continue current teaching approach",
+                "Maintain student engagement strategies",
+                "Share successful practices with colleagues"
+            ])
+        elif overall_sentiment_label == "Negative":
+            insights.extend([
+                "Students have concerns that need attention",
+                "Some aspects of teaching may need improvement",
+                f"{negative_count}/{total_comments} comments indicate issues"
+            ])
+            recommendations.extend([
+                "Review teaching methods and materials",
+                "Consider additional student support",
+                "Schedule meetings to address student concerns",
+                "Focus on areas mentioned in negative feedback"
+            ])
+        else:
+            insights.extend([
+                "Mixed feedback from students",
+                "Some areas performing well, others need attention",
+                "Balance of positive and negative responses"
+            ])
+            recommendations.extend([
+                "Focus on areas with negative feedback",
+                "Maintain strengths while addressing weaknesses",
+                "Gather more specific feedback on improvement areas"
+            ])
+        return {
+            "total_comments": total_comments,
+            "positive_comments": positive_count,
+            "negative_comments": negative_count,
+            "neutral_comments": neutral_count,
+            "positive_sentiment": round(avg_positive, 3),
+            "negative_sentiment": round(avg_negative, 3),
+            "neutral_sentiment": round(avg_neutral, 3),
+            "overall_sentiment": overall_sentiment_label,
+            "sentiment_distribution": {
+                "positive_percentage": round((positive_count / total_comments) * 100, 1),
+                "negative_percentage": round((negative_count / total_comments) * 100, 1),
+                "neutral_percentage": round((neutral_count / total_comments) * 100, 1)
+            },
+            "negative_comments_summary": negative_summary,
+            "negative_comments_list": negative_comments_list,
+            "key_insights": insights,
+            "recommendations": recommendations,
+            "detailed_analysis": {
+                "vader_scores": {
+                    "average_positive": round(final_df['vader_pos'].mean(), 3),
+                    "average_negative": round(final_df['vader_neg'].mean(), 3),
+                    "average_neutral": round(final_df['vader_neu'].mean(), 3),
+                    "average_compound": round(final_df['vader_compound'].mean(), 3)
+                },
+                "roberta_scores": {
+                    "average_positive": round(final_df['roberta_pos'].mean(), 3),
+                    "average_negative": round(final_df['roberta_neg'].mean(), 3),
+                    "average_neutral": round(final_df['roberta_neu'].mean(), 3)
+                }
+            },
+            "analysis_timestamp": datetime.utcnow().isoformat()
+        }
+    except Exception as e:
+        logger.error(f"Sentiment analysis failed: {e}", exc_info=True)
+        raise e
+@app.on_event("startup")
+async def startup_event():
+    """Initialize models on startup"""
+    try:
+        logger.info("===== Application Startup at {} =====".format(datetime.now().strftime('%Y-%m-%d %H:%M:%S')))
+        initialize_models()
+        logger.info("Service started successfully")
+    except Exception as e:
+        logger.error(f"Startup failed: {e}")
+        raise e
+@app.on_event("shutdown")
+async def shutdown_event():
+    """Cleanup on shutdown"""
+    logger.info("Service shutting down")
+@app.get("/")
+async def root():
+    """Root endpoint"""
+    return {
+        "service": get_settings().app_name,
+        "version": get_settings().app_version,
+        "status": "running"
+    }
+@app.get("/health")
+async def health_check():
+    """Health check endpoint"""
+    models_loaded = sia is not None and model is not None and tokenizer is not None
+    return {
+        "status": "healthy" if models_loaded else "unhealthy",
+        "service": "comment-analysis",
+        "version": get_settings().app_version,
+        "models_loaded": models_loaded,
+        "device": device if device else "not initialized",
+        "timestamp": datetime.utcnow().isoformat()
+    }
+@app.post("/analyze-comments", response_model=CommentAnalysisResponse)
+async def analyze_comments(
+    request: CommentAnalysisRequest,
+    settings: Settings = Depends(get_settings)
+):
+    """
+    Analyze comments for sentiment analysis using VADER and RoBERTa models
+    """
+    try:
+        comments = request.comments
+        faculty_info = request.faculty_info
+        if not comments:
+            return CommentAnalysisResponse(
+                success=False,
+                analysis=None,
+                message="No comments provided for analysis"
+            )
+        logger.info(
+            f"Analyzing {len(comments)} comments for "
+            f"{faculty_info.faculty_name} ({faculty_info.course_code})"
+        )
+        analysis_result = analyze_comments_sentiment(comments)
+        if analysis_result.get("total_comments", 0) == 0:
+            return CommentAnalysisResponse(
+                success=False,
+                analysis=None,
+                message=analysis_result.get("message", "No valid comments to analyze")
+            )
+        analysis_result["faculty_info"] = {
+            "faculty_name": faculty_info.faculty_name,
+            "staff_id": faculty_info.staff_id,
+            "course_code": faculty_info.course_code,
+            "course_name": faculty_info.course_name
+        }
+        return CommentAnalysisResponse(
+            success=True,
+            analysis=analysis_result,
+            message=f"Successfully analyzed {analysis_result['total_comments']} comments"
+        )
+    except ValueError as ve:
+        logger.warning(f"Validation error: {ve}")
+        raise HTTPException(status_code=400, detail=str(ve))
+    except Exception as e:
+        logger.error(f"Analysis failed: {e}", exc_info=True)
+        raise HTTPException(
+            status_code=500,
+            detail="Analysis failed. Please try again later."
+        )
+@app.get("/config")
+async def get_config(settings: Settings = Depends(get_settings)):
+    """Get current configuration"""
+    if not settings.debug_mode:
+        raise HTTPException(status_code=404, detail="Not found")
+    return {
+        "max_comments_per_request": settings.max_comments_per_request,
+        "vader_pos_threshold": settings.vader_pos_threshold,
+        "vader_neg_threshold": settings.vader_neg_threshold,
+        "roberta_pos_threshold": settings.roberta_pos_threshold,
+        "roberta_neg_threshold": settings.roberta_neg_threshold,
+        "enable_caching": settings.enable_caching,
+        "batch_size": settings.batch_size
+    }
+if __name__ == "__main__":
+    uvicorn.run(
+        app,
+        host="0.0.0.0",
+        port=8000,
+        log_level="info"
+    )

model_cache/models--cardiffnlp--twitter-roberta-base-sentiment/.no_exist/daefdd1f6ae931839bce4d0f3db0a1a4265cd50f/added_tokens.json ADDED Viewed

File without changes

model_cache/models--cardiffnlp--twitter-roberta-base-sentiment/.no_exist/daefdd1f6ae931839bce4d0f3db0a1a4265cd50f/chat_template.jinja ADDED Viewed

File without changes

model_cache/models--cardiffnlp--twitter-roberta-base-sentiment/.no_exist/daefdd1f6ae931839bce4d0f3db0a1a4265cd50f/model.safetensors ADDED Viewed

File without changes

model_cache/models--cardiffnlp--twitter-roberta-base-sentiment/.no_exist/daefdd1f6ae931839bce4d0f3db0a1a4265cd50f/model.safetensors.index.json ADDED Viewed

File without changes

model_cache/models--cardiffnlp--twitter-roberta-base-sentiment/.no_exist/daefdd1f6ae931839bce4d0f3db0a1a4265cd50f/tokenizer.json ADDED Viewed

File without changes

model_cache/models--cardiffnlp--twitter-roberta-base-sentiment/.no_exist/daefdd1f6ae931839bce4d0f3db0a1a4265cd50f/tokenizer_config.json ADDED Viewed

File without changes

model_cache/models--cardiffnlp--twitter-roberta-base-sentiment/refs/main ADDED Viewed

	@@ -0,0 +1 @@


1	+ daefdd1f6ae931839bce4d0f3db0a1a4265cd50f

model_cache/models--cardiffnlp--twitter-roberta-base-sentiment/refs/refs/pr/19 ADDED Viewed

	@@ -0,0 +1 @@


1	+ 5ecd13590450ab163e43547492b0cfc49f16629b

model_cache/models--cardiffnlp--twitter-roberta-base-sentiment/snapshots/5ecd13590450ab163e43547492b0cfc49f16629b/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:727b715c623b78b1842f8e257b2f6b4b314a86c0c944d46d0784ce3009982a68
+size 498620100

model_cache/models--cardiffnlp--twitter-roberta-base-sentiment/snapshots/daefdd1f6ae931839bce4d0f3db0a1a4265cd50f/config.json ADDED Viewed

	@@ -0,0 +1,33 @@

+{
+  "_name_or_path": "tweeteval_new/roberta-base-rt-sentiment/",
+  "architectures": [
+    "RobertaForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "bos_token_id": 0,
+  "eos_token_id": 2,
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "LABEL_0",
+    "1": "LABEL_1",
+    "2": "LABEL_2"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "label2id": {
+    "LABEL_0": 0,
+    "LABEL_1": 1,
+    "LABEL_2": 2
+  },
+  "layer_norm_eps": 1e-05,
+  "max_position_embeddings": 514,
+  "model_type": "roberta",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 1,
+  "type_vocab_size": 1,
+  "vocab_size": 50265
+}

model_cache/models--cardiffnlp--twitter-roberta-base-sentiment/snapshots/daefdd1f6ae931839bce4d0f3db0a1a4265cd50f/merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

model_cache/models--cardiffnlp--twitter-roberta-base-sentiment/snapshots/daefdd1f6ae931839bce4d0f3db0a1a4265cd50f/pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c37a3484c55954cd75b336a85f1e0c023ae874f3a73b05d2418dd04828e293b1
+size 498679497

model_cache/models--cardiffnlp--twitter-roberta-base-sentiment/snapshots/daefdd1f6ae931839bce4d0f3db0a1a4265cd50f/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "<unk>", "sep_token": "</s>", "pad_token": "<pad>", "cls_token": "<s>", "mask_token": "<mask>"}

model_cache/models--cardiffnlp--twitter-roberta-base-sentiment/snapshots/daefdd1f6ae931839bce4d0f3db0a1a4265cd50f/vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff

requirements.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+fastapi==0.111.0
+uvicorn==0.30.1
+pydantic==2.8.2
+pydantic-settings==2.10.1
+pandas==2.3.1
+numpy==2.1.3
+nltk==3.9.1
+torch==2.8.0
+transformers==4.56.2
+scipy==1.16.2