Spaces:

shegga
/

SentimentAnalysisForNMTTNT

Runtime error

File size: 10,774 Bytes

"""
Vietnamese Sentiment Analysis - Modular Hugging Face Spaces App
Uses fine-tuned model and modular page structure
"""

import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import time
import gc
import psutil
import os
import threading
import subprocess
import sys

# Import modular pages
from py.api_controller import create_api_controller
from py.pages import (
    create_single_analysis_page,
    create_batch_analysis_page,
    create_model_info_page
)

# Global app instances
app_instance = None
api_controller = None
api_server_thread = None

class SentimentGradioApp:
    def __init__(self):
        # Always use the fine-tuned model
        self.finetuned_model = "./vietnamese_sentiment_finetuned"
        self.base_model = "5CD-AI/Vietnamese-Sentiment-visobert"  # For initial fine-tuning
        self.tokenizer = None
        self.model = None
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.sentiment_labels = ["Negative", "Neutral", "Positive"]
        self.model_loaded = False
        self.max_memory_mb = 8192
        self.current_model = None

    def get_memory_usage(self):
        """Get current memory usage in MB"""
        process = psutil.Process(os.getpid())
        return process.memory_info().rss / 1024 / 1024

    def cleanup_memory(self):
        """Clean up GPU and CPU memory"""
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
        gc.collect()

    def run_fine_tuning_if_needed(self):
        """Run fine-tuning if the fine-tuned model doesn't exist"""
        if os.path.exists(self.finetuned_model):
            print(f"✅ Fine-tuned model already exists at {self.finetuned_model}")
            return True

        print(f"🔧 Fine-tuned model not found at {self.finetuned_model}")
        print("🚀 Starting automatic fine-tuning process...")

        try:
            # Get the correct path to the fine-tuning script
            current_dir = os.path.dirname(os.path.abspath(__file__))
            fine_tune_script = os.path.join(current_dir, "py", "fine_tune_sentiment.py")

            if not os.path.exists(fine_tune_script):
                print(f"❌ Fine-tuning script not found at: {fine_tune_script}")
                return False

            print("📋 Running fine_tune_sentiment.py...")
            print(f"📁 Script path: {fine_tune_script}")

            # Run the fine-tuning script as a subprocess
            result = subprocess.run([
                sys.executable,
                fine_tune_script
            ], capture_output=True, text=True, cwd=current_dir)

            if result.returncode == 0:
                print("✅ Fine-tuning completed successfully!")
                # Show only the last few lines of output to avoid spam
                output_lines = result.stdout.strip().split('\n')
                if output_lines:
                    print("📊 Final output:")
                    for line in output_lines[-5:]:  # Show last 5 lines
                        print(f"   {line}")
                return True
            else:
                print(f"❌ Fine-tuning failed with error:")
                print(result.stderr)
                return False

        except Exception as e:
            print(f"❌ Error running fine-tuning: {e}")
            return False

    def load_model(self):
        """Load the fine-tuned model, creating it if needed"""
        if self.model_loaded:
            return True

        print("🎯 Loading Vietnamese Sentiment Analysis Model")

        # Step 1: Check if fine-tuned model exists, if not, create it
        if not self.run_fine_tuning_if_needed():
            print("❌ Failed to create fine-tuned model")
            return False

        # Step 2: Load the fine-tuned model
        try:
            self.cleanup_memory()
            print(f"🤖 Loading fine-tuned model from: {self.finetuned_model}")

            self.tokenizer = AutoTokenizer.from_pretrained(self.finetuned_model)
            self.model = AutoModelForSequenceClassification.from_pretrained(self.finetuned_model)

            self.model.to(self.device)
            self.model.eval()
            self.model_loaded = True

            print(f"✅ Fine-tuned model loaded successfully!")
            self.current_model = self.finetuned_model
            return True

        except Exception as e:
            print(f"❌ Error loading fine-tuned model: {e}")
            print("🔄 This should not happen if fine-tuning completed successfully")
            self.model_loaded = False
            return False

    def predict_sentiment(self, text):
        """Predict sentiment for given text"""
        if not self.model_loaded:
            return None, "❌ Model not loaded. Please refresh the page."

        if not text.strip():
            return None, "❌ Please enter some text to analyze."

        try:
            self.cleanup_memory()
            start_time = time.time()

            # Tokenize input
            inputs = self.tokenizer(
                text.strip(),
                truncation=True,
                padding=True,
                max_length=512,
                return_tensors="pt"
            ).to(self.device)

            # Get prediction
            with torch.no_grad():
                outputs = self.model(**inputs)
                probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)
                predicted_class = torch.argmax(probabilities, dim=-1).item()
                confidence = torch.max(probabilities).item()

            inference_time = time.time() - start_time

            # Move to CPU and clean GPU memory
            probs = probabilities.cpu().numpy()[0].tolist()
            del probabilities, outputs, inputs
            self.cleanup_memory()

            sentiment = self.sentiment_labels[predicted_class]

            # Create formatted output
            output_text = f"""
## 🎯 Sentiment Analysis Result

**Sentiment:** {sentiment}
**Confidence:** {confidence:.2%}
**Processing Time:** {inference_time:.3f}s

### 📊 Probability Distribution:
- 😠 **Negative:** {probs[0]:.2%}
- 😐 **Neutral:** {probs[1]:.2%}
- 😊 **Positive:** {probs[2]:.2%}

### 📝 Input Text:
> "{text}"

---
*Analysis completed at {time.strftime('%Y-%m-%d %H:%M:%S')}*
*Memory usage: {self.get_memory_usage():.1f}MB*
            """.strip()

            return sentiment, output_text

        except Exception as e:
            self.cleanup_memory()
            return None, f"❌ Error during prediction: {str(e)}"

    def batch_predict(self, texts):
        """Predict sentiment for multiple texts"""
        if not self.model_loaded:
            return [], "❌ Model not loaded. Please refresh the page."

        if not texts or not any(texts):
            return [], "❌ Please enter some texts to analyze."

        # Filter valid texts
        valid_texts = [text.strip() for text in texts if text.strip()]

        if len(valid_texts) > 10:
            return [], "❌ Too many texts. Maximum 10 texts per batch for memory efficiency."

        if not valid_texts:
            return [], "❌ No valid texts provided."

        try:
            results = []
            total_start_time = time.time()

            for text in valid_texts:
                sentiment, _ = self.predict_sentiment(text)
                if sentiment:
                    results.append({
                        "text": text,
                        "sentiment": sentiment,
                        "confidence": 0.0,  # Would need to extract from full output
                        "processing_time": 0.0  # Would need to extract from full output
                    })

            total_time = time.time() - total_start_time
            return results, None

        except Exception as e:
            self.cleanup_memory()
            return [], f"❌ Error during batch prediction: {str(e)}"


def start_api_server():
    """Start the API server in a separate thread"""
    global api_controller
    if app_instance and api_controller is None:
        try:
            api_controller = create_api_controller(app_instance)
            # Run API server on a different port to avoid conflicts
            api_server_thread = threading.Thread(
                target=api_controller.run,
                kwargs={"host": "0.0.0.0", "port": 7861},
                daemon=True
            )
            api_server_thread.start()
            print("🌐 API server started on port 7861")
            print("📚 API Documentation: http://localhost:7861/docs")
        except Exception as e:
            print(f"❌ Failed to start API server: {e}")


def create_interface():
    """Create the Gradio interface for Hugging Face Spaces"""
    global app_instance, api_controller

    app_instance = SentimentGradioApp()

    # Load model
    if not app_instance.load_model():
        print("❌ Failed to load model. Please try again.")
        return None

    # Start API server
    start_api_server()

    # Create the interface
    with gr.Blocks(
        title="Vietnamese Sentiment Analysis",
        theme=gr.themes.Soft(),
        css="""
        .gradio-container {
            max-width: 1200px !important;
            margin: 0 auto !important;
        }
        .main-header {
            text-align: center;
            margin-bottom: 2rem;
        }
        """
    ) as interface:
        # Main title
        gr.HTML("""
        <div class="main-header">
            <h1>🎭 Vietnamese Sentiment Analysis</h1>
            <p>Analyze sentiment in Vietnamese text using transformer models from Hugging Face</p>
            <p><strong>Current Model:</strong> {model_name} | <strong>Device:</strong> {device}</p>
        </div>
        """.format(
            model_name=getattr(app_instance, 'current_model', app_instance.finetuned_model),
            device=str(app_instance.device).upper()
        ))

        # Create tabs
        with gr.Tabs():
            # Import and create all pages
            create_single_analysis_page(app_instance)
            create_batch_analysis_page(app_instance)
            create_model_info_page(app_instance)

    return interface


# Create and launch the interface
if __name__ == "__main__":
    print("🚀 Starting Vietnamese Sentiment Analysis for Hugging Face Spaces...")

    interface = create_interface()
    if interface is None:
        print("❌ Failed to create interface. Exiting.")
        exit(1)

    print("✅ Interface created successfully!")
    print("🌐 Launching web interface...")

    # Launch the interface
    interface.launch(
        share=False,
        show_error=True,
        quiet=False
    )