File size: 10,774 Bytes
0210351
b8ae42e
 
0210351
 
 
 
 
 
 
 
 
b8ae42e
 
 
 
 
 
 
 
 
9522fcb
b8ae42e
 
 
d3bc543
b8ae42e
 
d3bc543
0210351
b8ae42e
 
 
 
0210351
 
 
 
 
4d616d3
b8ae42e
0210351
 
 
 
 
 
 
 
 
 
 
 
b8ae42e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0210351
b8ae42e
0210351
 
 
b8ae42e
 
 
 
 
 
 
 
0210351
 
b8ae42e
0210351
b8ae42e
 
0210351
 
 
 
 
b8ae42e
 
0210351
4d616d3
0210351
b8ae42e
 
0210351
 
 
 
 
 
 
 
 
 
 
 
b8ae42e
0210351
 
b8ae42e
0210351
b8ae42e
0210351
 
b8ae42e
 
 
0210351
b8ae42e
0210351
 
b8ae42e
0210351
 
 
 
 
 
 
b8ae42e
0210351
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4d616d3
 
0210351
 
4d616d3
0210351
 
 
 
 
 
4d616d3
0210351
 
 
 
 
 
4d616d3
0210351
 
4d616d3
 
0210351
 
 
 
 
b8ae42e
 
0210351
b8ae42e
 
 
 
 
 
 
 
 
 
 
 
0210351
b8ae42e
0210351
b8ae42e
0210351
 
b8ae42e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0210351
b8ae42e
 
d3bc543
0210351
 
b8ae42e
d3bc543
 
0210351
 
d3bc543
0210351
 
 
b8ae42e
 
0210351
b8ae42e
0210351
 
b8ae42e
 
 
 
 
 
 
 
 
 
 
0210351
b8ae42e
 
 
 
 
 
 
 
 
 
 
 
 
0210351
b8ae42e
 
 
 
0210351
 
 
b8ae42e
0210351
 
 
 
 
 
 
 
 
 
 
 
 
 
4d616d3
0210351
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
"""
Vietnamese Sentiment Analysis - Modular Hugging Face Spaces App
Uses fine-tuned model and modular page structure
"""

import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import time
import gc
import psutil
import os
import threading
import subprocess
import sys

# Import modular pages
from py.api_controller import create_api_controller
from py.pages import (
    create_single_analysis_page,
    create_batch_analysis_page,
    create_model_info_page
)

# Global app instances
app_instance = None
api_controller = None
api_server_thread = None

class SentimentGradioApp:
    def __init__(self):
        # Always use the fine-tuned model
        self.finetuned_model = "./vietnamese_sentiment_finetuned"
        self.base_model = "5CD-AI/Vietnamese-Sentiment-visobert"  # For initial fine-tuning
        self.tokenizer = None
        self.model = None
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.sentiment_labels = ["Negative", "Neutral", "Positive"]
        self.model_loaded = False
        self.max_memory_mb = 8192
        self.current_model = None

    def get_memory_usage(self):
        """Get current memory usage in MB"""
        process = psutil.Process(os.getpid())
        return process.memory_info().rss / 1024 / 1024

    def cleanup_memory(self):
        """Clean up GPU and CPU memory"""
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
        gc.collect()

    def run_fine_tuning_if_needed(self):
        """Run fine-tuning if the fine-tuned model doesn't exist"""
        if os.path.exists(self.finetuned_model):
            print(f"βœ… Fine-tuned model already exists at {self.finetuned_model}")
            return True

        print(f"πŸ”§ Fine-tuned model not found at {self.finetuned_model}")
        print("πŸš€ Starting automatic fine-tuning process...")

        try:
            # Get the correct path to the fine-tuning script
            current_dir = os.path.dirname(os.path.abspath(__file__))
            fine_tune_script = os.path.join(current_dir, "py", "fine_tune_sentiment.py")

            if not os.path.exists(fine_tune_script):
                print(f"❌ Fine-tuning script not found at: {fine_tune_script}")
                return False

            print("πŸ“‹ Running fine_tune_sentiment.py...")
            print(f"πŸ“ Script path: {fine_tune_script}")

            # Run the fine-tuning script as a subprocess
            result = subprocess.run([
                sys.executable,
                fine_tune_script
            ], capture_output=True, text=True, cwd=current_dir)

            if result.returncode == 0:
                print("βœ… Fine-tuning completed successfully!")
                # Show only the last few lines of output to avoid spam
                output_lines = result.stdout.strip().split('\n')
                if output_lines:
                    print("πŸ“Š Final output:")
                    for line in output_lines[-5:]:  # Show last 5 lines
                        print(f"   {line}")
                return True
            else:
                print(f"❌ Fine-tuning failed with error:")
                print(result.stderr)
                return False

        except Exception as e:
            print(f"❌ Error running fine-tuning: {e}")
            return False

    def load_model(self):
        """Load the fine-tuned model, creating it if needed"""
        if self.model_loaded:
            return True

        print("🎯 Loading Vietnamese Sentiment Analysis Model")

        # Step 1: Check if fine-tuned model exists, if not, create it
        if not self.run_fine_tuning_if_needed():
            print("❌ Failed to create fine-tuned model")
            return False

        # Step 2: Load the fine-tuned model
        try:
            self.cleanup_memory()
            print(f"πŸ€– Loading fine-tuned model from: {self.finetuned_model}")

            self.tokenizer = AutoTokenizer.from_pretrained(self.finetuned_model)
            self.model = AutoModelForSequenceClassification.from_pretrained(self.finetuned_model)

            self.model.to(self.device)
            self.model.eval()
            self.model_loaded = True

            print(f"βœ… Fine-tuned model loaded successfully!")
            self.current_model = self.finetuned_model
            return True

        except Exception as e:
            print(f"❌ Error loading fine-tuned model: {e}")
            print("πŸ”„ This should not happen if fine-tuning completed successfully")
            self.model_loaded = False
            return False

    def predict_sentiment(self, text):
        """Predict sentiment for given text"""
        if not self.model_loaded:
            return None, "❌ Model not loaded. Please refresh the page."

        if not text.strip():
            return None, "❌ Please enter some text to analyze."

        try:
            self.cleanup_memory()
            start_time = time.time()

            # Tokenize input
            inputs = self.tokenizer(
                text.strip(),
                truncation=True,
                padding=True,
                max_length=512,
                return_tensors="pt"
            ).to(self.device)

            # Get prediction
            with torch.no_grad():
                outputs = self.model(**inputs)
                probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)
                predicted_class = torch.argmax(probabilities, dim=-1).item()
                confidence = torch.max(probabilities).item()

            inference_time = time.time() - start_time

            # Move to CPU and clean GPU memory
            probs = probabilities.cpu().numpy()[0].tolist()
            del probabilities, outputs, inputs
            self.cleanup_memory()

            sentiment = self.sentiment_labels[predicted_class]

            # Create formatted output
            output_text = f"""
## 🎯 Sentiment Analysis Result

**Sentiment:** {sentiment}
**Confidence:** {confidence:.2%}
**Processing Time:** {inference_time:.3f}s

### πŸ“Š Probability Distribution:
- 😠 **Negative:** {probs[0]:.2%}
- 😐 **Neutral:** {probs[1]:.2%}
- 😊 **Positive:** {probs[2]:.2%}

### πŸ“ Input Text:
> "{text}"

---
*Analysis completed at {time.strftime('%Y-%m-%d %H:%M:%S')}*
*Memory usage: {self.get_memory_usage():.1f}MB*
            """.strip()

            return sentiment, output_text

        except Exception as e:
            self.cleanup_memory()
            return None, f"❌ Error during prediction: {str(e)}"

    def batch_predict(self, texts):
        """Predict sentiment for multiple texts"""
        if not self.model_loaded:
            return [], "❌ Model not loaded. Please refresh the page."

        if not texts or not any(texts):
            return [], "❌ Please enter some texts to analyze."

        # Filter valid texts
        valid_texts = [text.strip() for text in texts if text.strip()]

        if len(valid_texts) > 10:
            return [], "❌ Too many texts. Maximum 10 texts per batch for memory efficiency."

        if not valid_texts:
            return [], "❌ No valid texts provided."

        try:
            results = []
            total_start_time = time.time()

            for text in valid_texts:
                sentiment, _ = self.predict_sentiment(text)
                if sentiment:
                    results.append({
                        "text": text,
                        "sentiment": sentiment,
                        "confidence": 0.0,  # Would need to extract from full output
                        "processing_time": 0.0  # Would need to extract from full output
                    })

            total_time = time.time() - total_start_time
            return results, None

        except Exception as e:
            self.cleanup_memory()
            return [], f"❌ Error during batch prediction: {str(e)}"


def start_api_server():
    """Start the API server in a separate thread"""
    global api_controller
    if app_instance and api_controller is None:
        try:
            api_controller = create_api_controller(app_instance)
            # Run API server on a different port to avoid conflicts
            api_server_thread = threading.Thread(
                target=api_controller.run,
                kwargs={"host": "0.0.0.0", "port": 7861},
                daemon=True
            )
            api_server_thread.start()
            print("🌐 API server started on port 7861")
            print("πŸ“š API Documentation: http://localhost:7861/docs")
        except Exception as e:
            print(f"❌ Failed to start API server: {e}")


def create_interface():
    """Create the Gradio interface for Hugging Face Spaces"""
    global app_instance, api_controller

    app_instance = SentimentGradioApp()

    # Load model
    if not app_instance.load_model():
        print("❌ Failed to load model. Please try again.")
        return None

    # Start API server
    start_api_server()

    # Create the interface
    with gr.Blocks(
        title="Vietnamese Sentiment Analysis",
        theme=gr.themes.Soft(),
        css="""
        .gradio-container {
            max-width: 1200px !important;
            margin: 0 auto !important;
        }
        .main-header {
            text-align: center;
            margin-bottom: 2rem;
        }
        """
    ) as interface:
        # Main title
        gr.HTML("""
        <div class="main-header">
            <h1>🎭 Vietnamese Sentiment Analysis</h1>
            <p>Analyze sentiment in Vietnamese text using transformer models from Hugging Face</p>
            <p><strong>Current Model:</strong> {model_name} | <strong>Device:</strong> {device}</p>
        </div>
        """.format(
            model_name=getattr(app_instance, 'current_model', app_instance.finetuned_model),
            device=str(app_instance.device).upper()
        ))

        # Create tabs
        with gr.Tabs():
            # Import and create all pages
            create_single_analysis_page(app_instance)
            create_batch_analysis_page(app_instance)
            create_model_info_page(app_instance)

    return interface


# Create and launch the interface
if __name__ == "__main__":
    print("πŸš€ Starting Vietnamese Sentiment Analysis for Hugging Face Spaces...")

    interface = create_interface()
    if interface is None:
        print("❌ Failed to create interface. Exiting.")
        exit(1)

    print("βœ… Interface created successfully!")
    print("🌐 Launching web interface...")

    # Launch the interface
    interface.launch(
        share=False,
        show_error=True,
        quiet=False
    )