anonymous12321
/

CouncilTopics-PT

@@ -1,359 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
- Intelligent Stacking - Portuguese Document Classifier
-======================================================
-Clean interface for multilabel administrative document classification.
-"""
-import streamlit as st
-import numpy as np
-import joblib
-import json
-import re
-from pathlib import Path
-# ML imports
-from sklearn.feature_extraction.text import TfidfVectorizer
-from sklearn.preprocessing import MultiLabelBinarizer
-from scipy.sparse import hstack, csr_matrix
-# Optional PyTorch imports
-try:
-    import torch
-    from transformers import AutoTokenizer, AutoModel
-    TORCH_AVAILABLE = True
-except ImportError:
-    TORCH_AVAILABLE = False
-# Page config
-st.set_page_config(
-    page_title=" Intelligent Stacking",
-    page_icon="🧠",
-    layout="wide"
-)
-# Custom CSS
-st.markdown("""
-<style>
-    .main-title {
-        text-align: center;
-        color: #1f77b4;
-        margin-bottom: 2rem;
-    }
-    .prediction-card {
-        padding: 1rem;
-        margin: 0.5rem 0;
-        border-radius: 8px;
-        border-left: 4px solid #1f77b4;
-        background: #f8f9fa;
-    }
-    .high-conf { border-left-color: #28a745; }
-    .med-conf { border-left-color: #ffc107; }
-    .low-conf { border-left-color: #dc3545; }
-</style>
-""", unsafe_allow_html=True)
-class PortugueseClassifier:
-    """Intelligent Stacking Classifier"""
-    def __init__(self):
-        self.model_path = Path("models")
-        self.labels = None
-        self.models_loaded = False
-        # Model components
-        self.tfidf_vectorizer = None
-        self.meta_learner = None
-        self.mlb = None
-        self.optimal_thresholds = None
-        self.trained_base_models = None
-        # BERT components
-        if TORCH_AVAILABLE:
-            self.bert_tokenizer = None
-            self.bert_model = None
-            self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-        self.load_models()
-    def load_models(self):
-        """Load all model components"""
-        try:
-            # Load MLB encoder
-            mlb_path = self.model_path / "int_stacking_mlb_encoder.joblib"
-            if mlb_path.exists():
-                self.mlb = joblib.load(mlb_path)
-                self.labels = self.mlb.classes_.tolist()
-            else:
-                return "❌ MLB encoder not found"
-            # Load TF-IDF
-            tfidf_path = self.model_path / "int_stacking_tfidf_vectorizer.joblib"
-            if tfidf_path.exists():
-                self.tfidf_vectorizer = joblib.load(tfidf_path)
-            else:
-                return "❌ TF-IDF vectorizer not found"
-            # Load meta-learner
-            meta_path = self.model_path / "int_stacking_meta_learner.joblib"
-            if meta_path.exists():
-                self.meta_learner = joblib.load(meta_path)
-            else:
-                return "❌ Meta-learner not found"
-            # Load thresholds
-            thresh_path = self.model_path / "int_stacking_optimal_thresholds.npy"
-            if thresh_path.exists():
-                self.optimal_thresholds = np.load(thresh_path)
-            else:
-                return "❌ Thresholds not found"
-            # Load base models
-            base_path = self.model_path / "int_stacking_base_models.joblib"
-            if base_path.exists():
-                self.trained_base_models = joblib.load(base_path)
-            else:
-                return "❌ Base models not found"
-            # Load BERT if available
-            if TORCH_AVAILABLE:
-                try:
-                    self.bert_tokenizer = AutoTokenizer.from_pretrained('neuralmind/bert-base-portuguese-cased')
-                    self.bert_model = AutoModel.from_pretrained('neuralmind/bert-base-portuguese-cased')
-                    self.bert_model.eval()
-                    self.bert_model = self.bert_model.to(self.device)
-                except Exception:
-                    return "⚠️ BERT not available"
-            self.models_loaded = True
-            return f"✅ Intelligent Stacking loaded with {len(self.labels)} categories"
-        except Exception as e:
-            return f"❌ Error loading models: {str(e)}"
-    def extract_bert_features(self, text):
-        """Extract BERT features"""
-        if not TORCH_AVAILABLE or not self.bert_model:
-            return np.zeros((1, 768))
-        try:
-            inputs = self.bert_tokenizer(
-                text,
-                return_tensors="pt",
-                truncation=True,
-                padding=True,
-                max_length=512
-            )
-            inputs = {k: v.to(self.device) for k, v in inputs.items()}
-            with torch.no_grad():
-                outputs = self.bert_model(**inputs)
-                bert_features = outputs.last_hidden_state[:, 0, :].cpu().numpy()
-            return bert_features
-        except Exception:
-            return np.zeros((1, 768))
-    def predict(self, text):
-        """Make prediction using Intelligent Stacking"""
-        if not self.models_loaded:
-            return {"error": "Models not loaded"}
-        try:
-            # Preprocess
-            text = re.sub(r'\s+', ' ', text.strip())
-            if not text:
-                return {"error": "Empty text"}
-            # Extract features
-            tfidf_features = self.tfidf_vectorizer.transform([text])
-            bert_features = self.extract_bert_features(text)
-            combined_features = hstack([tfidf_features, csr_matrix(bert_features)])
-            # Generate base model predictions
-            base_predictions = np.zeros((1, len(self.labels), 12))
-            model_idx = 0
-            feature_sets = [
-                ("TF-IDF", tfidf_features),
-                ("BERT", csr_matrix(bert_features)),
-                ("TF-IDF+BERT", combined_features)
-            ]
-            for feat_name, X_feat in feature_sets:
-                for algo_name in ["LogReg_C1", "LogReg_C05", "GradBoost", "RandomForest"]:
-                    try:
-                        model_key = f"{feat_name}_{algo_name}"
-                        if model_key in self.trained_base_models:
-                            model = self.trained_base_models[model_key]
-                            pred = model.predict_proba(X_feat)
-                            base_predictions[0, :, model_idx] = pred[0]
-                        else:
-                            base_predictions[0, :, model_idx] = np.random.rand(len(self.labels)) * 0.3
-                    except Exception:
-                        base_predictions[0, :, model_idx] = np.random.rand(len(self.labels)) * 0.2
-                    model_idx += 1
-            # Meta-learner prediction
-            meta_features = base_predictions.reshape(1, -1)
-            meta_pred = self.meta_learner.predict_proba(meta_features)[0]
-            # Simple ensemble
-            simple_ensemble = np.mean(base_predictions, axis=2)
-            # Intelligent combination (70% meta + 30% ensemble)
-            final_pred = 0.7 * meta_pred + 0.3 * simple_ensemble[0]
-            # Apply thresholds
-            predicted_labels = []
-            for i, (prob, threshold) in enumerate(zip(final_pred, self.optimal_thresholds)):
-                if prob > threshold:
-                    confidence = "high" if prob > 0.7 else "medium" if prob > 0.4 else "low"
-                    predicted_labels.append({
-                        "label": self.labels[i],
-                        "probability": float(prob),
-                        "confidence": confidence
-                    })
-            predicted_labels.sort(key=lambda x: x["probability"], reverse=True)
-            return {
-                "predicted_labels": predicted_labels,
-                "max_probability": float(max(final_pred)) if len(final_pred) > 0 else 0.0
-            }
-        except Exception as e:
-            return {"error": f"Prediction error: {str(e)}"}
-@st.cache_resource
-def load_classifier():
-    """Load the classifier with caching"""
-    return PortugueseClassifier()
-def main():
-    # Title
-    st.markdown('<h1 class="main-title"> Intelligent Stacking</h1>', unsafe_allow_html=True)
-    st.markdown('<p style="text-align: center; color: #666;">Portuguese Administrative Document Classifier</p>', unsafe_allow_html=True)
-    # Load model
-    with st.spinner("Loading model..."):
-        classifier = load_classifier()
-    # Check if loaded successfully
-    status = classifier.load_models() if hasattr(classifier, 'load_models') else "Model loaded"
-    if "❌" in status:
-        st.error(status)
-        st.stop()
-    else:
-        st.success(status)
-    # Layout
-    col1, col2 = st.columns([1, 1])
-    with col1:
-        st.subheader("📝 Input Text")
-        # Example selection
-        example_choice = st.selectbox(
-            "Choose an example:",
-            ["Custom Text", "Contract Example", "Environmental Report", "Traffic Regulation"]
-        )
-        # Example texts
-        examples = {
-            "Custom Text": "",
-            "Contract Example": """CONTRATO DE PRESTAÇÃO DE SERVIÇOS
-Entre a Administração Pública Municipal e a empresa contratada, fica estabelecido o presente contrato para prestação de serviços de manutenção e conservação de vias públicas, incluindo reparação de pavimento, limpeza e sinalização viária.
-O valor total do contrato é de €150.000,00, sendo pago em prestações mensais.""",
-            "Environmental Report": """RELATÓRIO DE IMPACTO AMBIENTAL
-A avaliação dos níveis de poluição atmosférica na zona industrial revelou concentrações de partículas PM2.5 acima dos valores recomendados pela legislação europeia.
-Recomenda-se a implementação de medidas de mitigação, incluindo instalação de filtros e criação de zonas verdes.""",
-            "Traffic Regulation": """REGULAMENTO MUNICIPAL DE TRÂNSITO
-Artigo 1º - É proibido o estacionamento de veículos em locais que obstruam a circulação de peões.
-Artigo 2º - O limite de velocidade nas vias urbanas é de 50 km/h, exceto em zonas escolares onde o limite é reduzido para 30 km/h."""
-        }
-        # Text input
-        if example_choice == "Custom Text":
-            input_text = st.text_area(
-                "Enter Portuguese administrative text:",
-                height=300,
-                placeholder="Cole aqui o texto do documento..."
-            )
-        else:
-            input_text = st.text_area(
-                f"Example: {example_choice}",
-                value=examples[example_choice],
-                height=300
-            )
-        # Classify button
-        classify_button = st.button("🔍 Classify", type="primary")
-    with col2:
-        st.subheader("📊 Results")
-        if classify_button and input_text.strip():
-            with st.spinner("Classifying..."):
-                result = classifier.predict(input_text)
-                if "error" in result:
-                    st.error(f"Error: {result['error']}")
-                else:
-                    predictions = result.get('predicted_labels', [])
-                    if not predictions:
-                        st.warning("No categories predicted above threshold.")
-                    else:
-                        # Show metrics
-                        col_a, col_b = st.columns(2)
-                        with col_a:
-                            st.metric("Categories", len(predictions))
-                        with col_b:
-                            max_prob = result.get('max_probability', 0)
-                            st.metric("Max Confidence", f"{max_prob:.1%}")
-                        st.markdown("---")
-                        # Show predictions
-                        for i, pred in enumerate(predictions[:10], 1):
-                            conf = pred['confidence']
-                            prob = pred['probability']
-                            label = pred['label']
-                            conf_class = f"{conf}-conf"
-                            conf_emoji = {"high": "🟢", "medium": "🟡", "low": "🔴"}[conf]
-                            st.markdown(f"""
-                            <div class="prediction-card {conf_class}">
-                                <strong>#{i} {label}</strong> {conf_emoji}
-                                <br><small>Probability: {prob:.1%}</small>
-                            </div>
-                            """, unsafe_allow_html=True)
-        else:
-            st.info("👈 Enter text and click Classify to see results.")
-            # Show info
-            st.markdown("### About Intelligent Stacking")
-            st.markdown("""
-            - **12 Base Models**: 3 feature sets × 4 algorithms
-            - **Meta-Learning**: Advanced ensemble combination
-            - **Features**: TF-IDF + BERTimbau embeddings
-            - **Performance**: F1-macro 0.5486
-            """)
-if __name__ == "__main__":
-    main()