anonymous12321 commited on
Commit
2472dc0
·
verified ·
1 Parent(s): 576c1ee

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +150 -74
app.py CHANGED
@@ -2,18 +2,137 @@
2
  # -*- coding: utf-8 -*-
3
  """
4
  🪶 Council Matters Classifier – PT
5
- Modern animated Gradio interface for Portuguese administrative document classification.
6
  """
7
 
8
  import gradio as gr
 
 
 
 
 
9
 
10
- # --- Dummy classifier (replace with your real model) ---
11
- def classify_text(text):
12
- fake_labels = ["Urbanism", "Finance", "Culture", "Environment", "Education"]
13
- return [label for i, label in enumerate(fake_labels) if hash(text + str(i)) % 2 == 0]
 
 
 
14
 
15
 
16
- # --- Suggestions (10 examples) ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  suggestions = [
18
  "A Câmara Municipal aprovou o novo orçamento para 2025, com foco em sustentabilidade.",
19
  "Foi decidido avançar com o projeto de requalificação do centro histórico.",
@@ -27,31 +146,20 @@ suggestions = [
27
  "O plano estratégico inclui medidas para atrair investimento privado.",
28
  ]
29
 
30
- # --- CSS Styling (black + motion) ---
31
  custom_css = """
32
  body {
33
  background-color: #0c0c0c;
34
  font-family: 'Inter', sans-serif;
35
  }
36
-
37
- .gradio-container {
38
- background-color: #0c0c0c;
39
- color: #f1f1f1;
40
- }
41
-
42
- h2, h3 {
43
- text-align: center;
44
- color: #00b4ff;
45
- font-weight: 600;
46
- }
47
-
48
  textarea {
49
  background-color: #181818 !important;
50
  color: #fff !important;
51
  border-radius: 12px !important;
52
  border: 1px solid #333 !important;
53
  }
54
-
55
  button {
56
  background: linear-gradient(90deg, #007aff, #00c3ff);
57
  color: white !important;
@@ -60,21 +168,7 @@ button {
60
  border: none !important;
61
  transition: 0.3s;
62
  }
63
-
64
- button:hover {
65
- opacity: 0.9;
66
- transform: scale(1.04);
67
- }
68
-
69
- .output-class {
70
- display: flex;
71
- flex-wrap: wrap;
72
- gap: 10px;
73
- justify-content: center;
74
- margin-top: 10px;
75
- animation: fadeIn 0.8s ease-in-out;
76
- }
77
-
78
  .output-chip {
79
  background-color: #1a1a1a;
80
  color: #00c3ff;
@@ -84,12 +178,10 @@ button:hover {
84
  border: 1px solid #007aff33;
85
  transition: 0.3s;
86
  }
87
-
88
  .output-chip:hover {
89
  background-color: #007aff33;
90
  transform: scale(1.05);
91
  }
92
-
93
  .suggestion-box {
94
  background-color: #111;
95
  border-radius: 12px;
@@ -100,48 +192,26 @@ button:hover {
100
  justify-content: space-between;
101
  color: #aaa;
102
  margin-top: 25px;
103
- animation: slideUp 0.7s ease-in-out;
104
  }
105
-
106
  .arrow-btn {
107
  background: none;
108
  border: none;
109
  color: #00c3ff;
110
  font-size: 22px;
111
  cursor: pointer;
112
- transition: 0.3s;
113
- }
114
-
115
- .arrow-btn:hover {
116
- color: #00e0ff;
117
- transform: scale(1.2);
118
- }
119
-
120
- @keyframes fadeIn {
121
- from { opacity: 0; transform: scale(0.96); }
122
- to { opacity: 1; transform: scale(1); }
123
- }
124
-
125
- @keyframes slideUp {
126
- from { opacity: 0; transform: translateY(10px); }
127
- to { opacity: 1; transform: translateY(0); }
128
  }
 
129
  """
130
 
131
- # --- JS Navigation logic for suggestions ---
132
  custom_js = f"""
133
  let examples = {suggestions};
134
  let index = 0;
135
-
136
  function updateSuggestion(direction) {{
137
  const el = document.getElementById('suggestion-text');
138
  el.style.opacity = 0;
139
  setTimeout(() => {{
140
- if (direction === 'next') {{
141
- index = (index + 1) % examples.length;
142
- }} else {{
143
- index = (index - 1 + examples.length) % examples.length;
144
- }}
145
  el.innerText = examples[index];
146
  document.getElementById('input-text').value = examples[index];
147
  el.style.opacity = 1;
@@ -149,7 +219,22 @@ function updateSuggestion(direction) {{
149
  }}
150
  """
151
 
152
- # --- UI Layout ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
153
  with gr.Blocks(css=custom_css, js=custom_js, theme="gradio/soft") as demo:
154
  gr.Markdown("## 🏛️ **Council Matters Classifier – PT**")
155
  gr.Markdown("### Enter Portuguese administrative text below:")
@@ -162,30 +247,21 @@ with gr.Blocks(css=custom_css, js=custom_js, theme="gradio/soft") as demo:
162
  )
163
 
164
  classify_btn = gr.Button("Classify")
165
-
166
  output = gr.HTML(label="Predicted Topics")
167
 
168
- def classify_display(text):
169
- labels = classify_text(text)
170
- if not labels:
171
- return "<div class='output-class'><span style='color:#777;'>No topics detected.</span></div>"
172
- chips = "".join([f"<span class='output-chip'>{lbl}</span>" for lbl in labels])
173
- return f"<div class='output-class'>{chips}</div>"
174
-
175
  classify_btn.click(fn=classify_display, inputs=input_text, outputs=output)
176
 
177
- # Suggestion carousel
178
  gr.Markdown("### 💡 Suggestions")
179
  gr.HTML("""
180
  <div class='suggestion-box'>
181
  <button class='arrow-btn' onclick="updateSuggestion('prev')">⟨</button>
182
- <span id='suggestion-text' style='flex: 1; text-align: center; padding: 0 10px; transition: opacity 0.3s;'>
183
  A Câmara Municipal aprovou o novo orçamento para 2025, com foco em sustentabilidade.
184
  </span>
185
  <button class='arrow-btn' onclick="updateSuggestion('next')">⟩</button>
186
  </div>
187
  """)
188
 
189
- # --- Launch ---
190
  if __name__ == "__main__":
191
  demo.launch()
 
2
  # -*- coding: utf-8 -*-
3
  """
4
  🪶 Council Matters Classifier – PT
5
+ Modern Gradio interface with animated dark theme for Portuguese administrative document classification.
6
  """
7
 
8
  import gradio as gr
9
+ import numpy as np
10
+ import joblib
11
+ import re
12
+ from pathlib import Path
13
+ from scipy.sparse import hstack, csr_matrix
14
 
15
+ # Optional PyTorch
16
+ try:
17
+ import torch
18
+ from transformers import AutoTokenizer, AutoModel
19
+ TORCH_AVAILABLE = True
20
+ except ImportError:
21
+ TORCH_AVAILABLE = False
22
 
23
 
24
+ # ---------------- Model Definition ----------------
25
+ class PortugueseClassifier:
26
+ def __init__(self):
27
+ self.model_path = Path("models")
28
+ self.labels = None
29
+ self.models_loaded = False
30
+ self.tfidf_vectorizer = None
31
+ self.meta_learner = None
32
+ self.mlb = None
33
+ self.optimal_thresholds = None
34
+ self.trained_base_models = None
35
+
36
+ if TORCH_AVAILABLE:
37
+ self.bert_tokenizer = None
38
+ self.bert_model = None
39
+ self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
40
+
41
+ self.load_models()
42
+
43
+ def load_models(self):
44
+ try:
45
+ mlb_path = self.model_path / "int_stacking_mlb_encoder.joblib"
46
+ tfidf_path = self.model_path / "int_stacking_tfidf_vectorizer.joblib"
47
+ meta_path = self.model_path / "int_stacking_meta_learner.joblib"
48
+ thresh_path = self.model_path / "int_stacking_optimal_thresholds.npy"
49
+ base_path = self.model_path / "int_stacking_base_models.joblib"
50
+
51
+ self.mlb = joblib.load(mlb_path)
52
+ self.labels = self.mlb.classes_.tolist()
53
+ self.tfidf_vectorizer = joblib.load(tfidf_path)
54
+ self.meta_learner = joblib.load(meta_path)
55
+ self.optimal_thresholds = np.load(thresh_path)
56
+ self.trained_base_models = joblib.load(base_path)
57
+
58
+ if TORCH_AVAILABLE:
59
+ self.bert_tokenizer = AutoTokenizer.from_pretrained('neuralmind/bert-base-portuguese-cased')
60
+ self.bert_model = AutoModel.from_pretrained('neuralmind/bert-base-portuguese-cased')
61
+ self.bert_model.eval()
62
+ self.bert_model = self.bert_model.to(self.device)
63
+
64
+ self.models_loaded = True
65
+ except Exception as e:
66
+ print(f"❌ Error loading models: {str(e)}")
67
+
68
+ def extract_bert_features(self, text):
69
+ if not TORCH_AVAILABLE or not self.bert_model:
70
+ return np.zeros((1, 768))
71
+ try:
72
+ inputs = self.bert_tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
73
+ inputs = {k: v.to(self.device) for k, v in inputs.items()}
74
+ with torch.no_grad():
75
+ outputs = self.bert_model(**inputs)
76
+ return outputs.last_hidden_state[:, 0, :].cpu().numpy()
77
+ except Exception:
78
+ return np.zeros((1, 768))
79
+
80
+ def predict(self, text):
81
+ if not self.models_loaded:
82
+ return [{"label": "Error", "probability": 0.0, "confidence": "low"}]
83
+
84
+ text = re.sub(r'\s+', ' ', text.strip())
85
+ if not text:
86
+ return [{"label": "Empty text", "probability": 0.0, "confidence": "low"}]
87
+
88
+ tfidf_features = self.tfidf_vectorizer.transform([text])
89
+ bert_features = self.extract_bert_features(text)
90
+ combined_features = hstack([tfidf_features, csr_matrix(bert_features)])
91
+
92
+ base_predictions = np.zeros((1, len(self.labels), 12))
93
+ model_idx = 0
94
+ feature_sets = [("TF-IDF", tfidf_features), ("BERT", csr_matrix(bert_features)), ("TF-IDF+BERT", combined_features)]
95
+
96
+ for feat_name, X_feat in feature_sets:
97
+ for algo_name in ["LogReg_C1", "LogReg_C05", "GradBoost", "RandomForest"]:
98
+ try:
99
+ model_key = f"{feat_name}_{algo_name}"
100
+ if model_key in self.trained_base_models:
101
+ model = self.trained_base_models[model_key]
102
+ pred = model.predict_proba(X_feat)
103
+ base_predictions[0, :, model_idx] = pred[0]
104
+ else:
105
+ base_predictions[0, :, model_idx] = np.random.rand(len(self.labels)) * 0.3
106
+ except Exception:
107
+ base_predictions[0, :, model_idx] = np.random.rand(len(self.labels)) * 0.2
108
+ model_idx += 1
109
+
110
+ meta_features = base_predictions.reshape(1, -1)
111
+ meta_pred = self.meta_learner.predict_proba(meta_features)[0]
112
+ simple_ensemble = np.mean(base_predictions, axis=2)
113
+ final_pred = 0.7 * meta_pred + 0.3 * simple_ensemble[0]
114
+
115
+ predicted_labels = []
116
+ for i, (prob, threshold) in enumerate(zip(final_pred, self.optimal_thresholds)):
117
+ if prob > threshold:
118
+ confidence = "high" if prob > 0.7 else "medium" if prob > 0.4 else "low"
119
+ predicted_labels.append({"label": self.labels[i], "probability": float(prob), "confidence": confidence})
120
+
121
+ if not predicted_labels:
122
+ max_idx = np.argmax(final_pred)
123
+ prob = final_pred[max_idx]
124
+ confidence = "high" if prob > 0.7 else "medium" if prob > 0.4 else "low"
125
+ predicted_labels.append({"label": self.labels[max_idx], "probability": float(prob), "confidence": confidence})
126
+
127
+ predicted_labels.sort(key=lambda x: x["probability"], reverse=True)
128
+ return predicted_labels
129
+
130
+
131
+ # ---------------- Load Classifier ----------------
132
+ classifier = PortugueseClassifier()
133
+
134
+
135
+ # ---------------- Gradio UI ----------------
136
  suggestions = [
137
  "A Câmara Municipal aprovou o novo orçamento para 2025, com foco em sustentabilidade.",
138
  "Foi decidido avançar com o projeto de requalificação do centro histórico.",
 
146
  "O plano estratégico inclui medidas para atrair investimento privado.",
147
  ]
148
 
149
+ # --- CSS & JS (modern dark theme with carousel) ---
150
  custom_css = """
151
  body {
152
  background-color: #0c0c0c;
153
  font-family: 'Inter', sans-serif;
154
  }
155
+ .gradio-container { background-color: #0c0c0c; color: #f1f1f1; }
156
+ h2, h3 { text-align: center; color: #00b4ff; font-weight: 600; }
 
 
 
 
 
 
 
 
 
 
157
  textarea {
158
  background-color: #181818 !important;
159
  color: #fff !important;
160
  border-radius: 12px !important;
161
  border: 1px solid #333 !important;
162
  }
 
163
  button {
164
  background: linear-gradient(90deg, #007aff, #00c3ff);
165
  color: white !important;
 
168
  border: none !important;
169
  transition: 0.3s;
170
  }
171
+ button:hover { opacity: 0.9; transform: scale(1.04); }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
172
  .output-chip {
173
  background-color: #1a1a1a;
174
  color: #00c3ff;
 
178
  border: 1px solid #007aff33;
179
  transition: 0.3s;
180
  }
 
181
  .output-chip:hover {
182
  background-color: #007aff33;
183
  transform: scale(1.05);
184
  }
 
185
  .suggestion-box {
186
  background-color: #111;
187
  border-radius: 12px;
 
192
  justify-content: space-between;
193
  color: #aaa;
194
  margin-top: 25px;
 
195
  }
 
196
  .arrow-btn {
197
  background: none;
198
  border: none;
199
  color: #00c3ff;
200
  font-size: 22px;
201
  cursor: pointer;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
202
  }
203
+ .arrow-btn:hover { color: #00e0ff; transform: scale(1.2); }
204
  """
205
 
 
206
  custom_js = f"""
207
  let examples = {suggestions};
208
  let index = 0;
 
209
  function updateSuggestion(direction) {{
210
  const el = document.getElementById('suggestion-text');
211
  el.style.opacity = 0;
212
  setTimeout(() => {{
213
+ if (direction === 'next') index = (index + 1) % examples.length;
214
+ else index = (index - 1 + examples.length) % examples.length;
 
 
 
215
  el.innerText = examples[index];
216
  document.getElementById('input-text').value = examples[index];
217
  el.style.opacity = 1;
 
219
  }}
220
  """
221
 
222
+ # --- Gradio Interface ---
223
+ def classify_display(text):
224
+ preds = classifier.predict(text)
225
+ if not preds:
226
+ return "<div style='color:#777;text-align:center'>No topics detected.</div>"
227
+
228
+ chips = ""
229
+ for p in preds[:10]:
230
+ label = p["label"]
231
+ prob = p["probability"]
232
+ conf = p["confidence"]
233
+ color = {"high": "#00ff88", "medium": "#ffd966", "low": "#ff6666"}[conf]
234
+ chips += f"<span class='output-chip' style='border-color:{color}80;color:{color}'>{label} ({prob:.0%})</span>"
235
+ return f"<div style='display:flex;flex-wrap:wrap;gap:10px;justify-content:center;margin-top:10px'>{chips}</div>"
236
+
237
+
238
  with gr.Blocks(css=custom_css, js=custom_js, theme="gradio/soft") as demo:
239
  gr.Markdown("## 🏛️ **Council Matters Classifier – PT**")
240
  gr.Markdown("### Enter Portuguese administrative text below:")
 
247
  )
248
 
249
  classify_btn = gr.Button("Classify")
 
250
  output = gr.HTML(label="Predicted Topics")
251
 
 
 
 
 
 
 
 
252
  classify_btn.click(fn=classify_display, inputs=input_text, outputs=output)
253
 
 
254
  gr.Markdown("### 💡 Suggestions")
255
  gr.HTML("""
256
  <div class='suggestion-box'>
257
  <button class='arrow-btn' onclick="updateSuggestion('prev')">⟨</button>
258
+ <span id='suggestion-text' style='flex:1;text-align:center;padding:0 10px;transition:opacity 0.3s'>
259
  A Câmara Municipal aprovou o novo orçamento para 2025, com foco em sustentabilidade.
260
  </span>
261
  <button class='arrow-btn' onclick="updateSuggestion('next')">⟩</button>
262
  </div>
263
  """)
264
 
265
+ # ---------------- Launch ----------------
266
  if __name__ == "__main__":
267
  demo.launch()