jmisidro commited on
Commit
a46fc5d
·
verified ·
1 Parent(s): a638b4b

Upload 4 files

Browse files
Files changed (3) hide show
  1. README.md +5 -5
  2. requirements.txt +3 -1
  3. src/streamlit_app.py +607 -38
README.md CHANGED
@@ -1,14 +1,14 @@
1
  ---
2
  title: Metadata Identification Demo
3
- emoji: 🚀
4
- colorFrom: red
5
- colorTo: red
6
  sdk: docker
7
  app_port: 8501
8
  tags:
9
  - streamlit
10
  pinned: false
11
- short_description: Streamlit template space
12
  license: cc-by-nc-nd-4.0
13
  ---
14
 
@@ -17,4 +17,4 @@ license: cc-by-nc-nd-4.0
17
  Edit `/src/streamlit_app.py` to customize this app to your heart's desire. :heart:
18
 
19
  If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
20
- forums](https://discuss.streamlit.io).
 
1
  ---
2
  title: Metadata Identification Demo
3
+ emoji: 📚
4
+ colorFrom: purple
5
+ colorTo: purple
6
  sdk: docker
7
  app_port: 8501
8
  tags:
9
  - streamlit
10
  pinned: false
11
+ short_description: Structured Metadata Extraction from Portuguese Text
12
  license: cc-by-nc-nd-4.0
13
  ---
14
 
 
17
  Edit `/src/streamlit_app.py` to customize this app to your heart's desire. :heart:
18
 
19
  If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
20
+ forums](https://discuss.streamlit.io).
requirements.txt CHANGED
@@ -1,3 +1,5 @@
1
  altair
2
  pandas
3
- streamlit
 
 
 
1
  altair
2
  pandas
3
+ streamlit
4
+ transformers
5
+ torch
src/streamlit_app.py CHANGED
@@ -1,40 +1,609 @@
1
- import altair as alt
2
- import numpy as np
3
- import pandas as pd
4
  import streamlit as st
 
 
 
5
 
6
- """
7
- # Welcome to Streamlit!
8
-
9
- Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
10
- If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
11
- forums](https://discuss.streamlit.io).
12
-
13
- In the meantime, below is an example of what you can do with just a few lines of code:
14
- """
15
-
16
- num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
17
- num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
18
-
19
- indices = np.linspace(0, 1, num_points)
20
- theta = 2 * np.pi * num_turns * indices
21
- radius = indices
22
-
23
- x = radius * np.cos(theta)
24
- y = radius * np.sin(theta)
25
-
26
- df = pd.DataFrame({
27
- "x": x,
28
- "y": y,
29
- "idx": indices,
30
- "rand": np.random.randn(num_points),
31
- })
32
-
33
- st.altair_chart(alt.Chart(df, height=700, width=700)
34
- .mark_point(filled=True)
35
- .encode(
36
- x=alt.X("x", axis=None),
37
- y=alt.Y("y", axis=None),
38
- color=alt.Color("idx", legend=None, scale=alt.Scale()),
39
- size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
40
- ))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
+ import json
3
+ from transformers import AutoTokenizer, AutoModelForTokenClassification
4
+ import torch
5
 
6
+ # Page configuration
7
+ st.set_page_config(
8
+ page_title="MiNER - Stage 2: Metadata Extraction",
9
+ page_icon="🏷️",
10
+ layout="wide"
11
+ )
12
+
13
+ # Custom CSS for improved appearance
14
+ st.markdown("""
15
+ <style>
16
+ .main-header {
17
+ font-size: 2.5rem;
18
+ color: #4A90E2;
19
+ text-align: center;
20
+ margin-bottom: 0.5rem;
21
+ }
22
+ .sub-header {
23
+ text-align: center;
24
+ color: #666;
25
+ margin-bottom: 2rem;
26
+ }
27
+ .stButton>button {
28
+ width: 100%;
29
+ background-color: #FF6B6B;
30
+ color: white;
31
+ font-size: 1.1rem;
32
+ padding: 0.75rem;
33
+ border-radius: 8px;
34
+ border: none;
35
+ }
36
+ .stButton>button:hover {
37
+ background-color: #FF5252;
38
+ }
39
+ .entity-group {
40
+ padding: 0.5rem 0;
41
+ margin-bottom: 0.5rem;
42
+ }
43
+ .entity-group-title {
44
+ font-weight: 600;
45
+ color: #555;
46
+ margin-bottom: 0.4rem;
47
+ font-size: 0.8rem;
48
+ text-transform: uppercase;
49
+ letter-spacing: 0.5px;
50
+ }
51
+ .entity-badge {
52
+ display: inline-block;
53
+ padding: 0.3rem 0.7rem;
54
+ border-radius: 5px;
55
+ margin: 0.2rem 0.3rem 0.2rem 0;
56
+ font-size: 0.9rem;
57
+ font-weight: 500;
58
+ box-shadow: 0 1px 3px rgba(0,0,0,0.12);
59
+ }
60
+ .example-section {
61
+ background-color: #2d3436;
62
+ color: #dfe6e9;
63
+ padding: 1rem;
64
+ border-radius: 8px;
65
+ margin: 1rem 0;
66
+ }
67
+ </style>
68
+ """, unsafe_allow_html=True)
69
+
70
+ # ==================== MODEL LOADING ====================
71
+ MODEL_OPTIONS = {
72
+ "Metadata Baseline": "liaad/Citilink-BERTimbau-large-metadata-pt-baseline",
73
+ "Metadata Augmented": "liaad/Citilink-BERTimbau-large-Metadata-pt",
74
+ }
75
+
76
+ @st.cache_resource
77
+ def load_model(model_name: str):
78
+ """Loads the model and tokenizer (cached to avoid reloading)"""
79
+ try:
80
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
81
+ model = AutoModelForTokenClassification.from_pretrained(model_name)
82
+ model.eval()
83
+ return tokenizer, model
84
+ except Exception as e:
85
+ st.error(f"Error loading model: {e}")
86
+ return None, None
87
+
88
+ # ==================== ENTITY EXTRACTION ====================
89
+ def extract_entities(text, tokenizer, model):
90
+ """
91
+ Extracts entities from text using BERT model.
92
+ Based on the original extract_entities function.
93
+ """
94
+ if not text or text.strip() == "":
95
+ return {}
96
+
97
+ # Tokenization + offsets
98
+ encoding = tokenizer(
99
+ text,
100
+ return_tensors="pt",
101
+ return_offsets_mapping=True,
102
+ truncation=True,
103
+ max_length=512
104
+ )
105
+
106
+ offsets = encoding["offset_mapping"][0].tolist()
107
+ word_ids = encoding.word_ids(batch_index=0)
108
+
109
+ inputs = {
110
+ "input_ids": encoding["input_ids"],
111
+ "attention_mask": encoding["attention_mask"],
112
+ }
113
+
114
+ # Prediction
115
+ with torch.no_grad():
116
+ outputs = model(**inputs)
117
+
118
+ pred_ids = torch.argmax(outputs.logits, dim=2)[0].tolist()
119
+ pred_labels = [model.config.id2label[i] for i in pred_ids]
120
+
121
+ # Entity reconstruction
122
+ entities = []
123
+ current = None
124
+ prev_word_idx = None
125
+
126
+ for i, label in enumerate(pred_labels):
127
+ word_idx = word_ids[i]
128
+ start, end = offsets[i]
129
+
130
+ # Ignore special tokens (CLS, SEP, PAD)
131
+ if word_idx is None:
132
+ continue
133
+
134
+ # ONLY process the FIRST subtoken of each word
135
+ if word_idx == prev_word_idx:
136
+ # This is a subsequent subtoken, ignore prediction
137
+ # But extend the offset of current entity if it exists
138
+ if current:
139
+ current["end"] = end
140
+ continue
141
+
142
+ # Update prev_word_idx
143
+ prev_word_idx = word_idx
144
+
145
+ # Process the label of the first subtoken
146
+ if label.startswith("B-"):
147
+ # Close previous entity if it exists
148
+ if current:
149
+ entities.append(current)
150
+
151
+ # Start new entity
152
+ current = {"label": label[2:], "start": start, "end": end}
153
+
154
+ elif label.startswith("I-"):
155
+ if current and current["label"] == label[2:]:
156
+ # Continue entity correctly
157
+ current["end"] = end
158
+ else:
159
+ # Ignore loose I- (don't create entity)
160
+ continue
161
+
162
+ else: # label == "O"
163
+ # Close current entity
164
+ if current:
165
+ entities.append(current)
166
+ current = None
167
+
168
+ # Close last entity if it exists
169
+ if current:
170
+ entities.append(current)
171
+
172
+ # Add extracted text to each entity
173
+ for ent in entities:
174
+ ent["text"] = text[ent["start"]:ent["end"]]
175
+
176
+ # Group entities by label
177
+ grouped_entities = {}
178
+ for ent in entities:
179
+ label = ent["label"]
180
+ if label not in grouped_entities:
181
+ grouped_entities[label] = []
182
+
183
+ grouped_entities[label].append({
184
+ "text": ent["text"],
185
+ "start": ent["start"],
186
+ "end": ent["end"]
187
+ })
188
+
189
+ # ==================== POST-PROCESSING ====================
190
+ # For certain metadata, keep only the first occurrence
191
+ unique_entities = ["HORARIO", "DATA", "NUMERO-ATA", "LOCAL", "TIPO-REUNIAO"]
192
+
193
+ for label in list(grouped_entities.keys()):
194
+ # Check if the label starts with any of the unique entities
195
+ for unique_entity in unique_entities:
196
+ if label.startswith(unique_entity):
197
+ # Keep only the first occurrence (smallest start offset)
198
+ grouped_entities[label] = sorted(
199
+ grouped_entities[label],
200
+ key=lambda x: x["start"]
201
+ )[:1]
202
+ break
203
+
204
+ return grouped_entities
205
+
206
+ # ==================== UTILITY FUNCTIONS ====================
207
+ def translate_entity_label(label):
208
+ """Translates entity labels from Portuguese to English and adds emojis"""
209
+ translations = {
210
+ "NUMERO-ATA": ("📋", "MINUTES NUMBER"),
211
+ "DATA": ("📅", "DATE"),
212
+ "LOCAL": ("📍", "LOCATION"),
213
+ "TIPO-REUNIAO": ("📌", "MEETING TYPE"),
214
+ "HORARIO-INICIO": ("🕐", "BEGIN TIME"),
215
+ "HORARIO-FIM": ("🕐", "END TIME"),
216
+ "PARTICIPANTE-PRESIDENTE-PRESENTE": ("👔", "PRESIDENT - PRESENT"),
217
+ "PARTICIPANTE-PRESIDENTE-AUSENTE": ("👔", "PRESIDENT - ABSENT"),
218
+ "PARTICIPANTE-PRESIDENTE-SUBSTITUIDO": ("👔", "PRESIDENT - SUBSTITUTED"),
219
+ "PARTICIPANTE-VEREADOR-PRESENTE": ("👥", "COUNCILOR - PRESENT"),
220
+ "PARTICIPANTE-VEREADOR-AUSENTE": ("👥", "COUNCILOR - ABSENT"),
221
+ "PARTICIPANTE-VEREADOR-SUBSTITUIDO": ("👥", "COUNCILOR - SUBSTITUTED"),
222
+ "PARTICIPANTE-FUNCIONARIO-PRESENTE": ("🗂️", "STAFF - PRESENT"),
223
+ "PARTICIPANTE-FUNCIONARIO-AUSENTE": ("🗂️", "STAFF - ABSENT"),
224
+ "PARTICIPANTE-FUNCIONARIO-SUBSTITUIDO": ("🗂️", "STAFF - SUBSTITUTED"),
225
+ }
226
+
227
+ # Check if exact translation exists
228
+ if label in translations:
229
+ return translations[label]
230
+
231
+ # Fallback: try to extract base category
232
+ for key, value in translations.items():
233
+ if label.startswith(key.split("-")[0]):
234
+ return value
235
+
236
+ return ("", label)
237
+
238
+ def get_entity_style(label):
239
+ """Returns color and border style based on entity type"""
240
+ # Base colors
241
+ colors = {
242
+ "NUMERO-ATA": "#E74C3C", # Red
243
+ "DATA": "#16A085", # Teal
244
+ "LOCAL": "#2980B9", # Blue
245
+ "TIPO-REUNIAO": "#E67E22", # Orange
246
+ "HORARIO": "#F39C12", # Yellow-orange
247
+ "PARTICIPANTE-PRESIDENTE": "#8E44AD", # Purple for President
248
+ "PARTICIPANTE-VEREADOR": "#27AE60", # Green for Councilors
249
+ "PARTICIPANTE-FUNCIONARIO": "#1A6B8A" # Steel blue for Staff
250
+ }
251
+
252
+ # Border styles for different attendance states
253
+ border_styles = {
254
+ "PRESENTE": "solid", # Solid border
255
+ "AUSENTE": "dashed", # Dashed border
256
+ "SUBSTITUIDO": "dotted" # Dotted border
257
+ }
258
+
259
+ # Determine base color
260
+ color = "#7F8C8D" # Default color
261
+ border_style = "solid"
262
+ border_width = "2px"
263
+
264
+ # Check if it's President or Councilor
265
+ if "PARTICIPANTE-PRESIDENTE" in label:
266
+ color = colors["PARTICIPANTE-PRESIDENTE"]
267
+ # Determine border style
268
+ if "PRESENTE" in label:
269
+ border_style = border_styles["PRESENTE"]
270
+ elif "AUSENTE" in label:
271
+ border_style = border_styles["AUSENTE"]
272
+ elif "SUBSTITUIDO" in label:
273
+ border_style = border_styles["SUBSTITUIDO"]
274
+ elif "PARTICIPANTE-VEREADOR" in label:
275
+ color = colors["PARTICIPANTE-VEREADOR"]
276
+ # Determine border style
277
+ if "PRESENTE" in label:
278
+ border_style = border_styles["PRESENTE"]
279
+ elif "AUSENTE" in label:
280
+ border_style = border_styles["AUSENTE"]
281
+ elif "SUBSTITUIDO" in label:
282
+ border_style = border_styles["SUBSTITUIDO"]
283
+ elif "PARTICIPANTE-FUNCIONARIO" in label:
284
+ color = colors["PARTICIPANTE-FUNCIONARIO"]
285
+ # Determine border style
286
+ if "PRESENTE" in label:
287
+ border_style = border_styles["PRESENTE"]
288
+ elif "AUSENTE" in label:
289
+ border_style = border_styles["AUSENTE"]
290
+ elif "SUBSTITUIDO" in label:
291
+ border_style = border_styles["SUBSTITUIDO"]
292
+ else:
293
+ # For other entities, use specific colors
294
+ base_category = label.split("-")[0]
295
+ color = colors.get(base_category, color)
296
+
297
+ return color, border_style, border_width
298
+
299
+ def display_entities_compact(entities_dict):
300
+ """Displays entities in a compact format with emojis and translation"""
301
+ if not entities_dict:
302
+ st.info("No entities detected.")
303
+ return
304
+
305
+ for label, entities in sorted(entities_dict.items()):
306
+ emoji, translated_label = translate_entity_label(label)
307
+ color, border_style, border_width = get_entity_style(label)
308
+
309
+ # Group title with emoji
310
+ title_html = f'<div class="entity-group-title">{emoji} {translated_label}</div>'
311
+ st.markdown(title_html, unsafe_allow_html=True)
312
+
313
+ # Show all entities of this type in compact badges
314
+ badges_html = '<div class="entity-group">'
315
+ for ent in entities:
316
+ badge_style = f"background-color: {color}; color: white; border: {border_width} {border_style} rgba(0,0,0,0.2);"
317
+ badges_html += f'<span class="entity-badge" style="{badge_style}">{ent["text"]}</span>'
318
+ badges_html += '</div>'
319
+
320
+ st.markdown(badges_html, unsafe_allow_html=True)
321
+
322
+ # ==================== SIDEBAR ====================
323
+ with st.sidebar:
324
+ st.markdown("### ⚙️ Configuration")
325
+
326
+ # Model selection
327
+ selected_model_label = st.selectbox(
328
+ "🤖 Select Model",
329
+ options=list(MODEL_OPTIONS.keys())
330
+ )
331
+ selected_model_name = MODEL_OPTIONS[selected_model_label]
332
+
333
+ st.markdown("---")
334
+
335
+ st.markdown("Choose an example or enter your own text:")
336
+
337
+ selected_example = st.selectbox(
338
+ "Select an example",
339
+ ["Custom Text", "Example 1 - Alandroal", "Example 2 - Campo Maior", "Example 3 - Covilhã", "Example 4 - Fundão", "Example 5 - Guimarães", "Example 6 - Porto"]
340
+ )
341
+
342
+ st.markdown("---")
343
+
344
+ # About Section
345
+ st.markdown("### 📋 About")
346
+ st.markdown(f"""
347
+ **MiNER Stage 2** uses Named Entity Recognition models to automatically extract metadata from meeting minutes.
348
+
349
+ - **Model**: {selected_model_label}
350
+ - **Languages**: Portuguese
351
+ - **Method**: Token Classification (NER) with BIO tagging
352
+ """)
353
+
354
+ st.markdown("---")
355
+
356
+ # Resources Section
357
+ st.markdown("### 🔗 Resources")
358
+ st.markdown(f"""
359
+ - [GitHub Repository](https://github.com/LIAAD/MiNER)
360
+ - [Model on HuggingFace](https://huggingface.co/{selected_model_name})
361
+ """)
362
+
363
+ # ==================== MAIN CONTENT ====================
364
+
365
+ # Header
366
+ st.markdown('<div class="main-header">🏷️ MiNER — Stage 2: Metadata Extraction Demo</div>', unsafe_allow_html=True)
367
+ st.markdown('<div class="sub-header">Automatic extraction of structured metadata from municipal meeting minutes</div>', unsafe_allow_html=True)
368
+
369
+ # ==================== HOW IT WORKS (MOVED TO TOP) ====================
370
+ with st.expander("🎯 How It Works", expanded=False):
371
+ st.markdown("""
372
+ The model analyzes the **meeting minutes** to automatically extract **structured metadata** using a *Named Entity Recognition (NER)* approach.
373
+
374
+ **What information is extracted:**
375
+
376
+ Each token in the document is classified, identifying information such as:
377
+ - 📅 **Date**
378
+ - 🕐 **Start / End time**
379
+ - 📍 **Location**
380
+ - 📋 **Minute ID**
381
+ - 📌 **Meeting type**
382
+ - 👔 **President** (present / absent / substituted)
383
+ - 👥 **Councilors** (present / absent / substituted)
384
+
385
+ **Technical approach:**
386
+
387
+ The model uses the **BIO tagging scheme** (*Begin, Inside, Outside*) to mark entity boundaries, and the final spans are reconstructed from token-level predictions.
388
+
389
+ ---
390
+
391
+ ### 📖 Complete Example
392
+
393
+ **Input Document:**
394
+ """)
395
+
396
+ # Input example in code block
397
+ st.code("""CÂMARA MUNICIPAL DE ALANDROAL
398
+ ATA N.º 21
399
+ REUNIÃO ORDINÁRIA 11/09/2024
400
+ Presidiu o Senhor João Maria Aranha Grilo, Presidente da Câmara Municipal de Alandroal
401
+ Vereadores Paulo Jorge da Silva Gonçalves
402
+ Fernanda Manuela Brites Romão
403
+ Elisabete de Jesus dos Passos Galhardas
404
+ Faltou João Carlos Camões Roma Balsante
405
+ Secretariou a Reunião ****************************************
406
+ No Edifício Sede do Município de Alandroal, o Senhor Presidente da Câmara Municipal, João Maria Aranha Grilo, declarou aberta a reunião, eram 15 horas e 30 minutos.""", language=None)
407
+
408
+ st.markdown("""
409
+ **Expected Output (Extracted Entities):**
410
+
411
+ ```
412
+ 📅 DATE
413
+ • 11/09/2024
414
+
415
+ 🕐 TIME
416
+ • 15 horas e 30 minutos
417
+
418
+ 📍 LOCATION
419
+ • No Edifício Sede do Município de Alandroal
420
+
421
+ 📋 MINUTES NUMBER
422
+ • 21
423
+
424
+ 📌 MEETING TYPE
425
+ • ORDINÁRIA
426
+
427
+ 👔 PRESIDENT - PRESENT
428
+ • João Maria Aranha Grilo
429
+
430
+ 👥 COUNCILOR - PRESENT
431
+ • Paulo Jorge da Silva Gonçalves
432
+ • Fernanda Manuela Brites Romão
433
+ • Elisabete de Jesus dos Passos Galhardas
434
+
435
+ 👥 COUNCILOR - ABSENT
436
+ • João Carlos Camões Roma Balsante
437
+ ```
438
+ """)
439
+
440
+ # Load model
441
+ with st.spinner(f"Loading model **{selected_model_label}**..."):
442
+ tokenizer, model = load_model(selected_model_name)
443
+
444
+ if tokenizer is None or model is None:
445
+ st.error("❌ Failed to load model. Please check if the model path is correct.")
446
+ st.stop()
447
+
448
+ # Main layout with two columns
449
+ col1, col2 = st.columns([1, 1])
450
+
451
+ # ==================== LEFT COLUMN - INPUT ====================
452
+ with col1:
453
+ st.markdown("### 📝 Input Document")
454
+
455
+ if selected_example == "Custom Text":
456
+ example_text = ""
457
+ elif selected_example == "Example 1 - Alandroal":
458
+ example_text = """CÂMARA MUNICIPAL DE ALANDROAL
459
+ ATA N.º 21
460
+ REUNIÃO ORDINÁRIA 11/09/2024
461
+ Presidiu o Senhor João Maria Aranha Grilo, Presidente da Câmara Municipal de Alandroal
462
+ Vereadores Paulo Jorge da Silva Gonçalves
463
+ Fernanda Manuela Brites Romão
464
+ Elisabete de Jesus dos Passos Galhardas
465
+ Faltou João Carlos Camões Roma Balsante
466
+ Secretariou a Reunião ****************************************
467
+ No Edifício Sede do Município de Alandroal, o Senhor Presidente da Câmara Municipal, João Maria Aranha Grilo, declarou aberta a reunião, eram 15 horas e 30 minutos."""
468
+ elif selected_example == "Example 2 - Campo Maior":
469
+ example_text = """ATA Nº 1 REUNIÃO ORDINÁRIA DA CÂMARA MUNICIPAL DE CAMPO MAIOR, REALIZADA EM 5 DE JANEIRO DE 2022.
470
+ Aos cinco dias do mês de janeiro do ano de dois mil e vinte e dois, no Edifício dos Paços do Concelho, nesta Vila, realizou-se, pelas nove horas e trinta minutos, a reunião Ordinária da Câmara Municipal, comparecendo os Excelentíssimos Senhores Luís Fernando Martins Rosinha, Paulo Ivo Sabino Martins de Almeida, Paulo Jorge Furtado Pinheiro, Maria da Encarnação Grifo Silveirinha (videoconferência) e Fátima do Rosário Pingo Vitorino Pereira, respetivamente, Presidente e Vereadores efetivos deste Órgão Autárquico.
471
+ -Verificada a presença dos respectivos membros, o Senhor Presidente declarou aberta a reunião:
472
+ -Estava presente o Chefe **************************************, Dr. *********************************** e a Assistente Técnico **************************************.
473
+ -Depois de todos terem ocupado os seus lugares, o Senhor Presidente declarou aberta a reunião eram nove horas e trinta minutos."""
474
+ elif selected_example == "Example 3 - Covilhã":
475
+ example_text = """-- --
476
+
477
+ -- --
478
+
479
+ CÂMARA MUNICIPAL
480
+
481
+ DA
482
+
483
+ COVILHÃ
484
+
485
+ TEXTO DEFINITIVO DA ATA Nº 02/2023
486
+
487
+ Da reunião ordinária privada realizada no dia 03 de fevereiro de 2023, iniciada às 09:05 horas e concluída às 10:15 horas.
488
+
489
+ ------------------------------------ Sumário: 01 ------------------------------- ---- Abertura 02
490
+
491
+ Período Antes da Ordem do Dia 05
492
+
493
+ Período da Ordem do Dia 06
494
+
495
+ Agenda 06
496
+
497
+ Aprovação de Atas 06
498
+
499
+ Balancete 07
500
+
501
+ Despachos 07
502
+
503
+ DAGCJ 10
504
+
505
+ DFMA 17
506
+
507
+ DOP 19
508
+
509
+ DECAD 29
510
+
511
+ DU 38
512
+
513
+ Aprovação em minuta 42 Votação das deliberações 42 Montante Global de Encargos 42 Encerramento 42
514
+
515
+ ------------------------------------
516
+
517
+ ABERTURA
518
+
519
+ ATA Nº 02/2023
520
+
521
+ Aos três dias do mês de fevereiro do ano de dois mil e vinte e três, na Sala de Reuniões dos Paços do Concelho, na Covilhã, realizou-se a reunião ordinária privada da Câmara Municipal da Covilhã sob a presidência do Senhor Presidente da Câmara, Vítor Manuel Pinheiro Pereira, estando presentes o Senhor Vice-Presidente José Armando Serra dos Reis e os Senhores Vereadores Pedro Miguel Santos Farromba, Maria Regina Gomes Gouveia, Jorge Humberto Martins Simões (em substituição de Ricardo Miguel Correia Leitão Ferreira da Silva), José Miguel Ribeiro Oliveira e Marta Maria Tomaz Gomes Morais Alçada Bom Jesus.
522
+
523
+ A reunião foi secretariada pela Senhora Dr.ª ********************************, Diretora *************************************************************.
524
+
525
+ E, pelas 09:05 horas, o Senhor Presidente da Câmara deu início aos trabalhos da presente reunião com a seguinte Ordem de Trabalhos:"""
526
+ elif selected_example == "Example 4 - Fundão":
527
+ example_text = """ 14/02/2022 ATA DA REUNIÃO DE 14/02/2022 CÂMARA MUNICIPAL
528
+ DO
529
+ FUNDÃO
530
+ Texto definitivo da ata n.º 2/2022 da reunião ordinária realizada no dia 14 de fevereiro de 2022, iniciada às 17:00 horas e concluída às 19:00.
531
+ ATA N.º 2/2022 Aos catorze dias do mês fevereiro do ano dois mil e vinte e dois, realizou-se por videoconferência, a reunião ordinária privada da Câmara Municipal do Fundão, sob a presidência do Senhor Presidente da Câmara, Dr. Paulo Alexandre Bernardo Fernandes, com a participação do Senhor Vice-presidente, Dr. Luís Miguel Roque Tarouca Duarte Gavinhos e dos Senhores Vereadores, Dra. Joana Morgadinho Bento, Dra. Maria Alcina Domingues Cerdeira, Dr. Pedro Manuel Figueiredo Neto, Prof. Sérgio Miguel Cardoso Mendes e Dra. Ana Paula Coelho Duarte.
532
+ A reunião foi secretariada pela Dra. ****************************, Diretora *******************************************."""
533
+ elif selected_example == "Example 5 - Guimarães":
534
+ example_text = """Câmara Municipal de Guimarães.
535
+ ATA Nº 1 Fls. __10__ REUNIÃO ORDINÁRIA DE 13 DE JANEIRO DE 2022
536
+ ATA
537
+ Aos treze dias do mês de janeiro do ano de dois mil e vinte e dois, no Edifício dos Paços do Concelho, na Sala de Reuniões, compareceram os Excelentíssimos Senhores: Presidente da Câmara – Domingos Bragança Salgado e Vereadores – Adelina Paula Mendes Pinto, Paulo Rui Lopes Pereira da Silva, Paula Cristina dos Santos Oliveira, Nelson José Guimarães Felgueiras, Alice Sofia de Freitas Soares Ferreira Fernandes, Ana Maria Prego de Faria Berkeley Cotter, Bruno Alberto Vieira Fernandes, Ricardo José Machado Pereira da Silva Araújo, Vânia Carvalho Dias da Silva de Antas de Barros e Hugo Miguel Alves Ribeiro.
538
+ Secretariou a Diretora ***************, **************************************.
539
+ Pelas 10.10 horas foi declarada aberta a reunião."""
540
+ elif selected_example == "Example 6 - Porto":
541
+ example_text = """2.ª REUNIÃO PÚBLICA,
542
+ DA CÂMARA MUNICIPAL DO PORTO
543
+ REALIZADA EM 8 DE NOVEMBRO DE 2021
544
+ ÀS 10 HORAS
545
+ PRESENTES:
546
+ - Rui de Carvalho de Araújo Moreira
547
+ - Filipe Manuel Ventura Camões de Almeida Araújo
548
+ - Ana Catarina da Rocha Araújo
549
+ - Ricardo Miguel Araújo Cardoso Valente
550
+ - Albino Pedro Pereira Baganha
551
+ - Cristina Mafalda Nieto Guimarães Pimentel
552
+ - Tiago Barbosa Ribeiro
553
+ - Maria do Rosário Gambôa Lopes de Carvalho
554
+ - Catarina Maria da Costa Santos Cunha Pereira de Abreu
555
+ - Vladimiro Mota Cardoso Feliz
556
+ - Alberto Amaro Guedes Machado
557
+ - Maria Ilda da Costa Figueiredo
558
+ - Sérgio Augusto Leite Aires
559
+ Secretariou a reunião a Técnica ********, ***************."""
560
+ else:
561
+ example_text = "Add your text here"
562
+
563
+ st.markdown(f"**Example:** {selected_example}")
564
+
565
+ text_input = st.text_area(
566
+ "Type or paste the text here:",
567
+ value=example_text,
568
+ height=400,
569
+ placeholder="Enter the meeting minutes or administrative document text..."
570
+ )
571
+
572
+ # Segmentation button
573
+ process = st.button("🔍 Extract Metadata")
574
+
575
+ # ==================== RIGHT COLUMN - RESULTS ====================
576
+ with col2:
577
+ st.markdown("### 📊 Extraction Results")
578
+
579
+ if process:
580
+ if text_input.strip():
581
+ with st.spinner("Processing text..."):
582
+ # Extract entities using the model
583
+ entities = extract_entities(text_input, tokenizer, model)
584
+
585
+ if entities:
586
+ st.markdown("#### Detected Entities:")
587
+ display_entities_compact(entities)
588
+
589
+ # ==================== JSON EXPORT ====================
590
+ st.markdown("---")
591
+
592
+ # Expander with JSON visualization
593
+ with st.expander("📄 View complete JSON"):
594
+ st.json(entities)
595
+
596
+ # JSON download button
597
+ json_str = json.dumps(entities, ensure_ascii=False, indent=2)
598
+ st.download_button(
599
+ label="⬇️ Download JSON",
600
+ data=json_str,
601
+ file_name="extracted_entities.json",
602
+ mime="application/json"
603
+ )
604
+ else:
605
+ st.warning("⚠️ No entities were detected in the text.")
606
+ else:
607
+ st.warning("⚠️ Please enter some text to process.")
608
+ else:
609
+ st.info("👈 Enter text in the input box and click 'Segment Document' to begin.")