fccoelho aider (anthropic/claude-sonnet-4-20250514) commited on
Commit
8ee16a5
·
1 Parent(s): e295b52

feat: adicionar padrão regex para referências numeradas com título e editora

Browse files

Co-authored-by: aider (anthropic/claude-sonnet-4-20250514) <aider@aider.chat>

Files changed (1) hide show
  1. app.py +16 -5
app.py CHANGED
@@ -146,7 +146,10 @@ def extract_references_with_regex(text):
146
  r'^([A-Z][a-z]+,\s*[A-Z][A-Za-z\s,&.-]*?)\.\s*\((\d{4}[a-z]?)\)\.\s*([^.]+?)\.\s*([^.]+?)\.?\s*$',
147
 
148
  # Padrão 6: Múltiplos autores com &
149
- r'^([A-Z][A-Za-z\s,&.-]+?&[A-Za-z\s,&.-]+?)\.\s*\((\d{4}[a-z]?)\)\.\s*([^.]+?)\.\s*([^.]+?)\.?\s*$'
 
 
 
150
  ]
151
  # patterns = [re.compile(pat) for pat in patterns]
152
 
@@ -168,6 +171,12 @@ def extract_references_with_regex(text):
168
  volume = groups[3].strip()
169
  pages = groups[4].strip()
170
  year = groups[5].strip()
 
 
 
 
 
 
171
  else:
172
  # Para outros padrões (4 grupos)
173
  year = groups[1].strip()
@@ -221,16 +230,17 @@ def create_highlighted_text(text, regex_references):
221
 
222
  # Padrões para destacar (mesmos da extração)
223
  patterns = [
224
- r'^\d+\.\s*([A-Z][A-Za-z\s,&.-]+?(?:\s&\s[A-Z][A-Za-z\s,&.-]+?)*)\.\s*([^.]+?)\.\s*([^.]+?)\s+(\d+),?\s*([^(]*?)\s*\((\d{4})\)'
225
  r'^([A-Z][A-Za-z\s,&.-]+?)\.\s*\((\d{4}[a-z]?)\)\.\s*([^.]+?)\.\s*([^.]+?)\.?\s*$',
226
  r'^\[\d+\]\s*([A-Z][A-Za-z\s,&.-]+?)\.\s*\((\d{4}[a-z]?)\)\.\s*([^.]+?)\.\s*([^.]+?)\.?\s*$',
227
  r'^([A-Z][A-Za-z\s,&.-]+?)\s+\((\d{4}[a-z]?)\)[.,]\s*([^.]+?)[.,]\s*([^.]+?)\.?\s*$',
228
  r'^([A-Z][A-Za-z\s,&.-]*?et\s+al\.?)\s*\((\d{4}[a-z]?)\)[.,]?\s*([^.]+?)[.,]\s*([^.]+?)\.?\s*$',
229
  r'^([A-Z][a-z]+,\s*[A-Z][A-Za-z\s,&.-]*?)\.\s*\((\d{4}[a-z]?)\)\.\s*([^.]+?)\.\s*([^.]+?)\.?\s*$',
230
- r'^([A-Z][A-Za-z\s,&.-]+?&[A-Za-z\s,&.-]+?)\.\s*\((\d{4}[a-z]?)\)\.\s*([^.]+?)\.\s*([^.]+?)\.?\s*$'
 
231
  ]
232
 
233
- colors = ['#ff5722', '#ffeb3b', '#4caf50', '#2196f3', '#ff9800', '#9c27b0', '#e91e63']
234
 
235
  # Processar cada linha
236
  for line in lines:
@@ -277,7 +287,8 @@ def create_highlighted_text(text, regex_references):
277
  <span style="background-color: #2196f3; padding: 2px;">■</span> Padrão 3 &nbsp;
278
  <span style="background-color: #ff9800; padding: 2px;">■</span> Padrão 4 &nbsp;
279
  <span style="background-color: #9c27b0; padding: 2px;">■</span> Padrão 5 &nbsp;
280
- <span style="background-color: #e91e63; padding: 2px;">■</span> Padrão 6
 
281
  </div>
282
  {html_content}
283
  </div>
 
146
  r'^([A-Z][a-z]+,\s*[A-Z][A-Za-z\s,&.-]*?)\.\s*\((\d{4}[a-z]?)\)\.\s*([^.]+?)\.\s*([^.]+?)\.?\s*$',
147
 
148
  # Padrão 6: Múltiplos autores com &
149
+ r'^([A-Z][A-Za-z\s,&.-]+?&[A-Za-z\s,&.-]+?)\.\s*\((\d{4}[a-z]?)\)\.\s*([^.]+?)\.\s*([^.]+?)\.?\s*$',
150
+
151
+ # Padrão 7: Referências numeradas [número] Autor: Título, Editora (ano)
152
+ r'^\[\d+\]\s*([A-Z][A-Za-z\s,&.-]+?):\s*([^,]+?),\s*([^(]+?)\s*\((\d{4})\)'
153
  ]
154
  # patterns = [re.compile(pat) for pat in patterns]
155
 
 
171
  volume = groups[3].strip()
172
  pages = groups[4].strip()
173
  year = groups[5].strip()
174
+ # Para o padrão 7 (formato [número] Autor: Título, Editora (ano))
175
+ elif pattern_index == 7:
176
+ title = groups[1].strip()
177
+ journal = groups[2].strip()
178
+ year = groups[3].strip()
179
+ volume = ""
180
  else:
181
  # Para outros padrões (4 grupos)
182
  year = groups[1].strip()
 
230
 
231
  # Padrões para destacar (mesmos da extração)
232
  patterns = [
233
+ r'^\d+\.\s*([A-Z][A-Za-z\s,&.-]+?(?:\s&\s[A-Z][A-Za-z\s,&.-]+?)*)\.\s*([^.]+?)\.\s*([^.]+?)\s+(\d+),?\s*([^(]*?)\s*\((\d{4})\)',
234
  r'^([A-Z][A-Za-z\s,&.-]+?)\.\s*\((\d{4}[a-z]?)\)\.\s*([^.]+?)\.\s*([^.]+?)\.?\s*$',
235
  r'^\[\d+\]\s*([A-Z][A-Za-z\s,&.-]+?)\.\s*\((\d{4}[a-z]?)\)\.\s*([^.]+?)\.\s*([^.]+?)\.?\s*$',
236
  r'^([A-Z][A-Za-z\s,&.-]+?)\s+\((\d{4}[a-z]?)\)[.,]\s*([^.]+?)[.,]\s*([^.]+?)\.?\s*$',
237
  r'^([A-Z][A-Za-z\s,&.-]*?et\s+al\.?)\s*\((\d{4}[a-z]?)\)[.,]?\s*([^.]+?)[.,]\s*([^.]+?)\.?\s*$',
238
  r'^([A-Z][a-z]+,\s*[A-Z][A-Za-z\s,&.-]*?)\.\s*\((\d{4}[a-z]?)\)\.\s*([^.]+?)\.\s*([^.]+?)\.?\s*$',
239
+ r'^([A-Z][A-Za-z\s,&.-]+?&[A-Za-z\s,&.-]+?)\.\s*\((\d{4}[a-z]?)\)\.\s*([^.]+?)\.\s*([^.]+?)\.?\s*$',
240
+ r'^\[\d+\]\s*([A-Z][A-Za-z\s,&.-]+?):\s*([^,]+?),\s*([^(]+?)\s*\((\d{4})\)'
241
  ]
242
 
243
+ colors = ['#ff5722', '#ffeb3b', '#4caf50', '#2196f3', '#ff9800', '#9c27b0', '#e91e63', '#795548']
244
 
245
  # Processar cada linha
246
  for line in lines:
 
287
  <span style="background-color: #2196f3; padding: 2px;">■</span> Padrão 3 &nbsp;
288
  <span style="background-color: #ff9800; padding: 2px;">■</span> Padrão 4 &nbsp;
289
  <span style="background-color: #9c27b0; padding: 2px;">■</span> Padrão 5 &nbsp;
290
+ <span style="background-color: #e91e63; padding: 2px;">■</span> Padrão 6 &nbsp;
291
+ <span style="background-color: #795548; padding: 2px;">■</span> Padrão 7
292
  </div>
293
  {html_content}
294
  </div>