Spaces:
Sleeping
Sleeping
Commit
·
8ee16a5
1
Parent(s):
e295b52
feat: adicionar padrão regex para referências numeradas com título e editora
Browse filesCo-authored-by: aider (anthropic/claude-sonnet-4-20250514) <aider@aider.chat>
app.py
CHANGED
|
@@ -146,7 +146,10 @@ def extract_references_with_regex(text):
|
|
| 146 |
r'^([A-Z][a-z]+,\s*[A-Z][A-Za-z\s,&.-]*?)\.\s*\((\d{4}[a-z]?)\)\.\s*([^.]+?)\.\s*([^.]+?)\.?\s*$',
|
| 147 |
|
| 148 |
# Padrão 6: Múltiplos autores com &
|
| 149 |
-
r'^([A-Z][A-Za-z\s,&.-]+?&[A-Za-z\s,&.-]+?)\.\s*\((\d{4}[a-z]?)\)\.\s*([^.]+?)\.\s*([^.]+?)\.?\s*$'
|
|
|
|
|
|
|
|
|
|
| 150 |
]
|
| 151 |
# patterns = [re.compile(pat) for pat in patterns]
|
| 152 |
|
|
@@ -168,6 +171,12 @@ def extract_references_with_regex(text):
|
|
| 168 |
volume = groups[3].strip()
|
| 169 |
pages = groups[4].strip()
|
| 170 |
year = groups[5].strip()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 171 |
else:
|
| 172 |
# Para outros padrões (4 grupos)
|
| 173 |
year = groups[1].strip()
|
|
@@ -221,16 +230,17 @@ def create_highlighted_text(text, regex_references):
|
|
| 221 |
|
| 222 |
# Padrões para destacar (mesmos da extração)
|
| 223 |
patterns = [
|
| 224 |
-
r'^\d+\.\s*([A-Z][A-Za-z\s,&.-]+?(?:\s&\s[A-Z][A-Za-z\s,&.-]+?)*)\.\s*([^.]+?)\.\s*([^.]+?)\s+(\d+),?\s*([^(]*?)\s*\((\d{4})\)'
|
| 225 |
r'^([A-Z][A-Za-z\s,&.-]+?)\.\s*\((\d{4}[a-z]?)\)\.\s*([^.]+?)\.\s*([^.]+?)\.?\s*$',
|
| 226 |
r'^\[\d+\]\s*([A-Z][A-Za-z\s,&.-]+?)\.\s*\((\d{4}[a-z]?)\)\.\s*([^.]+?)\.\s*([^.]+?)\.?\s*$',
|
| 227 |
r'^([A-Z][A-Za-z\s,&.-]+?)\s+\((\d{4}[a-z]?)\)[.,]\s*([^.]+?)[.,]\s*([^.]+?)\.?\s*$',
|
| 228 |
r'^([A-Z][A-Za-z\s,&.-]*?et\s+al\.?)\s*\((\d{4}[a-z]?)\)[.,]?\s*([^.]+?)[.,]\s*([^.]+?)\.?\s*$',
|
| 229 |
r'^([A-Z][a-z]+,\s*[A-Z][A-Za-z\s,&.-]*?)\.\s*\((\d{4}[a-z]?)\)\.\s*([^.]+?)\.\s*([^.]+?)\.?\s*$',
|
| 230 |
-
r'^([A-Z][A-Za-z\s,&.-]+?&[A-Za-z\s,&.-]+?)\.\s*\((\d{4}[a-z]?)\)\.\s*([^.]+?)\.\s*([^.]+?)\.?\s*$'
|
|
|
|
| 231 |
]
|
| 232 |
|
| 233 |
-
colors = ['#ff5722', '#ffeb3b', '#4caf50', '#2196f3', '#ff9800', '#9c27b0', '#e91e63']
|
| 234 |
|
| 235 |
# Processar cada linha
|
| 236 |
for line in lines:
|
|
@@ -277,7 +287,8 @@ def create_highlighted_text(text, regex_references):
|
|
| 277 |
<span style="background-color: #2196f3; padding: 2px;">■</span> Padrão 3
|
| 278 |
<span style="background-color: #ff9800; padding: 2px;">■</span> Padrão 4
|
| 279 |
<span style="background-color: #9c27b0; padding: 2px;">■</span> Padrão 5
|
| 280 |
-
<span style="background-color: #e91e63; padding: 2px;">■</span> Padrão 6
|
|
|
|
| 281 |
</div>
|
| 282 |
{html_content}
|
| 283 |
</div>
|
|
|
|
| 146 |
r'^([A-Z][a-z]+,\s*[A-Z][A-Za-z\s,&.-]*?)\.\s*\((\d{4}[a-z]?)\)\.\s*([^.]+?)\.\s*([^.]+?)\.?\s*$',
|
| 147 |
|
| 148 |
# Padrão 6: Múltiplos autores com &
|
| 149 |
+
r'^([A-Z][A-Za-z\s,&.-]+?&[A-Za-z\s,&.-]+?)\.\s*\((\d{4}[a-z]?)\)\.\s*([^.]+?)\.\s*([^.]+?)\.?\s*$',
|
| 150 |
+
|
| 151 |
+
# Padrão 7: Referências numeradas [número] Autor: Título, Editora (ano)
|
| 152 |
+
r'^\[\d+\]\s*([A-Z][A-Za-z\s,&.-]+?):\s*([^,]+?),\s*([^(]+?)\s*\((\d{4})\)'
|
| 153 |
]
|
| 154 |
# patterns = [re.compile(pat) for pat in patterns]
|
| 155 |
|
|
|
|
| 171 |
volume = groups[3].strip()
|
| 172 |
pages = groups[4].strip()
|
| 173 |
year = groups[5].strip()
|
| 174 |
+
# Para o padrão 7 (formato [número] Autor: Título, Editora (ano))
|
| 175 |
+
elif pattern_index == 7:
|
| 176 |
+
title = groups[1].strip()
|
| 177 |
+
journal = groups[2].strip()
|
| 178 |
+
year = groups[3].strip()
|
| 179 |
+
volume = ""
|
| 180 |
else:
|
| 181 |
# Para outros padrões (4 grupos)
|
| 182 |
year = groups[1].strip()
|
|
|
|
| 230 |
|
| 231 |
# Padrões para destacar (mesmos da extração)
|
| 232 |
patterns = [
|
| 233 |
+
r'^\d+\.\s*([A-Z][A-Za-z\s,&.-]+?(?:\s&\s[A-Z][A-Za-z\s,&.-]+?)*)\.\s*([^.]+?)\.\s*([^.]+?)\s+(\d+),?\s*([^(]*?)\s*\((\d{4})\)',
|
| 234 |
r'^([A-Z][A-Za-z\s,&.-]+?)\.\s*\((\d{4}[a-z]?)\)\.\s*([^.]+?)\.\s*([^.]+?)\.?\s*$',
|
| 235 |
r'^\[\d+\]\s*([A-Z][A-Za-z\s,&.-]+?)\.\s*\((\d{4}[a-z]?)\)\.\s*([^.]+?)\.\s*([^.]+?)\.?\s*$',
|
| 236 |
r'^([A-Z][A-Za-z\s,&.-]+?)\s+\((\d{4}[a-z]?)\)[.,]\s*([^.]+?)[.,]\s*([^.]+?)\.?\s*$',
|
| 237 |
r'^([A-Z][A-Za-z\s,&.-]*?et\s+al\.?)\s*\((\d{4}[a-z]?)\)[.,]?\s*([^.]+?)[.,]\s*([^.]+?)\.?\s*$',
|
| 238 |
r'^([A-Z][a-z]+,\s*[A-Z][A-Za-z\s,&.-]*?)\.\s*\((\d{4}[a-z]?)\)\.\s*([^.]+?)\.\s*([^.]+?)\.?\s*$',
|
| 239 |
+
r'^([A-Z][A-Za-z\s,&.-]+?&[A-Za-z\s,&.-]+?)\.\s*\((\d{4}[a-z]?)\)\.\s*([^.]+?)\.\s*([^.]+?)\.?\s*$',
|
| 240 |
+
r'^\[\d+\]\s*([A-Z][A-Za-z\s,&.-]+?):\s*([^,]+?),\s*([^(]+?)\s*\((\d{4})\)'
|
| 241 |
]
|
| 242 |
|
| 243 |
+
colors = ['#ff5722', '#ffeb3b', '#4caf50', '#2196f3', '#ff9800', '#9c27b0', '#e91e63', '#795548']
|
| 244 |
|
| 245 |
# Processar cada linha
|
| 246 |
for line in lines:
|
|
|
|
| 287 |
<span style="background-color: #2196f3; padding: 2px;">■</span> Padrão 3
|
| 288 |
<span style="background-color: #ff9800; padding: 2px;">■</span> Padrão 4
|
| 289 |
<span style="background-color: #9c27b0; padding: 2px;">■</span> Padrão 5
|
| 290 |
+
<span style="background-color: #e91e63; padding: 2px;">■</span> Padrão 6
|
| 291 |
+
<span style="background-color: #795548; padding: 2px;">■</span> Padrão 7
|
| 292 |
</div>
|
| 293 |
{html_content}
|
| 294 |
</div>
|