Spaces:
Sleeping
Sleeping
Updated regex
Browse files
app.py
CHANGED
|
@@ -148,11 +148,11 @@ def extract_references_with_regex(text):
|
|
| 148 |
# Padrão 6: Múltiplos autores com &
|
| 149 |
r'^([A-Z][A-Za-z\s,&.-]+?&[A-Za-z\s,&.-]+?)\.\s*\((\d{4}[a-z]?)\)\.\s*([^.]+?)\.\s*([^.]+?)\.?\s*$'
|
| 150 |
]
|
| 151 |
-
patterns = [re.compile(pat) for pat in patterns]
|
| 152 |
|
| 153 |
# Processar cada padrão
|
| 154 |
for pattern_index, pattern in enumerate(patterns):
|
| 155 |
-
reflist =
|
| 156 |
|
| 157 |
if reflist:
|
| 158 |
for ref_match in reflist:
|
|
@@ -221,7 +221,7 @@ def create_highlighted_text(text, regex_references):
|
|
| 221 |
|
| 222 |
# Padrões para destacar (mesmos da extração)
|
| 223 |
patterns = [
|
| 224 |
-
r'^\d+\.\s*([A-Z][A-Za-z\s,&.-]+?(?:\s&\s[A-Z][A-Za-z\s,&.-]+?)*)\.\s*([^.]+?)\.\s*([^.]+?)\s+(\d+),?\s*([^(]*?)\s*\((\d{4})\)'
|
| 225 |
r'^([A-Z][A-Za-z\s,&.-]+?)\.\s*\((\d{4}[a-z]?)\)\.\s*([^.]+?)\.\s*([^.]+?)\.?\s*$',
|
| 226 |
r'^\[\d+\]\s*([A-Z][A-Za-z\s,&.-]+?)\.\s*\((\d{4}[a-z]?)\)\.\s*([^.]+?)\.\s*([^.]+?)\.?\s*$',
|
| 227 |
r'^([A-Z][A-Za-z\s,&.-]+?)\s+\((\d{4}[a-z]?)\)[.,]\s*([^.]+?)[.,]\s*([^.]+?)\.?\s*$',
|
|
|
|
| 148 |
# Padrão 6: Múltiplos autores com &
|
| 149 |
r'^([A-Z][A-Za-z\s,&.-]+?&[A-Za-z\s,&.-]+?)\.\s*\((\d{4}[a-z]?)\)\.\s*([^.]+?)\.\s*([^.]+?)\.?\s*$'
|
| 150 |
]
|
| 151 |
+
# patterns = [re.compile(pat) for pat in patterns]
|
| 152 |
|
| 153 |
# Processar cada padrão
|
| 154 |
for pattern_index, pattern in enumerate(patterns):
|
| 155 |
+
reflist = re.findall(pattern, text, re.MULTILINE | re.UNICODE | re.DOTALL)
|
| 156 |
|
| 157 |
if reflist:
|
| 158 |
for ref_match in reflist:
|
|
|
|
| 221 |
|
| 222 |
# Padrões para destacar (mesmos da extração)
|
| 223 |
patterns = [
|
| 224 |
+
r'^\d+\.\s*([A-Z][A-Za-z\s,&.-]+?(?:\s&\s[A-Z][A-Za-z\s,&.-]+?)*)\.\s*([^.]+?)\.\s*([^.]+?)\s+(\d+),?\s*([^(]*?)\s*\((\d{4})\)'
|
| 225 |
r'^([A-Z][A-Za-z\s,&.-]+?)\.\s*\((\d{4}[a-z]?)\)\.\s*([^.]+?)\.\s*([^.]+?)\.?\s*$',
|
| 226 |
r'^\[\d+\]\s*([A-Z][A-Za-z\s,&.-]+?)\.\s*\((\d{4}[a-z]?)\)\.\s*([^.]+?)\.\s*([^.]+?)\.?\s*$',
|
| 227 |
r'^([A-Z][A-Za-z\s,&.-]+?)\s+\((\d{4}[a-z]?)\)[.,]\s*([^.]+?)[.,]\s*([^.]+?)\.?\s*$',
|