File size: 955 Bytes
2b8c32b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
from app.utils.text_utils import normalize_text


DOCUMENT_TYPE_ALIASES = {
    "academic_paper": "academic_publication",
    "academic_article": "academic_publication",
    "academic_publication": "academic_publication",
    "article": "academic_publication",
    "journal_article": "academic_publication",
    "publication": "academic_publication",
    "research_paper": "academic_publication",
    "paper": "academic_publication",
    "report": "report",
    "whitepaper": "report",
}

CONTEXTUAL_LONGFORM_TYPES = {
    "academic_publication",
    "report",
}


def canonical_document_type(document_type: str | None) -> str:
    normalized = normalize_text((document_type or "").replace("_", " ").replace("-", " ")).replace(" ", "_")
    return DOCUMENT_TYPE_ALIASES.get(normalized, normalized or "general")


def is_contextual_longform(document_type: str | None) -> bool:
    return canonical_document_type(document_type) in CONTEXTUAL_LONGFORM_TYPES