Spaces:
Sleeping
Sleeping
| from app.utils.text_utils import normalize_text | |
| DOCUMENT_TYPE_ALIASES = { | |
| "academic_paper": "academic_publication", | |
| "academic_article": "academic_publication", | |
| "academic_publication": "academic_publication", | |
| "article": "academic_publication", | |
| "journal_article": "academic_publication", | |
| "publication": "academic_publication", | |
| "research_paper": "academic_publication", | |
| "paper": "academic_publication", | |
| "report": "report", | |
| "whitepaper": "report", | |
| } | |
| CONTEXTUAL_LONGFORM_TYPES = { | |
| "academic_publication", | |
| "report", | |
| } | |
| def canonical_document_type(document_type: str | None) -> str: | |
| normalized = normalize_text((document_type or "").replace("_", " ").replace("-", " ")).replace(" ", "_") | |
| return DOCUMENT_TYPE_ALIASES.get(normalized, normalized or "general") | |
| def is_contextual_longform(document_type: str | None) -> bool: | |
| return canonical_document_type(document_type) in CONTEXTUAL_LONGFORM_TYPES | |