File size: 1,300 Bytes
767b90c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
"""POST /analyze — extract drugs from OCR text."""

import re

from fastapi import APIRouter

from app.api.schemas import AnalyzeDataSources, AnalyzeRequest, AnalyzeResponse, DrugResult
from app.nlp import ner_model
from app.services import drug_analyzer

router = APIRouter()

_HTML_TAG = re.compile(r"<[^>]+>")


def _is_predominantly_non_latin(text: str) -> bool:
    """Check if the alphabetic characters are mostly non-Latin."""
    alpha_chars = [c for c in text if c.isalpha()]
    if not alpha_chars:
        return False
    latin_count = sum(1 for c in alpha_chars if c.isascii())
    return (latin_count / len(alpha_chars)) < 0.3


from app.main import limiter
from fastapi import Request

@router.post("/analyze", response_model=AnalyzeResponse)
@limiter.limit("10/minute")
async def analyze(request: Request, body: AnalyzeRequest):
    note = None

    if _is_predominantly_non_latin(body.text):
        drugs = []
        note = "Non-Latin text detected; only Latin-script drug names are supported"
    else:
        drugs = await drug_analyzer.analyze(body.text)

    return AnalyzeResponse(
        drugs=[DrugResult(**d) for d in drugs],
        raw_text=_HTML_TAG.sub("", body.text),
        data_sources=AnalyzeDataSources(ner_model=ner_model.MODEL_ID),
        note=note,
    )