llm-ready-data / app /api /v1 /system.py
xce009's picture
sure
c28ae12
Raw
History Blame Contribute Delete
3.95 kB
from __future__ import annotations
import platform
import time
from datetime import datetime, timezone
from fastapi import APIRouter, Depends
from app.api.deps import get_extraction_service, require_auth
from app.config import get_settings
from app.core.constants import (
ARCHIVE_EXTENSIONS,
AUDIO_EXTENSIONS,
DOCUMENT_EXTENSIONS,
IMAGE_EXTENSIONS,
OFFICE_EXTENSIONS,
SUPPORTED_EXTENSIONS,
TEXT_EXTENSIONS,
WEB_EXTENSIONS,
)
from app.models.schemas import (
HealthResponse,
InfoResponse,
SpacyLabelsResponse,
SupportedFormatsResponse,
)
from app.services.extraction_service import ExtractionService
router = APIRouter()
_settings = get_settings()
_START_TIME = time.time()
@router.get("/health", response_model=HealthResponse, summary="Health check")
async def health(
token: str = Depends(require_auth),
):
return HealthResponse(
success=True,
status="ok",
version=_settings.app_version,
uptime_seconds=round(time.time() - _START_TIME, 2),
timestamp=datetime.now(timezone.utc).isoformat(),
)
@router.get("/info", response_model=InfoResponse, summary="Server and environment information")
async def info(
token: str = Depends(require_auth),
):
return InfoResponse(
success=True,
app=_settings.app_name,
version=_settings.app_version,
python_version=platform.python_version(),
platform=platform.system(),
uptime_seconds=round(time.time() - _START_TIME, 2),
max_upload_mb=_settings.max_upload_mb,
supported_extensions=len(SUPPORTED_EXTENSIONS),
timestamp=datetime.now(timezone.utc).isoformat(),
)
@router.get("/formats", response_model=SupportedFormatsResponse, summary="List supported file formats")
async def list_formats(
token: str = Depends(require_auth),
):
by_category = {
"documents": [e for e in SUPPORTED_EXTENSIONS if e in DOCUMENT_EXTENSIONS],
"office": [e for e in SUPPORTED_EXTENSIONS if e in OFFICE_EXTENSIONS],
"data": [e for e in SUPPORTED_EXTENSIONS if e in {".csv", ".json", ".xml"}],
"web": [e for e in SUPPORTED_EXTENSIONS if e in WEB_EXTENSIONS],
"text": [e for e in SUPPORTED_EXTENSIONS if e in TEXT_EXTENSIONS],
"images": [e for e in SUPPORTED_EXTENSIONS if e in IMAGE_EXTENSIONS],
"audio": [e for e in SUPPORTED_EXTENSIONS if e in AUDIO_EXTENSIONS],
"archives": [e for e in SUPPORTED_EXTENSIONS if e in ARCHIVE_EXTENSIONS],
}
return SupportedFormatsResponse(
success=True,
total_count=len(SUPPORTED_EXTENSIONS),
all_extensions=sorted(SUPPORTED_EXTENSIONS),
by_category={k: sorted(v) for k, v in by_category.items()},
)
@router.get("/spacy-labels", response_model=SpacyLabelsResponse, summary="List available spaCy NER labels")
async def list_spacy_labels(
token: str = Depends(require_auth),
extraction_service: ExtractionService = Depends(get_extraction_service),
):
return SpacyLabelsResponse(
success=True,
spacy_labels=extraction_service.get_spacy_labels(),
source_types={
"entity": "Extract using spaCy NER labels (ORG, PERSON, DATE, etc.)",
"regex": "Extract using custom regular expressions",
"token_attr": "Extract using token attributes (text, pos_, tag_, etc.)",
},
example_mappings={
"company": {"source_type": "entity", "label": "ORG"},
"person": {"source_type": "entity", "label": "PERSON"},
"date": {"source_type": "entity", "label": "DATE"},
"money": {"source_type": "entity", "label": "MONEY"},
"email": {
"source_type": "regex",
"pattern": r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b",
},
"phone": {"source_type": "regex", "pattern": r"\b\d{3}-\d{3}-\d{4}\b"},
},
)