File size: 2,569 Bytes
a6b8ecc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
"""
Setup script for Justitia: Selective Vision Token Masking for PHI-Compliant OCR
"""

from setuptools import setup, find_packages
import os

# Read the README file
with open("README.md", "r", encoding="utf-8") as fh:
    long_description = fh.read()

# Read requirements
def read_requirements(filename):
    with open(filename, 'r') as f:
        return [line.strip() for line in f
                if line.strip() and not line.startswith('#') and not line.startswith('flash-attn')]

# Base requirements
install_requires = read_requirements('requirements.txt')

# Development requirements
dev_requires = [
    'jupyter>=1.0.0',
    'black>=23.0.0',
    'flake8>=6.0.0',
    'pytest>=7.4.0',
    'pytest-cov>=4.1.0',
    'ipywidgets>=8.0.0',
]

setup(
    name="justitia-phi-ocr",
    version="0.1.0",
    author="Your Name",
    author_email="your.email@example.com",
    description="Selective Vision Token Masking for PHI-Compliant OCR using DeepSeek-OCR and LoRA",
    long_description=long_description,
    long_description_content_type="text/markdown",
    url="https://github.com/yourusername/Justitia-PHI-OCR",
    packages=find_packages(where="src"),
    package_dir={"": "src"},
    classifiers=[
        "Development Status :: 3 - Alpha",
        "Intended Audience :: Science/Research",
        "Topic :: Scientific/Engineering :: Artificial Intelligence",
        "Topic :: Scientific/Engineering :: Medical Science Apps.",
        "License :: OSI Approved :: MIT License",
        "Programming Language :: Python :: 3",
        "Programming Language :: Python :: 3.12",
    ],
    python_requires=">=3.12",
    install_requires=install_requires,
    extras_require={
        "dev": dev_requires,
        "flash-attn": ["flash-attn>=2.7.3"],
    },
    entry_points={
        "console_scripts": [
            "justitia-train=training.train_lora:main",
            "justitia-infer=inference.process_documents:main",
            "justitia-generate=data_generation.synthea_to_pdf:main",
            "justitia-download=scripts.download_model:main",
        ],
    },
    include_package_data=True,
    package_data={
        "": ["*.yaml", "*.json", "*.txt", "*.md"],
    },
)

print("\n" + "="*60)
print("Justitia PHI-OCR Setup Complete!")
print("="*60)
print("\nIMPORTANT: Flash Attention must be installed separately:")
print("  pip install flash-attn==2.7.3 --no-build-isolation")
print("\nFor development installation:")
print("  pip install -e .[dev]")
print("\nTo download DeepSeek-OCR model:")
print("  python scripts/download_model.py")
print("="*60)