push
Browse files- Dockerfile +1 -1
- app.py +15 -2
- requirements.txt +1 -0
Dockerfile
CHANGED
|
@@ -41,7 +41,7 @@ RUN mkdir -p /models/huggingface && \
|
|
| 41 |
chmod -R 755 /models/huggingface
|
| 42 |
|
| 43 |
# Pre-download the model during build
|
| 44 |
-
RUN python -c "from transformers import pipeline; import torch; pipe = pipeline('text-generation', model='tiiuae/Falcon3-3B-Instruct', dtype=torch.bfloat16, device_map='cpu')" && \
|
| 45 |
chown -R 1000:1000 /models/huggingface && \
|
| 46 |
chmod -R 755 /models/huggingface || true
|
| 47 |
|
|
|
|
| 41 |
chmod -R 755 /models/huggingface
|
| 42 |
|
| 43 |
# Pre-download the model during build
|
| 44 |
+
RUN python -c "from transformers import pipeline; import torch; pipe = pipeline('text-generation', model='tiiuae/Falcon3-3B-Instruct', dtype=torch.bfloat16, device_map='cpu', model_kwargs={'low_cpu_mem_usage': False})" && \
|
| 45 |
chown -R 1000:1000 /models/huggingface && \
|
| 46 |
chmod -R 755 /models/huggingface || true
|
| 47 |
|
app.py
CHANGED
|
@@ -54,13 +54,26 @@ async def load_model():
|
|
| 54 |
global pipe, ocr_reader
|
| 55 |
try:
|
| 56 |
logger.info(f"Loading model: {MODEL_ID} ...")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
pipe = pipeline(
|
| 58 |
"text-generation",
|
| 59 |
model=MODEL_ID,
|
| 60 |
dtype=torch.bfloat16,
|
| 61 |
-
device_map="
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
)
|
| 63 |
-
logger.info("✅ Model loaded successfully!")
|
| 64 |
|
| 65 |
logger.info("Loading OCR reader...")
|
| 66 |
try:
|
|
|
|
| 54 |
global pipe, ocr_reader
|
| 55 |
try:
|
| 56 |
logger.info(f"Loading model: {MODEL_ID} ...")
|
| 57 |
+
logger.info("Optimizing for CPU-only inference...")
|
| 58 |
+
|
| 59 |
+
torch.set_num_threads(os.cpu_count() or 4)
|
| 60 |
+
torch.set_num_interop_threads(os.cpu_count() or 4)
|
| 61 |
+
|
| 62 |
+
logger.info(f"Using {torch.get_num_threads()} CPU threads for inference")
|
| 63 |
+
logger.info("Loading full model into CPU RAM (no offloading)...")
|
| 64 |
+
|
| 65 |
pipe = pipeline(
|
| 66 |
"text-generation",
|
| 67 |
model=MODEL_ID,
|
| 68 |
dtype=torch.bfloat16,
|
| 69 |
+
device_map="cpu",
|
| 70 |
+
model_kwargs={
|
| 71 |
+
"torch_dtype": torch.bfloat16,
|
| 72 |
+
"low_cpu_mem_usage": False,
|
| 73 |
+
"offload_folder": None
|
| 74 |
+
}
|
| 75 |
)
|
| 76 |
+
logger.info("✅ Model loaded successfully in CPU RAM!")
|
| 77 |
|
| 78 |
logger.info("Loading OCR reader...")
|
| 79 |
try:
|
requirements.txt
CHANGED
|
@@ -10,4 +10,5 @@ pillow
|
|
| 10 |
pytesseract
|
| 11 |
pdf2image
|
| 12 |
easyocr
|
|
|
|
| 13 |
|
|
|
|
| 10 |
pytesseract
|
| 11 |
pdf2image
|
| 12 |
easyocr
|
| 13 |
+
json5
|
| 14 |
|