nexusbert commited on
Commit
986c48c
·
1 Parent(s): 57c0318
Files changed (3) hide show
  1. Dockerfile +1 -1
  2. app.py +15 -2
  3. requirements.txt +1 -0
Dockerfile CHANGED
@@ -41,7 +41,7 @@ RUN mkdir -p /models/huggingface && \
41
  chmod -R 755 /models/huggingface
42
 
43
  # Pre-download the model during build
44
- RUN python -c "from transformers import pipeline; import torch; pipe = pipeline('text-generation', model='tiiuae/Falcon3-3B-Instruct', dtype=torch.bfloat16, device_map='cpu')" && \
45
  chown -R 1000:1000 /models/huggingface && \
46
  chmod -R 755 /models/huggingface || true
47
 
 
41
  chmod -R 755 /models/huggingface
42
 
43
  # Pre-download the model during build
44
+ RUN python -c "from transformers import pipeline; import torch; pipe = pipeline('text-generation', model='tiiuae/Falcon3-3B-Instruct', dtype=torch.bfloat16, device_map='cpu', model_kwargs={'low_cpu_mem_usage': False})" && \
45
  chown -R 1000:1000 /models/huggingface && \
46
  chmod -R 755 /models/huggingface || true
47
 
app.py CHANGED
@@ -54,13 +54,26 @@ async def load_model():
54
  global pipe, ocr_reader
55
  try:
56
  logger.info(f"Loading model: {MODEL_ID} ...")
 
 
 
 
 
 
 
 
57
  pipe = pipeline(
58
  "text-generation",
59
  model=MODEL_ID,
60
  dtype=torch.bfloat16,
61
- device_map="auto"
 
 
 
 
 
62
  )
63
- logger.info("✅ Model loaded successfully!")
64
 
65
  logger.info("Loading OCR reader...")
66
  try:
 
54
  global pipe, ocr_reader
55
  try:
56
  logger.info(f"Loading model: {MODEL_ID} ...")
57
+ logger.info("Optimizing for CPU-only inference...")
58
+
59
+ torch.set_num_threads(os.cpu_count() or 4)
60
+ torch.set_num_interop_threads(os.cpu_count() or 4)
61
+
62
+ logger.info(f"Using {torch.get_num_threads()} CPU threads for inference")
63
+ logger.info("Loading full model into CPU RAM (no offloading)...")
64
+
65
  pipe = pipeline(
66
  "text-generation",
67
  model=MODEL_ID,
68
  dtype=torch.bfloat16,
69
+ device_map="cpu",
70
+ model_kwargs={
71
+ "torch_dtype": torch.bfloat16,
72
+ "low_cpu_mem_usage": False,
73
+ "offload_folder": None
74
+ }
75
  )
76
+ logger.info("✅ Model loaded successfully in CPU RAM!")
77
 
78
  logger.info("Loading OCR reader...")
79
  try:
requirements.txt CHANGED
@@ -10,4 +10,5 @@ pillow
10
  pytesseract
11
  pdf2image
12
  easyocr
 
13
 
 
10
  pytesseract
11
  pdf2image
12
  easyocr
13
+ json5
14