Spaces:

nexusbert
/

Deckgpt

Sleeping

nexusbert commited on Nov 2, 2025

Commit

986c48c

1 Parent(s): 57c0318

push

Files changed (3) hide show

Dockerfile CHANGED Viewed

@@ -41,7 +41,7 @@ RUN mkdir -p /models/huggingface && \
     chmod -R 755 /models/huggingface
 # Pre-download the model during build
-RUN python -c "from transformers import pipeline; import torch; pipe = pipeline('text-generation', model='tiiuae/Falcon3-3B-Instruct', dtype=torch.bfloat16, device_map='cpu')" && \
     chown -R 1000:1000 /models/huggingface && \
     chmod -R 755 /models/huggingface || true

     chmod -R 755 /models/huggingface
 # Pre-download the model during build
+RUN python -c "from transformers import pipeline; import torch; pipe = pipeline('text-generation', model='tiiuae/Falcon3-3B-Instruct', dtype=torch.bfloat16, device_map='cpu', model_kwargs={'low_cpu_mem_usage': False})" && \
     chown -R 1000:1000 /models/huggingface && \
     chmod -R 755 /models/huggingface || true

app.py CHANGED Viewed

@@ -54,13 +54,26 @@ async def load_model():
     global pipe, ocr_reader
     try:
         logger.info(f"Loading model: {MODEL_ID} ...")
         pipe = pipeline(
             "text-generation",
             model=MODEL_ID,
             dtype=torch.bfloat16,
-            device_map="auto"
         )
-        logger.info("✅ Model loaded successfully!")
         logger.info("Loading OCR reader...")
         try:

     global pipe, ocr_reader
     try:
         logger.info(f"Loading model: {MODEL_ID} ...")
+        logger.info("Optimizing for CPU-only inference...")
+        torch.set_num_threads(os.cpu_count() or 4)
+        torch.set_num_interop_threads(os.cpu_count() or 4)
+        logger.info(f"Using {torch.get_num_threads()} CPU threads for inference")
+        logger.info("Loading full model into CPU RAM (no offloading)...")
         pipe = pipeline(
             "text-generation",
             model=MODEL_ID,
             dtype=torch.bfloat16,
+            device_map="cpu",
+            model_kwargs={
+                "torch_dtype": torch.bfloat16,
+                "low_cpu_mem_usage": False,
+                "offload_folder": None
+            }
         )
+        logger.info("✅ Model loaded successfully in CPU RAM!")
         logger.info("Loading OCR reader...")
         try:

requirements.txt CHANGED Viewed

@@ -10,4 +10,5 @@ pillow
 pytesseract
 pdf2image
 easyocr

 pytesseract
 pdf2image
 easyocr
+json5