david167 commited on
Commit
adea437
·
1 Parent(s): 23353f8

Fix PyTorch CVE-2025-32434: upgrade to v2.6+, use safetensors, restore Llama 3.1

Browse files
Files changed (3) hide show
  1. Dockerfile +2 -2
  2. app.py +4 -24
  3. requirements.txt +1 -1
Dockerfile CHANGED
@@ -22,8 +22,8 @@ WORKDIR /app
22
  # Copy requirements
23
  COPY requirements.txt .
24
 
25
- # Install PyTorch with CUDA support
26
- RUN pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
27
 
28
  # Skip llama-cpp-python to avoid compilation - using transformers instead
29
 
 
22
  # Copy requirements
23
  COPY requirements.txt .
24
 
25
+ # Install PyTorch 2.6+ with CUDA support to fix CVE-2025-32434
26
+ RUN pip install torch>=2.6.0 torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
27
 
28
  # Skip llama-cpp-python to avoid compilation - using transformers instead
29
 
app.py CHANGED
@@ -62,9 +62,8 @@ async def load_model():
62
 
63
  logger.info("Loading model with transformers...")
64
 
65
- # Use a working model while waiting for Llama 3.1 access
66
- # TODO: Change back to "meta-llama/Llama-3.1-8B-Instruct" once you have access
67
- base_model_name = "microsoft/DialoGPT-medium"
68
 
69
  # Get HF token from environment
70
  hf_token = os.getenv("HF_TOKEN")
@@ -82,6 +81,7 @@ async def load_model():
82
  device_map="auto" if device == "cuda" else None,
83
  trust_remote_code=True,
84
  low_cpu_mem_usage=True,
 
85
  token=hf_token
86
  )
87
 
@@ -92,27 +92,7 @@ async def load_model():
92
 
93
  except Exception as e:
94
  logger.error(f"Error loading model with transformers: {str(e)}")
95
- # Fallback to a smaller, more reliable model
96
- logger.info("Falling back to smaller model...")
97
-
98
- try:
99
- base_model_name = "microsoft/DialoGPT-medium"
100
-
101
- tokenizer = AutoTokenizer.from_pretrained(base_model_name)
102
- model = AutoModelForCausalLM.from_pretrained(
103
- base_model_name,
104
- torch_dtype=torch.float16 if device == "cuda" else torch.float32,
105
- device_map="auto" if device == "cuda" else None
106
- )
107
-
108
- if device == "cuda":
109
- model = model.to(device)
110
-
111
- logger.info("Fallback model loaded successfully!")
112
-
113
- except Exception as fallback_error:
114
- logger.error(f"Fallback model also failed: {str(fallback_error)}")
115
- raise
116
 
117
  except Exception as e:
118
  logger.error(f"Error loading model: {str(e)}")
 
62
 
63
  logger.info("Loading model with transformers...")
64
 
65
+ # Use Llama 3.1 8B Instruct (user now has access)
66
+ base_model_name = "meta-llama/Llama-3.1-8B-Instruct"
 
67
 
68
  # Get HF token from environment
69
  hf_token = os.getenv("HF_TOKEN")
 
81
  device_map="auto" if device == "cuda" else None,
82
  trust_remote_code=True,
83
  low_cpu_mem_usage=True,
84
+ use_safetensors=True, # Force safetensors to avoid CVE-2025-32434
85
  token=hf_token
86
  )
87
 
 
92
 
93
  except Exception as e:
94
  logger.error(f"Error loading model with transformers: {str(e)}")
95
+ raise # Re-raise the error to stop startup if primary model fails
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
 
97
  except Exception as e:
98
  logger.error(f"Error loading model: {str(e)}")
requirements.txt CHANGED
@@ -1,7 +1,7 @@
1
  fastapi==0.104.1
2
  uvicorn[standard]==0.24.0
3
  pydantic==2.5.0
4
- torch>=2.0.0
5
  transformers>=4.35.0
6
  accelerate>=0.24.0
7
  bitsandbytes>=0.41.0
 
1
  fastapi==0.104.1
2
  uvicorn[standard]==0.24.0
3
  pydantic==2.5.0
4
+ torch>=2.6.0
5
  transformers>=4.35.0
6
  accelerate>=0.24.0
7
  bitsandbytes>=0.41.0