Miroir commited on
Commit
b9f6a03
·
1 Parent(s): 1169139

changed order for user permissions on checking model from cache tmp

Browse files
Files changed (2) hide show
  1. Dockerfile +8 -2
  2. services/word_service.py +16 -16
Dockerfile CHANGED
@@ -1,6 +1,12 @@
1
- FROM python:3.9-slim
2
 
 
3
  RUN useradd -m -u 1000 user
 
 
 
 
 
4
  USER user
5
  ENV PATH="/home/user/.local/bin:$PATH"
6
  ENV MODEL_URL="https://huggingface.co/Miroir/cc.fr.300.reduced/resolve/main/cc.fr.300.reduced.vec"
@@ -9,7 +15,7 @@ WORKDIR /app
9
 
10
  COPY --chown=user requirements.txt requirements.txt
11
 
12
- # Modified installation command with --no-cache-dir
13
  RUN pip install --no-cache-dir --upgrade pip && \
14
  pip install --no-cache-dir --upgrade -r requirements.txt
15
 
 
1
+ FROM python:3.11-slim
2
 
3
+ # Create user first
4
  RUN useradd -m -u 1000 user
5
+
6
+ # Create cache directory and set permissions while still root
7
+ RUN mkdir -p /tmp/fasttext_cache && chown -R user:user /tmp/fasttext_cache
8
+
9
+ # Switch to user after setting up permissions
10
  USER user
11
  ENV PATH="/home/user/.local/bin:$PATH"
12
  ENV MODEL_URL="https://huggingface.co/Miroir/cc.fr.300.reduced/resolve/main/cc.fr.300.reduced.vec"
 
15
 
16
  COPY --chown=user requirements.txt requirements.txt
17
 
18
+ # Install dependencies
19
  RUN pip install --no-cache-dir --upgrade pip && \
20
  pip install --no-cache-dir --upgrade -r requirements.txt
21
 
services/word_service.py CHANGED
@@ -23,26 +23,27 @@ class WordEmbeddingService:
23
  def _initialize_model(self):
24
  """Initialize the model only when needed"""
25
  try:
26
- # Get model URL from environment variable
27
  model_url = os.getenv('MODEL_URL', 'https://huggingface.co/Miroir/cc.fr.300.reduced/resolve/main/cc.fr.300.reduced.vec')
 
 
28
 
29
- logger.info("Loading FastText embeddings from URL...")
30
 
31
- # Create a temporary file to store the model
32
- with tempfile.NamedTemporaryFile(delete=False) as temp_file:
33
- # Download the file
34
  response = requests.get(model_url, stream=True)
35
  response.raise_for_status()
36
 
37
- # Write the content to the temporary file
38
- for chunk in response.iter_content(chunk_size=8192):
39
- if chunk:
40
- temp_file.write(chunk)
41
-
42
- temp_file.flush()
43
-
44
- # Load the model from the temporary file
45
- WordEmbeddingService._model = KeyedVectors.load_word2vec_format(temp_file.name)
46
 
47
  # Build vocabulary vectors
48
  self.vocab_vectors = {
@@ -50,8 +51,7 @@ class WordEmbeddingService:
50
  for word in WordEmbeddingService._model.index_to_key
51
  }
52
 
53
- logger.info(f"FastText model loaded successfully with "
54
- f"{len(self.vocab_vectors)} words in the vocabulary.")
55
 
56
  except Exception as e:
57
  logger.exception(f"Failed to load FastText model: {str(e)}")
 
23
  def _initialize_model(self):
24
  """Initialize the model only when needed"""
25
  try:
 
26
  model_url = os.getenv('MODEL_URL', 'https://huggingface.co/Miroir/cc.fr.300.reduced/resolve/main/cc.fr.300.reduced.vec')
27
+ cache_dir = "/tmp/fasttext_cache" # Hugging Face Spaces preserves this
28
+ os.makedirs(cache_dir, exist_ok=True)
29
 
30
+ cache_path = os.path.join(cache_dir, "model.vec")
31
 
32
+ # Check if cached
33
+ if not os.path.exists(cache_path):
34
+ logger.info("Downloading FastText embeddings...")
35
  response = requests.get(model_url, stream=True)
36
  response.raise_for_status()
37
 
38
+ with open(cache_path, 'wb') as f:
39
+ for chunk in response.iter_content(chunk_size=8192):
40
+ if chunk:
41
+ f.write(chunk)
42
+ else:
43
+ logger.info("Using cached FastText model")
44
+
45
+ # Load the model
46
+ WordEmbeddingService._model = KeyedVectors.load_word2vec_format(cache_path)
47
 
48
  # Build vocabulary vectors
49
  self.vocab_vectors = {
 
51
  for word in WordEmbeddingService._model.index_to_key
52
  }
53
 
54
+ logger.info(f"FastText model loaded with {len(self.vocab_vectors)} words")
 
55
 
56
  except Exception as e:
57
  logger.exception(f"Failed to load FastText model: {str(e)}")