IDS75912 commited on
Commit
6b9607e
·
1 Parent(s): 059d1d9

fix: Try permissions error fix

Browse files
Files changed (2) hide show
  1. Dockerfile +2 -0
  2. main.py +45 -48
Dockerfile CHANGED
@@ -29,3 +29,5 @@ ENV PATH="/opt/venv/bin:$PATH"
29
  # Expose the port and run the app
30
  EXPOSE 8000
31
  CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
 
 
 
29
  # Expose the port and run the app
30
  EXPOSE 8000
31
  CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
32
+
33
+
main.py CHANGED
@@ -43,64 +43,64 @@ ANIMALS = ['Cat', 'Dog', 'Panda'] # Animal names here, these represent the label
43
  # 1) download your SavedModel from the Hub into a writable directory (Spaces often
44
  # run with the repository checkout read-only). Prefer TMPDIR or /tmp.
45
  repo_id = "IDS75912/masterclass-2025"
46
- local_model_dir = os.environ.get('HF_MODEL_DIR') or os.environ.get('TMPDIR') or '/tmp/model'
47
 
48
  # Ensure the directory exists and is writable. If creating fails, raise a clear error.
49
  try:
50
  os.makedirs(local_model_dir, exist_ok=True)
51
- except PermissionError:
52
- raise RuntimeError(f"Cannot create model directory '{local_model_dir}'. Ensure the process has write access or set HF_MODEL_DIR to a writable path.")
53
 
54
  # download files into local_model_dir and load model with resilient error handling
55
  model = None
56
  model_load_error = None
57
- try:
58
  # First try using a cache dir so downloads happen in a shared cache and final move
59
  # into local_model_dir is less likely to require risky renames inside the repo.
60
- cache_dir = '/tmp/.cache/huggingface'
61
- os.makedirs(cache_dir, exist_ok=True)
62
 
63
- hf_hub_download(repo_id, filename="config.json", repo_type="model", local_dir=local_model_dir, cache_dir=cache_dir)
64
- hf_hub_download(repo_id, filename="metadata.json", repo_type="model", local_dir=local_model_dir, cache_dir=cache_dir)
65
- hf_hub_download(repo_id, filename="model.weights.h5", repo_type="model", local_dir=local_model_dir, cache_dir=cache_dir)
66
 
67
  # 2) load it
68
- model = tf.keras.models.load_model(local_model_dir)
69
- logging.info(f"Model loaded successfully from {local_model_dir}")
70
- except Exception as e:
71
- # Primary download attempt failed -> try streamed fallback which writes directly
72
- # into local_model_dir (avoids internal temp/move when those operations are blocked).
73
- primary_err = e
74
- logging.error("Primary hf_hub_download failed, attempting streamed fallback: %s", e)
75
- try:
76
- for filename in ("config.json", "metadata.json", "model.weights.h5"):
77
- url = hf_hub_url(repo_id=repo_id, filename=filename, repo_type='model')
78
- logging.info(f"Streaming {filename} from {url} into {local_model_dir}")
79
- resp = requests.get(url, stream=True, timeout=60)
80
- resp.raise_for_status()
81
- # write to a temp file inside the target dir then move atomically
82
- with tempfile.NamedTemporaryFile(dir=local_model_dir, delete=False) as tmpf:
83
- for chunk in resp.iter_content(chunk_size=8192):
84
- if chunk:
85
- tmpf.write(chunk)
86
- tmp_path = tmpf.name
87
- final_path = os.path.join(local_model_dir, filename)
88
- try:
89
- shutil.move(tmp_path, final_path)
90
- except Exception:
91
- # if atomic move fails, try copy+remove
92
- shutil.copy(tmp_path, final_path)
93
- os.remove(tmp_path)
94
-
95
- # After streamed download, try loading
96
- model = tf.keras.models.load_model(local_model_dir)
97
- logging.info(f"Model loaded successfully from {local_model_dir} after streamed fallback")
98
- except Exception as e2:
99
- model_load_error = f"primary: {primary_err}; fallback: {e2}"
100
- tb = traceback.format_exc()
101
- logging.error("Streamed fallback failed: %s", e2)
102
- logging.error(tb)
103
- model = None
104
 
105
 
106
  @app.post('/upload/image')
@@ -151,9 +151,6 @@ def predict_stub() -> Dict[str, Any]:
151
 
152
 
153
 
154
-
155
-
156
-
157
  if __name__ == "__main__":
158
  # Run with: conda run -n gradio uvicorn main:app --reload
159
  import uvicorn
 
43
  # 1) download your SavedModel from the Hub into a writable directory (Spaces often
44
  # run with the repository checkout read-only). Prefer TMPDIR or /tmp.
45
  repo_id = "IDS75912/masterclass-2025"
46
+ local_model_dir = os.environ.get('HF_MODEL_DIR', './model_dir')
47
 
48
  # Ensure the directory exists and is writable. If creating fails, raise a clear error.
49
  try:
50
  os.makedirs(local_model_dir, exist_ok=True)
51
+ except Exception as e:
52
+ raise RuntimeError(f"Cannot create model directory '{local_model_dir}'. Ensure the process has write access or set HF_MODEL_DIR to a writable path., Error: {e}")
53
 
54
  # download files into local_model_dir and load model with resilient error handling
55
  model = None
56
  model_load_error = None
57
+ #try:
58
  # First try using a cache dir so downloads happen in a shared cache and final move
59
  # into local_model_dir is less likely to require risky renames inside the repo.
60
+ # cache_dir = '/tmp/.cache/huggingface'
61
+ # os.makedirs(cache_dir, exist_ok=True)
62
 
63
+ hf_hub_download(repo_id, filename="config.json", repo_type="model", local_dir=local_model_dir )
64
+ hf_hub_download(repo_id, filename="metadata.json", repo_type="model", local_dir=local_model_dir)
65
+ hf_hub_download(repo_id, filename="model.weights.h5", repo_type="model", local_dir=local_model_dir)
66
 
67
  # 2) load it
68
+ model = tf.keras.models.load_model(local_model_dir)
69
+ logging.info(f"Model loaded successfully from {local_model_dir}")
70
+ # except Exception as e:
71
+ # # Primary download attempt failed -> try streamed fallback which writes directly
72
+ # # into local_model_dir (avoids internal temp/move when those operations are blocked).
73
+ # primary_err = e
74
+ # logging.error("Primary hf_hub_download failed, attempting streamed fallback: %s", e)
75
+ # try:
76
+ # for filename in ("config.json", "metadata.json", "model.weights.h5"):
77
+ # url = hf_hub_url(repo_id=repo_id, filename=filename, repo_type='model')
78
+ # logging.info(f"Streaming {filename} from {url} into {local_model_dir}")
79
+ # resp = requests.get(url, stream=True, timeout=60)
80
+ # resp.raise_for_status()
81
+ # # write to a temp file inside the target dir then move atomically
82
+ # with tempfile.NamedTemporaryFile(dir=local_model_dir, delete=False) as tmpf:
83
+ # for chunk in resp.iter_content(chunk_size=8192):
84
+ # if chunk:
85
+ # tmpf.write(chunk)
86
+ # tmp_path = tmpf.name
87
+ # final_path = os.path.join(local_model_dir, filename)
88
+ # try:
89
+ # shutil.move(tmp_path, final_path)
90
+ # except Exception:
91
+ # # if atomic move fails, try copy+remove
92
+ # shutil.copy(tmp_path, final_path)
93
+ # os.remove(tmp_path)
94
+
95
+ # # After streamed download, try loading
96
+ # model = tf.keras.models.load_model(local_model_dir)
97
+ # logging.info(f"Model loaded successfully from {local_model_dir} after streamed fallback")
98
+ # except Exception as e2:
99
+ # model_load_error = f"primary: {primary_err}; fallback: {e2}"
100
+ # tb = traceback.format_exc()
101
+ # logging.error("Streamed fallback failed: %s", e2)
102
+ # logging.error(tb)
103
+ # model = None
104
 
105
 
106
  @app.post('/upload/image')
 
151
 
152
 
153
 
 
 
 
154
  if __name__ == "__main__":
155
  # Run with: conda run -n gradio uvicorn main:app --reload
156
  import uvicorn