Spaces:
Sleeping
Sleeping
add gpu healthcheck
Browse files
app.py
CHANGED
|
@@ -73,6 +73,7 @@ if IS_DEPLOYED and HF_CACHE_DIR:
|
|
| 73 |
except (PermissionError, OSError):
|
| 74 |
# If we can't create it, log but continue (might already exist from Dockerfile)
|
| 75 |
pass
|
|
|
|
| 76 |
else:
|
| 77 |
from dotenv import load_dotenv
|
| 78 |
load_dotenv()
|
|
@@ -82,11 +83,32 @@ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(
|
|
| 82 |
logger = logging.getLogger(__name__)
|
| 83 |
|
| 84 |
# Log environment setup for debugging
|
| 85 |
-
logger.info(f"π Environment: {'DEPLOYED' if IS_DEPLOYED else 'LOCAL'}")
|
| 86 |
logger.info(f"π PROJECT_DIR: {PROJECT_DIR}")
|
| 87 |
-
logger.info(f"
|
| 88 |
logger.info(f"π§ OMP_NUM_THREADS: {os.environ.get('OMP_NUM_THREADS', 'NOT SET')}")
|
| 89 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 90 |
|
| 91 |
# Page config
|
| 92 |
st.set_page_config(
|
|
|
|
| 73 |
except (PermissionError, OSError):
|
| 74 |
# If we can't create it, log but continue (might already exist from Dockerfile)
|
| 75 |
pass
|
| 76 |
+
|
| 77 |
else:
|
| 78 |
from dotenv import load_dotenv
|
| 79 |
load_dotenv()
|
|
|
|
| 83 |
logger = logging.getLogger(__name__)
|
| 84 |
|
| 85 |
# Log environment setup for debugging
|
|
|
|
| 86 |
logger.info(f"π PROJECT_DIR: {PROJECT_DIR}")
|
| 87 |
+
logger.info(f"π Environment: {'DEPLOYED' if IS_DEPLOYED else 'LOCAL'}")
|
| 88 |
logger.info(f"π§ OMP_NUM_THREADS: {os.environ.get('OMP_NUM_THREADS', 'NOT SET')}")
|
| 89 |
+
logger.info(f"π HuggingFace cache: {os.environ.get('HF_HOME', 'DEFAULT (not overridden)')}")
|
| 90 |
+
|
| 91 |
+
|
| 92 |
+
import torch, sys
|
| 93 |
+
try:
|
| 94 |
+
cuda_ = torch.cuda.is_available()
|
| 95 |
+
if gpu_check not in st.session_state:
|
| 96 |
+
st.write(f"GPU check skipped: {e.__str__}")
|
| 97 |
+
print("CUDA:", cuda_)
|
| 98 |
+
logger.info("CUDA:", cuda_)
|
| 99 |
+
if cuda_:
|
| 100 |
+
if gpu_check not in st.session_state:
|
| 101 |
+
st.write(f"Device: {torch.cuda.get_device_name(0)}")
|
| 102 |
+
print("Device:", torch.cuda.get_device_name(0))
|
| 103 |
+
logger.info(f"Device: {torch.cuda.get_device_name(0)}")
|
| 104 |
+
except Exception as e:
|
| 105 |
+
if gpu_check not in st.session_state:
|
| 106 |
+
st.write(f"GPU check skipped: {e.__str__}")
|
| 107 |
+
logger.error(f"GPU check skipped: {e.__str__}")
|
| 108 |
+
print("GPU check skipped:", e, file=sys.stderr)
|
| 109 |
+
finally:
|
| 110 |
+
st.session_state.gpu_check = True
|
| 111 |
+
|
| 112 |
|
| 113 |
# Page config
|
| 114 |
st.set_page_config(
|