akryldigital commited on
Commit
0714a89
Β·
verified Β·
1 Parent(s): 074129d

add gpu healthcheck

Browse files
Files changed (1) hide show
  1. app.py +25 -3
app.py CHANGED
@@ -73,6 +73,7 @@ if IS_DEPLOYED and HF_CACHE_DIR:
73
  except (PermissionError, OSError):
74
  # If we can't create it, log but continue (might already exist from Dockerfile)
75
  pass
 
76
  else:
77
  from dotenv import load_dotenv
78
  load_dotenv()
@@ -82,11 +83,32 @@ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(
82
  logger = logging.getLogger(__name__)
83
 
84
  # Log environment setup for debugging
85
- logger.info(f"🌍 Environment: {'DEPLOYED' if IS_DEPLOYED else 'LOCAL'}")
86
  logger.info(f"πŸ“ PROJECT_DIR: {PROJECT_DIR}")
87
- logger.info(f"πŸ“ HuggingFace cache: {os.environ.get('HF_HOME', 'DEFAULT (not overridden)')}")
88
  logger.info(f"πŸ”§ OMP_NUM_THREADS: {os.environ.get('OMP_NUM_THREADS', 'NOT SET')}")
89
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
 
91
  # Page config
92
  st.set_page_config(
 
73
  except (PermissionError, OSError):
74
  # If we can't create it, log but continue (might already exist from Dockerfile)
75
  pass
76
+
77
  else:
78
  from dotenv import load_dotenv
79
  load_dotenv()
 
83
  logger = logging.getLogger(__name__)
84
 
85
  # Log environment setup for debugging
 
86
  logger.info(f"πŸ“ PROJECT_DIR: {PROJECT_DIR}")
87
+ logger.info(f"🌍 Environment: {'DEPLOYED' if IS_DEPLOYED else 'LOCAL'}")
88
  logger.info(f"πŸ”§ OMP_NUM_THREADS: {os.environ.get('OMP_NUM_THREADS', 'NOT SET')}")
89
+ logger.info(f"πŸ“ HuggingFace cache: {os.environ.get('HF_HOME', 'DEFAULT (not overridden)')}")
90
+
91
+
92
+ import torch, sys
93
+ try:
94
+ cuda_ = torch.cuda.is_available()
95
+ if gpu_check not in st.session_state:
96
+ st.write(f"GPU check skipped: {e.__str__}")
97
+ print("CUDA:", cuda_)
98
+ logger.info("CUDA:", cuda_)
99
+ if cuda_:
100
+ if gpu_check not in st.session_state:
101
+ st.write(f"Device: {torch.cuda.get_device_name(0)}")
102
+ print("Device:", torch.cuda.get_device_name(0))
103
+ logger.info(f"Device: {torch.cuda.get_device_name(0)}")
104
+ except Exception as e:
105
+ if gpu_check not in st.session_state:
106
+ st.write(f"GPU check skipped: {e.__str__}")
107
+ logger.error(f"GPU check skipped: {e.__str__}")
108
+ print("GPU check skipped:", e, file=sys.stderr)
109
+ finally:
110
+ st.session_state.gpu_check = True
111
+
112
 
113
  # Page config
114
  st.set_page_config(