SFM2001 commited on
Commit
0dae8fe
·
1 Parent(s): f7e69d0
Files changed (2) hide show
  1. Dockerfile +1 -1
  2. create_app.py +4 -1
Dockerfile CHANGED
@@ -13,4 +13,4 @@ COPY --chown=user . /app
13
 
14
  RUN python -m spacy download en_core_web_sm
15
 
16
- CMD ["gunicorn", "--bind", "0.0.0.0:7860", "app:app"]
 
13
 
14
  RUN python -m spacy download en_core_web_sm
15
 
16
+ CMD ["gunicorn", "--bind", "0.0.0.0:7860", "--timeout", "360", "--workers", "1", "--log-level", "debug", "app:app"]
create_app.py CHANGED
@@ -19,6 +19,9 @@ MODEL_SESSION = None
19
  def load_models():
20
  global MODELS_LOADED, LONGFORMER_TOKENIZER, LONGFORMER_MODEL, QWEN_TOKENIZER, QWEN_MODEL
21
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
 
 
22
  print("DEIVCE=", device)
23
  print("WHY DONT PRINT")
24
  if not MODELS_LOADED:
@@ -33,7 +36,7 @@ def load_models():
33
  QWEN_TOKENIZER.pad_token_id = QWEN_TOKENIZER.eos_token_id
34
  print("QWEN TOKENIZER LOADED")
35
  try:
36
- QWEN_MODEL = AutoModelForCausalLM.from_pretrained(model_name).half()
37
  QWEN_MODEL = QWEN_MODEL.to(device)
38
  print("QWEN MODEL LOADED") # Typo fixed from your original code ("LOADED" vs. "LOADED")
39
  except Exception as e:
 
19
  def load_models():
20
  global MODELS_LOADED, LONGFORMER_TOKENIZER, LONGFORMER_MODEL, QWEN_TOKENIZER, QWEN_MODEL
21
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
22
+ print(f"CUDA Available: {torch.cuda.is_available()}")
23
+ print(f"CUDA Device Name: {torch.cuda.get_device_name(0)}")
24
+ print(f"Free GPU Memory: {torch.cuda.memory_reserved(0)/1e9:.2f} GB")
25
  print("DEIVCE=", device)
26
  print("WHY DONT PRINT")
27
  if not MODELS_LOADED:
 
36
  QWEN_TOKENIZER.pad_token_id = QWEN_TOKENIZER.eos_token_id
37
  print("QWEN TOKENIZER LOADED")
38
  try:
39
+ QWEN_MODEL = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, device_map="auto", trust_remote_code=True)
40
  QWEN_MODEL = QWEN_MODEL.to(device)
41
  print("QWEN MODEL LOADED") # Typo fixed from your original code ("LOADED" vs. "LOADED")
42
  except Exception as e: