fix
Browse files- Dockerfile +1 -1
- create_app.py +4 -1
Dockerfile
CHANGED
|
@@ -13,4 +13,4 @@ COPY --chown=user . /app
|
|
| 13 |
|
| 14 |
RUN python -m spacy download en_core_web_sm
|
| 15 |
|
| 16 |
-
CMD ["gunicorn", "--bind", "0.0.0.0:7860", "app:app"]
|
|
|
|
| 13 |
|
| 14 |
RUN python -m spacy download en_core_web_sm
|
| 15 |
|
| 16 |
+
CMD ["gunicorn", "--bind", "0.0.0.0:7860", "--timeout", "360", "--workers", "1", "--log-level", "debug", "app:app"]
|
create_app.py
CHANGED
|
@@ -19,6 +19,9 @@ MODEL_SESSION = None
|
|
| 19 |
def load_models():
|
| 20 |
global MODELS_LOADED, LONGFORMER_TOKENIZER, LONGFORMER_MODEL, QWEN_TOKENIZER, QWEN_MODEL
|
| 21 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
|
|
|
|
|
|
|
|
|
| 22 |
print("DEIVCE=", device)
|
| 23 |
print("WHY DONT PRINT")
|
| 24 |
if not MODELS_LOADED:
|
|
@@ -33,7 +36,7 @@ def load_models():
|
|
| 33 |
QWEN_TOKENIZER.pad_token_id = QWEN_TOKENIZER.eos_token_id
|
| 34 |
print("QWEN TOKENIZER LOADED")
|
| 35 |
try:
|
| 36 |
-
QWEN_MODEL = AutoModelForCausalLM.from_pretrained(model_name
|
| 37 |
QWEN_MODEL = QWEN_MODEL.to(device)
|
| 38 |
print("QWEN MODEL LOADED") # Typo fixed from your original code ("LOADED" vs. "LOADED")
|
| 39 |
except Exception as e:
|
|
|
|
| 19 |
def load_models():
|
| 20 |
global MODELS_LOADED, LONGFORMER_TOKENIZER, LONGFORMER_MODEL, QWEN_TOKENIZER, QWEN_MODEL
|
| 21 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 22 |
+
print(f"CUDA Available: {torch.cuda.is_available()}")
|
| 23 |
+
print(f"CUDA Device Name: {torch.cuda.get_device_name(0)}")
|
| 24 |
+
print(f"Free GPU Memory: {torch.cuda.memory_reserved(0)/1e9:.2f} GB")
|
| 25 |
print("DEIVCE=", device)
|
| 26 |
print("WHY DONT PRINT")
|
| 27 |
if not MODELS_LOADED:
|
|
|
|
| 36 |
QWEN_TOKENIZER.pad_token_id = QWEN_TOKENIZER.eos_token_id
|
| 37 |
print("QWEN TOKENIZER LOADED")
|
| 38 |
try:
|
| 39 |
+
QWEN_MODEL = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, device_map="auto", trust_remote_code=True)
|
| 40 |
QWEN_MODEL = QWEN_MODEL.to(device)
|
| 41 |
print("QWEN MODEL LOADED") # Typo fixed from your original code ("LOADED" vs. "LOADED")
|
| 42 |
except Exception as e:
|