Changes to be committed:
Browse filesmodified: Dockerfile
modified: create_app.py
- Dockerfile +1 -1
- create_app.py +8 -12
Dockerfile
CHANGED
|
@@ -13,4 +13,4 @@ COPY --chown=user . /app
|
|
| 13 |
|
| 14 |
RUN python -m spacy download en_core_web_sm
|
| 15 |
|
| 16 |
-
CMD ["gunicorn", "--bind", "0.0.0.0:7860", "--timeout", "360", "
|
|
|
|
| 13 |
|
| 14 |
RUN python -m spacy download en_core_web_sm
|
| 15 |
|
| 16 |
+
CMD ["gunicorn", "--bind", "0.0.0.0:7860", "--timeout", "360", "app:app"]
|
create_app.py
CHANGED
|
@@ -19,32 +19,28 @@ MODEL_SESSION = None
|
|
| 19 |
def load_models():
|
| 20 |
global MODELS_LOADED, LONGFORMER_TOKENIZER, LONGFORMER_MODEL, QWEN_TOKENIZER, QWEN_MODEL
|
| 21 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 22 |
-
print(f"CUDA Available: {torch.cuda.is_available()}")
|
| 23 |
-
print(f"CUDA Device Name: {torch.cuda.get_device_name(0)}")
|
| 24 |
-
print(f"Free GPU Memory: {torch.cuda.memory_reserved(0)/1e9:.2f} GB")
|
| 25 |
print("DEIVCE=", device)
|
| 26 |
-
print("WHY DONT PRINT")
|
| 27 |
if not MODELS_LOADED:
|
| 28 |
-
LONGFORMER_TOKENIZER = LongformerTokenizer.from_pretrained('allenai/longformer-base-4096', device='auto')
|
| 29 |
-
config = LongformerConfig.from_json_file("Longformer_checkpoint/config.json")
|
| 30 |
-
LONGFORMER_MODEL = CustomLongformerForSequenceClassification(config).from_pretrained('SFM2001/LongFormerScorer')
|
| 31 |
-
LONGFORMER_MODEL = LONGFORMER_MODEL.to(device)
|
| 32 |
-
LONGFORMER_MODEL.eval()
|
| 33 |
print("START TO GET QWEN")
|
| 34 |
-
model_name = 'Qwen/Qwen3-
|
| 35 |
QWEN_TOKENIZER = AutoTokenizer.from_pretrained(model_name, device='auto')
|
| 36 |
QWEN_TOKENIZER.pad_token_id = QWEN_TOKENIZER.eos_token_id
|
| 37 |
print("QWEN TOKENIZER LOADED")
|
| 38 |
try:
|
| 39 |
-
QWEN_MODEL = AutoModelForCausalLM.from_pretrained(model_name,
|
| 40 |
QWEN_MODEL = QWEN_MODEL.to(device)
|
| 41 |
-
print("QWEN MODEL LOADED"
|
| 42 |
except Exception as e:
|
| 43 |
print(f"ERROR LOADING QWEN MODEL: {str(e)}")
|
| 44 |
raise # Re-raise to see full traceback
|
| 45 |
# QWEN_MODEL = AutoModelForCausalLM.from_pretrained(model_name).half()
|
| 46 |
# QWEN_MODEL = QWEN_MODEL.to(device)
|
| 47 |
print("QWEN MODEL LOADED")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
MODELS_LOADED = True
|
| 49 |
print("LOAD ENDED")
|
| 50 |
|
|
|
|
| 19 |
def load_models():
|
| 20 |
global MODELS_LOADED, LONGFORMER_TOKENIZER, LONGFORMER_MODEL, QWEN_TOKENIZER, QWEN_MODEL
|
| 21 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
|
|
|
|
|
|
|
|
|
| 22 |
print("DEIVCE=", device)
|
|
|
|
| 23 |
if not MODELS_LOADED:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
print("START TO GET QWEN")
|
| 25 |
+
model_name = 'Qwen/Qwen3-0.6B'
|
| 26 |
QWEN_TOKENIZER = AutoTokenizer.from_pretrained(model_name, device='auto')
|
| 27 |
QWEN_TOKENIZER.pad_token_id = QWEN_TOKENIZER.eos_token_id
|
| 28 |
print("QWEN TOKENIZER LOADED")
|
| 29 |
try:
|
| 30 |
+
QWEN_MODEL = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto")
|
| 31 |
QWEN_MODEL = QWEN_MODEL.to(device)
|
| 32 |
+
print("QWEN MODEL LOADED", flush=True)
|
| 33 |
except Exception as e:
|
| 34 |
print(f"ERROR LOADING QWEN MODEL: {str(e)}")
|
| 35 |
raise # Re-raise to see full traceback
|
| 36 |
# QWEN_MODEL = AutoModelForCausalLM.from_pretrained(model_name).half()
|
| 37 |
# QWEN_MODEL = QWEN_MODEL.to(device)
|
| 38 |
print("QWEN MODEL LOADED")
|
| 39 |
+
LONGFORMER_TOKENIZER = LongformerTokenizer.from_pretrained('allenai/longformer-base-4096', device='auto')
|
| 40 |
+
config = LongformerConfig.from_json_file("Longformer_checkpoint/config.json")
|
| 41 |
+
LONGFORMER_MODEL = CustomLongformerForSequenceClassification(config).from_pretrained('SFM2001/LongFormerScorer')
|
| 42 |
+
LONGFORMER_MODEL = LONGFORMER_MODEL.to(device)
|
| 43 |
+
LONGFORMER_MODEL.eval()
|
| 44 |
MODELS_LOADED = True
|
| 45 |
print("LOAD ENDED")
|
| 46 |
|