SFM2001 commited on
Commit
0daa81d
·
1 Parent(s): 0dae8fe

Changes to be committed:

Browse files

modified: Dockerfile
modified: create_app.py

Files changed (2) hide show
  1. Dockerfile +1 -1
  2. create_app.py +8 -12
Dockerfile CHANGED
@@ -13,4 +13,4 @@ COPY --chown=user . /app
13
 
14
  RUN python -m spacy download en_core_web_sm
15
 
16
- CMD ["gunicorn", "--bind", "0.0.0.0:7860", "--timeout", "360", "--workers", "1", "--log-level", "debug", "app:app"]
 
13
 
14
  RUN python -m spacy download en_core_web_sm
15
 
16
+ CMD ["gunicorn", "--bind", "0.0.0.0:7860", "--timeout", "360", "app:app"]
create_app.py CHANGED
@@ -19,32 +19,28 @@ MODEL_SESSION = None
19
  def load_models():
20
  global MODELS_LOADED, LONGFORMER_TOKENIZER, LONGFORMER_MODEL, QWEN_TOKENIZER, QWEN_MODEL
21
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
22
- print(f"CUDA Available: {torch.cuda.is_available()}")
23
- print(f"CUDA Device Name: {torch.cuda.get_device_name(0)}")
24
- print(f"Free GPU Memory: {torch.cuda.memory_reserved(0)/1e9:.2f} GB")
25
  print("DEIVCE=", device)
26
- print("WHY DONT PRINT")
27
  if not MODELS_LOADED:
28
- LONGFORMER_TOKENIZER = LongformerTokenizer.from_pretrained('allenai/longformer-base-4096', device='auto')
29
- config = LongformerConfig.from_json_file("Longformer_checkpoint/config.json")
30
- LONGFORMER_MODEL = CustomLongformerForSequenceClassification(config).from_pretrained('SFM2001/LongFormerScorer')
31
- LONGFORMER_MODEL = LONGFORMER_MODEL.to(device)
32
- LONGFORMER_MODEL.eval()
33
  print("START TO GET QWEN")
34
- model_name = 'Qwen/Qwen3-1.7B'
35
  QWEN_TOKENIZER = AutoTokenizer.from_pretrained(model_name, device='auto')
36
  QWEN_TOKENIZER.pad_token_id = QWEN_TOKENIZER.eos_token_id
37
  print("QWEN TOKENIZER LOADED")
38
  try:
39
- QWEN_MODEL = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, device_map="auto", trust_remote_code=True)
40
  QWEN_MODEL = QWEN_MODEL.to(device)
41
- print("QWEN MODEL LOADED") # Typo fixed from your original code ("LOADED" vs. "LOADED")
42
  except Exception as e:
43
  print(f"ERROR LOADING QWEN MODEL: {str(e)}")
44
  raise # Re-raise to see full traceback
45
  # QWEN_MODEL = AutoModelForCausalLM.from_pretrained(model_name).half()
46
  # QWEN_MODEL = QWEN_MODEL.to(device)
47
  print("QWEN MODEL LOADED")
 
 
 
 
 
48
  MODELS_LOADED = True
49
  print("LOAD ENDED")
50
 
 
19
  def load_models():
20
  global MODELS_LOADED, LONGFORMER_TOKENIZER, LONGFORMER_MODEL, QWEN_TOKENIZER, QWEN_MODEL
21
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
 
 
22
  print("DEIVCE=", device)
 
23
  if not MODELS_LOADED:
 
 
 
 
 
24
  print("START TO GET QWEN")
25
+ model_name = 'Qwen/Qwen3-0.6B'
26
  QWEN_TOKENIZER = AutoTokenizer.from_pretrained(model_name, device='auto')
27
  QWEN_TOKENIZER.pad_token_id = QWEN_TOKENIZER.eos_token_id
28
  print("QWEN TOKENIZER LOADED")
29
  try:
30
+ QWEN_MODEL = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto")
31
  QWEN_MODEL = QWEN_MODEL.to(device)
32
+ print("QWEN MODEL LOADED", flush=True)
33
  except Exception as e:
34
  print(f"ERROR LOADING QWEN MODEL: {str(e)}")
35
  raise # Re-raise to see full traceback
36
  # QWEN_MODEL = AutoModelForCausalLM.from_pretrained(model_name).half()
37
  # QWEN_MODEL = QWEN_MODEL.to(device)
38
  print("QWEN MODEL LOADED")
39
+ LONGFORMER_TOKENIZER = LongformerTokenizer.from_pretrained('allenai/longformer-base-4096', device='auto')
40
+ config = LongformerConfig.from_json_file("Longformer_checkpoint/config.json")
41
+ LONGFORMER_MODEL = CustomLongformerForSequenceClassification(config).from_pretrained('SFM2001/LongFormerScorer')
42
+ LONGFORMER_MODEL = LONGFORMER_MODEL.to(device)
43
+ LONGFORMER_MODEL.eval()
44
  MODELS_LOADED = True
45
  print("LOAD ENDED")
46