MGZON commited on
Commit
e9688d3
·
verified ·
1 Parent(s): 007f931

Optimize startup with reduced threads and timing logs

Browse files
Files changed (1) hide show
  1. app.py +13 -6
app.py CHANGED
@@ -1,5 +1,6 @@
1
  import os
2
  import logging
 
3
  from fastapi import FastAPI, HTTPException
4
  from pydantic import BaseModel
5
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
@@ -37,6 +38,8 @@ async def health_check():
37
  # Async function to load models
38
  async def load_models():
39
  global t5_tokenizer, t5_model, mistral, models_loaded
 
 
40
  try:
41
  # Load T5 model from local cache
42
  T5_MODEL_PATH = os.path.join(CACHE_DIR, "models--MGZON--mgzon-flan-t5-base/snapshots")
@@ -45,13 +48,13 @@ async def load_models():
45
  T5_MODEL_PATH,
46
  local_files_only=True
47
  )
48
- logger.info(f"Successfully loaded tokenizer for MGZON/mgzon-flan-t5-base")
49
  logger.info(f"Loading model for MGZON/mgzon-flan-t5-base from {T5_MODEL_PATH}")
50
  t5_model = AutoModelForSeq2SeqLM.from_pretrained(
51
  T5_MODEL_PATH,
52
  local_files_only=True
53
  )
54
- logger.info(f"Successfully loaded model for MGZON/mgzon-flan-t5-base")
55
 
56
  # Load Mistral GGUF model
57
  gguf_path = os.path.abspath("models/mistral-7b-instruct-v0.1.Q4_K_M.gguf")
@@ -66,14 +69,18 @@ async def load_models():
66
  mistral = Llama(
67
  model_path=gguf_path,
68
  n_ctx=2048,
69
- n_threads=8,
70
- # إذا كان لديك GPU، يمكنك إضافة: n_gpu_layers=35
 
71
  )
72
- logger.info(f"Successfully loaded Mistral model from {gguf_path}")
73
  models_loaded = True
74
  except Exception as e:
75
  logger.error(f"Failed to load models: {str(e)}")
76
  raise RuntimeError(f"Failed to load models: {str(e)}")
 
 
 
77
 
78
  # Run model loading in the background
79
  @app.on_event("startup")
@@ -108,7 +115,7 @@ async def ask(req: AskRequest):
108
  else:
109
  # نموذج Mistral
110
  logger.info("Using Mistral-7B-GGUF model")
111
- out = mistral(prompt=q, max_tokens=req.max_new_tokens)
112
  answer = out["choices"][0]["text"].strip()
113
  model_name = "Mistral-7B-GGUF"
114
  logger.info(f"Response generated by {model_name}: {answer}")
 
1
  import os
2
  import logging
3
+ import time
4
  from fastapi import FastAPI, HTTPException
5
  from pydantic import BaseModel
6
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
 
38
  # Async function to load models
39
  async def load_models():
40
  global t5_tokenizer, t5_model, mistral, models_loaded
41
+ start_time = time.time()
42
+ logger.info(f"Starting model loading at {start_time}")
43
  try:
44
  # Load T5 model from local cache
45
  T5_MODEL_PATH = os.path.join(CACHE_DIR, "models--MGZON--mgzon-flan-t5-base/snapshots")
 
48
  T5_MODEL_PATH,
49
  local_files_only=True
50
  )
51
+ logger.info(f"Successfully loaded tokenizer for MGZON/mgzon-flan-t5-base in {time.time() - start_time} seconds")
52
  logger.info(f"Loading model for MGZON/mgzon-flan-t5-base from {T5_MODEL_PATH}")
53
  t5_model = AutoModelForSeq2SeqLM.from_pretrained(
54
  T5_MODEL_PATH,
55
  local_files_only=True
56
  )
57
+ logger.info(f"Successfully loaded model for MGZON/mgzon-flan-t5-base in {time.time() - start_time} seconds")
58
 
59
  # Load Mistral GGUF model
60
  gguf_path = os.path.abspath("models/mistral-7b-instruct-v0.1.Q4_K_M.gguf")
 
69
  mistral = Llama(
70
  model_path=gguf_path,
71
  n_ctx=2048,
72
+ n_threads=4, # قللنا عدد الـ threads عشان نقلل الحمل
73
+ n_batch=512,
74
+ verbose=True
75
  )
76
+ logger.info(f"Successfully loaded Mistral model from {gguf_path} in {time.time() - start_time} seconds")
77
  models_loaded = True
78
  except Exception as e:
79
  logger.error(f"Failed to load models: {str(e)}")
80
  raise RuntimeError(f"Failed to load models: {str(e)}")
81
+ finally:
82
+ end_time = time.time()
83
+ logger.info(f"Model loading completed in {end_time - start_time} seconds")
84
 
85
  # Run model loading in the background
86
  @app.on_event("startup")
 
115
  else:
116
  # نموذج Mistral
117
  logger.info("Using Mistral-7B-GGUF model")
118
+ out = mistral(prompt=q, max_tokens=req.max_new_tokens, temperature=0.7)
119
  answer = out["choices"][0]["text"].strip()
120
  model_name = "Mistral-7B-GGUF"
121
  logger.info(f"Response generated by {model_name}: {answer}")