AI-Talent-Force Claude Sonnet 4.5 commited on
Commit
c8d6960
Β·
1 Parent(s): 6fdb30f

Add detailed progress tracking for model initialization

Browse files

- Shows 4-step progress: tokenizer β†’ quantization β†’ base model β†’ LoRA
- Added visual indicators (βœ“, ⏳, πŸš€, 🎯) for each stage
- Includes time estimate for base model loading (2-3 minutes)
- Added status indicator in UI showing model is ready
- Makes startup process transparent to users watching logs

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>

Files changed (1) hide show
  1. app.py +21 -5
app.py CHANGED
@@ -9,11 +9,15 @@ BASE_MODEL = "unsloth/qwen3-30b-a3b"
9
  LORA_ADAPTER_PATH = "AI-Talent-Force/ceo-voice-lora-qwen3-30b"
10
 
11
  # Load model and tokenizer at startup (once)
12
- print("Initializing CEO AI Executive...")
13
- print("Loading tokenizer...")
 
 
 
14
  tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
 
15
 
16
- print("Loading base model...")
17
  # Use 4-bit quantization to fit in GPU memory
18
  quantization_config = BitsAndBytesConfig(
19
  load_in_4bit=True,
@@ -21,19 +25,27 @@ quantization_config = BitsAndBytesConfig(
21
  bnb_4bit_use_double_quant=True,
22
  bnb_4bit_quant_type="nf4"
23
  )
 
24
 
 
 
25
  model = AutoModelForCausalLM.from_pretrained(
26
  BASE_MODEL,
27
  quantization_config=quantization_config,
28
  device_map="auto",
29
  trust_remote_code=True
30
  )
 
31
 
32
- print("Loading LoRA adapter...")
33
  model = PeftModel.from_pretrained(model, LORA_ADAPTER_PATH)
34
  model.eval()
 
35
 
36
- print("Model loaded successfully!")
 
 
 
37
 
38
  @spaces.GPU
39
  def chat_with_ceo(message, history):
@@ -89,6 +101,10 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
89
  Ask questions about business strategy, leadership, technology, or any topic your CEO writes about.
90
 
91
  **Note:** This AI responds based on patterns learned from the CEO's blog posts and writings.
 
 
 
 
92
  """
93
  )
94
 
 
9
  LORA_ADAPTER_PATH = "AI-Talent-Force/ceo-voice-lora-qwen3-30b"
10
 
11
  # Load model and tokenizer at startup (once)
12
+ print("=" * 60)
13
+ print("πŸš€ INITIALIZING CEO AI EXECUTIVE")
14
+ print("=" * 60)
15
+
16
+ print("\n[1/4] Loading tokenizer...")
17
  tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
18
+ print("βœ“ Tokenizer loaded successfully!")
19
 
20
+ print("\n[2/4] Configuring 4-bit quantization...")
21
  # Use 4-bit quantization to fit in GPU memory
22
  quantization_config = BitsAndBytesConfig(
23
  load_in_4bit=True,
 
25
  bnb_4bit_use_double_quant=True,
26
  bnb_4bit_quant_type="nf4"
27
  )
28
+ print("βœ“ Quantization config ready!")
29
 
30
+ print("\n[3/4] Loading base model (Qwen3-30B)...")
31
+ print("⏳ This may take 2-3 minutes - downloading and quantizing 30B parameters...")
32
  model = AutoModelForCausalLM.from_pretrained(
33
  BASE_MODEL,
34
  quantization_config=quantization_config,
35
  device_map="auto",
36
  trust_remote_code=True
37
  )
38
+ print("βœ“ Base model loaded successfully!")
39
 
40
+ print("\n[4/4] Loading LoRA adapter (CEO fine-tuning)...")
41
  model = PeftModel.from_pretrained(model, LORA_ADAPTER_PATH)
42
  model.eval()
43
+ print("βœ“ LoRA adapter loaded successfully!")
44
 
45
+ print("\n" + "=" * 60)
46
+ print("🎯 CEO AI EXECUTIVE IS READY!")
47
+ print("=" * 60)
48
+ print("Model is loaded in memory and ready for fast inference.\n")
49
 
50
  @spaces.GPU
51
  def chat_with_ceo(message, history):
 
101
  Ask questions about business strategy, leadership, technology, or any topic your CEO writes about.
102
 
103
  **Note:** This AI responds based on patterns learned from the CEO's blog posts and writings.
104
+
105
+ ---
106
+
107
+ βœ… **Model Status:** Loaded and ready! The model is kept in memory for fast responses.
108
  """
109
  )
110