AI-Talent-Force Claude Sonnet 4.5 commited on
Commit Β·
c8d6960
1
Parent(s): 6fdb30f
Add detailed progress tracking for model initialization
Browse files- Shows 4-step progress: tokenizer β quantization β base model β LoRA
- Added visual indicators (β, β³, π, π―) for each stage
- Includes time estimate for base model loading (2-3 minutes)
- Added status indicator in UI showing model is ready
- Makes startup process transparent to users watching logs
Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
app.py
CHANGED
|
@@ -9,11 +9,15 @@ BASE_MODEL = "unsloth/qwen3-30b-a3b"
|
|
| 9 |
LORA_ADAPTER_PATH = "AI-Talent-Force/ceo-voice-lora-qwen3-30b"
|
| 10 |
|
| 11 |
# Load model and tokenizer at startup (once)
|
| 12 |
-
print("
|
| 13 |
-
print("
|
|
|
|
|
|
|
|
|
|
| 14 |
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
|
|
|
|
| 15 |
|
| 16 |
-
print("
|
| 17 |
# Use 4-bit quantization to fit in GPU memory
|
| 18 |
quantization_config = BitsAndBytesConfig(
|
| 19 |
load_in_4bit=True,
|
|
@@ -21,19 +25,27 @@ quantization_config = BitsAndBytesConfig(
|
|
| 21 |
bnb_4bit_use_double_quant=True,
|
| 22 |
bnb_4bit_quant_type="nf4"
|
| 23 |
)
|
|
|
|
| 24 |
|
|
|
|
|
|
|
| 25 |
model = AutoModelForCausalLM.from_pretrained(
|
| 26 |
BASE_MODEL,
|
| 27 |
quantization_config=quantization_config,
|
| 28 |
device_map="auto",
|
| 29 |
trust_remote_code=True
|
| 30 |
)
|
|
|
|
| 31 |
|
| 32 |
-
print("Loading LoRA adapter...")
|
| 33 |
model = PeftModel.from_pretrained(model, LORA_ADAPTER_PATH)
|
| 34 |
model.eval()
|
|
|
|
| 35 |
|
| 36 |
-
print("
|
|
|
|
|
|
|
|
|
|
| 37 |
|
| 38 |
@spaces.GPU
|
| 39 |
def chat_with_ceo(message, history):
|
|
@@ -89,6 +101,10 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
| 89 |
Ask questions about business strategy, leadership, technology, or any topic your CEO writes about.
|
| 90 |
|
| 91 |
**Note:** This AI responds based on patterns learned from the CEO's blog posts and writings.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 92 |
"""
|
| 93 |
)
|
| 94 |
|
|
|
|
| 9 |
LORA_ADAPTER_PATH = "AI-Talent-Force/ceo-voice-lora-qwen3-30b"
|
| 10 |
|
| 11 |
# Load model and tokenizer at startup (once)
|
| 12 |
+
print("=" * 60)
|
| 13 |
+
print("π INITIALIZING CEO AI EXECUTIVE")
|
| 14 |
+
print("=" * 60)
|
| 15 |
+
|
| 16 |
+
print("\n[1/4] Loading tokenizer...")
|
| 17 |
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
|
| 18 |
+
print("β Tokenizer loaded successfully!")
|
| 19 |
|
| 20 |
+
print("\n[2/4] Configuring 4-bit quantization...")
|
| 21 |
# Use 4-bit quantization to fit in GPU memory
|
| 22 |
quantization_config = BitsAndBytesConfig(
|
| 23 |
load_in_4bit=True,
|
|
|
|
| 25 |
bnb_4bit_use_double_quant=True,
|
| 26 |
bnb_4bit_quant_type="nf4"
|
| 27 |
)
|
| 28 |
+
print("β Quantization config ready!")
|
| 29 |
|
| 30 |
+
print("\n[3/4] Loading base model (Qwen3-30B)...")
|
| 31 |
+
print("β³ This may take 2-3 minutes - downloading and quantizing 30B parameters...")
|
| 32 |
model = AutoModelForCausalLM.from_pretrained(
|
| 33 |
BASE_MODEL,
|
| 34 |
quantization_config=quantization_config,
|
| 35 |
device_map="auto",
|
| 36 |
trust_remote_code=True
|
| 37 |
)
|
| 38 |
+
print("β Base model loaded successfully!")
|
| 39 |
|
| 40 |
+
print("\n[4/4] Loading LoRA adapter (CEO fine-tuning)...")
|
| 41 |
model = PeftModel.from_pretrained(model, LORA_ADAPTER_PATH)
|
| 42 |
model.eval()
|
| 43 |
+
print("β LoRA adapter loaded successfully!")
|
| 44 |
|
| 45 |
+
print("\n" + "=" * 60)
|
| 46 |
+
print("π― CEO AI EXECUTIVE IS READY!")
|
| 47 |
+
print("=" * 60)
|
| 48 |
+
print("Model is loaded in memory and ready for fast inference.\n")
|
| 49 |
|
| 50 |
@spaces.GPU
|
| 51 |
def chat_with_ceo(message, history):
|
|
|
|
| 101 |
Ask questions about business strategy, leadership, technology, or any topic your CEO writes about.
|
| 102 |
|
| 103 |
**Note:** This AI responds based on patterns learned from the CEO's blog posts and writings.
|
| 104 |
+
|
| 105 |
+
---
|
| 106 |
+
|
| 107 |
+
β
**Model Status:** Loaded and ready! The model is kept in memory for fast responses.
|
| 108 |
"""
|
| 109 |
)
|
| 110 |
|